[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 11/17] Scatter-gather support.



Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>
---
 drivers/net/xen-netchannel2/chan.c             |   27 +++++++++--
 drivers/net/xen-netchannel2/netchannel2_core.h |   35 +++++++++++---
 drivers/net/xen-netchannel2/offload.c          |   59 ++++++++++++++++++++++++
 drivers/net/xen-netchannel2/recv_packet.c      |   23 +++++++++
 drivers/net/xen-netchannel2/rscb.c             |   18 +++++--
 drivers/net/xen-netchannel2/xmit_packet.c      |   43 ++++++++++-------
 include/xen/interface/io/netchannel2.h         |   24 ++++++++-
 7 files changed, 191 insertions(+), 38 deletions(-)

diff --git a/drivers/net/xen-netchannel2/chan.c 
b/drivers/net/xen-netchannel2/chan.c
index e8d3796..1317482 100644
--- a/drivers/net/xen-netchannel2/chan.c
+++ b/drivers/net/xen-netchannel2/chan.c
@@ -86,6 +86,10 @@ retry:
                case NETCHANNEL2_MSG_SET_OFFLOAD:
                        nc2_handle_set_offload(nc, ncrp, &hdr);
                        break;
+               case NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET:
+                       nc2_handle_set_max_fragments_per_packet(nc, ncrp,
+                                                               &hdr);
+                       break;
                case NETCHANNEL2_MSG_PAD:
                        break;
                default:
@@ -138,6 +142,8 @@ static void flush_rings(struct netchannel2_ring_pair *ncrp)
        send_finish_packet_messages(ncrp);
        if (ncrp->need_advertise_max_packets)
                advertise_max_packets(ncrp);
+       if (ncrp->need_advertise_max_fragments_per_packet)
+               advertise_max_fragments_per_packet(ncrp);
        if (nc->need_advertise_offloads)
                advertise_offloads(nc);
 
@@ -465,6 +471,8 @@ static void _nc2_attach_rings(struct netchannel2_ring_pair 
*ncrp,
        ncrp->is_attached = 1;
 
        ncrp->need_advertise_max_packets = 1;
+       ncrp->need_advertise_max_fragments_per_packet = 1;
+       ncrp->max_fragments_per_tx_packet = 1;
 }
 
 /* Attach a netchannel2 structure to a ring pair.  The endpoint is
@@ -551,8 +559,9 @@ void nc2_detach_rings(struct netchannel2 *nc)
        nc->rings.irq = -1;
 
        /* Disable all offloads */
-       nc->net_device->features &= ~NETIF_F_IP_CSUM;
+       nc->net_device->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG);
        nc->allow_tx_csum_offload = 0;
+       nc->rings.max_fragments_per_tx_packet = 1;
 }
 
 #if defined(CONFIG_XEN_NETDEV2_BACKEND)
@@ -662,17 +671,25 @@ static int process_ring(struct napi_struct *napi,
                        skb = skb_peek_tail(&nc->pending_skbs);
                        if (!skb)
                                break;
-                       if (prepare_xmit_allocate_resources(nc, skb) < 0) {
-                               /* Still stuck */
+                       switch (prepare_xmit_allocate_resources(nc, skb)) {
+                       case PREP_XMIT_OKAY:
+                               __skb_unlink(skb, &nc->pending_skbs);
+                               queue_packet_to_interface(skb, ncrp);
+                               break;
+                       case PREP_XMIT_BUSY:
+                               goto still_stuck;
+                       case PREP_XMIT_DROP:
+                               __skb_unlink(skb, &nc->pending_skbs);
+                               release_tx_packet(ncrp, skb);
                                break;
                        }
-                       __skb_unlink(skb, &nc->pending_skbs);
-                       queue_packet_to_interface(skb, ncrp);
                }
                if (skb_queue_empty(&nc->pending_skbs)) {
                        nc->is_stopped = 0;
                        netif_wake_queue(nc->net_device);
                }
+still_stuck:
+               ;
        }
 
        spin_unlock(&ncrp->lock);
diff --git a/drivers/net/xen-netchannel2/netchannel2_core.h 
b/drivers/net/xen-netchannel2/netchannel2_core.h
index 296b606..6197550 100644
--- a/drivers/net/xen-netchannel2/netchannel2_core.h
+++ b/drivers/net/xen-netchannel2/netchannel2_core.h
@@ -200,6 +200,15 @@ struct netchannel2_ring_pair {
           filtering rules would suppress the event. */
        uint8_t delayed_kick;
 
+       /* Set if we need to send a SET_MAX_FRAGMENTS_PER_PACKET
+        * message. */
+       uint8_t need_advertise_max_fragments_per_packet;
+
+       /* The maximum number of fragments which can be used in any
+          given packet.  We have to linearise anything which is more
+          fragmented than this. */
+       uint32_t max_fragments_per_tx_packet;
+
        /* A list of packet IDs which we need to return to the other
           end as soon as there is space on the ring.  Protected by
           the lock. */
@@ -309,10 +318,18 @@ struct sk_buff *handle_receiver_copy_packet(struct 
netchannel2 *nc,
                                            unsigned nr_frags,
                                            unsigned frags_off);
 
-int prepare_xmit_allocate_small(struct netchannel2_ring_pair *ncrp,
-                                                      struct sk_buff *skb);
-int prepare_xmit_allocate_grant(struct netchannel2_ring_pair *ncrp,
-                               struct sk_buff *skb);
+enum prepare_xmit_result {
+       PREP_XMIT_OKAY = 0,
+       PREP_XMIT_BUSY = -1,
+       PREP_XMIT_DROP = -2,
+};
+
+enum prepare_xmit_result prepare_xmit_allocate_small(
+       struct netchannel2_ring_pair *ncrp,
+       struct sk_buff *skb);
+enum prepare_xmit_result prepare_xmit_allocate_grant(
+       struct netchannel2_ring_pair *ncrp,
+       struct sk_buff *skb);
 void xmit_grant(struct netchannel2_ring_pair *ncrp,
                struct sk_buff *skb,
                volatile void *msg);
@@ -348,9 +365,9 @@ void nc2_rscb_on_gntcopy_fail(void *ctxt, struct 
gnttab_copy *gop);
 
 int nc2_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int nc2_really_start_xmit(struct netchannel2_ring_pair *ncrp,
-                          struct sk_buff *skb);
-int prepare_xmit_allocate_resources(struct netchannel2 *nc,
-                                   struct sk_buff *skb);
+                         struct sk_buff *skb);
+enum prepare_xmit_result prepare_xmit_allocate_resources(struct netchannel2 
*nc,
+                                                        struct sk_buff *skb);
 void nc2_handle_finish_packet_msg(struct netchannel2 *nc,
                                  struct netchannel2_ring_pair *ncrp,
                                  struct netchannel2_msg_hdr *hdr);
@@ -364,6 +381,10 @@ void nc2_handle_packet_msg(struct netchannel2 *nc,
                           struct netchannel2_msg_hdr *hdr,
                           struct sk_buff_head *pending_rx_queue);
 void advertise_max_packets(struct netchannel2_ring_pair *ncrp);
+void nc2_handle_set_max_fragments_per_packet(struct netchannel2 *nc,
+                                            struct netchannel2_ring_pair *ncrp,
+                                            struct netchannel2_msg_hdr *hdr);
+void advertise_max_fragments_per_packet(struct netchannel2_ring_pair *ncrp);
 void receive_pending_skbs(struct sk_buff_head *rx_queue);
 void nc2_queue_purge(struct netchannel2_ring_pair *ncrp,
                     struct sk_buff_head *queue);
diff --git a/drivers/net/xen-netchannel2/offload.c 
b/drivers/net/xen-netchannel2/offload.c
index 90d0a54..552b0ad 100644
--- a/drivers/net/xen-netchannel2/offload.c
+++ b/drivers/net/xen-netchannel2/offload.c
@@ -5,6 +5,7 @@
 #include "netchannel2_core.h"
 
 static int nc2_set_tx_csum(struct net_device *nd, u32 val);
+static int nc2_set_sg(struct net_device *nd, u32 val);
 
 /* ---------------- Interface to the other domain ----------------------- */
 void nc2_handle_set_offload(struct netchannel2 *nc,
@@ -25,6 +26,14 @@ void nc2_handle_set_offload(struct netchannel2 *nc,
        if (msg.csum != nc->allow_tx_csum_offload) {
                nc->allow_tx_csum_offload = msg.csum;
                nc2_set_tx_csum(nc->net_device, msg.csum);
+               /* Linux doesn't support scatter-gather mode without
+                  TX csum offload.  We therefore need to disable SG
+                  support whenever the remote turns off csum support.
+                  We also elect to enable SG support whenever the
+                  remote turns on csum support, since that's more
+                  likely to be useful than requiring the user to
+                  manually enable it every time. */
+               nc2_set_sg(nc->net_device, msg.csum);
        }
 }
 
@@ -47,6 +56,37 @@ void advertise_offloads(struct netchannel2 *nc)
        }
 }
 
+/* Not really offload-related, but it interacts with checksum offload
+   and is easiest to do here. */
+void nc2_handle_set_max_fragments_per_packet(struct netchannel2 *nc,
+                                            struct netchannel2_ring_pair *ncrp,
+                                            struct netchannel2_msg_hdr *hdr)
+{
+       struct netchannel2_msg_set_max_fragments_per_packet msg;
+
+       if (hdr->size != sizeof(msg)) {
+               pr_debug("Set max fragments per packet message had strange size 
%d\n",
+                        hdr->size);
+               return;
+       }
+       nc2_copy_from_ring(&ncrp->cons_ring, &msg, sizeof(msg));
+       if (msg.max_frags_per_packet < 1) {
+               pr_debug("set max fragments per packet to %d?\n",
+                        msg.max_frags_per_packet);
+               return;
+       }
+       if (ncrp == &nc->rings &&
+           ncrp->max_fragments_per_tx_packet == 1 &&
+           msg.max_frags_per_packet > 1) {
+               /* Turning on scatter-gather mode.  Linux only
+                  supports it if you've got TX csum offload,
+                  though. */
+               if (nc->net_device->features & NETIF_F_IP_CSUM)
+                       nc->net_device->features |= NETIF_F_SG;
+       }
+       ncrp->max_fragments_per_tx_packet = msg.max_frags_per_packet;
+}
+
 
 
 /* ---------------------- Ethtool interface ---------------------------- */
@@ -85,9 +125,28 @@ static int nc2_set_tx_csum(struct net_device *nd, u32 val)
        return ethtool_op_set_tx_csum(nd, val);
 }
 
+/* ethtool set_sg() handler.  Linux makes sure that TX csum offload is
+   only enabled when scatter-gather mode is, so we don't have to worry
+   about that here. */
+static int nc2_set_sg(struct net_device *nd, u32 val)
+{
+       struct netchannel2 *nc = netdev_priv(nd);
+
+       if (nc->rings.max_fragments_per_tx_packet <= 1)
+               return -EOPNOTSUPP;
+
+       if (val)
+               nd->features |= NETIF_F_SG;
+       else
+               nd->features &= ~NETIF_F_SG;
+       return 0;
+}
+
 struct ethtool_ops nc2_ethtool_ops = {
        .get_tx_csum = ethtool_op_get_tx_csum,
        .set_tx_csum = nc2_set_tx_csum,
        .get_rx_csum = nc2_get_rx_csum,
        .set_rx_csum = nc2_set_rx_csum,
+       .get_sg      = ethtool_op_get_sg,
+       .set_sg      = nc2_set_sg,
 };
diff --git a/drivers/net/xen-netchannel2/recv_packet.c 
b/drivers/net/xen-netchannel2/recv_packet.c
index 0d4e593..958a3a6 100644
--- a/drivers/net/xen-netchannel2/recv_packet.c
+++ b/drivers/net/xen-netchannel2/recv_packet.c
@@ -83,6 +83,13 @@ void nc2_handle_packet_msg(struct netchannel2 *nc,
        frags_bytes = hdr->size - sizeof(msg) - msg.prefix_size;
        nr_frags = frags_bytes / sizeof(struct netchannel2_fragment);
 
+       if (nr_frags > MAX_SKB_FRAGS) {
+               pr_debug("otherend misbehaving: %d frags > %ld\n",
+                        nr_frags, MAX_SKB_FRAGS);
+               nc->stats.tx_errors++;
+               return;
+       }
+
        switch (msg.type) {
        case NC2_PACKET_TYPE_small:
                if (nr_frags != 0) {
@@ -218,6 +225,22 @@ void advertise_max_packets(struct netchannel2_ring_pair 
*ncrp)
        ncrp->pending_time_sensitive_messages = 1;
 }
 
+void advertise_max_fragments_per_packet(struct netchannel2_ring_pair *ncrp)
+{
+       struct netchannel2_msg_set_max_fragments_per_packet msg;
+
+       if (!nc2_can_send_payload_bytes(&ncrp->prod_ring, sizeof(msg)))
+               return;
+       msg.max_frags_per_packet = MAX_SKB_FRAGS;
+       nc2_send_message(&ncrp->prod_ring,
+                        NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET,
+                        0,
+                        &msg,
+                        sizeof(msg));
+       ncrp->need_advertise_max_fragments_per_packet = 0;
+       ncrp->pending_time_sensitive_messages = 1;
+}
+
 void receive_pending_skbs(struct sk_buff_head *pending_rx_queue)
 {
        struct sk_buff *skb;
diff --git a/drivers/net/xen-netchannel2/rscb.c 
b/drivers/net/xen-netchannel2/rscb.c
index 5cc8cd9..de6e8c6 100644
--- a/drivers/net/xen-netchannel2/rscb.c
+++ b/drivers/net/xen-netchannel2/rscb.c
@@ -230,8 +230,8 @@ static inline int nfrags_skb(struct sk_buff *skb, int 
prefix_size)
                + skb_shinfo(skb)->nr_frags;
 }
 
-int prepare_xmit_allocate_grant(struct netchannel2_ring_pair *ncrp,
-                               struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_grant(struct 
netchannel2_ring_pair *ncrp,
+                                                    struct sk_buff *skb)
 {
        struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
        unsigned nr_fragments;
@@ -240,7 +240,7 @@ int prepare_xmit_allocate_grant(struct 
netchannel2_ring_pair *ncrp,
        unsigned inline_prefix_size;
 
        if (allocate_txp_slot(ncrp, skb) < 0)
-               return -1;
+               return PREP_XMIT_BUSY;
 
        /* We're going to have to get the remote to issue a grant copy
           hypercall anyway, so there's no real benefit to shoving the
@@ -257,6 +257,14 @@ int prepare_xmit_allocate_grant(struct 
netchannel2_ring_pair *ncrp,
                 * policy grant. */
                BUG_ON(nr_fragments == 0);
 
+               if (nr_fragments > ncrp->max_fragments_per_tx_packet) {
+                       if (skb_linearize(skb) < 0)
+                               return PREP_XMIT_DROP;
+                       nr_fragments = nfrags_skb(skb, inline_prefix_size);
+                       if (nr_fragments > ncrp->max_fragments_per_tx_packet)
+                               return PREP_XMIT_DROP;
+               }
+
                skb_co->nr_fragments = nr_fragments;
        }
 
@@ -268,14 +276,14 @@ int prepare_xmit_allocate_grant(struct 
netchannel2_ring_pair *ncrp,
                release_txp_slot(ncrp, skb);
                /* Leave skb_co->nr_fragments set, so that we don't
                   have to recompute it next time around. */
-               return -1;
+               return PREP_XMIT_BUSY;
        }
        skb_co->gref_pool = gref_pool;
        skb_co->inline_prefix_size = inline_prefix_size;
 
        skb_co->type = NC2_PACKET_TYPE_receiver_copy;
 
-       return 0;
+       return PREP_XMIT_OKAY;
 }
 
 static void prepare_subpage_grant(struct netchannel2_ring_pair *ncrp,
diff --git a/drivers/net/xen-netchannel2/xmit_packet.c 
b/drivers/net/xen-netchannel2/xmit_packet.c
index 2783754..a3304f2 100644
--- a/drivers/net/xen-netchannel2/xmit_packet.c
+++ b/drivers/net/xen-netchannel2/xmit_packet.c
@@ -21,8 +21,9 @@ static enum transmit_policy transmit_policy(struct 
netchannel2 *nc,
    transmitted in the ring.  This is only called for small, linear
    SKBs.  It always succeeds, but has an int return type for symmetry
    with the other prepare_xmit_*() functions. */
-int prepare_xmit_allocate_small(struct netchannel2_ring_pair *ncrp,
-                               struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_small(
+       struct netchannel2_ring_pair *ncrp,
+       struct sk_buff *skb)
 {
        struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
 
@@ -33,7 +34,7 @@ int prepare_xmit_allocate_small(struct netchannel2_ring_pair 
*ncrp,
        skb_co->gref_pool = 0;
        skb_co->inline_prefix_size = skb->len;
 
-       return 0;
+       return PREP_XMIT_OKAY;
 }
 
 /* Figure out how much space @tp will take up on the ring. */
@@ -56,13 +57,13 @@ static unsigned get_transmitted_packet_msg_size(struct 
sk_buff *skb)
    allocated.  The expected case is that the caller will arrange for
    us to retry the allocation later, in which case we'll pick up the
    already-allocated buffers. */
-int prepare_xmit_allocate_resources(struct netchannel2 *nc,
-                                   struct sk_buff *skb)
+enum prepare_xmit_result prepare_xmit_allocate_resources(struct netchannel2 
*nc,
+                                                        struct sk_buff *skb)
 {
        struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
        enum transmit_policy policy;
        unsigned msg_size;
-       int r;
+       enum prepare_xmit_result r;
 
        if (skb_co->policy == transmit_policy_unknown) {
                policy = transmit_policy(nc, skb);
@@ -76,18 +77,18 @@ int prepare_xmit_allocate_resources(struct netchannel2 *nc,
                default:
                        BUG();
                        /* Shut the compiler up. */
-                       r = -1;
+                       r = PREP_XMIT_BUSY;
                }
-               if (r < 0)
+               if (r != PREP_XMIT_OKAY)
                        return r;
                skb_co->policy = policy;
        }
 
        msg_size = get_transmitted_packet_msg_size(skb);
        if (nc2_reserve_payload_bytes(&nc->rings.prod_ring, msg_size))
-               return 0;
+               return PREP_XMIT_OKAY;
 
-       return -1;
+       return PREP_XMIT_BUSY;
 }
 
 static void set_offload_flags(struct sk_buff *skb,
@@ -219,21 +220,27 @@ int nc2_start_xmit(struct sk_buff *skb, struct net_device 
*dev)
 
        spin_lock_bh(&nc->rings.lock);
 
-       if (!nc->rings.is_attached) {
-               spin_unlock_bh(&nc->rings.lock);
-               dev_kfree_skb(skb);
-               nc->stats.tx_dropped++;
-               return NETDEV_TX_OK;
-       }
+       if (!nc->rings.is_attached)
+               goto out_drop;
 
        r = prepare_xmit_allocate_resources(nc, skb);
-       if (r < 0)
-               goto out_busy;
+       if (r != PREP_XMIT_OKAY) {
+               if (r == PREP_XMIT_BUSY)
+                       goto out_busy;
+               else
+                       goto out_drop;
+       }
        queue_packet_to_interface(skb, &nc->rings);
        spin_unlock_bh(&nc->rings.lock);
 
        return NETDEV_TX_OK;
 
+out_drop:
+       spin_unlock_bh(&nc->rings.lock);
+       dev_kfree_skb(skb);
+       nc->stats.tx_dropped++;
+       return NETDEV_TX_OK;
+
 out_busy:
        /* Some more buffers may have arrived, so kick the worker
         * thread to go and have a look. */
diff --git a/include/xen/interface/io/netchannel2.h 
b/include/xen/interface/io/netchannel2.h
index 5a56eb9..11bb469 100644
--- a/include/xen/interface/io/netchannel2.h
+++ b/include/xen/interface/io/netchannel2.h
@@ -26,6 +26,11 @@ struct netchannel2_msg_set_max_packets {
  * NETCHANNEL2_MAX_INLINE_BYTES.  Packets may contain no more than
  * NETCHANNEL2_MAX_PACKET_BYTES bytes of data, including all fragments
  * and the prefix.
+ *
+ * If a SET_MAX_FRAGMENTS_PER_PACKET message has been received, the
+ * number of fragments in the packet should respect that limit.
+ * Otherwise, there should be at most one fragment in the packet
+ * (there may be zero if the entire packet fits in the inline prefix).
  */
 #define NETCHANNEL2_MSG_PACKET 2
 #define NETCHANNEL2_MAX_PACKET_BYTES 65536
@@ -55,10 +60,8 @@ struct netchannel2_msg_packet {
        uint16_t pad2;
        uint16_t csum_start;
        uint16_t csum_offset;
-       /* Variable-size array.  The number of elements is determined
+       /* Variable-size array.  The number of elements is determined
           by the size of the message. */
-       /* Until we support scatter-gather, this will be either 0 or 1
-          element. */
        struct netchannel2_fragment frags[0];
 };
 
@@ -141,4 +144,19 @@ struct netchannel2_msg_set_offload {
        uint16_t reserved;
 };
 
+/* Set the maximum number of fragments which can be used in any packet
+ * (not including the inline prefix).  Until this is sent, there can
+ * be at most one such fragment per packet.  The maximum must not be
+ * set to zero. */
+/* Note that there is no acknowledgement for this message, and so if
+ * an endpoint tries to reduce the number of fragments then it may
+ * continue to recieve over-fragmented packets for some time.  The
+ * receiving endpoint is expected to deal with this.
+ */
+#define NETCHANNEL2_MSG_SET_MAX_FRAGMENTS_PER_PACKET 5
+struct netchannel2_msg_set_max_fragments_per_packet {
+       struct netchannel2_msg_hdr hdr;
+       uint32_t max_frags_per_packet;
+};
+
 #endif /* !__NETCHANNEL2_H__ */
-- 
1.6.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.