WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 11/22] Transmit and receive checksum offload support.

Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>
---
 drivers/xen/netchannel2/Makefile           |    2 +-
 drivers/xen/netchannel2/chan.c             |   16 +++++
 drivers/xen/netchannel2/netchannel2_core.h |   19 ++++++
 drivers/xen/netchannel2/offload.c          |   93 ++++++++++++++++++++++++++++
 drivers/xen/netchannel2/recv_packet.c      |   30 +++++++++
 drivers/xen/netchannel2/xmit_packet.c      |   17 +++++
 include/xen/interface/io/netchannel2.h     |   44 ++++++++++++-
 7 files changed, 217 insertions(+), 4 deletions(-)
 create mode 100644 drivers/xen/netchannel2/offload.c

diff --git a/drivers/xen/netchannel2/Makefile b/drivers/xen/netchannel2/Makefile
index d6641a1..565ba89 100644
--- a/drivers/xen/netchannel2/Makefile
+++ b/drivers/xen/netchannel2/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_XEN_NETCHANNEL2) += netchannel2.o
 
 netchannel2-objs := chan.o netchan2.o rscb.o util.o \
-       xmit_packet.o recv_packet.o poll.o
+       xmit_packet.o offload.o recv_packet.o poll.o
 
 ifeq ($(CONFIG_XEN_NETDEV2_BACKEND),y)
 netchannel2-objs += netback2.o
diff --git a/drivers/xen/netchannel2/chan.c b/drivers/xen/netchannel2/chan.c
index a4b83a1..af8d028 100644
--- a/drivers/xen/netchannel2/chan.c
+++ b/drivers/xen/netchannel2/chan.c
@@ -82,6 +82,9 @@ retry:
                case NETCHANNEL2_MSG_FINISH_PACKET:
                        nc2_handle_finish_packet_msg(nc, ncrp, &hdr);
                        break;
+               case NETCHANNEL2_MSG_SET_OFFLOAD:
+                       nc2_handle_set_offload(nc, ncrp, &hdr);
+                       break;
                case NETCHANNEL2_MSG_PAD:
                        break;
                default:
@@ -126,6 +129,7 @@ done:
    event channel if necessary. */
 static void flush_rings(struct netchannel2_ring_pair *ncrp)
 {
+       struct netchannel2 *nc = ncrp->interface;
        int need_kick;
 
        flush_hypercall_batcher(&ncrp->pending_rx_hypercalls,
@@ -133,6 +137,8 @@ static void flush_rings(struct netchannel2_ring_pair *ncrp)
        send_finish_packet_messages(ncrp);
        if (ncrp->need_advertise_max_packets)
                advertise_max_packets(ncrp);
+       if (nc->need_advertise_offloads)
+               advertise_offloads(nc);
 
        need_kick = 0;
        if (nc2_finish_messages(&ncrp->cons_ring)) {
@@ -366,6 +372,9 @@ struct netchannel2 *nc2_new(struct xenbus_device *xd)
        nc->local_trusted = local_trusted;
        nc->rings.filter_mac = filter_mac;
 
+       /* Default to RX csum on. */
+       nc->use_rx_csum = 1;
+
        skb_queue_head_init(&nc->pending_skbs);
        if (init_ring_pair(&nc->rings, nc) < 0) {
                nc2_release(nc);
@@ -383,6 +392,7 @@ struct netchannel2 *nc2_new(struct xenbus_device *xd)
        netdev->features = NETIF_F_LLTX;
 
        SET_NETDEV_DEV(netdev, &xd->dev);
+       SET_ETHTOOL_OPS(netdev, &nc2_ethtool_ops);
 
        err = read_mac_address(xd->nodename, "remote-mac",
                               nc->rings.remote_mac);
@@ -468,6 +478,8 @@ int nc2_attach_rings(struct netchannel2 *nc,
        _nc2_attach_rings(&nc->rings, cons_sring, cons_payload, cons_size,
                          prod_sring, prod_payload, prod_size, otherend_id);
 
+       nc->need_advertise_offloads = 1;
+
        spin_unlock_bh(&nc->rings.lock);
 
        netif_carrier_on(nc->net_device);
@@ -532,6 +544,10 @@ void nc2_detach_rings(struct netchannel2 *nc)
        if (nc->rings.irq >= 0)
                unbind_from_irqhandler(nc->rings.irq, &nc->rings);
        nc->rings.irq = -1;
+
+       /* Disable all offloads */
+       nc->net_device->features &= ~NETIF_F_IP_CSUM;
+       nc->allow_tx_csum_offload = 0;
 }
 
 #if defined(CONFIG_XEN_NETDEV2_BACKEND)
diff --git a/drivers/xen/netchannel2/netchannel2_core.h 
b/drivers/xen/netchannel2/netchannel2_core.h
index 7304451..7e00daf 100644
--- a/drivers/xen/netchannel2/netchannel2_core.h
+++ b/drivers/xen/netchannel2/netchannel2_core.h
@@ -242,6 +242,19 @@ struct netchannel2 {
        /* Packets which we need to transmit soon */
        struct sk_buff_head pending_skbs;
 
+       /* Task offload control.  These are all protected by the
+        * lock. */
+       /* Ethtool allows us to use RX checksumming */
+       uint8_t use_rx_csum;
+       /* The remote endpoint allows us to use TX checksumming.
+          Whether we actually use TX checksumming is controlled by
+          the net device feature bits. */
+       uint8_t allow_tx_csum_offload;
+       /* At some point in the past, we tried to tell the other end
+          what our current offload policy is and failed.  Try again
+          as soon as possible. */
+       uint8_t need_advertise_offloads;
+
        /* Flag to indicate that the interface is stopped
           When the interface is stopped we need to run the tasklet
           after we receive an interrupt so that we can wake it up */
@@ -354,6 +367,12 @@ void receive_pending_skbs(struct sk_buff_head *rx_queue);
 void nc2_queue_purge(struct netchannel2_ring_pair *ncrp,
                     struct sk_buff_head *queue);
 
+void advertise_offloads(struct netchannel2 *nc);
+void nc2_handle_set_offload(struct netchannel2 *nc,
+                           struct netchannel2_ring_pair *ncrp,
+                           struct netchannel2_msg_hdr *hdr);
+extern struct ethtool_ops nc2_ethtool_ops;
+
 void nc2_init_poller(struct netchannel2_ring_pair *ncrp);
 void nc2_start_polling(struct netchannel2_ring_pair *ncrp);
 void nc2_stop_polling(struct netchannel2_ring_pair *ncrp);
diff --git a/drivers/xen/netchannel2/offload.c 
b/drivers/xen/netchannel2/offload.c
new file mode 100644
index 0000000..90d0a54
--- /dev/null
+++ b/drivers/xen/netchannel2/offload.c
@@ -0,0 +1,93 @@
+/* All the bits used to handle enabling and disabling the various
+ * offloads. */
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include "netchannel2_core.h"
+
+static int nc2_set_tx_csum(struct net_device *nd, u32 val);
+
+/* ---------------- Interface to the other domain ----------------------- */
+void nc2_handle_set_offload(struct netchannel2 *nc,
+                           struct netchannel2_ring_pair *ncrp,
+                           struct netchannel2_msg_hdr *hdr)
+{
+       struct netchannel2_msg_set_offload msg;
+       if (hdr->size != sizeof(msg)) {
+               pr_debug("Strange sized offload message: %d\n",
+                        hdr->size);
+               return;
+       }
+       if (ncrp != &nc->rings) {
+               pr_debug("Setting offloads on an ancillary ring!\n");
+               return;
+       }
+       nc2_copy_from_ring(&nc->rings.cons_ring, &msg, hdr->size);
+       if (msg.csum != nc->allow_tx_csum_offload) {
+               nc->allow_tx_csum_offload = msg.csum;
+               nc2_set_tx_csum(nc->net_device, msg.csum);
+       }
+}
+
+/* Tell the other end what sort of offloads it's allowed to use. */
+void advertise_offloads(struct netchannel2 *nc)
+{
+       struct netchannel2_msg_set_offload msg;
+
+       memset(&msg, 0, sizeof(msg));
+
+       if (nc2_can_send_payload_bytes(&nc->rings.prod_ring, sizeof(msg))) {
+               msg.csum = nc->use_rx_csum;
+               nc2_send_message(&nc->rings.prod_ring,
+                                NETCHANNEL2_MSG_SET_OFFLOAD,
+                                0, &msg, sizeof(msg));
+               nc->need_advertise_offloads = 0;
+               nc->rings.pending_time_sensitive_messages = 1;
+       } else {
+               nc->need_advertise_offloads = 1;
+       }
+}
+
+
+
+/* ---------------------- Ethtool interface ---------------------------- */
+
+static int nc2_set_rx_csum(struct net_device *nd, u32 val)
+{
+       struct netchannel2 *nc = netdev_priv(nd);
+
+       spin_lock_bh(&nc->rings.lock);
+       if (nc->use_rx_csum != val) {
+               nc->use_rx_csum = val;
+               nc->need_advertise_offloads = 1;
+               spin_unlock_bh(&nc->rings.lock);
+               nc2_kick(&nc->rings);
+       } else {
+               spin_unlock_bh(&nc->rings.lock);
+       }
+
+       return 0;
+}
+
+static u32 nc2_get_rx_csum(struct net_device *nd)
+{
+       struct netchannel2 *nc = netdev_priv(nd);
+       return nc->use_rx_csum;
+}
+
+static int nc2_set_tx_csum(struct net_device *nd, u32 val)
+{
+       struct netchannel2 *nc = netdev_priv(nd);
+
+       /* Can't turn on TX csum offload if the other end can't do RX
+          csum offload. */
+       if (val != 0 && !nc->allow_tx_csum_offload)
+               return -EOPNOTSUPP;
+       return ethtool_op_set_tx_csum(nd, val);
+}
+
+struct ethtool_ops nc2_ethtool_ops = {
+       .get_tx_csum = ethtool_op_get_tx_csum,
+       .set_tx_csum = nc2_set_tx_csum,
+       .get_rx_csum = nc2_get_rx_csum,
+       .set_rx_csum = nc2_set_rx_csum,
+};
diff --git a/drivers/xen/netchannel2/recv_packet.c 
b/drivers/xen/netchannel2/recv_packet.c
index 4678c28..0d4e593 100644
--- a/drivers/xen/netchannel2/recv_packet.c
+++ b/drivers/xen/netchannel2/recv_packet.c
@@ -132,6 +132,36 @@ void nc2_handle_packet_msg(struct netchannel2 *nc,
                        goto err;
                }
 
+               switch (msg.flags & (NC2_PACKET_FLAG_data_validated |
+                                    NC2_PACKET_FLAG_csum_blank)) {
+               case 0:
+                       skb->ip_summed = CHECKSUM_NONE;
+                       break;
+               case NC2_PACKET_FLAG_data_validated:
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       break;
+               default:
+                       /* csum_blank implies data_validated, so
+                          csum_blank and csum_blank|data_validated
+                          are equivalent. */
+                       skb->ip_summed = CHECKSUM_PARTIAL;
+                       if (msg.csum_offset + 2 > skb->len) {
+                               /* Whoops.  Assuming no bugs in our
+                                  receive methods, the other end just
+                                  requested checksum calculation
+                                  beyond the end of the packet. */
+                               if (net_ratelimit())
+                                       dev_warn(&nc->net_device->dev,
+                                                "csum field too far through 
packet (%d, skb len %d, headlen %d)\n",
+                                                msg.csum_offset, skb->len,
+                                                skb_headlen(skb));
+                               goto err;
+                       }
+                       skb->csum_start = msg.csum_start + skb_headroom(skb);
+                       skb->csum_offset = msg.csum_offset - msg.csum_start;
+                       break;
+               }
+
                __skb_queue_tail(pending_rx_queue, skb);
 
                if (ncrp->pending_rx_hypercalls.nr_pending_gops >=
diff --git a/drivers/xen/netchannel2/xmit_packet.c 
b/drivers/xen/netchannel2/xmit_packet.c
index a693a75..5b0ba6b 100644
--- a/drivers/xen/netchannel2/xmit_packet.c
+++ b/drivers/xen/netchannel2/xmit_packet.c
@@ -90,6 +90,21 @@ int prepare_xmit_allocate_resources(struct netchannel2 *nc,
        return -1;
 }
 
+static void set_offload_flags(struct sk_buff *skb,
+                             volatile struct netchannel2_msg_packet *msg)
+{
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               msg->flags |=
+                       NC2_PACKET_FLAG_csum_blank |
+                       NC2_PACKET_FLAG_data_validated;
+               msg->csum_start = skb->csum_start - (skb->data - skb->head);
+               msg->csum_offset = msg->csum_start + skb->csum_offset;
+       }
+
+       if (skb->proto_data_valid)
+               msg->flags |= NC2_PACKET_FLAG_data_validated;
+}
+
 /* Transmit a packet which has previously been prepared with
    prepare_xmit_allocate_resources(). */
 /* Once this has been called, the ring must not be flushed until the
@@ -139,6 +154,8 @@ int nc2_really_start_xmit(struct netchannel2_ring_pair 
*ncrp,
               skb_co->inline_prefix_size);
        barrier();
 
+       set_offload_flags(skb, msg);
+
        switch (skb_co->policy) {
        case transmit_policy_small:
                /* Nothing to do */
diff --git a/include/xen/interface/io/netchannel2.h 
b/include/xen/interface/io/netchannel2.h
index c45963e..5a56eb9 100644
--- a/include/xen/interface/io/netchannel2.h
+++ b/include/xen/interface/io/netchannel2.h
@@ -53,15 +53,31 @@ struct netchannel2_msg_packet {
        uint8_t pad1;
        uint16_t prefix_size;
        uint16_t pad2;
-       uint16_t pad3;
-       uint16_t pad4;
-       /* Variable-size array.  The number of elements is determined
+       uint16_t csum_start;
+       uint16_t csum_offset;
+       /* Variable-size array.  The number of elements is determined
           by the size of the message. */
        /* Until we support scatter-gather, this will be either 0 or 1
           element. */
        struct netchannel2_fragment frags[0];
 };
 
+/* TX csum offload.  The transmitting domain has skipped a checksum
+ * calculation.  Before forwarding the packet on, the receiving domain
+ * must first perform a 16 bit IP checksum on everything from
+ * csum_start to the end of the packet, and then write the result to
+ * an offset csum_offset in the packet.  This should only be set if
+ * the transmitting domain has previously received a SET_OFFLOAD
+ * message with csum = 1.
+ */
+#define NC2_PACKET_FLAG_csum_blank 1
+/* RX csum offload.  The transmitting domain has already validated the
+ * protocol-level checksum on this packet (i.e. TCP or UDP), so the
+ * receiving domain shouldn't bother.  This does not tell you anything
+ * about the IP-level checksum.  This can be set on any packet,
+ * regardless of any SET_OFFLOAD messages which may or may not have
+ * been sent. */
+#define NC2_PACKET_FLAG_data_validated 2
 /* If set, the transmitting domain requires an event urgently when
  * this packet's finish message is sent.  Otherwise, the event can be
  * delayed. */
@@ -103,4 +119,26 @@ struct netchannel2_msg_finish_packet {
        uint32_t id;
 };
 
+/* Tell the other end what sort of offloads we're going to let it use.
+ * An endpoint must not use any offload unless it has been enabled
+ * by a previous SET_OFFLOAD message. */
+/* Note that there is no acknowledgement for this message.  This means
+ * that an endpoint can continue to receive PACKET messages which
+ * require offload support for some time after it disables task
+ * offloading.  The endpoint is expected to handle this case correctly
+ * (which may just mean dropping the packet and returning a FINISH
+ * message, if appropriate).
+ */
+#define NETCHANNEL2_MSG_SET_OFFLOAD 4
+struct netchannel2_msg_set_offload {
+       struct netchannel2_msg_hdr hdr;
+       /* Checksum offload.  If this is 0, the other end must
+        * calculate checksums before sending the packet.  If it is 1,
+        * the other end does not have to perform the calculation.
+        */
+       uint8_t csum;
+       uint8_t pad;
+       uint16_t reserved;
+};
+
 #endif /* !__NETCHANNEL2_H__ */
-- 
1.6.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>