[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [Patch] vnet-module ( for 3.1 && unstable )


  • To: "Robert Valentan" <R.Valentan@xxxxxxxxxxxxx>
  • From: "Zhu Han" <schumi.han@xxxxxxxxx>
  • Date: Mon, 16 Jul 2007 10:15:33 +0800
  • Cc: Mike Wray <mike.wray@xxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx
  • Delivery-date: Sun, 15 Jul 2007 19:13:21 -0700
  • Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=beta; h=received:message-id:date:from:to:subject:cc:in-reply-to:mime-version:content-type:content-transfer-encoding:content-disposition:references; b=MgzKmYdJesixAiH7vqUSsnOOjX6tvh34+6r+mon9G8wa1QWdVlePCYa+fzN0KR6H2anRpU6SXzmPtkAqD0mBVgukSO+JjSPfVwh1+CNA1+UXE7jHeF6qroWfbNR8QgeiwN44g+NdfdRYygFeerZ2TBwcFFBvK22ZgoBoNciuKA0=
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

Hi, Robert,

Here are some questions:

1) If we just pull Eth Header and IP header, does it reach the
position out of the head room?  I dont' observe it before.

2) You don't move_addr_to_kernel/move_addr_to_user, you just need
change_fs to KERNEL_DS and then change it back.

3) Why not use "struct sock" directly to manipulate the socket instead
of using int file descriptor?

On 7/16/07, Robert Valentan <R.Valentan@xxxxxxxxxxxxx> wrote:
Hallo!

The attached patch makes the vnet-module working for i386
and x86_64. (I had no other systems for testing)
(My patch from 6.6.2007 is included)

 - x86_64: the x86-64 kernel has __ARCH_WANT_SYS_SOCKETCALL
   defined, but has no socketcall. Replacing the _syscall's
   to a working one.

 - moves the "skb_pull" function from kernel to skb_util.c
   because pulling in the data will rise a "BUG_ON" in the
   kernel. The kernel-function does not allow to pull into
   the payload!

 - the skb_buff is not always possible to modify a received
   buffer to send it again to the network.
   (vnet_forward.c and etherip.c) The code now work's..
   I think with no or less time-penalty.


Summary:
Solved:
  With this patch the vnet-module is working on i386 and x86_64,
  including peering-option.   (except the following "open")


Open:
 - Vnet's only with security-option "none" are working.
 - The module-option "vnet_encaps" is ignored, always "udp" is used.



Signed-off-by: Robert Valentan <R.Valentan@xxxxxxxxxxxxx>


--
regards
Robert Valentan

diff -r aee991c7723a tools/vnet/vnet-module/esp.c
--- a/tools/vnet/vnet-module/esp.c      Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/esp.c      Sun Jul 15 17:03:47 2007
@@ -341,12 +341,13 @@
         dprintf("> ETH header pull...\n");
         memmove(skb->data, skb->mac.raw, ETH_HLEN);
         skb->mac.raw = skb->data;
-        __skb_pull(skb, ETH_HLEN);
+        skb_pull_vn(skb, ETH_HLEN);
     }
     dprintf("> IP header pull...\n");
     memmove(skb->data, skb->nh.raw, ip_n);
     skb->nh.raw = skb->data;
-    __skb_pull(skb, ip_n);
+    skb_pull_vn(skb, ip_n);
+    skb->h.raw = skb->data;
     esph = (void*)skb->data;
     // Add spi and sequence number.
     esph->spi = sa->ident.spi;
@@ -457,7 +458,7 @@
     // Move skb->data back to ethernet header.
     // Do in 2 moves to ensure offsets are +ve,
     // since args to skb_pull/skb_push are unsigned.
-    __skb_pull(skb, head_n);
+    skb_pull_vn(skb, head_n);
     __skb_push(skb, skb->data - skb->mac.raw);
     // After this esph is invalid.
     esph = NULL;
@@ -763,7 +764,7 @@
     dprintf(">\n");
 #ifdef DEBUG
     dprintf("> recv skb=\n");
-    skb_print_bits(skb, 0, skb->len);
+    skb_print_bits("", skb, 0, skb->len);
 #endif
     ip_n = (skb->nh.iph->ihl << 2);
     if(skb->data == skb->mac.raw){
@@ -773,7 +774,7 @@
             err = -EINVAL;
             goto exit;
         }
-        skb_pull(skb, eth_n + ip_n);
+        skb_pull_vn(skb, eth_n + ip_n);
     }
     addr = skb->nh.iph->daddr;
     err = esp_skb_header(skb, &esph);
diff -r aee991c7723a tools/vnet/vnet-module/etherip.c
--- a/tools/vnet/vnet-module/etherip.c  Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/etherip.c  Sun Jul 15 17:03:47 2007
@@ -270,6 +270,7 @@
     u32 saddr, daddr;
     char vnetbuf[VNET_ID_BUF];
     struct ethhdr *eth;
+    struct sk_buff *newskb;

     dprintf(">\n");
     saddr = skb->nh.iph->saddr;
@@ -293,7 +294,7 @@
             err = -EINVAL;
             goto exit;
         }
-        skb_pull(skb, pull_n);
+        skb_pull_vn(skb, pull_n);
     }
     // Assume skb->data points at etherip header.
     etheriph = (void*)skb->data;
@@ -318,7 +319,18 @@
         goto exit;
     }
     // Point at the headers in the contained ethernet frame.
-    skb->mac.raw = skb_pull(skb, etherip_n);
+    skb->mac.raw = skb_pull_vn(skb, etherip_n);
+
+    newskb = alloc_skb(skb->len, GFP_ATOMIC);
+    if (!newskb) {
+        wprintf("> alloc new sk_buff failed \n");
+        goto exit;
+    }
+    newskb->mac.raw = skb_put(newskb, skb->len);
+    skb_copy_bits(skb, 0, newskb->data, skb->len);
+    kfree_skb(skb);
+    skb = newskb;
+
     eth = eth_hdr(skb);

     // Simulate the logic from eth_type_trans()
@@ -340,27 +352,12 @@

     // Assuming a standard Ethernet frame.
     // Should check for protocol? Support ETH_P_8021Q too.
-    skb->nh.raw = skb_pull(skb, ETH_HLEN);
-
-#ifdef __KERNEL__
-    // Fix IP options, checksum, skb dst, netfilter state.
-    memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
-    if (skb->ip_summed == CHECKSUM_HW){
-        skb->ip_summed = CHECKSUM_NONE;
-    }
-    dst_release(skb->dst);
-    skb->dst = NULL;
-    nf_reset(skb);
-#ifdef CONFIG_BRIDGE_NETFILTER
-    if(skb->nf_bridge){
-        // Stop the eth header being clobbered by 
nf_bridge_maybe_copy_header().
-        _nf_bridge_save_header(skb);
-    }
-#endif
-#endif // __KERNEL__
-
-    dprintf("> Unpacked srcaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT 
"\n",
+    skb->nh.raw = skb_pull_vn(skb, ETH_HLEN);
+    skb->h.raw = newskb->nh.raw + sizeof(struct iphdr);
+
+    dprintf("> Unpacked srcaddr=" IPFMT " dstaddr=" IPFMT " vnet=%s srcmac=" MACFMT " 
dstmac=" MACFMT "\n",
             NIPQUAD(skb->nh.iph->saddr),
+            NIPQUAD(skb->nh.iph->daddr),
             VnetId_ntoa(&vnet, vnetbuf),
             MAC6TUPLE(eth->h_source),
             MAC6TUPLE(eth->h_dest));
diff -r aee991c7723a tools/vnet/vnet-module/skb_util.h
--- a/tools/vnet/vnet-module/skb_util.h Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/skb_util.h Sun Jul 15 17:03:47 2007
@@ -66,6 +66,21 @@
 }

 #endif
+
+/*
+ * It's a copy from {kernel}/include/linux/skbuff.h func '__skb_pull' and 
'skb_pull'
+ * to aviodthe BUG_ON when pulling into the data (getting forwarded ip-frames)
+ */
+static inline unsigned char *__skb_pull_vn(struct sk_buff *skb, unsigned int 
len)
+{
+        skb->len -= len;
+        //BUG_ON(skb->len < skb->data_len);
+        return skb->data += len;
+}
+static inline unsigned char *skb_pull_vn(struct sk_buff *skb, unsigned int len)
+{
+        return unlikely(len > skb->len) ? NULL : __skb_pull_vn(skb, len);
+}


 #ifdef __KERNEL__
diff -r aee991c7723a tools/vnet/vnet-module/varp.c
--- a/tools/vnet/vnet-module/varp.c     Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/varp.c     Sun Jul 15 17:03:47 2007
@@ -1365,7 +1365,7 @@
             goto exit;
         }
     }
-    varph = (void*)skb_pull(skb, sizeof(struct udphdr));
+    varph = (void*)skb_pull_vn(skb, sizeof(struct udphdr));
     if(skb->len < sizeof(struct VnetMsgHdr)){
         wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(struct 
VnetMsgHdr));
         goto exit;
@@ -1378,11 +1378,11 @@
         }
         break;
     case VUDP_ID: // Etherip-in-udp packet.
-        skb_pull(skb, sizeof(struct VnetMsgHdr));
+        skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
         err = etherip_protocol_recv(skb);
         goto exit;
     case VFWD_ID: // Forwarded.
-        skb_pull(skb, sizeof(struct VnetMsgHdr));
+        skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
         err = vnet_forward_recv(skb);
         goto exit;
     default:
diff -r aee991c7723a tools/vnet/vnet-module/varp_socket.c
--- a/tools/vnet/vnet-module/varp_socket.c      Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/varp_socket.c      Sun Jul 15 17:03:47 2007
@@ -79,8 +79,8 @@
  * Architectures using socketcall() define __ARCH_WANT_SYS_SOCKETCALL.
  */

-#ifdef __ARCH_WANT_SYS_SOCKETCALL
-
+#if defined(__ARCH_WANT_SYS_SOCKETCALLxx) && defined(__NR_socketcall)
+
 /* Define the socketcall() syscall.
  * Multiplexes all the socket-related calls.
  *
@@ -190,59 +190,260 @@

 /* No socketcall - define the individual syscalls. */

-static inline _syscall3(int, socket,
-                        int, family,
-                        int, type,
-                        int, protocol);
-
-static inline _syscall3(int, bind,
-                        int, fd,
-                        struct sockaddr *, umyaddr,
-                        int, addrlen);
-
-static inline _syscall3(int, connect,
-                        int, fd,
-                        struct sockaddr *, uservaddr,
-                        int, addrlen);
-
-static inline _syscall6(int, sendto,
-                        int, fd,
-                        void *, buff,
-                        size_t, len,
-                        unsigned, flags,
-                        struct sockaddr *, addr,
-                        int, addr_len);
-
-static inline _syscall6(int, recvfrom,
-                        int, fd,
-                        void *, ubuf,
-                        size_t, size,
-                        unsigned, flags,
-                        struct sockaddr *, addr,
-                        int *, addr_len);
-
-static inline _syscall5(int, setsockopt,
-                        int, fd,
-                        int, level,
-                        int, optname,
-                        void *, optval,
-                        int, optlen);
-
-static inline _syscall5(int, getsockopt,
-                        int, fd,
-                        int, level,
-                        int, optname,
-                        void *, optval,
-                        int *, optlen);
-
-static inline _syscall2(int, shutdown,
-                        int, fd,
-                        int, how);
-
-static inline _syscall3(int, getsockname,
-                        int, fd,
-                        struct sockaddr *, usockaddr,
-                        int *, usockaddr_len);
+/* the following code is copied from linux-kernel/net/socket.c
+ * As replacement of the __NR_socketcall, which exists not in x86_64 and
+ * same other systems.
+ * An alternate will be an export of the copied-functions in net/socket.c
+ */
+#define MAX_SOCK_ADDR 128
+
+int socket(int family, int type, int protocol){
+
+       int retval;
+       struct socket *sock;
+       retval = sock_create(family, type, protocol, &sock);
+       if (retval < 0)
+               goto out;
+
+       retval = sock_map_fd(sock);
+       if (retval < 0)
+               goto out_release;
+
+out:
+       /* It may be already another descriptor 8) Not kernel problem. */
+       return retval;
+
+out_release:
+       sock_release(sock);
+       return retval;
+
+}
+
+
+int bind(int fd, struct sockaddr *umyaddr, int addrlen){
+
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       int err;
+
+       if((sock = sockfd_lookup(fd,&err))!=NULL)
+       {
+               if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
+                       err = security_socket_bind(sock, (struct sockaddr 
*)address, addrlen);
+                       if (err) {
+                               sockfd_put(sock);
+                               return err;
+                       }
+                       err = sock->ops->bind(sock, (struct sockaddr *)address, 
addrlen);
+               }
+               sockfd_put(sock);
+       }
+       return err;
+}
+
+
+int connect(int fd, struct sockaddr *uservaddr, int addrlen){
+
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       int err;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+       err = move_addr_to_kernel(uservaddr, addrlen, address);
+       if (err < 0)
+               goto out_put;
+
+       err = security_socket_connect(sock, (struct sockaddr *)address, 
addrlen);
+       if (err)
+               goto out_put;
+
+       err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+                                sock->file->f_flags);
+out_put:
+       sockfd_put(sock);
+out:
+       return err;
+}
+
+int sendto(int fd, void * buff, size_t len,
+           unsigned flags, struct sockaddr *addr,
+           int addr_len){
+
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       int err;
+       struct msghdr msg;
+       struct iovec iov;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+       iov.iov_base=buff;
+       iov.iov_len=len;
+       msg.msg_name=NULL;
+       msg.msg_iov=&iov;
+       msg.msg_iovlen=1;
+       msg.msg_control=NULL;
+       msg.msg_controllen=0;
+       msg.msg_namelen=0;
+       if(addr)
+       {
+               err = move_addr_to_kernel(addr, addr_len, address);
+               if (err < 0)
+                       goto out_put;
+               msg.msg_name=address;
+               msg.msg_namelen=addr_len;
+       }
+       if (sock->file->f_flags & O_NONBLOCK)
+               flags |= MSG_DONTWAIT;
+       msg.msg_flags = flags;
+       err = sock_sendmsg(sock, &msg, len);
+
+out_put:
+       sockfd_put(sock);
+out:
+       return err;
+}
+
+
+int recvfrom(int fd, void * ubuf, size_t size,
+             unsigned flags, struct sockaddr *addr,
+             int *addr_len){
+
+       struct socket *sock;
+       struct iovec iov;
+       struct msghdr msg;
+       char address[MAX_SOCK_ADDR];
+       int err,err2;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+
+       msg.msg_control=NULL;
+       msg.msg_controllen=0;
+       msg.msg_iovlen=1;
+       msg.msg_iov=&iov;
+       iov.iov_len=size;
+       iov.iov_base=ubuf;
+       msg.msg_name=address;
+       msg.msg_namelen=MAX_SOCK_ADDR;
+       if (sock->file->f_flags & O_NONBLOCK)
+               flags |= MSG_DONTWAIT;
+       err=sock_recvmsg(sock, &msg, size, flags);
+
+       if(err >= 0 && addr != NULL)
+       {
+               err2=move_addr_to_user(address, msg.msg_namelen, addr, 
addr_len);
+               if(err2<0)
+                       err=err2;
+       }
+       sockfd_put(sock);
+out:
+       return err;
+}
+
+int setsockopt(int fd, int level, int optname, void *optval, int optlen){
+
+       int err;
+       struct socket *sock;
+
+       if (optlen < 0)
+               return -EINVAL;
+
+       if ((sock = sockfd_lookup(fd, &err))!=NULL)
+       {
+               err = security_socket_setsockopt(sock,level,optname);
+               if (err) {
+                       sockfd_put(sock);
+                       return err;
+               }
+
+               if (level == SOL_SOCKET)
+                       err=sock_setsockopt(sock,level,optname,optval,optlen);
+               else
+                       err=sock->ops->setsockopt(sock, level, optname, optval, 
optlen);
+               sockfd_put(sock);
+       }
+       return err;
+}
+
+
+/*  not possible, because sock_getsockopt is not exported ...
+int getsockopt(int fd, int level, int optname, void *optval, int *optlen){
+
+       int err;
+       struct socket *sock;
+
+       if ((sock = sockfd_lookup(fd, &err))!=NULL)
+       {
+               err = security_socket_getsockopt(sock, level, optname);
+               if (err) {
+                       sockfd_put(sock);
+                       return err;
+               }
+
+               if (level == SOL_SOCKET)
+                       err=sock_getsockopt(sock,level,optname,optval,optlen);
+               else
+                       err=sock->ops->getsockopt(sock, level, optname, optval, 
optlen);
+               sockfd_put(sock);
+       }
+       return err;
+}
+*/
+
+
+int shutdown(int fd, int how){
+
+       int err;
+       struct socket *sock;
+
+       if ((sock = sockfd_lookup(fd, &err))!=NULL)
+       {
+               err = security_socket_shutdown(sock, how);
+               if (err) {
+                       sockfd_put(sock);
+                       return err;
+               }
+
+               err=sock->ops->shutdown(sock, how);
+               sockfd_put(sock);
+       }
+       return err;
+}
+
+int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){
+
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       int len, err;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+
+       err = security_socket_getsockname(sock);
+       if (err)
+               goto out_put;
+
+       err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
+       if (err)
+               goto out_put;
+       err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
+
+out_put:
+       sockfd_put(sock);
+out:
+       return err;
+}
+
+/**
+ * End of copy from net/socket.c
+ */
+

 #endif /* __ARCH_WANT_SYS_SOCKETCALL */

diff -r aee991c7723a tools/vnet/vnet-module/vnet_forward.c
--- a/tools/vnet/vnet-module/vnet_forward.c     Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/vnet_forward.c     Sun Jul 15 17:03:47 2007
@@ -186,7 +186,7 @@
     printk("\nWrapped packet:\n");
     print_iphdr(__FUNCTION__, skb);
     print_udphdr(__FUNCTION__, skb);
-    skb_print_bits(__FUNCTION__, skb, 0, 0 * skb->len);
+    skb_print_bits(__FUNCTION__, skb, 0, skb->len);
 #endif

     err = _skb_xmit(skb, saddr);
@@ -304,7 +304,7 @@
     peer->rx_packets++;
     skb->mac.raw = NULL;
     skb->nh.raw = skb->data;
-    skb->h.raw = (void*)(skb->nh.iph + 1);
+    skb->h.raw = skb->data + sizeof(struct iphdr);
     if(!skb->nh.iph->saddr){
         skb->nh.iph->saddr = addr.u.ip4.s_addr;
     }
@@ -328,12 +328,17 @@

     // Handle (a copy of) it ourselves, because
     // if it is looped-back by xmit it will be ignored.
-    //recvskb = skb_clone(skb, GFP_ATOMIC);
-    recvskb = pskb_copy(skb, GFP_ATOMIC);
+    recvskb = alloc_skb(skb->len, GFP_ATOMIC);
     if(recvskb){
+        recvskb->protocol = htons(ETH_P_IP);
+
+        recvskb->nh.raw = skb_put(recvskb, skb->len);
+        recvskb->h.raw = recvskb->data + sizeof(struct iphdr);
+        skb_copy_bits(skb, 0, recvskb->data, skb->len);
+
         // Data points at the unwrapped iphdr, but varp_handle_message()
         // expects it to point at the udphdr, so pull.
-        skb_pull(recvskb, sizeof(struct iphdr));
+        skb_pull_vn(recvskb, sizeof(struct iphdr));
         if(varp_handle_message(recvskb) <= 0){
             kfree_skb(recvskb);
         }

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel




--
best regards,
hanzhu

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.