[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 3/4] Xen/netfront: Implement persistent grant in netfront.





On 2012-11-15 18:52, Roger Pau Monné wrote:
On 15/11/12 08:05, Annie Li wrote:
Tx/rx page pool are maintained. New grant is mapped and put into
pool, unmap only happens when releasing/removing device.

Signed-off-by: Annie Li<annie.li@xxxxxxxxxx>
---
  drivers/net/xen-netfront.c |  372 +++++++++++++++++++++++++++++++++++++-------
  1 files changed, 315 insertions(+), 57 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 0ebbb19..17b81c0 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -79,6 +79,13 @@ struct netfront_stats {
         struct u64_stats_sync   syncp;
  };

+struct gnt_list {
+       grant_ref_t             gref;
+       struct                  page *gnt_pages;
+       void                    *gnt_target;
+       struct                  gnt_list *tail;
+};
This could also be shared with blkfront.

Netfront does not have the shadow like blkfront, and it needs the gnt_target to save skb address of rx path. So we can share this too? blkfront would not use it actually.


+
  struct netfront_info {
         struct list_head list;
         struct net_device *netdev;
@@ -109,6 +116,10 @@ struct netfront_info {
         grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
         unsigned tx_skb_freelist;

+       struct gnt_list *tx_grant[NET_TX_RING_SIZE];
+       struct gnt_list *tx_gnt_list;
+       unsigned int tx_gnt_cnt;
I don't understand this, why do you need both an array and a list?

The array tx_grant is just like other tx_skbs, grant_tx_ref. It saves grant entries corresponding every request in the ring. This is what netfront different from blkfront, netfront does not have shadow ring, and it only uses a ring size array to track every request in the ring.
The list is like a pool to save all available persistent grants.

I'm
not familiar with net code, so I don't know if this is some kind of
special netfront thing?

Yes, this is different from blkfront. netfront uses ring size arrays to track every request in the ring.


Anyway if you have to use a list I would recommend using one of the list
constructions that's already in the kernel, it simplifies the code and
makes it more easy to understand than creating your own list structure.

Ok, thanks.


+
         spinlock_t   rx_lock ____cacheline_aligned_in_smp;
         struct xen_netif_rx_front_ring rx;
         int rx_ring_ref;
@@ -126,6 +137,10 @@ struct netfront_info {
         grant_ref_t gref_rx_head;
         grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];

+       struct gnt_list *rx_grant[NET_RX_RING_SIZE];
+       struct gnt_list *rx_gnt_list;
+       unsigned int rx_gnt_cnt;
Same comment above here.

Same as above.


+
         unsigned long rx_pfn_array[NET_RX_RING_SIZE];
         struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
         struct mmu_update rx_mmu[NET_RX_RING_SIZE];
@@ -134,6 +149,7 @@ struct netfront_info {
         struct netfront_stats __percpu *stats;

         unsigned long rx_gso_checksum_fixup;
+       u8 persistent_gnt:1;
  };

  struct netfront_rx_info {
@@ -194,6 +210,16 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_info 
*np,
         return ref;
  }

+static struct gnt_list *xennet_get_rx_grant(struct netfront_info *np,
+                                           RING_IDX ri)
+{
+       int i = xennet_rxidx(ri);
+       struct gnt_list *gntlist = np->rx_grant[i];
+       np->rx_grant[i] = NULL;
Ok, I think I get why do you need both an array and a list, is that
because netfront doesn't have some kind of shadow ring to keep track of
issued requests?

Yes.


So each issued request has an associated gnt_list with the list of used
grants?

gnt_list is kind of free grants. It is like a pool of free grants. If free grants exist in this list, free grant will be gotten from this list. If no, new grant will be allocated. In xennet_tx_buf_gc, free grants will be put into the list again if response status is OK.

If so it would be good to add a comment about it.

+       return gntlist;
+}
+
  #ifdef CONFIG_SYSFS
  static int xennet_sysfs_addif(struct net_device *netdev);
  static void xennet_sysfs_delif(struct net_device *netdev);
@@ -231,6 +257,68 @@ static void xennet_maybe_wake_tx(struct net_device *dev)
                 netif_wake_queue(dev);
  }

+static grant_ref_t xennet_alloc_rx_ref(struct net_device *dev,
+                                      unsigned long mfn, void *vaddr,
+                                      unsigned int id,
+                                      grant_ref_t ref)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       grant_ref_t gnt_ref;
+       struct gnt_list *gnt_list_entry;
+
+       if (np->persistent_gnt&&  np->rx_gnt_cnt) {
+               gnt_list_entry = np->rx_gnt_list;
+               np->rx_gnt_list = np->rx_gnt_list->tail;
+               np->rx_gnt_cnt--;
+
+               gnt_list_entry->gnt_target = vaddr;
+               gnt_ref = gnt_list_entry->gref;
+               np->rx_grant[id] = gnt_list_entry;
+       } else {
+               struct page *page;
+
+               BUG_ON(!np->persistent_gnt&&  np->rx_gnt_cnt);
+               if (!ref)
+                       gnt_ref =
+                               gnttab_claim_grant_reference(&np->gref_rx_head);
+               else
+                       gnt_ref = ref;
+               BUG_ON((signed short)gnt_ref<  0);
+
+               if (np->persistent_gnt) {
So you are only using persistent grants if the backend also supports
them.

Current implementation is:
If netback supports persistent grant, the frontend will work with persistent grant feature too. If netback does not support persistent grant, the frontend will work without persistent grant feature.

Have you benchmarked the performance of a persistent frontend with
a non-persistent backend.

I  remember I did some test before, not so sure. Will check it.

  In the block case, usign a persistent frontend
with a non-persistent backend let to an overall performance improvement,
so blkfront uses persistent grants even if blkback doesn't support them.
Take a look at the following graph:

http://xenbits.xen.org/people/royger/persistent_grants/nonpers_read.png

Good idea, that makes sense. I will change netfront too, thanks.


+                       page = alloc_page(GFP_KERNEL);
+                       if (!page) {
+                               if (!ref)
+                                       gnttab_release_grant_reference(
+&np->gref_rx_head, ref);
+                               return -ENOMEM;
+                       }
+                       mfn = pfn_to_mfn(page_to_pfn(page));
+
+                       gnt_list_entry = kmalloc(sizeof(struct gnt_list),
+                                                GFP_KERNEL);
+                       if (!gnt_list_entry) {
+                               __free_page(page);
+                               if (!ref)
+                                       gnttab_release_grant_reference(
+&np->gref_rx_head, ref);
+                               return -ENOMEM;
+                       }
+                       gnt_list_entry->gref = gnt_ref;
+                       gnt_list_entry->gnt_pages = page;
+                       gnt_list_entry->gnt_target = vaddr;
+
+                       np->rx_grant[id] = gnt_list_entry;
+               }
+
+               gnttab_grant_foreign_access_ref(gnt_ref, np->xbdev->otherend_id,
+                                               mfn, 0);
+       }
+       np->grant_rx_ref[id] = gnt_ref;
+
+       return gnt_ref;
+}
+
  static void xennet_alloc_rx_buffers(struct net_device *dev)
  {
         unsigned short id;
@@ -240,8 +328,6 @@ static void xennet_alloc_rx_buffers(struct net_device *dev)
         int i, batch_target, notify;
         RING_IDX req_prod = np->rx.req_prod_pvt;
         grant_ref_t ref;
-       unsigned long pfn;
-       void *vaddr;
         struct xen_netif_rx_request *req;

         if (unlikely(!netif_carrier_ok(dev)))
@@ -306,19 +392,16 @@ no_skb:
                 BUG_ON(np->rx_skbs[id]);
                 np->rx_skbs[id] = skb;

-               ref = gnttab_claim_grant_reference(&np->gref_rx_head);
-               BUG_ON((signed short)ref<  0);
-               np->grant_rx_ref[id] = ref;
+               page = skb_frag_page(&skb_shinfo(skb)->frags[0]);

-               pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
-               vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
+               ref = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)),
+                                         page_address(page), id, 0);
+               if ((signed short)ref<  0) {
+                       __skb_queue_tail(&np->rx_batch, skb);
+                       break;
+               }

                 req = RING_GET_REQUEST(&np->rx, req_prod + i);
-               gnttab_grant_foreign_access_ref(ref,
-                                               np->xbdev->otherend_id,
-                                               pfn_to_mfn(pfn),
-                                               0);
-
                 req->id = id;
                 req->gref = ref;
         }
@@ -375,17 +458,30 @@ static void xennet_tx_buf_gc(struct net_device *dev)

                         id  = txrsp->id;
                         skb = np->tx_skbs[id].skb;
-                       if (unlikely(gnttab_query_foreign_access(
-                               np->grant_tx_ref[id]) != 0)) {
-                               printk(KERN_ALERT "xennet_tx_buf_gc: warning "
-                                      "-- grant still in use by backend "
-                                      "domain.\n");
-                               BUG();
+
+                       if (np->persistent_gnt) {
+                               struct gnt_list *gnt_list_entry;
+
+                               gnt_list_entry = np->tx_grant[id];
+                               BUG_ON(!gnt_list_entry);
+
+                               gnt_list_entry->tail = np->tx_gnt_list;
+                               np->tx_gnt_list = gnt_list_entry;
+                               np->tx_gnt_cnt++;
+                       } else {
+                               if (unlikely(gnttab_query_foreign_access(
+                                       np->grant_tx_ref[id]) != 0)) {
+                                       printk(KERN_ALERT "xennet_tx_buf_gc: warning 
"
+                                              "-- grant still in use by backend 
"
+                                              "domain.\n");
+                                       BUG();
+                               }
+
+                               gnttab_end_foreign_access_ref(
+                                       np->grant_tx_ref[id], GNTMAP_readonly);
If I've read the code correctly, you are giving this frame both
read/write permissions to the other end on xennet_alloc_tx_ref, but then
you are only removing the read permissions? (see comment below on the
xennet_alloc_tx_ref function).

Yes, this is a bug.
For non persistent grant, it should remove the read permissions. For persistent grant, it should remove both. As mentioned above, it is better to enable persistent grant, I will change code and not consider non persistent grant. See comments below about why needing both permissions in xennet_alloc_tx_ref.


+                               gnttab_release_grant_reference(
+&np->gref_tx_head, np->grant_tx_ref[id]);
                         }
-                       gnttab_end_foreign_access_ref(
-                               np->grant_tx_ref[id], GNTMAP_readonly);
-                       gnttab_release_grant_reference(
-&np->gref_tx_head, np->grant_tx_ref[id]);
                         np->grant_tx_ref[id] = GRANT_INVALID_REF;
                         add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, 
id);
                         dev_kfree_skb_irq(skb);
@@ -409,6 +505,59 @@ static void xennet_tx_buf_gc(struct net_device *dev)
         xennet_maybe_wake_tx(dev);
  }

+static grant_ref_t xennet_alloc_tx_ref(struct net_device *dev,
+                                      unsigned long mfn,
+                                      unsigned int id)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       grant_ref_t ref;
+       struct page *granted_page;
+
+       if (np->persistent_gnt&&  np->tx_gnt_cnt) {
+               struct gnt_list *gnt_list_entry;
+
+               gnt_list_entry = np->tx_gnt_list;
+               np->tx_gnt_list = np->tx_gnt_list->tail;
+               np->tx_gnt_cnt--;
+
+               ref = gnt_list_entry->gref;
+               np->tx_grant[id] = gnt_list_entry;
+       } else {
+               struct gnt_list *gnt_list_entry;
+
+               BUG_ON(!np->persistent_gnt&&  np->tx_gnt_cnt);
+               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+               BUG_ON((signed short)ref<  0);
+
+               if (np->persistent_gnt) {
+                       granted_page = alloc_page(GFP_KERNEL);
+                       if (!granted_page) {
+                               gnttab_release_grant_reference(
+&np->gref_tx_head, ref);
+                               return -ENOMEM;
+                       }
+
+                       mfn = pfn_to_mfn(page_to_pfn(granted_page));
+                       gnt_list_entry = kmalloc(sizeof(struct gnt_list),
+                                                GFP_KERNEL);
+                       if (!gnt_list_entry) {
+                               __free_page(granted_page);
+                               gnttab_release_grant_reference(
+&np->gref_tx_head, ref);
+                               return -ENOMEM;
+                       }
+
+                       gnt_list_entry->gref = ref;
+                       gnt_list_entry->gnt_pages = granted_page;
+                       np->tx_grant[id] = gnt_list_entry;
+               }
+               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+                                               mfn, 0);
If you are not always using persistent grants I guess you need to give
read only permissions to this frame (GNTMAP_readonly).

We can not use GNTMAP_readonly here because tx path packet data will be copied into these persistent grant pages. Mabbe it is better to use GNTMAP_readonly for nonpersistent and 0 for persistent grant. As mentioned above, it is better to enable persistent grant, I will change code and not consider non persistent grant.

  Also, for keeping
things in logical order, isn't it best that this function comes before
xennet_tx_buf_gc?

xennet_alloc_tx_ref is called by following function xennet_make_frags, so I assume xennet_alloc_tx_ref is better to be close to xennet_make_frags. Xennet_tx_buf_gc does not have any connection with xennet_alloc_tx_ref, did I miss something?


+       }
+
+       return ref;
+}
+
@@ -1132,8 +1357,10 @@ static void xennet_release_rx_bufs(struct netfront_info 
*np)
                 }

                 skb = np->rx_skbs[id];
-               mfn = gnttab_end_foreign_transfer_ref(ref);
-               gnttab_release_grant_reference(&np->gref_rx_head, ref);
+               if (!np->persistent_gnt) {
+                       mfn = gnttab_end_foreign_transfer_ref(ref);
+                       gnttab_release_grant_reference(&np->gref_rx_head, ref);
+               }
                 np->grant_rx_ref[id] = GRANT_INVALID_REF;

                 if (0 == mfn) {
@@ -1607,6 +1834,13 @@ again:
                 goto abort_transaction;
         }

+       err = xenbus_printf(xbt, dev->nodename, "feature-persistent-grants",
+                           "%u", info->persistent_gnt);
As in netback, I think "feature-persistent" should be used.

Same in blkback, I assume it is  "feature-persistent-grants", right?
I referred your RFC patch, did you change it later? Or I missed something?

Thanks
Annie

+       if (err) {
+               message = "writing feature-persistent-grants";
+               xenbus_dev_fatal(dev, err, "%s", message);
+       }
+
         err = xenbus_transaction_end(xbt, 0);
         if (err) {
                 if (err == -EAGAIN)
@@ -1634,6 +1868,7 @@ static int xennet_connect(struct net_device *dev)
         grant_ref_t ref;
         struct xen_netif_rx_request *req;
         unsigned int feature_rx_copy;
+       int ret, val;

         err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
                            "feature-rx-copy", "%u",&feature_rx_copy);
@@ -1646,6 +1881,13 @@ static int xennet_connect(struct net_device *dev)
                 return -ENODEV;
         }

+       err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+                          "feature-persistent-grants", "%u",&val);
+       if (err != 1)
+               val = 0;
+
+       np->persistent_gnt = !!val;
+
         err = talk_to_netback(np->xbdev, np);
         if (err)
                 return err;
@@ -1657,9 +1899,24 @@ static int xennet_connect(struct net_device *dev)
         spin_lock_bh(&np->rx_lock);
         spin_lock_irq(&np->tx_lock);

+       np->tx_gnt_cnt = 0;
+       np->rx_gnt_cnt = 0;
+
         /* Step 1: Discard all pending TX packet fragments. */
         xennet_release_tx_bufs(np);

+       if (np->persistent_gnt) {
+               struct gnt_list *gnt_list_entry;
+
+               while (np->rx_gnt_list) {
+                       gnt_list_entry = np->rx_gnt_list;
+                       np->rx_gnt_list = np->rx_gnt_list->tail;
+                       gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+                       __free_page(gnt_list_entry->gnt_pages);
+                       kfree(gnt_list_entry);
+               }
+       }
+
         /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
         for (requeue_idx = 0, i = 0; i<  NET_RX_RING_SIZE; i++) {
                 skb_frag_t *frag;
@@ -1673,10 +1930,11 @@ static int xennet_connect(struct net_device *dev)

                 frag =&skb_shinfo(skb)->frags[0];
                 page = skb_frag_page(frag);
-               gnttab_grant_foreign_access_ref(
-                       ref, np->xbdev->otherend_id,
-                       pfn_to_mfn(page_to_pfn(page)),
-                       0);
+               ret = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)),
+                                         page_address(page), requeue_idx, ref);
+               if ((signed short)ret<  0)
+                       break;
+
                 req->gref = ref;
                 req->id   = requeue_idx;

--
1.7.3.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.