WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Because of a bug with reference counting against the tar

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Because of a bug with reference counting against the target guest page
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 14 Oct 2005 01:40:14 +0000
Delivery-date: Fri, 14 Oct 2005 01:38:06 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID b41e51ffa5eacf07ed7843e5f85ae1748e19c370
# Parent  3fd239d8b640968c56fbd5aacfe3564f12203f54
Because of a bug with reference counting against the target guest page
when searching the list for L1 shadow pages to write protect that page
(at shadow_promote(), which is called by alloc_shadow_page()), the code
was always scanning _all_ the entries in the hash list. The length of
the hash list can be >500 for L1 shadow pages, and for each page we
needed to check all the PTEs in the page.

The patch attached does the following things:
- Correct the reference count (for the target guest page) so that=20
  it can exit the loop when all the L1 shadow pages to modify are found.
  Even with this, we can search the entire list if the page is at=20
  the end.
- Try to avoid the search in the hash list, by having a
  back pointer (as a hint) to the shadow page pfn. For most cases,=20
  there is a single translation for the guest page in the shadow.
- Cleanups, remove the nested function fix_entry

With those, the kernel build performance, for example, was improved
approximately by 20%, 40% on 32-bit, 64-bit unmodified Linux guests,
respectively. Tested log-dirty mode for plain 32-bit as well.

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Asit Mallick <asit.k.mallick@xxxxxxxxx>

diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow.c     Thu Oct 13 16:58:01 2005
@@ -616,13 +616,6 @@
         pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << 
L1_PAGETABLE_SHIFT;
         spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
 
-        /*
-        gpl1e = &(linear_pg_table[l1_linear_offset(va) &
-                              ~(L1_PAGETABLE_ENTRIES-1)]);
-
-        spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
-                                     ~(L1_PAGETABLE_ENTRIES-1)]);*/
-
         for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
         {
             l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
@@ -645,7 +638,8 @@
                 min = i;
             if ( likely(i > max) )
                 max = i;
-        }
+            set_guest_back_ptr(d, sl1e, sl1mfn, i);
+          }
 
         frame_table[sl1mfn].tlbflush_timestamp =
             SHADOW_ENCODE_MIN_MAX(min, max);
@@ -716,8 +710,8 @@
         }
     }
 
+    set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
     __shadow_set_l1e(v, va, &new_spte);
-
     shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
 }
 
@@ -1133,6 +1127,24 @@
         delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
         perfc_decr(writable_pte_predictions);
     }
+}
+
+static int fix_entry(
+    struct domain *d, 
+    l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
+{
+    l1_pgentry_t old = *pt;
+    l1_pgentry_t new = old;
+
+    l1e_remove_flags(new,_PAGE_RW);
+    if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+        BUG();
+    (*found)++;
+    *pt = new;
+    if ( is_l1_shadow )
+        shadow_put_page_from_l1e(old, d);
+
+    return (*found == max_refs_to_find);
 }
 
 static u32 remove_all_write_access_in_ptpage(
@@ -1156,49 +1168,28 @@
 
     match = l1e_from_pfn(readonly_gmfn, flags);
 
-    // returns true if all refs have been found and fixed.
-    //
-    int fix_entry(int i)
-    {
-        l1_pgentry_t old = pt[i];
-        l1_pgentry_t new = old;
-
-        l1e_remove_flags(new,_PAGE_RW);
-        if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
-            BUG();
-        found++;
-        pt[i] = new;
-        if ( is_l1_shadow )
-            shadow_put_page_from_l1e(old, d);
-
-#if 0
-        printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
-               "is_l1_shadow=%d\n",
-               readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
-#endif
-
-        return (found == max_refs_to_find);
-    }
-
-    i = readonly_gpfn & (GUEST_L1_PAGETABLE_ENTRIES - 1);
-    if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
-    {
-        perfc_incrc(remove_write_fast_exit);
-        increase_writable_pte_prediction(d, readonly_gpfn, prediction);
-        unmap_domain_page(pt);
-        return found;
+    if ( shadow_mode_external(d) ) {
+        i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask) 
+            >> PGT_va_shift;
+
+        if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
+             !l1e_has_changed(pt[i], match, flags) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
+             !prediction )
+            goto out;
     }
  
     for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++)
     {
-        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
             break;
     }
 
+out:
     unmap_domain_page(pt);
 
     return found;
-#undef MATCH_ENTRY
 }
 
 static int remove_all_write_access(
@@ -1206,8 +1197,8 @@
 {
     int i;
     struct shadow_status *a;
-    u32 found = 0, fixups, write_refs;
-    unsigned long prediction, predicted_gpfn, predicted_smfn;
+    u32 found = 0, write_refs;
+    unsigned long predicted_smfn;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(VALID_MFN(readonly_gmfn));
@@ -1238,26 +1229,18 @@
         return 1;
     }
 
-    // Before searching all the L1 page tables, check the typical culprit first
-    //
-    if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
-    {
-        predicted_gpfn = prediction & PGT_mfn_mask;
-        if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, 
PGT_l1_shadow)) &&
-             (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, 
predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
-        {
-            found += fixups;
-            if ( found == write_refs )
-            {
-                perfc_incrc(remove_write_predicted);
-                return 1;
-            }
-        }
-        else
-        {
-            perfc_incrc(remove_write_bad_prediction);
-            decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
-        }
+    if ( shadow_mode_external(d) ) {
+        if (write_refs-- == 0) 
+            return 0;
+
+         // Use the back pointer to locate the shadow page that can contain
+         // the PTE of interest
+         if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) 
) {
+             found += remove_all_write_access_in_ptpage(
+                 d, predicted_smfn, predicted_smfn, readonly_gpfn, 
readonly_gmfn, write_refs, 0);
+             if ( found == write_refs )
+                 return 0;
+         }
     }
 
     // Search all the shadow L1 page tables...
@@ -1276,7 +1259,7 @@
             {
                 found += remove_all_write_access_in_ptpage(d, 
a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, 
write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
                 if ( found == write_refs )
-                    return 1;
+                    return 0;
             }
 
             a = a->next;
@@ -1376,7 +1359,7 @@
                      guest_l1e_has_changed(guest1[i], snapshot1[i], 
PAGE_FLAG_MASK) )
                 {
                     need_flush |= validate_pte_change(d, guest1[i], 
&shadow1[i]);
-
+                    set_guest_back_ptr(d, shadow1[i], smfn, i);
                     // can't update snapshots of linear page tables -- they
                     // are used multiple times...
                     //
@@ -1604,6 +1587,8 @@
              !shadow_get_page_from_l1e(npte, d) )
             BUG();
         *ppte = npte;
+        set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT, 
+                           (entry->writable_pl1e & 
~PAGE_MASK)/sizeof(l1_pgentry_t));
         shadow_put_page_from_l1e(opte, d);
 
         unmap_domain_page(ppte);
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow32.c   Thu Oct 13 16:58:01 2005
@@ -1032,7 +1032,12 @@
             if ( !get_page_type(page, PGT_writable_page) )
                 BUG();
             put_page_type(page);
-
+            /*
+             * We use tlbflush_timestamp as back pointer to smfn, and need to
+             * clean up it.
+             */
+            if ( shadow_mode_external(d) )
+                page->tlbflush_timestamp = 0;
             list_ent = page->list.next;
         }
     }
@@ -1390,18 +1395,6 @@
     return rc;
 }
 
-/*
- * XXX KAF: Why is this VMX specific?
- */
-void vmx_shadow_clear_state(struct domain *d)
-{
-    SH_VVLOG("%s:", __func__);
-    shadow_lock(d);
-    free_shadow_pages(d);
-    shadow_unlock(d);
-    update_pagetables(d->vcpu[0]);
-}
-
 unsigned long
 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
 {
@@ -1462,14 +1455,10 @@
 
     hl2 = map_domain_page(hl2mfn);
 
-#ifdef __i386__
     if ( shadow_mode_external(d) )
         limit = L2_PAGETABLE_ENTRIES;
     else
         limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#else
-    limit = 0; /* XXX x86/64 XXX */
-#endif
 
     memset(hl2, 0, limit * sizeof(l1_pgentry_t));
 
@@ -1665,6 +1654,7 @@
                 min = i;
             if ( likely(i > max) )
                 max = i;
+            set_guest_back_ptr(d, sl1e, sl1mfn, i);
         }
 
         frame_table[sl1mfn].tlbflush_timestamp =
@@ -2070,6 +2060,24 @@
 
         xfree(gpfn_list);
     }
+}
+
+static int fix_entry(
+    struct domain *d, 
+    l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
+{
+    l1_pgentry_t old = *pt;
+    l1_pgentry_t new = old;
+
+    l1e_remove_flags(new,_PAGE_RW);
+    if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+        BUG();
+    (*found)++;
+    *pt = new;
+    if ( is_l1_shadow )
+        shadow_put_page_from_l1e(old, d);
+
+    return (*found == max_refs_to_find);
 }
 
 static u32 remove_all_write_access_in_ptpage(
@@ -2088,49 +2096,28 @@
 
     match = l1e_from_pfn(readonly_gmfn, flags);
 
-    // returns true if all refs have been found and fixed.
-    //
-    int fix_entry(int i)
-    {
-        l1_pgentry_t old = pt[i];
-        l1_pgentry_t new = old;
-
-        l1e_remove_flags(new,_PAGE_RW);
-        if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
-            BUG();
-        found++;
-        pt[i] = new;
-        if ( is_l1_shadow )
-            shadow_put_page_from_l1e(old, d);
-
-#if 0
-        printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
-               "is_l1_shadow=%d\n",
-               readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
-#endif
-
-        return (found == max_refs_to_find);
-    }
-
-    i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
-    if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
-    {
-        perfc_incrc(remove_write_fast_exit);
-        increase_writable_pte_prediction(d, readonly_gpfn, prediction);
-        unmap_domain_page(pt);
-        return found;
-    }
- 
+    if ( shadow_mode_external(d) ) {
+        i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask) 
+            >> PGT_va_shift;
+
+        if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
+             !l1e_has_changed(pt[i], match, flags) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
+             !prediction )
+            goto out;
+    }
+
     for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
     {
-        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
             break;
     }
 
+out:
     unmap_domain_page(pt);
 
     return found;
-#undef MATCH_ENTRY
 }
 
 int shadow_remove_all_write_access(
@@ -2138,8 +2125,8 @@
 {
     int i;
     struct shadow_status *a;
-    u32 found = 0, fixups, write_refs;
-    unsigned long prediction, predicted_gpfn, predicted_smfn;
+    u32 found = 0, write_refs;
+    unsigned long predicted_smfn;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(VALID_MFN(readonly_gmfn));
@@ -2169,27 +2156,19 @@
         perfc_incrc(remove_write_no_work);
         return 1;
     }
-
-    // Before searching all the L1 page tables, check the typical culprit first
-    //
-    if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
-    {
-        predicted_gpfn = prediction & PGT_mfn_mask;
-        if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, 
PGT_l1_shadow)) &&
-             (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, 
predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
-        {
-            found += fixups;
-            if ( found == write_refs )
-            {
-                perfc_incrc(remove_write_predicted);
-                return 1;
-            }
-        }
-        else
-        {
-            perfc_incrc(remove_write_bad_prediction);
-            decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
-        }
+    
+    if ( shadow_mode_external(d) ) {
+        if (write_refs-- == 0) 
+            return 0;
+
+         // Use the back pointer to locate the shadow page that can contain
+         // the PTE of interest
+         if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) 
) {
+             found += remove_all_write_access_in_ptpage(
+                 d, predicted_smfn, predicted_smfn, readonly_gpfn, 
readonly_gmfn, write_refs, 0);
+             if ( found == write_refs )
+                 return 0;
+         }
     }
 
     // Search all the shadow L1 page tables...
@@ -2203,7 +2182,7 @@
             {
                 found += remove_all_write_access_in_ptpage(d, 
a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, 
write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
                 if ( found == write_refs )
-                    return 1;
+                    return 0;
             }
 
             a = a->next;
@@ -2376,12 +2355,12 @@
                      l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
                 {
                     need_flush |= validate_pte_change(d, guest1[i], 
&shadow1[i]);
+                    set_guest_back_ptr(d, shadow1[i], smfn, i);
 
                     // can't update snapshots of linear page tables -- they
                     // are used multiple times...
                     //
                     // snapshot[i] = new_pte;
-
                     changed++;
                 }
             }
@@ -2530,6 +2509,8 @@
              !shadow_get_page_from_l1e(npte, d) )
             BUG();
         *ppte = npte;
+        set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT, 
+                           (entry->writable_pl1e & 
~PAGE_MASK)/sizeof(l1_pgentry_t));
         shadow_put_page_from_l1e(opte, d);
 
         unmap_domain_page(ppte);
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow_public.c      Thu Oct 13 16:58:01 2005
@@ -1095,7 +1095,12 @@
             if ( !get_page_type(page, PGT_writable_page) )
                 BUG();
             put_page_type(page);
-
+            /*
+             * We use tlbflush_timestamp as back pointer to smfn, and need to
+             * clean up it.
+             */
+            if ( shadow_mode_external(d) )
+                page->tlbflush_timestamp = 0;
             list_ent = page->list.next;
         }
     }
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Oct 13 16:55:15 2005
+++ b/xen/include/asm-x86/shadow.h      Thu Oct 13 16:58:01 2005
@@ -718,6 +718,23 @@
     put_shadow_ref(smfn);
 }
 
+/*
+ * SMP issue. The following code assumes the shadow lock is held. Re-visit
+ * when working on finer-gained locks for shadow.
+ */
+static inline void set_guest_back_ptr(
+    struct domain *d, l1_pgentry_t spte, unsigned long smfn, unsigned int 
index)
+{
+    if ( shadow_mode_external(d) ) {
+        unsigned long gmfn;
+
+        ASSERT(shadow_lock_is_acquired(d));
+        gmfn = l1e_get_pfn(spte);
+        frame_table[gmfn].tlbflush_timestamp = smfn;
+        frame_table[gmfn].u.inuse.type_info &= ~PGT_va_mask;
+        frame_table[gmfn].u.inuse.type_info |= (unsigned long) index << 
PGT_va_shift;
+    }
+}
 
 /************************************************************************/
 #if CONFIG_PAGING_LEVELS <= 2
@@ -1611,10 +1628,11 @@
             if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
                 shadow_put_page_from_l1e(old_spte, d);
         }
-    }
-
+
+    }
+
+    set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
     shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
-
     shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
 }
 #endif

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Because of a bug with reference counting against the target guest page, Xen patchbot -unstable <=