# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID b41e51ffa5eacf07ed7843e5f85ae1748e19c370
# Parent 3fd239d8b640968c56fbd5aacfe3564f12203f54
Because of a bug with reference counting against the target guest page
when searching the list for L1 shadow pages to write protect that page
(at shadow_promote(), which is called by alloc_shadow_page()), the code
was always scanning _all_ the entries in the hash list. The length of
the hash list can be >500 for L1 shadow pages, and for each page we
needed to check all the PTEs in the page.
The patch attached does the following things:
- Correct the reference count (for the target guest page) so that=20
it can exit the loop when all the L1 shadow pages to modify are found.
Even with this, we can search the entire list if the page is at=20
the end.
- Try to avoid the search in the hash list, by having a
back pointer (as a hint) to the shadow page pfn. For most cases,=20
there is a single translation for the guest page in the shadow.
- Cleanups, remove the nested function fix_entry
With those, the kernel build performance, for example, was improved
approximately by 20%, 40% on 32-bit, 64-bit unmodified Linux guests,
respectively. Tested log-dirty mode for plain 32-bit as well.
Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Asit Mallick <asit.k.mallick@xxxxxxxxx>
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow.c Thu Oct 13 16:58:01 2005
@@ -616,13 +616,6 @@
pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) <<
L1_PAGETABLE_SHIFT;
spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
- /*
- gpl1e = &(linear_pg_table[l1_linear_offset(va) &
- ~(L1_PAGETABLE_ENTRIES-1)]);
-
- spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
- ~(L1_PAGETABLE_ENTRIES-1)]);*/
-
for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
{
l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
@@ -645,7 +638,8 @@
min = i;
if ( likely(i > max) )
max = i;
- }
+ set_guest_back_ptr(d, sl1e, sl1mfn, i);
+ }
frame_table[sl1mfn].tlbflush_timestamp =
SHADOW_ENCODE_MIN_MAX(min, max);
@@ -716,8 +710,8 @@
}
}
+ set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
__shadow_set_l1e(v, va, &new_spte);
-
shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
}
@@ -1133,6 +1127,24 @@
delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
perfc_decr(writable_pte_predictions);
}
+}
+
+static int fix_entry(
+ struct domain *d,
+ l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
+{
+ l1_pgentry_t old = *pt;
+ l1_pgentry_t new = old;
+
+ l1e_remove_flags(new,_PAGE_RW);
+ if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+ BUG();
+ (*found)++;
+ *pt = new;
+ if ( is_l1_shadow )
+ shadow_put_page_from_l1e(old, d);
+
+ return (*found == max_refs_to_find);
}
static u32 remove_all_write_access_in_ptpage(
@@ -1156,49 +1168,28 @@
match = l1e_from_pfn(readonly_gmfn, flags);
- // returns true if all refs have been found and fixed.
- //
- int fix_entry(int i)
- {
- l1_pgentry_t old = pt[i];
- l1_pgentry_t new = old;
-
- l1e_remove_flags(new,_PAGE_RW);
- if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
- BUG();
- found++;
- pt[i] = new;
- if ( is_l1_shadow )
- shadow_put_page_from_l1e(old, d);
-
-#if 0
- printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
- "is_l1_shadow=%d\n",
- readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
-#endif
-
- return (found == max_refs_to_find);
- }
-
- i = readonly_gpfn & (GUEST_L1_PAGETABLE_ENTRIES - 1);
- if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
- {
- perfc_incrc(remove_write_fast_exit);
- increase_writable_pte_prediction(d, readonly_gpfn, prediction);
- unmap_domain_page(pt);
- return found;
+ if ( shadow_mode_external(d) ) {
+ i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask)
+ >> PGT_va_shift;
+
+ if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
+ !l1e_has_changed(pt[i], match, flags) &&
+ fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
+ !prediction )
+ goto out;
}
for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++)
{
- if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+ if ( unlikely(!l1e_has_changed(pt[i], match, flags)) &&
+ fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
break;
}
+out:
unmap_domain_page(pt);
return found;
-#undef MATCH_ENTRY
}
static int remove_all_write_access(
@@ -1206,8 +1197,8 @@
{
int i;
struct shadow_status *a;
- u32 found = 0, fixups, write_refs;
- unsigned long prediction, predicted_gpfn, predicted_smfn;
+ u32 found = 0, write_refs;
+ unsigned long predicted_smfn;
ASSERT(shadow_lock_is_acquired(d));
ASSERT(VALID_MFN(readonly_gmfn));
@@ -1238,26 +1229,18 @@
return 1;
}
- // Before searching all the L1 page tables, check the typical culprit first
- //
- if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
- {
- predicted_gpfn = prediction & PGT_mfn_mask;
- if ( (predicted_smfn = __shadow_status(d, predicted_gpfn,
PGT_l1_shadow)) &&
- (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn,
predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
- {
- found += fixups;
- if ( found == write_refs )
- {
- perfc_incrc(remove_write_predicted);
- return 1;
- }
- }
- else
- {
- perfc_incrc(remove_write_bad_prediction);
- decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
- }
+ if ( shadow_mode_external(d) ) {
+ if (write_refs-- == 0)
+ return 0;
+
+ // Use the back pointer to locate the shadow page that can contain
+ // the PTE of interest
+ if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp)
) {
+ found += remove_all_write_access_in_ptpage(
+ d, predicted_smfn, predicted_smfn, readonly_gpfn,
readonly_gmfn, write_refs, 0);
+ if ( found == write_refs )
+ return 0;
+ }
}
// Search all the shadow L1 page tables...
@@ -1276,7 +1259,7 @@
{
found += remove_all_write_access_in_ptpage(d,
a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn,
write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
if ( found == write_refs )
- return 1;
+ return 0;
}
a = a->next;
@@ -1376,7 +1359,7 @@
guest_l1e_has_changed(guest1[i], snapshot1[i],
PAGE_FLAG_MASK) )
{
need_flush |= validate_pte_change(d, guest1[i],
&shadow1[i]);
-
+ set_guest_back_ptr(d, shadow1[i], smfn, i);
// can't update snapshots of linear page tables -- they
// are used multiple times...
//
@@ -1604,6 +1587,8 @@
!shadow_get_page_from_l1e(npte, d) )
BUG();
*ppte = npte;
+ set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT,
+ (entry->writable_pl1e &
~PAGE_MASK)/sizeof(l1_pgentry_t));
shadow_put_page_from_l1e(opte, d);
unmap_domain_page(ppte);
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow32.c Thu Oct 13 16:58:01 2005
@@ -1032,7 +1032,12 @@
if ( !get_page_type(page, PGT_writable_page) )
BUG();
put_page_type(page);
-
+ /*
+ * We use tlbflush_timestamp as back pointer to smfn, and need to
+ * clean up it.
+ */
+ if ( shadow_mode_external(d) )
+ page->tlbflush_timestamp = 0;
list_ent = page->list.next;
}
}
@@ -1390,18 +1395,6 @@
return rc;
}
-/*
- * XXX KAF: Why is this VMX specific?
- */
-void vmx_shadow_clear_state(struct domain *d)
-{
- SH_VVLOG("%s:", __func__);
- shadow_lock(d);
- free_shadow_pages(d);
- shadow_unlock(d);
- update_pagetables(d->vcpu[0]);
-}
-
unsigned long
gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
{
@@ -1462,14 +1455,10 @@
hl2 = map_domain_page(hl2mfn);
-#ifdef __i386__
if ( shadow_mode_external(d) )
limit = L2_PAGETABLE_ENTRIES;
else
limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#else
- limit = 0; /* XXX x86/64 XXX */
-#endif
memset(hl2, 0, limit * sizeof(l1_pgentry_t));
@@ -1665,6 +1654,7 @@
min = i;
if ( likely(i > max) )
max = i;
+ set_guest_back_ptr(d, sl1e, sl1mfn, i);
}
frame_table[sl1mfn].tlbflush_timestamp =
@@ -2070,6 +2060,24 @@
xfree(gpfn_list);
}
+}
+
+static int fix_entry(
+ struct domain *d,
+ l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
+{
+ l1_pgentry_t old = *pt;
+ l1_pgentry_t new = old;
+
+ l1e_remove_flags(new,_PAGE_RW);
+ if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+ BUG();
+ (*found)++;
+ *pt = new;
+ if ( is_l1_shadow )
+ shadow_put_page_from_l1e(old, d);
+
+ return (*found == max_refs_to_find);
}
static u32 remove_all_write_access_in_ptpage(
@@ -2088,49 +2096,28 @@
match = l1e_from_pfn(readonly_gmfn, flags);
- // returns true if all refs have been found and fixed.
- //
- int fix_entry(int i)
- {
- l1_pgentry_t old = pt[i];
- l1_pgentry_t new = old;
-
- l1e_remove_flags(new,_PAGE_RW);
- if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
- BUG();
- found++;
- pt[i] = new;
- if ( is_l1_shadow )
- shadow_put_page_from_l1e(old, d);
-
-#if 0
- printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
- "is_l1_shadow=%d\n",
- readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
-#endif
-
- return (found == max_refs_to_find);
- }
-
- i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
- if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
- {
- perfc_incrc(remove_write_fast_exit);
- increase_writable_pte_prediction(d, readonly_gpfn, prediction);
- unmap_domain_page(pt);
- return found;
- }
-
+ if ( shadow_mode_external(d) ) {
+ i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask)
+ >> PGT_va_shift;
+
+ if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
+ !l1e_has_changed(pt[i], match, flags) &&
+ fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
+ !prediction )
+ goto out;
+ }
+
for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
{
- if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+ if ( unlikely(!l1e_has_changed(pt[i], match, flags)) &&
+ fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
break;
}
+out:
unmap_domain_page(pt);
return found;
-#undef MATCH_ENTRY
}
int shadow_remove_all_write_access(
@@ -2138,8 +2125,8 @@
{
int i;
struct shadow_status *a;
- u32 found = 0, fixups, write_refs;
- unsigned long prediction, predicted_gpfn, predicted_smfn;
+ u32 found = 0, write_refs;
+ unsigned long predicted_smfn;
ASSERT(shadow_lock_is_acquired(d));
ASSERT(VALID_MFN(readonly_gmfn));
@@ -2169,27 +2156,19 @@
perfc_incrc(remove_write_no_work);
return 1;
}
-
- // Before searching all the L1 page tables, check the typical culprit first
- //
- if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
- {
- predicted_gpfn = prediction & PGT_mfn_mask;
- if ( (predicted_smfn = __shadow_status(d, predicted_gpfn,
PGT_l1_shadow)) &&
- (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn,
predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
- {
- found += fixups;
- if ( found == write_refs )
- {
- perfc_incrc(remove_write_predicted);
- return 1;
- }
- }
- else
- {
- perfc_incrc(remove_write_bad_prediction);
- decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
- }
+
+ if ( shadow_mode_external(d) ) {
+ if (write_refs-- == 0)
+ return 0;
+
+ // Use the back pointer to locate the shadow page that can contain
+ // the PTE of interest
+ if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp)
) {
+ found += remove_all_write_access_in_ptpage(
+ d, predicted_smfn, predicted_smfn, readonly_gpfn,
readonly_gmfn, write_refs, 0);
+ if ( found == write_refs )
+ return 0;
+ }
}
// Search all the shadow L1 page tables...
@@ -2203,7 +2182,7 @@
{
found += remove_all_write_access_in_ptpage(d,
a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn,
write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
if ( found == write_refs )
- return 1;
+ return 0;
}
a = a->next;
@@ -2376,12 +2355,12 @@
l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
{
need_flush |= validate_pte_change(d, guest1[i],
&shadow1[i]);
+ set_guest_back_ptr(d, shadow1[i], smfn, i);
// can't update snapshots of linear page tables -- they
// are used multiple times...
//
// snapshot[i] = new_pte;
-
changed++;
}
}
@@ -2530,6 +2509,8 @@
!shadow_get_page_from_l1e(npte, d) )
BUG();
*ppte = npte;
+ set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT,
+ (entry->writable_pl1e &
~PAGE_MASK)/sizeof(l1_pgentry_t));
shadow_put_page_from_l1e(opte, d);
unmap_domain_page(ppte);
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c Thu Oct 13 16:55:15 2005
+++ b/xen/arch/x86/shadow_public.c Thu Oct 13 16:58:01 2005
@@ -1095,7 +1095,12 @@
if ( !get_page_type(page, PGT_writable_page) )
BUG();
put_page_type(page);
-
+ /*
+ * We use tlbflush_timestamp as back pointer to smfn, and need to
+ * clean up it.
+ */
+ if ( shadow_mode_external(d) )
+ page->tlbflush_timestamp = 0;
list_ent = page->list.next;
}
}
diff -r 3fd239d8b640 -r b41e51ffa5ea xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Thu Oct 13 16:55:15 2005
+++ b/xen/include/asm-x86/shadow.h Thu Oct 13 16:58:01 2005
@@ -718,6 +718,23 @@
put_shadow_ref(smfn);
}
+/*
+ * SMP issue. The following code assumes the shadow lock is held. Re-visit
+ * when working on finer-gained locks for shadow.
+ */
+static inline void set_guest_back_ptr(
+ struct domain *d, l1_pgentry_t spte, unsigned long smfn, unsigned int
index)
+{
+ if ( shadow_mode_external(d) ) {
+ unsigned long gmfn;
+
+ ASSERT(shadow_lock_is_acquired(d));
+ gmfn = l1e_get_pfn(spte);
+ frame_table[gmfn].tlbflush_timestamp = smfn;
+ frame_table[gmfn].u.inuse.type_info &= ~PGT_va_mask;
+ frame_table[gmfn].u.inuse.type_info |= (unsigned long) index <<
PGT_va_shift;
+ }
+}
/************************************************************************/
#if CONFIG_PAGING_LEVELS <= 2
@@ -1611,10 +1628,11 @@
if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
shadow_put_page_from_l1e(old_spte, d);
}
- }
-
+
+ }
+
+ set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
-
shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
}
#endif
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|