# HG changeset patch
# User Michael.Fetterman@xxxxxxxxxxxx
# Node ID c665ab5a6b442710d9543e002d430d1319eabb15
# Parent 6d298cac0e8d851331096f16a52cdd7208cbc95c
Cleanup various shadow mode asserts.
Separate out the ability for domains to be able to write to their
pagetables (ala "writable page tables", which uses write-protected PTEs
to address the page tables: this is shadow_mode_write_all()) from the
right of a domain to create a PTE with write permissions that points
at a page table (this is shadow_mode_wr_pt_pte())...
Minor cleanup of SHADOW_DEBUG (at least make it compilable) in shadow.c.
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Tue Nov 8 11:26:48 2005
+++ b/xen/arch/x86/shadow.c Tue Nov 8 12:26:50 2005
@@ -37,8 +37,10 @@
extern void free_shadow_pages(struct domain *d);
+#if 0 // this code has not been updated for 32pae & 64 bit modes
#if SHADOW_DEBUG
static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned
long gpfn);
+#endif
#endif
#if CONFIG_PAGING_LEVELS == 3
@@ -898,8 +900,10 @@
entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
entry->writable_pl1e = -1;
+#if 0 // this code has not been updated for 32pae & 64 bit modes
#if SHADOW_DEBUG
mark_shadows_as_reflecting_snapshot(d, gpfn);
+#endif
#endif
// increment guest's ref count to represent the entry in the
@@ -1317,18 +1321,17 @@
if ( !smfn )
{
+ // For heavy weight shadows: no need to update refcounts if
+ // there's no shadow page.
+ //
if ( shadow_mode_refcounts(d) )
continue;
- // For light weight shadows, even when no shadow page exists,
- // we need to resync the refcounts to the new contents of the
- // guest page.
- // This only applies when we have writable page tables.
+ // For light weight shadows: only need up resync the refcounts to
+ // the new contents of the guest page iff this it has the right
+ // page type.
//
- if ( !shadow_mode_write_all(d) &&
- !((stype == PGT_l1_shadow) &&
- VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
- // Page is not writable -- no resync necessary
+ if ( stype != ( pfn_to_page(entry->gmfn)->u.inuse.type_info &
PGT_type_mask) )
continue;
}
@@ -1365,8 +1368,8 @@
guest_l1_pgentry_t *snapshot1 = snapshot;
int unshadow_l1 = 0;
- ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
- shadow_mode_write_all(d));
+ ASSERT(shadow_mode_write_l1(d) ||
+ shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
if ( !shadow_mode_refcounts(d) )
revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t
*)snapshot1);
@@ -1427,7 +1430,7 @@
l2_pgentry_t *shadow2 = shadow;
l2_pgentry_t *snapshot2 = snapshot;
- ASSERT(shadow_mode_write_all(d));
+ ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
changed = 0;
@@ -1473,7 +1476,7 @@
l2_pgentry_t *snapshot2 = snapshot;
l1_pgentry_t *shadow2 = shadow;
- ASSERT(shadow_mode_write_all(d));
+ ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
changed = 0;
@@ -1822,8 +1825,13 @@
goto fail;
}
}
-
- if ( !l1pte_write_fault(v, &gpte, &spte, va) )
+ else if ( unlikely(!shadow_mode_wr_pt_pte(d) &&
mfn_is_page_table(l1e_get_pfn(gpte))) )
+ {
+ SH_LOG("l1pte_write_fault: no write access to page table page");
+ domain_crash_synchronous();
+ }
+
+ if ( unlikely(!l1pte_write_fault(v, &gpte, &spte, va)) )
{
SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
perfc_incrc(write_fault_bail);
@@ -2072,6 +2080,7 @@
/************************************************************************/
/************************************************************************/
+#if 0 // this code has not been updated for 32pae & 64 bit modes
#if SHADOW_DEBUG
// The following is entirely for _check_pagetable()'s benefit.
@@ -2118,8 +2127,8 @@
// BUG: these are not SMP safe...
static int sh_l2_present;
static int sh_l1_present;
-char * sh_check_name;
-int shadow_status_noswap;
+static char *sh_check_name;
+// int shadow_status_noswap; // declared in shadow32.c
#define v2m(_v, _adr) ({ \
unsigned long _a = (unsigned long)(_adr); \
@@ -2218,11 +2227,11 @@
guest_writable =
(l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
- (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) &&
mfn_out_of_sync(eff_guest_mfn));
+ (shadow_mode_write_l1(d) && (level == 1) &&
mfn_out_of_sync(eff_guest_mfn));
if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
{
- printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x
page_table_page=%d\n",
+ printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08lx
page_table_page=%d\n",
eff_guest_pfn, eff_guest_mfn, shadow_mfn,
frame_table[eff_guest_mfn].u.inuse.type_info,
page_table_page);
@@ -2233,7 +2242,7 @@
(l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
!(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
{
- printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x
page_table_page=%d\n",
+ printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08lx
page_table_page=%d\n",
eff_guest_pfn, eff_guest_mfn, shadow_mfn,
frame_table[eff_guest_mfn].u.inuse.type_info,
page_table_page);
@@ -2393,13 +2402,12 @@
}
#undef FAILPT
-static int _check_pagetable(struct vcpu *v, char *s)
+int _check_pagetable(struct vcpu *v, char *s)
{
struct domain *d = v->domain;
#if defined (__x86_64__)
pagetable_t pt = ((v->arch.flags & TF_kernel_mode)?
- pagetable_get_pfn(v->arch.guest_table) :
- pagetable_get_pfn(v->arch.guest_table_user));
+ v->arch.guest_table : v->arch.guest_table_user);
#else
pagetable_t pt = v->arch.guest_table;
#endif
@@ -2539,6 +2547,7 @@
}
#endif // SHADOW_DEBUG
+#endif // this code has not been updated for 32pae & 64 bit modes
#if CONFIG_PAGING_LEVELS == 3
static unsigned long shadow_l3_table(
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Tue Nov 8 11:26:48 2005
+++ b/xen/arch/x86/shadow32.c Tue Nov 8 12:26:50 2005
@@ -624,6 +624,14 @@
// under us... First, collect the list of pinned pages, then
// free them.
//
+ // FIXME: it would be good to just free all the pages referred to in
+ // the hash table without going through each of them to decrement their
+ // reference counts. In shadow_mode_refcount(), we've gotta do the hard
+ // work, but only for L1 shadows. If we're not in refcount mode, then
+ // there's no real hard work to do at all. Need to be careful with the
+ // writable_pte_predictions and snapshot entries in the hash table, but
+ // that's about it.
+ //
for ( i = 0; i < shadow_ht_buckets; i++ )
{
u32 count;
@@ -634,17 +642,51 @@
continue;
count = 0;
- for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
- if ( MFN_PINNED(x->smfn) )
- count++;
+
+ for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) {
+ /* Skip entries that are writable_pred) */
+ switch(x->gpfn_and_flags & PGT_type_mask){
+ case PGT_l1_shadow:
+ case PGT_l2_shadow:
+ case PGT_l3_shadow:
+ case PGT_l4_shadow:
+ case PGT_hl2_shadow:
+ if ( MFN_PINNED(x->smfn) )
+ count++;
+ break;
+ case PGT_snapshot:
+ case PGT_writable_pred:
+ break;
+ default:
+ BUG();
+
+ }
+ }
+
if ( !count )
continue;
mfn_list = xmalloc_array(unsigned long, count);
count = 0;
- for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
- if ( MFN_PINNED(x->smfn) )
- mfn_list[count++] = x->smfn;
+ for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) {
+ /* Skip entries that are writable_pred) */
+ switch(x->gpfn_and_flags & PGT_type_mask){
+ case PGT_l1_shadow:
+ case PGT_l2_shadow:
+ case PGT_l3_shadow:
+ case PGT_l4_shadow:
+ case PGT_hl2_shadow:
+ if ( MFN_PINNED(x->smfn) )
+ mfn_list[count++] = x->smfn;
+ break;
+ case PGT_snapshot:
+ case PGT_writable_pred:
+ break;
+ default:
+ BUG();
+
+ }
+ }
while ( count )
{
@@ -779,6 +821,7 @@
unsigned long va = pfn << PAGE_SHIFT;
ASSERT(tabpfn != 0);
+ ASSERT(shadow_lock_is_acquired(d));
l2 = map_domain_page_with_cache(tabpfn, l2cache);
l2e = l2[l2_table_offset(va)];
@@ -2037,7 +2080,12 @@
while ( count )
{
count--;
+ /* delete_shadow_status() may do a shadow_audit(), so we need to
+ * keep an accurate count of writable_pte_predictions to keep it
+ * happy.
+ */
delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+ perfc_decr(writable_pte_predictions);
}
xfree(gpfn_list);
@@ -2273,18 +2321,17 @@
if ( !smfn )
{
+ // For heavy weight shadows: no need to update refcounts if
+ // there's no shadow page.
+ //
if ( shadow_mode_refcounts(d) )
continue;
- // For light weight shadows, even when no shadow page exists,
- // we need to resync the refcounts to the new contents of the
- // guest page.
- // This only applies when we have writable page tables.
+ // For light weight shadows: only need up resync the refcounts to
+ // the new contents of the guest page iff this it has the right
+ // page type.
//
- if ( !shadow_mode_write_all(d) &&
- !((stype == PGT_l1_shadow) &&
- VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
- // Page is not writable -- no resync necessary
+ if ( stype != ( pfn_to_page(entry->gmfn)->u.inuse.type_info &
PGT_type_mask) )
continue;
}
@@ -2312,8 +2359,8 @@
l1_pgentry_t *snapshot1 = snapshot;
int unshadow_l1 = 0;
- ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
- shadow_mode_write_all(d));
+ ASSERT(shadow_mode_write_l1(d) ||
+ shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
if ( !shadow_mode_refcounts(d) )
revalidate_l1(d, guest1, snapshot1);
@@ -2380,7 +2427,7 @@
l2_pgentry_t *shadow2 = shadow;
l2_pgentry_t *snapshot2 = snapshot;
- ASSERT(shadow_mode_write_all(d));
+ ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
changed = 0;
@@ -2426,7 +2473,7 @@
l2_pgentry_t *snapshot2 = snapshot;
l1_pgentry_t *shadow2 = shadow;
- ASSERT(shadow_mode_write_all(d));
+ ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
changed = 0;
@@ -2619,8 +2666,13 @@
goto fail;
}
}
-
- if ( !l1pte_write_fault(v, &gpte, &spte, va) )
+ else if ( unlikely(!shadow_mode_wr_pt_pte(d) &&
mfn_is_page_table(l1e_get_pfn(gpte))) )
+ {
+ SH_LOG("l1pte_write_fault: no write access to page table page");
+ domain_crash_synchronous();
+ }
+
+ if ( unlikely(!l1pte_write_fault(v, &gpte, &spte, va)) )
{
SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
perfc_incrc(write_fault_bail);
@@ -2954,7 +3006,7 @@
// BUG: these are not SMP safe...
static int sh_l2_present;
static int sh_l1_present;
-char * sh_check_name;
+static char *sh_check_name;
int shadow_status_noswap;
#define v2m(_v, _adr) ({ \
@@ -3054,7 +3106,7 @@
guest_writable =
(l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
- (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) &&
mfn_out_of_sync(eff_guest_mfn));
+ (shadow_mode_write_l1(d) && (level == 1) &&
mfn_out_of_sync(eff_guest_mfn));
if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
{
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c Tue Nov 8 11:26:48 2005
+++ b/xen/arch/x86/vmx.c Tue Nov 8 12:26:50 2005
@@ -79,7 +79,7 @@
* the shared 1:1 page table initially. It shouldn't hurt */
shadow_mode_enable(v->domain,
SHM_enable|SHM_refcounts|
- SHM_translate|SHM_external);
+ SHM_translate|SHM_external|SHM_wr_pt_pte);
}
vmx_switch_on = 1;
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Tue Nov 8 11:26:48 2005
+++ b/xen/include/asm-x86/page.h Tue Nov 8 12:26:50 2005
@@ -271,6 +271,9 @@
#define _PAGE_PAT 0x080U
#define _PAGE_PSE 0x080U
#define _PAGE_GLOBAL 0x100U
+#define _PAGE_AVAIL0 0x200U
+#define _PAGE_AVAIL1 0x400U
+#define _PAGE_AVAIL2 0x800U
#define _PAGE_AVAIL 0xE00U
#define __PAGE_HYPERVISOR \
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Tue Nov 8 11:26:48 2005
+++ b/xen/include/asm-x86/shadow.h Tue Nov 8 12:26:50 2005
@@ -45,15 +45,21 @@
#define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
regardless of pte write permissions */
#define SHM_log_dirty (1<<3) /* enable log dirty mode */
-#define SHM_translate (1<<4) /* do p2m tranaltion on guest tables */
-#define SHM_external (1<<5) /* external page table, not used by Xen */
+#define SHM_translate (1<<4) /* Xen does p2m translation, not guest */
+#define SHM_external (1<<5) /* Xen does not steal address space from the
+ domain for its own booking; requires VT or
+ similar mechanisms */
+#define SHM_wr_pt_pte (1<<6) /* guest allowed to set PAGE_RW bit in PTEs which
+ point to page table pages. */
#define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode)
#define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
+#define shadow_mode_write_l1(_d) (VM_ASSIST(_d,
VMASST_TYPE_writable_pagetables))
#define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
#define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
#define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external)
+#define shadow_mode_wr_pt_pte(_d) ((_d)->arch.shadow_mode & SHM_wr_pt_pte)
#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
#define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
@@ -324,8 +330,7 @@
#if SHADOW_DEBUG
extern int shadow_status_noswap;
-#define _SHADOW_REFLECTS_SNAPSHOT ( 9)
-#define SHADOW_REFLECTS_SNAPSHOT (1u << _SHADOW_REFLECTS_SNAPSHOT)
+#define SHADOW_REFLECTS_SNAPSHOT _PAGE_AVAIL0
#endif
#ifdef VERBOSE
@@ -1474,7 +1479,8 @@
if ( stype != PGT_writable_pred )
BUG(); // we should never replace entries into the hash table
x->smfn = smfn;
- put_page(pfn_to_page(gmfn)); // already had a ref...
+ if ( stype != PGT_writable_pred )
+ put_page(pfn_to_page(gmfn)); // already had a ref...
goto done;
}
@@ -1656,14 +1662,18 @@
(type == PGT_writable_page) )
type = shadow_max_pgtable_type(d, gpfn, NULL);
- if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
- (type == PGT_l1_page_table) &&
- (va < HYPERVISOR_VIRT_START) &&
- KERNEL_MODE(v, regs) )
- return 1;
-
- if ( shadow_mode_write_all(d) &&
- type && (type <= PGT_l4_page_table) &&
+ // Strange but true: writable page tables allow kernel-mode access
+ // to L1 page table pages via write-protected PTEs... Similarly, write
+ // access to all page table pages is granted for shadow_mode_write_all
+ // clients.
+ //
+ if ( ((shadow_mode_write_l1(d) && (type == PGT_l1_page_table)) ||
+ (shadow_mode_write_all(d) && type && (type <= PGT_l4_page_table))) &&
+ ((va < HYPERVISOR_VIRT_START)
+#if defined(__x86_64__)
+ || (va >= HYPERVISOR_VIRT_END)
+#endif
+ ) &&
KERNEL_MODE(v, regs) )
return 1;
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|