# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 0267063e050cbdb0408921c4acc68f09434bac65
# Parent c3bb51c443a7a1a78a4917fd09e602fa35d9c318
This is the initial patch for SMP PAE guest on x86-64 Xen.
For vcpus=2, the SMP PAE guest can do kernel build successfully.
And it improves the stability of SMP guests.
Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Xiaohui Xin xiaohui.xin@xxxxxxxxx
diff -r c3bb51c443a7 -r 0267063e050c xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Thu Apr 13 09:29:27 2006
+++ b/xen/arch/x86/Makefile Thu Apr 13 09:31:53 2006
@@ -76,6 +76,7 @@
$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
shadow_guest32.o: shadow.c
+shadow_guest32pae.o: shadow.c
.PHONY: clean
clean::
diff -r c3bb51c443a7 -r 0267063e050c xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Thu Apr 13 09:29:27 2006
+++ b/xen/arch/x86/shadow.c Thu Apr 13 09:31:53 2006
@@ -1531,14 +1531,10 @@
idx = get_cr3_idxval(v);
smfn = __shadow_status(
- d, ((unsigned long)(idx << PGT_score_shift) | entry->gpfn),
PGT_l4_shadow);
-
-#ifndef NDEBUG
+ d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn),
PGT_l4_shadow);
+
if ( !smfn )
- {
- BUG();
- }
-#endif
+ continue;
guest = (pgentry_64_t *)map_domain_page(entry->gmfn);
snapshot = (pgentry_64_t *)map_domain_page(entry->snapshot_mfn);
@@ -1550,9 +1546,35 @@
if ( entry_has_changed(
guest[index], snapshot[index], PAGE_FLAG_MASK) )
{
+ unsigned long gpfn;
+
+ /*
+ * Looks like it's no longer a page table.
+ */
+ if ( unlikely(entry_get_value(guest[index]) &
PAE_PDPT_RESERVED) )
+ {
+ if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT )
+ put_shadow_ref(entry_get_pfn(shadow_l3[i]));
+
+ shadow_l3[i] = entry_empty();
+ continue;
+ }
+
+ gpfn = entry_get_pfn(guest[index]);
+
+ if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+ {
+ if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT )
+ put_shadow_ref(entry_get_pfn(shadow_l3[i]));
+
+ shadow_l3[i] = entry_empty();
+ continue;
+ }
+
validate_entry_change(d, &guest[index],
&shadow_l3[i], PAGING_L3);
}
+
if ( entry_get_value(guest[index]) != 0 )
max = i;
@@ -1675,6 +1697,19 @@
guest_l1e_has_changed(guest1[i], snapshot1[i],
PAGE_FLAG_MASK) )
{
int error;
+
+#if CONFIG_PAGING_LEVELS == 4
+ unsigned long gpfn;
+
+ gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT;
+
+ if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+ {
+ guest_l1_pgentry_t tmp_gl1e = guest_l1e_empty();
+ validate_pte_change(d, tmp_gl1e, sl1e_p);
+ continue;
+ }
+#endif
error = validate_pte_change(d, guest1[i], sl1e_p);
if ( error == -1 )
@@ -1698,6 +1733,7 @@
perfc_incrc(resync_l1);
perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1,
PT_UPDATES);
+
if ( d->arch.ops->guest_paging_levels >= PAGING_L3 &&
unshadow_l1 ) {
pgentry_64_t l2e = { 0 };
@@ -1804,18 +1840,22 @@
for ( i = min_shadow; i <= max_shadow; i++ )
{
if ( (i < min_snapshot) || (i > max_snapshot) ||
- entry_has_changed(
- guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
+ entry_has_changed(
+ guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
{
-
unsigned long gpfn;
gpfn = entry_get_pfn(guest_pt[i]);
/*
- * Looks like it's longer a page table.
+ * Looks like it's no longer a page table.
*/
if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+ {
+ if ( entry_get_flags(shadow_pt[i]) & _PAGE_PRESENT )
+ put_shadow_ref(entry_get_pfn(shadow_pt[i]));
+ shadow_pt[i] = entry_empty();
continue;
+ }
need_flush |= validate_entry_change(
d, &guest_pt[i], &shadow_pt[i],
@@ -1864,11 +1904,17 @@
unsigned long gpfn;
gpfn = l4e_get_pfn(new_root_e);
+
/*
- * Looks like it's longer a page table.
+ * Looks like it's no longer a page table.
*/
if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+ {
+ if ( l4e_get_flags(shadow4[i]) & _PAGE_PRESENT )
+ put_shadow_ref(l4e_get_pfn(shadow4[i]));
+ shadow4[i] = l4e_empty();
continue;
+ }
if ( d->arch.ops->guest_paging_levels == PAGING_L4 )
{
@@ -2372,7 +2418,7 @@
if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
{
u32 index = get_cr3_idxval(v);
- gpfn = (index << PGT_score_shift) | gpfn;
+ gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn;
}
#endif
@@ -3233,8 +3279,35 @@
int i;
for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
+ {
+ unsigned long gpfn;
+
+ /*
+ * Looks like it's no longer a page table.
+ */
+ if ( unlikely(entry_get_value(gple[index*4+i]) & PAE_PDPT_RESERVED) )
+ {
+ if ( entry_get_flags(sple[i]) & _PAGE_PRESENT )
+ put_shadow_ref(entry_get_pfn(sple[i]));
+
+ sple[i] = entry_empty();
+ continue;
+ }
+
+ gpfn = entry_get_pfn(gple[index*4+i]);
+
+ if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+ {
+ if ( entry_get_flags(sple[i]) & _PAGE_PRESENT )
+ put_shadow_ref(entry_get_pfn(sple[i]));
+
+ sple[i] = entry_empty();
+ continue;
+ }
+
validate_entry_change(
v->domain, &gple[index*4+i], &sple[i], PAGING_L3);
+ }
unmap_domain_page(sple);
}
diff -r c3bb51c443a7 -r 0267063e050c xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c Thu Apr 13 09:29:27 2006
+++ b/xen/arch/x86/shadow_public.c Thu Apr 13 09:31:53 2006
@@ -102,6 +102,15 @@
int shadow_set_guest_paging_levels(struct domain *d, int levels)
{
+ struct vcpu *v = current;
+
+ /*
+ * Need to wait for VCPU0 to complete the on-going shadow ops.
+ */
+
+ if ( v->vcpu_id )
+ return 1;
+
shadow_lock(d);
switch(levels) {
@@ -692,7 +701,6 @@
void free_shadow_page(unsigned long smfn)
{
struct page_info *page = mfn_to_page(smfn);
-
unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
struct domain *d = page_get_owner(mfn_to_page(gmfn));
unsigned long gpfn = mfn_to_gmfn(d, gmfn);
@@ -709,10 +717,9 @@
if ( !mfn )
gpfn |= (1UL << 63);
}
- if (d->arch.ops->guest_paging_levels == PAGING_L3)
- if (type == PGT_l4_shadow ) {
- gpfn = ((unsigned long)page->tlbflush_timestamp <<
PGT_score_shift) | gpfn;
- }
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+ if ( type == PGT_l4_shadow )
+ gpfn = ((unsigned long)page->tlbflush_timestamp <<
PGT_pae_idx_shift) | gpfn;
#endif
delete_shadow_status(d, gpfn, gmfn, type);
@@ -743,9 +750,24 @@
#if CONFIG_PAGING_LEVELS >= 3
case PGT_l2_shadow:
case PGT_l3_shadow:
+ shadow_demote(d, gpfn, gmfn);
+ free_shadow_tables(d, smfn, shadow_type_to_level(type));
+ d->arch.shadow_page_count--;
+ break;
+
case PGT_l4_shadow:
gpfn = gpfn & PGT_mfn_mask;
- shadow_demote(d, gpfn, gmfn);
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+ {
+ /*
+ * Since a single PDPT page can have multiple PDPs, it's possible
+ * that shadow_demote() has been already called for gmfn.
+ */
+ if ( mfn_is_page_table(gmfn) )
+ shadow_demote(d, gpfn, gmfn);
+ } else
+ shadow_demote(d, gpfn, gmfn);
+
free_shadow_tables(d, smfn, shadow_type_to_level(type));
d->arch.shadow_page_count--;
break;
@@ -2041,7 +2063,16 @@
void clear_all_shadow_status(struct domain *d)
{
+ struct vcpu *v = current;
+
+ /*
+ * Don't clean up while other vcpus are working.
+ */
+ if ( v->vcpu_id )
+ return;
+
shadow_lock(d);
+
free_shadow_pages(d);
free_shadow_ht_entries(d);
d->arch.shadow_ht =
@@ -2054,6 +2085,7 @@
shadow_ht_buckets * sizeof(struct shadow_status));
free_out_of_sync_entries(d);
+
shadow_unlock(d);
}
diff -r c3bb51c443a7 -r 0267063e050c xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Thu Apr 13 09:29:27 2006
+++ b/xen/include/asm-x86/mm.h Thu Apr 13 09:31:53 2006
@@ -103,11 +103,13 @@
#define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift)
#define PGT_mfn_mask (((1U<<23)-1) | PGT_high_mfn_mask)
#define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift)
+#define PGT_pae_idx_shift PGT_high_mfn_shift
#else
/* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
#define PGT_mfn_mask ((1U<<23)-1)
/* NX for PAE xen is not supported yet */
#define PGT_high_mfn_nx (1ULL << 63)
+#define PGT_pae_idx_shift 23
#endif
#define PGT_score_shift 23
diff -r c3bb51c443a7 -r 0267063e050c xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h Thu Apr 13 09:29:27 2006
+++ b/xen/include/asm-x86/shadow_64.h Thu Apr 13 09:31:53 2006
@@ -119,6 +119,8 @@
#define PAE_CR3_IDX_MASK 0x7f
#define PAE_CR3_IDX_NO 128
+#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */
+
/******************************************************************************/
static inline int table_offset_64(unsigned long va, int level)
{
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|