# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1290172869 0
# Node ID d272ae0cb1a07c59f5db0ad0e96061255717184d
# Parent ff989ac7cd970598f64a05a07b037f1423f7cb47
x86/mm: Allocate log-dirty bitmaps from shadow/HAP memory.
Move the p2m alloc and free functions back into the per-domain paging
assistance structure and allow them to be called from the log-dirty
code. This makes it less likely that log-dirty code will run out of
memory populating the log-dirty bitmap.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---
xen/arch/x86/mm/hap/hap.c | 42 +++++++++++++------
xen/arch/x86/mm/hap/p2m-ept.c | 4 +
xen/arch/x86/mm/p2m.c | 19 ++------
xen/arch/x86/mm/paging.c | 83 +++++++++------------------------------
xen/arch/x86/mm/shadow/common.c | 53 +++++++++++++++++-------
xen/arch/x86/mm/shadow/private.h | 6 +-
xen/include/asm-x86/domain.h | 4 +
xen/include/asm-x86/p2m.h | 11 -----
xen/include/asm-x86/paging.h | 4 -
9 files changed, 107 insertions(+), 119 deletions(-)
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/arch/x86/mm/hap/hap.c Fri Nov 19 13:21:09 2010 +0000
@@ -276,12 +276,16 @@ static void hap_free(struct domain *d, m
page_list_add_tail(pg, &d->arch.paging.hap.freelist);
}
-static struct page_info *hap_alloc_p2m_page(struct p2m_domain *p2m)
-{
- struct domain *d = p2m->domain;
+static struct page_info *hap_alloc_p2m_page(struct domain *d)
+{
struct page_info *pg;
-
- hap_lock(d);
+ int do_locking;
+
+ /* This is called both from the p2m code (which never holds the
+ * hap lock) and the log-dirty code (which sometimes does). */
+ do_locking = !hap_locked_by_me(d);
+ if ( do_locking )
+ hap_lock(d);
pg = hap_alloc(d);
#if CONFIG_PAGING_LEVELS == 3
@@ -312,14 +316,21 @@ static struct page_info *hap_alloc_p2m_p
pg->count_info |= 1;
}
- hap_unlock(d);
+ if ( do_locking )
+ hap_unlock(d);
return pg;
}
-static void hap_free_p2m_page(struct p2m_domain *p2m, struct page_info *pg)
-{
- struct domain *d = p2m->domain;
- hap_lock(d);
+static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
+{
+ int do_locking;
+
+ /* This is called both from the p2m code (which never holds the
+ * hap lock) and the log-dirty code (which sometimes does). */
+ do_locking = !hap_locked_by_me(d);
+ if ( do_locking )
+ hap_lock(d);
+
ASSERT(page_get_owner(pg) == d);
/* Should have just the one ref we gave it in alloc_p2m_page() */
if ( (pg->count_info & PGC_count_mask) != 1 )
@@ -333,7 +344,9 @@ static void hap_free_p2m_page(struct p2m
d->arch.paging.hap.total_pages++;
hap_free(d, page_to_mfn(pg));
ASSERT(d->arch.paging.hap.p2m_pages >= 0);
- hap_unlock(d);
+
+ if ( do_locking )
+ hap_unlock(d);
}
/* Return the size of the pool, rounded up to the nearest MB */
@@ -597,11 +610,14 @@ int hap_enable(struct domain *d, u32 mod
}
}
+ /* Allow p2m and log-dirty code to borrow our memory */
+ d->arch.paging.alloc_page = hap_alloc_p2m_page;
+ d->arch.paging.free_page = hap_free_p2m_page;
+
/* allocate P2m table */
if ( mode & PG_translate )
{
- rv = p2m_alloc_table(p2m_get_hostp2m(d),
- hap_alloc_p2m_page, hap_free_p2m_page);
+ rv = p2m_alloc_table(p2m_get_hostp2m(d));
if ( rv != 0 )
goto out;
}
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Fri Nov 19 13:21:09 2010 +0000
@@ -125,6 +125,8 @@ static int ept_set_middle_entry(struct p
/* free ept sub tree behind an entry */
void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
{
+ struct domain *d = p2m->domain;
+
/* End if the entry is a leaf entry. */
if ( level == 0 || !is_epte_present(ept_entry) ||
is_epte_superpage(ept_entry) )
@@ -138,7 +140,7 @@ void ept_free_entry(struct p2m_domain *p
unmap_domain_page(epte);
}
- p2m->free_page(p2m, mfn_to_page(ept_entry->mfn));
+ d->arch.paging.free_page(d, mfn_to_page(ept_entry->mfn));
}
static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry,
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/arch/x86/mm/p2m.c Fri Nov 19 13:21:09 2010 +0000
@@ -142,8 +142,9 @@ p2m_alloc_ptp(struct p2m_domain *p2m, un
struct page_info *pg;
ASSERT(p2m);
- ASSERT(p2m->alloc_page);
- pg = p2m->alloc_page(p2m);
+ ASSERT(p2m->domain);
+ ASSERT(p2m->domain->arch.paging.alloc_page);
+ pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
if (pg == NULL)
return NULL;
@@ -167,7 +168,6 @@ p2m_next_level(struct p2m_domain *p2m, m
l1_pgentry_t new_entry;
void *next;
int i;
- ASSERT(p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
shift, max)) )
@@ -1792,14 +1792,9 @@ int set_p2m_entry(struct p2m_domain *p2m
// The structure of the p2m table is that of a pagetable for xen (i.e. it is
// controlled by CONFIG_PAGING_LEVELS).
//
-// The alloc_page and free_page functions will be used to get memory to
-// build the p2m, and to release it again at the end of day.
-//
// Returns 0 for success or -errno.
//
-int p2m_alloc_table(struct p2m_domain *p2m,
- struct page_info * (*alloc_page)(struct p2m_domain *p2m),
- void (*free_page)(struct p2m_domain *p2m, struct page_info *pg))
+int p2m_alloc_table(struct p2m_domain *p2m)
{
mfn_t mfn = _mfn(INVALID_MFN);
struct page_info *page, *p2m_top;
@@ -1816,9 +1811,6 @@ int p2m_alloc_table(struct p2m_domain *p
}
P2M_PRINTK("allocating p2m table\n");
-
- p2m->alloc_page = alloc_page;
- p2m->free_page = free_page;
p2m_top = p2m_alloc_ptp(p2m,
#if CONFIG_PAGING_LEVELS == 4
@@ -1882,6 +1874,7 @@ void p2m_teardown(struct p2m_domain *p2m
* We know we don't have any extra mappings to these pages */
{
struct page_info *pg;
+ struct domain *d = p2m->domain;
#ifdef __x86_64__
unsigned long gfn;
p2m_type_t t;
@@ -1902,7 +1895,7 @@ void p2m_teardown(struct p2m_domain *p2m
p2m->phys_table = pagetable_null();
while ( (pg = page_list_remove_head(&p2m->pages)) )
- p2m->free_page(p2m, pg);
+ d->arch.paging.free_page(d, pg);
p2m_unlock(p2m);
}
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/arch/x86/mm/paging.c Fri Nov 19 13:21:09 2010 +0000
@@ -99,10 +99,11 @@ static mfn_t paging_new_log_dirty_page(s
{
struct page_info *page;
- page = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+ page = d->arch.paging.alloc_page(d);
if ( unlikely(page == NULL) )
{
d->arch.paging.log_dirty.failed_allocs++;
+ *mapping_p = NULL;
return _mfn(INVALID_MFN);
}
@@ -131,30 +132,23 @@ static mfn_t paging_new_log_dirty_node(s
return mfn;
}
-int paging_alloc_log_dirty_bitmap(struct domain *d)
+mfn_t *paging_map_log_dirty_bitmap(struct domain *d)
{
mfn_t *mapping;
- if ( mfn_valid(d->arch.paging.log_dirty.top) )
- return 0;
+ if ( likely(mfn_valid(d->arch.paging.log_dirty.top)) )
+ return map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d, &mapping);
- if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
- {
- /* Clear error indicator since we're reporting this one */
- d->arch.paging.log_dirty.failed_allocs = 0;
- return -ENOMEM;
- }
- unmap_domain_page(mapping);
-
- return 0;
+
+ return mapping;
}
static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn)
{
d->arch.paging.log_dirty.allocs--;
- free_domheap_page(mfn_to_page(mfn));
-}
+ d->arch.paging.free_page(d, mfn_to_page(mfn));
+}
void paging_free_log_dirty_bitmap(struct domain *d)
{
@@ -204,36 +198,13 @@ int paging_log_dirty_enable(struct domai
{
int ret;
+ if ( paging_mode_log_dirty(d) )
+ return -EINVAL;
+
domain_pause(d);
- log_dirty_lock(d);
-
- if ( paging_mode_log_dirty(d) )
- {
- ret = -EINVAL;
- goto out;
- }
-
- ret = paging_alloc_log_dirty_bitmap(d);
- if ( ret != 0 )
- {
- paging_free_log_dirty_bitmap(d);
- goto out;
- }
-
- log_dirty_unlock(d);
-
- /* Safe because the domain is paused. */
ret = d->arch.paging.log_dirty.enable_log_dirty(d);
-
- /* Possibility of leaving the bitmap allocated here but it'll be
- * tidied on domain teardown. */
-
domain_unpause(d);
- return ret;
-
- out:
- log_dirty_unlock(d);
- domain_unpause(d);
+
return ret;
}
@@ -271,8 +242,6 @@ void paging_mark_dirty(struct domain *d,
log_dirty_lock(d);
- ASSERT(mfn_valid(d->arch.paging.log_dirty.top));
-
/* We /really/ mean PFN here, even for non-translated guests. */
pfn = get_gpfn_from_mfn(mfn_x(gmfn));
/* Shared MFNs should NEVER be marked dirty */
@@ -291,7 +260,9 @@ void paging_mark_dirty(struct domain *d,
i3 = L3_LOGDIRTY_IDX(pfn);
i4 = L4_LOGDIRTY_IDX(pfn);
- l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
+ l4 = paging_map_log_dirty_bitmap(d);
+ if ( !l4 )
+ goto out;
mfn = l4[i4];
if ( !mfn_valid(mfn) )
mfn = l4[i4] = paging_new_log_dirty_node(d, &l3);
@@ -367,12 +338,6 @@ int paging_log_dirty_op(struct domain *d
/* caller may have wanted just to clean the state or access stats. */
peek = 0;
- if ( (peek || clean) && !mfn_valid(d->arch.paging.log_dirty.top) )
- {
- rv = -EINVAL; /* perhaps should be ENOMEM? */
- goto out;
- }
-
if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
printk("%s: %d failed page allocs while logging dirty pages\n",
__FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
@@ -381,8 +346,7 @@ int paging_log_dirty_op(struct domain *d
}
pages = 0;
- l4 = (mfn_valid(d->arch.paging.log_dirty.top) ?
- map_domain_page(mfn_x(d->arch.paging.log_dirty.top)) : NULL);
+ l4 = paging_map_log_dirty_bitmap(d);
for ( i4 = 0;
(pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES);
@@ -469,12 +433,6 @@ int paging_log_dirty_range(struct domain
d->arch.paging.log_dirty.fault_count,
d->arch.paging.log_dirty.dirty_count);
- if ( !mfn_valid(d->arch.paging.log_dirty.top) )
- {
- rv = -EINVAL; /* perhaps should be ENOMEM? */
- goto out;
- }
-
if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
printk("%s: %d failed page allocs while logging dirty pages\n",
__FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
@@ -500,13 +458,13 @@ int paging_log_dirty_range(struct domain
b2 = L2_LOGDIRTY_IDX(begin_pfn);
b3 = L3_LOGDIRTY_IDX(begin_pfn);
b4 = L4_LOGDIRTY_IDX(begin_pfn);
- l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
+ l4 = paging_map_log_dirty_bitmap(d);
for ( i4 = b4;
(pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
i4++ )
{
- l3 = mfn_valid(l4[i4]) ? map_domain_page(mfn_x(l4[i4])) : NULL;
+ l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
for ( i3 = b3;
(pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
i3++ )
@@ -590,7 +548,8 @@ int paging_log_dirty_range(struct domain
if ( l3 )
unmap_domain_page(l3);
}
- unmap_domain_page(l4);
+ if ( l4 )
+ unmap_domain_page(l4);
log_dirty_unlock(d);
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/arch/x86/mm/shadow/common.c Fri Nov 19 13:21:09 2010 +0000
@@ -1620,12 +1620,16 @@ void shadow_free(struct domain *d, mfn_t
* That's OK because the p2m table only exists for translated domains,
* and those domains can't ever turn off shadow mode. */
static struct page_info *
-shadow_alloc_p2m_page(struct p2m_domain *p2m)
-{
- struct domain *d = p2m->domain;
+shadow_alloc_p2m_page(struct domain *d)
+{
struct page_info *pg;
-
- shadow_lock(d);
+ int do_locking;
+
+ /* This is called both from the p2m code (which never holds the
+ * shadow lock) and the log-dirty code (which sometimes does). */
+ do_locking = !shadow_locked_by_me(d);
+ if ( do_locking )
+ shadow_lock(d);
if ( d->arch.paging.shadow.total_pages
< shadow_min_acceptable_pages(d) + 1 )
@@ -1639,7 +1643,8 @@ shadow_alloc_p2m_page(struct p2m_domain
d->arch.paging.shadow.p2m_pages++;
d->arch.paging.shadow.total_pages--;
- shadow_unlock(d);
+ if ( do_locking )
+ shadow_unlock(d);
/* Unlike shadow pages, mark p2m pages as owned by the domain.
* Marking the domain as the owner would normally allow the guest to
@@ -1652,9 +1657,10 @@ shadow_alloc_p2m_page(struct p2m_domain
}
static void
-shadow_free_p2m_page(struct p2m_domain *p2m, struct page_info *pg)
-{
- struct domain *d = p2m->domain;
+shadow_free_p2m_page(struct domain *d, struct page_info *pg)
+{
+ int do_locking;
+
ASSERT(page_get_owner(pg) == d);
/* Should have just the one ref we gave it in alloc_p2m_page() */
if ( (pg->count_info & PGC_count_mask) != 1 )
@@ -1666,11 +1672,18 @@ shadow_free_p2m_page(struct p2m_domain *
pg->u.sh.type = SH_type_p2m_table; /* p2m code reuses type-info */
page_set_owner(pg, NULL);
- shadow_lock(d);
+ /* This is called both from the p2m code (which never holds the
+ * shadow lock) and the log-dirty code (which sometimes does). */
+ do_locking = !shadow_locked_by_me(d);
+ if ( do_locking )
+ shadow_lock(d);
+
shadow_free(d, page_to_mfn(pg));
d->arch.paging.shadow.p2m_pages--;
d->arch.paging.shadow.total_pages++;
- shadow_unlock(d);
+
+ if ( do_locking )
+ shadow_unlock(d);
}
#if CONFIG_PAGING_LEVELS == 3
@@ -3032,6 +3045,7 @@ static void sh_new_mode(struct domain *d
ASSERT(shadow_locked_by_me(d));
ASSERT(d != current->domain);
+
d->arch.paging.mode = new_mode;
for_each_vcpu(d, v)
sh_update_paging_modes(v);
@@ -3079,12 +3093,15 @@ int shadow_enable(struct domain *d, u32
shadow_unlock(d);
}
+ /* Allow p2m and log-dirty code to borrow shadow memory */
+ d->arch.paging.alloc_page = shadow_alloc_p2m_page;
+ d->arch.paging.free_page = shadow_free_p2m_page;
+
/* Init the P2M table. Must be done before we take the shadow lock
* to avoid possible deadlock. */
if ( mode & PG_translate )
{
- rv = p2m_alloc_table(p2m,
- shadow_alloc_p2m_page, shadow_free_p2m_page);
+ rv = p2m_alloc_table(p2m);
if (rv != 0)
goto out_unlocked;
}
@@ -3095,7 +3112,7 @@ int shadow_enable(struct domain *d, u32
{
/* Get a single page from the shadow pool. Take it via the
* P2M interface to make freeing it simpler afterwards. */
- pg = shadow_alloc_p2m_page(p2m);
+ pg = shadow_alloc_p2m_page(d);
if ( pg == NULL )
{
rv = -ENOMEM;
@@ -3147,7 +3164,7 @@ int shadow_enable(struct domain *d, u32
if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
p2m_teardown(p2m);
if ( rv != 0 && pg != NULL )
- shadow_free_p2m_page(p2m, pg);
+ shadow_free_p2m_page(d, pg);
domain_unpause(d);
return rv;
}
@@ -3265,7 +3282,7 @@ void shadow_teardown(struct domain *d)
/* Must be called outside the lock */
if ( unpaged_pagetable )
- shadow_free_p2m_page(p2m_get_hostp2m(d), unpaged_pagetable);
+ shadow_free_p2m_page(d, unpaged_pagetable);
}
void shadow_final_teardown(struct domain *d)
@@ -3319,6 +3336,10 @@ static int shadow_one_bit_enable(struct
sh_set_allocation(d, 0, NULL);
return -ENOMEM;
}
+
+ /* Allow p2m and log-dirty code to borrow shadow memory */
+ d->arch.paging.alloc_page = shadow_alloc_p2m_page;
+ d->arch.paging.free_page = shadow_free_p2m_page;
}
if ( d->arch.paging.mode == 0 )
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/arch/x86/mm/shadow/private.h Fri Nov 19 13:21:09 2010 +0000
@@ -584,7 +584,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t
int rv;
ASSERT(shadow_mode_log_dirty(d));
- ASSERT(mfn_valid(d->arch.paging.log_dirty.top));
/* We /really/ mean PFN here, even for non-translated guests. */
pfn = get_gpfn_from_mfn(mfn_x(gmfn));
@@ -602,7 +601,10 @@ sh_mfn_is_dirty(struct domain *d, mfn_t
*/
return 1;
- l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
+ l4 = paging_map_log_dirty_bitmap(d);
+ if ( !l4 )
+ return 0;
+
mfn = l4[L4_LOGDIRTY_IDX(pfn)];
unmap_domain_page(l4);
if ( !mfn_valid(mfn) )
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/include/asm-x86/domain.h Fri Nov 19 13:21:09 2010 +0000
@@ -201,6 +201,10 @@ struct paging_domain {
struct hap_domain hap;
/* log dirty support */
struct log_dirty_domain log_dirty;
+ /* alloc/free pages from the pool for paging-assistance structures
+ * (used by p2m and log-dirty code for their tries) */
+ struct page_info * (*alloc_page)(struct domain *d);
+ void (*free_page)(struct domain *d, struct page_info *pg);
};
struct paging_vcpu {
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/include/asm-x86/p2m.h Fri Nov 19 13:21:09 2010 +0000
@@ -179,10 +179,6 @@ struct p2m_domain {
/* Pages used to construct the p2m */
struct page_list_head pages;
- /* Functions to call to get or free pages for the p2m */
- struct page_info * (*alloc_page )(struct p2m_domain *p2m);
- void (*free_page )(struct p2m_domain *p2m,
- struct page_info *pg);
int (*set_entry )(struct p2m_domain *p2m,
unsigned long gfn,
mfn_t mfn, unsigned int page_order,
@@ -390,13 +386,8 @@ int p2m_init(struct domain *d);
/* Allocate a new p2m table for a domain.
*
- * The alloc_page and free_page functions will be used to get memory to
- * build the p2m, and to release it again at the end of day.
- *
* Returns 0 for success or -errno. */
-int p2m_alloc_table(struct p2m_domain *p2m,
- struct page_info * (*alloc_page)(struct p2m_domain *p2m),
- void (*free_page)(struct p2m_domain *p2m, struct page_info
*pg));
+int p2m_alloc_table(struct p2m_domain *p2m);
/* Return all the p2m resources to Xen. */
void p2m_teardown(struct p2m_domain *p2m);
diff -r ff989ac7cd97 -r d272ae0cb1a0 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h Fri Nov 19 13:20:48 2010 +0000
+++ b/xen/include/asm-x86/paging.h Fri Nov 19 13:21:09 2010 +0000
@@ -134,8 +134,8 @@ struct paging_mode {
/*****************************************************************************
* Log dirty code */
-/* allocate log dirty bitmap resource for recording dirty pages */
-int paging_alloc_log_dirty_bitmap(struct domain *d);
+/* get the top of the log-dirty bitmap trie, allocating if necessary */
+mfn_t *paging_map_log_dirty_bitmap(struct domain *d);
/* free log dirty bitmap resource */
void paging_free_log_dirty_bitmap(struct domain *d);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|