# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 234939c0ec3a14cac448ec65e4dbaa173ccae16b
# Parent 7154e0416313e5dcecd7c2e78ca18e51569202eb
[BALLOON] Make the domain0 ballooning logic in xend aware of pages that are on
the page scrub list.
This fixes the case where crashing/restarting a domain can cause dom0 to
balloon more than
necessary.
I changed the physinfo dictionary in Python to be in KiB, rather than
MiB, to avoid accumulating ugly rounding errors. I tried to avoid
changing units anywhere else.
Signed-off-by: Charles Coffing <ccoffing@xxxxxxxxxx>
---
tools/python/xen/lowlevel/xc/xc.c | 21 ++++++-----
tools/python/xen/xend/XendNode.py | 3 +
tools/python/xen/xend/balloon.py | 69 ++++++++++++++++++++------------------
xen/arch/x86/dom0_ops.c | 1
xen/common/page_alloc.c | 12 +++++-
xen/include/public/dom0_ops.h | 1
xen/include/xen/mm.h | 1
7 files changed, 64 insertions(+), 44 deletions(-)
diff -r 7154e0416313 -r 234939c0ec3a tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue Jun 27 11:50:57 2006 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Tue Jun 27 12:03:46 2006 +0100
@@ -582,6 +582,12 @@ static PyObject *pyxc_readconsolering(Xc
}
+static unsigned long pages_to_kib(unsigned long pages)
+{
+ return pages * (XC_PAGE_SIZE / 1024);
+}
+
+
static PyObject *pyxc_pages_to_kib(XcObject *self, PyObject *args)
{
unsigned long pages;
@@ -589,13 +595,7 @@ static PyObject *pyxc_pages_to_kib(XcObj
if (!PyArg_ParseTuple(args, "l", &pages))
return NULL;
- return PyLong_FromUnsignedLong(pages * (XC_PAGE_SIZE / 1024));
-}
-
-
-static unsigned long pages_to_mb(unsigned long pages)
-{
- return (pages * (XC_PAGE_SIZE / 1024) + 1023) / 1024;
+ return PyLong_FromUnsignedLong(pages_to_kib(pages));
}
@@ -618,13 +618,14 @@ static PyObject *pyxc_physinfo(XcObject
if(q>cpu_cap)
*(q-1)=0;
- return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i,s:s}",
+ return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
"threads_per_core", info.threads_per_core,
"cores_per_socket", info.cores_per_socket,
"sockets_per_node", info.sockets_per_node,
"nr_nodes", info.nr_nodes,
- "total_memory", pages_to_mb(info.total_pages),
- "free_memory", pages_to_mb(info.free_pages),
+ "total_memory", pages_to_kib(info.total_pages),
+ "free_memory", pages_to_kib(info.free_pages),
+ "scrub_memory", pages_to_kib(info.scrub_pages),
"cpu_khz", info.cpu_khz,
"hw_caps", cpu_cap);
}
diff -r 7154e0416313 -r 234939c0ec3a tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Tue Jun 27 11:50:57 2006 +0100
+++ b/tools/python/xen/xend/XendNode.py Tue Jun 27 12:03:46 2006 +0100
@@ -64,6 +64,9 @@ class XendNode:
info['cores_per_socket'] *
info['threads_per_core'])
info['cpu_mhz'] = info['cpu_khz'] / 1000
+ # physinfo is in KiB
+ info['total_memory'] = info['total_memory'] / 1024
+ info['free_memory'] = info['free_memory'] / 1024
ITEM_ORDER = ['nr_cpus',
'nr_nodes',
diff -r 7154e0416313 -r 234939c0ec3a tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py Tue Jun 27 11:50:57 2006 +0100
+++ b/tools/python/xen/xend/balloon.py Tue Jun 27 12:03:46 2006 +0100
@@ -29,8 +29,6 @@ from XendError import VmError
PROC_XEN_BALLOON = '/proc/xen/balloon'
-BALLOON_OUT_SLACK = 1 # MiB. We need this because the physinfo details are
- # rounded.
RETRY_LIMIT = 20
RETRY_LIMIT_INCR = 5
##
@@ -68,22 +66,22 @@ def _get_proc_balloon(label):
f.close()
def get_dom0_current_alloc():
- """Returns the current memory allocation (in MiB) of dom0."""
+ """Returns the current memory allocation (in KiB) of dom0."""
kb = _get_proc_balloon(labels['current'])
if kb == None:
raise VmError('Failed to query current memory allocation of dom0.')
- return kb / 1024
+ return kb
def get_dom0_target_alloc():
- """Returns the target memory allocation (in MiB) of dom0."""
+ """Returns the target memory allocation (in KiB) of dom0."""
kb = _get_proc_balloon(labels['target'])
if kb == None:
raise VmError('Failed to query target memory allocation of dom0.')
- return kb / 1024
+ return kb
-def free(required):
+def free(need_mem):
"""Balloon out memory from the privileged domain so that there is the
specified required amount (in KiB) free.
"""
@@ -92,9 +90,10 @@ def free(required):
# to balloon out to free some up. Memory freed by a destroyed domain may
# not appear in the free_memory field immediately, because it needs to be
# scrubbed before it can be released to the free list, which is done
- # asynchronously by Xen; ballooning is asynchronous also. No matter where
- # we expect the free memory to come from, therefore, we need to wait for
- # it to become available.
+ # asynchronously by Xen; ballooning is asynchronous also. Such memory
+ # does, however, need to be accounted for when calculating how much dom0
+ # needs to balloon. No matter where we expect the free memory to come
+ # from, we need to wait for it to become available.
#
# We are not allowed to balloon below dom0_min_mem, or if dom0_min_mem
# is 0, we cannot balloon at all. Memory can still become available
@@ -108,43 +107,49 @@ def free(required):
# usage, so we recheck the required alloc each time around the loop, but
# track the last used value so that we don't trigger too many watches.
- need_mem = (required + 1023) / 1024 + BALLOON_OUT_SLACK
-
xroot = XendRoot.instance()
xc = xen.lowlevel.xc.xc()
try:
- dom0_min_mem = xroot.get_dom0_min_mem()
+ dom0_min_mem = xroot.get_dom0_min_mem() * 1024
retries = 0
sleep_time = SLEEP_TIME_GROWTH
last_new_alloc = None
rlimit = RETRY_LIMIT
while retries < rlimit:
- free_mem = xc.physinfo()['free_memory']
+ physinfo = xc.physinfo()
+ free_mem = physinfo['free_memory']
+ scrub_mem = physinfo['scrub_memory']
if free_mem >= need_mem:
- log.debug("Balloon: free %d; need %d; done.", free_mem,
- need_mem)
+ log.debug("Balloon: %d KiB free; need %d; done.",
+ free_mem, need_mem)
return
if retries == 0:
- rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
- log.debug("Balloon: free %d; need %d; retries: %d.",
- free_mem, need_mem, rlimit)
+ rlimit += ((need_mem - free_mem)/1024/1024) * RETRY_LIMIT_INCR
+ log.debug("Balloon: %d KiB free; %d to scrub; need %d;
retries: %d.",
+ free_mem, scrub_mem, need_mem, rlimit)
if dom0_min_mem > 0:
dom0_alloc = get_dom0_current_alloc()
- new_alloc = dom0_alloc - (need_mem - free_mem)
+ new_alloc = dom0_alloc - (need_mem - free_mem - scrub_mem)
- if (new_alloc >= dom0_min_mem and
- new_alloc != last_new_alloc):
- log.debug("Balloon: setting dom0 target to %d.",
- new_alloc)
- dom0 = XendDomain.instance().privilegedDomain()
- dom0.setMemoryTarget(new_alloc)
- last_new_alloc = new_alloc
- # Continue to retry, waiting for ballooning.
+ if free_mem + scrub_mem >= need_mem:
+ if last_new_alloc == None:
+ log.debug("Balloon: waiting on scrubbing")
+ last_new_alloc = dom0_alloc
+ else:
+ if (new_alloc >= dom0_min_mem and
+ new_alloc != last_new_alloc):
+ new_alloc_mb = new_alloc / 1024 # Round down
+ log.debug("Balloon: setting dom0 target to %d MiB.",
+ new_alloc_mb)
+ dom0 = XendDomain.instance().privilegedDomain()
+ dom0.setMemoryTarget(new_alloc_mb)
+ last_new_alloc = new_alloc
+ # Continue to retry, waiting for ballooning or scrubbing.
time.sleep(sleep_time)
if retries < 2 * RETRY_LIMIT:
@@ -154,15 +159,15 @@ def free(required):
# Not enough memory; diagnose the problem.
if dom0_min_mem == 0:
raise VmError(('Not enough free memory and dom0_min_mem is 0, so '
- 'I cannot release any more. I need %d MiB but '
+ 'I cannot release any more. I need %d KiB but '
'only have %d.') %
(need_mem, free_mem))
elif new_alloc < dom0_min_mem:
raise VmError(
- ('I need %d MiB, but dom0_min_mem is %d and shrinking to '
- '%d MiB would leave only %d MiB free.') %
+ ('I need %d KiB, but dom0_min_mem is %d and shrinking to '
+ '%d KiB would leave only %d KiB free.') %
(need_mem, dom0_min_mem, dom0_min_mem,
- free_mem + dom0_alloc - dom0_min_mem))
+ free_mem + scrub_mem + dom0_alloc - dom0_min_mem))
else:
raise VmError('The privileged domain did not balloon!')
diff -r 7154e0416313 -r 234939c0ec3a xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c Tue Jun 27 11:50:57 2006 +0100
+++ b/xen/arch/x86/dom0_ops.c Tue Jun 27 12:03:46 2006 +0100
@@ -194,6 +194,7 @@ long arch_do_dom0_op(struct dom0_op *op,
pi->nr_nodes = 1;
pi->total_pages = total_pages;
pi->free_pages = avail_domheap_pages();
+ pi->scrub_pages = avail_scrub_pages();
pi->cpu_khz = cpu_khz;
memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
diff -r 7154e0416313 -r 234939c0ec3a xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Tue Jun 27 11:50:57 2006 +0100
+++ b/xen/common/page_alloc.c Tue Jun 27 12:03:46 2006 +0100
@@ -61,6 +61,7 @@ custom_param("lowmem_emergency_pool", pa
static DEFINE_SPINLOCK(page_scrub_lock);
LIST_HEAD(page_scrub_list);
+static unsigned long scrub_pages;
/*********************
* ALLOCATION BITMAP
@@ -696,6 +697,7 @@ void free_domheap_pages(struct page_info
{
spin_lock(&page_scrub_lock);
list_add(&pg[i].list, &page_scrub_list);
+ scrub_pages++;
spin_unlock(&page_scrub_lock);
}
}
@@ -784,9 +786,10 @@ static void page_scrub_softirq(void)
/* Remove peeled pages from the list. */
ent->next->prev = &page_scrub_list;
page_scrub_list.next = ent->next;
-
+ scrub_pages -= (i+1);
+
spin_unlock(&page_scrub_lock);
-
+
/* Working backwards, scrub each page in turn. */
while ( ent != &page_scrub_list )
{
@@ -798,6 +801,11 @@ static void page_scrub_softirq(void)
free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
}
} while ( (NOW() - start) < MILLISECS(1) );
+}
+
+unsigned long avail_scrub_pages(void)
+{
+ return scrub_pages;
}
static __init int page_scrub_init(void)
diff -r 7154e0416313 -r 234939c0ec3a xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h Tue Jun 27 11:50:57 2006 +0100
+++ b/xen/include/public/dom0_ops.h Tue Jun 27 12:03:46 2006 +0100
@@ -231,6 +231,7 @@ struct dom0_physinfo {
uint32_t cpu_khz;
uint64_t total_pages;
uint64_t free_pages;
+ uint64_t scrub_pages;
uint32_t hw_cap[8];
};
typedef struct dom0_physinfo dom0_physinfo_t;
diff -r 7154e0416313 -r 234939c0ec3a xen/include/xen/mm.h
--- a/xen/include/xen/mm.h Tue Jun 27 11:50:57 2006 +0100
+++ b/xen/include/xen/mm.h Tue Jun 27 12:03:46 2006 +0100
@@ -91,6 +91,7 @@ extern struct list_head page_scrub_list;
if ( !list_empty(&page_scrub_list) ) \
raise_softirq(PAGE_SCRUB_SOFTIRQ); \
} while ( 0 )
+unsigned long avail_scrub_pages(void);
#include <asm/mm.h>
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|