# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1172328497 0
# Node ID 59b8d5168cc1561326f6749c79ea879093e37b0c
# Parent 0147ef7c3cd79675453525c9d61e4dd6c8d8bad5
Reduce impact of saving/restoring/dumping large domains on Dom0 memory
usage by means of fadvise64() to tell the OS to discard the cache
pages used for the save/dump file.
Signed-off-by: Simon Graham <Simon.Graham@xxxxxxxxxxx>
---
tools/libxc/Makefile | 3 +++
tools/libxc/xc_core.c | 9 +++++++++
tools/libxc/xc_linux.c | 33 +++++++++++++++++++++++++++++++++
tools/libxc/xc_linux_restore.c | 18 ++++++++++++++++--
tools/libxc/xc_linux_save.c | 38 ++++++++++++++++++++++++++++++++------
tools/libxc/xc_private.h | 10 ++++++++++
tools/libxc/xc_solaris.c | 7 +++++++
7 files changed, 110 insertions(+), 8 deletions(-)
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/Makefile
--- a/tools/libxc/Makefile Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/Makefile Sat Feb 24 14:48:17 2007 +0000
@@ -58,6 +58,9 @@ CFLAGS += -Werror -Wmissing-prototypes
CFLAGS += -Werror -Wmissing-prototypes
CFLAGS += -fno-strict-aliasing
CFLAGS += $(INCLUDES) -I.
+
+# Needed for posix_fadvise64() in xc_linux.c
+CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
# Define this to make it possible to run valgrind on code linked with these
# libraries.
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_core.c Sat Feb 24 14:48:17 2007 +0000
@@ -802,6 +802,12 @@ static int local_file_dump(void *args, c
}
}
+ if (length >= DUMP_INCREMENT*PAGE_SIZE) {
+ // Now dumping pages -- make sure we discard clean pages from
+ // the cache after each write
+ discard_file_cache(da->fd, 0 /* no flush */);
+ }
+
return 0;
}
@@ -821,6 +827,9 @@ xc_domain_dumpcore(int xc_handle,
sts = xc_domain_dumpcore_via_callback(
xc_handle, domid, &da, &local_file_dump);
+
+ /* flush and discard any remaining portion of the file from cache */
+ discard_file_cache(da.fd, 1/* flush first*/);
close(da.fd);
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_linux.c Sat Feb 24 14:48:17 2007 +0000
@@ -328,6 +328,39 @@ int xc_evtchn_unmask(int xce_handle, evt
return dorw(xce_handle, (char *)&port, sizeof(port), 1);
}
+/* Optionally flush file to disk and discard page cache */
+int discard_file_cache(int fd, int flush)
+{
+ off_t cur = 0;
+
+ if ( flush && (fsync(fd) < 0) )
+ {
+ PERROR("Failed to flush file: %s", strerror(errno));
+ return -errno;
+ }
+
+ /*
+ * Calculate last page boundary of amount written so far
+ * unless we are flushing in which case entire cache
+ * is discarded.
+ */
+ if ( !flush )
+ {
+ if ( (cur = lseek(fd, 0, SEEK_CUR)) == (off_t)-1 )
+ cur = 0;
+ cur &= ~(PAGE_SIZE-1);
+ }
+
+ /* Discard from the buffer cache. */
+ if ( posix_fadvise64(fd, 0, cur, POSIX_FADV_DONTNEED) < 0 )
+ {
+ PERROR("Failed to discard cache: %s", strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
/*
* Local variables:
* mode: C
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_linux_restore.c Sat Feb 24 14:48:17 2007 +0000
@@ -144,7 +144,7 @@ int xc_linux_restore(int xc_handle, int
unsigned int console_evtchn, unsigned long *console_mfn)
{
DECLARE_DOMCTL;
- int rc = 1, i, n, pae_extended_cr3 = 0;
+ int rc = 1, i, n, m, pae_extended_cr3 = 0;
unsigned long mfn, pfn;
unsigned int prev_pc, this_pc;
int verify = 0;
@@ -331,7 +331,7 @@ int xc_linux_restore(int xc_handle, int
*/
prev_pc = 0;
- n = 0;
+ n = m = 0;
while (1) {
int j, nr_mfns = 0;
@@ -530,6 +530,17 @@ int xc_linux_restore(int xc_handle, int
munmap(region_base, j*PAGE_SIZE);
n+= j; /* crude stats */
+
+ /*
+ * Discard cache for portion of file read so far up to last
+ * page boundary every 16MB or so.
+ */
+ m += j;
+ if ( m > MAX_PAGECACHE_USAGE )
+ {
+ discard_file_cache(io_fd, 0 /* no flush */);
+ m = 0;
+ }
}
/*
@@ -864,6 +875,9 @@ int xc_linux_restore(int xc_handle, int
free(p2m);
free(pfn_type);
+ /* discard cache for save file */
+ discard_file_cache(io_fd, 1 /*flush*/);
+
DPRINTF("Restore exit with rc=%d\n", rc);
return rc;
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_linux_save.c Sat Feb 24 14:48:17 2007 +0000
@@ -172,6 +172,28 @@ static uint64_t tv_delta(struct timeval
(new->tv_usec - old->tv_usec);
}
+static int noncached_write(int fd, int live, void *buffer, int len)
+{
+ static int write_count = 0;
+
+ int rc = write(fd,buffer,len);
+
+ if (!live) {
+ write_count += len;
+
+ if (write_count >= MAX_PAGECACHE_USAGE*PAGE_SIZE) {
+ int serrno = errno;
+
+ /* Time to discard cache - dont care if this fails */
+ discard_file_cache(fd, 0 /* no flush */);
+
+ write_count = 0;
+
+ errno = serrno;
+ }
+ }
+ return rc;
+}
#ifdef ADAPTIVE_SAVE
@@ -205,7 +227,7 @@ static inline void initialize_mbit_rate(
}
-static int ratewrite(int io_fd, void *buf, int n)
+static int ratewrite(int io_fd, int live, void *buf, int n)
{
static int budget = 0;
static int burst_time_us = -1;
@@ -215,7 +237,7 @@ static int ratewrite(int io_fd, void *bu
long long delta;
if (START_MBIT_RATE == 0)
- return write(io_fd, buf, n);
+ return noncached_write(io_fd, live, buf, n);
budget -= n;
if (budget < 0) {
@@ -251,13 +273,13 @@ static int ratewrite(int io_fd, void *bu
}
}
}
- return write(io_fd, buf, n);
+ return noncached_write(io_fd, live, buf, n);
}
#else /* ! ADAPTIVE SAVE */
#define RATE_IS_MAX() (0)
-#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
+#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live),
(_buf), (_n))
#define initialize_mbit_rate()
#endif
@@ -1082,7 +1104,7 @@ int xc_linux_save(int xc_handle, int io_
if(race && !live)
goto out;
- if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) {
+ if (ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE) {
ERROR("Error when writing to state file (4)"
" (errno %d)", errno);
goto out;
@@ -1091,7 +1113,7 @@ int xc_linux_save(int xc_handle, int io_
} else {
/* We have a normal page: just write it directly. */
- if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) {
+ if (ratewrite(io_fd, live, spage, PAGE_SIZE) != PAGE_SIZE)
{
ERROR("Error when writing to state file (5)"
" (errno %d)", errno);
goto out;
@@ -1261,6 +1283,10 @@ int xc_linux_save(int xc_handle, int io_
DPRINTF("Warning - couldn't disable shadow mode");
}
}
+ else {
+ // flush last write and discard cache for file
+ discard_file_cache(io_fd, 1 /* flush */);
+ }
if (live_shinfo)
munmap(live_shinfo, PAGE_SIZE);
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_private.h Sat Feb 24 14:48:17 2007 +0000
@@ -40,6 +40,13 @@
#define DEBUG 1
#define INFO 1
#define PROGRESS 0
+
+/*
+** Define max dirty page cache to permit during save/restore -- need to
balance
+** keeping cache usage down with CPU impact of invalidating too often.
+** (Currently 16MB)
+*/
+#define MAX_PAGECACHE_USAGE (4*1024)
#if INFO
#define IPRINTF(_f, _a...) printf(_f , ## _a)
@@ -158,4 +165,7 @@ void bitmap_64_to_byte(uint8_t *bp, cons
void bitmap_64_to_byte(uint8_t *bp, const uint64_t *lp, int nbits);
void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits);
+/* Optionally flush file to disk and discard page cache */
+int discard_file_cache(int fd, int flush);
+
#endif /* __XC_PRIVATE_H__ */
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_solaris.c
--- a/tools/libxc/xc_solaris.c Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_solaris.c Sat Feb 24 14:48:17 2007 +0000
@@ -242,3 +242,10 @@ int xc_evtchn_unmask(int xce_handle, evt
{
return dorw(xce_handle, (char *)&port, sizeof(port), 1);
}
+
+/* Optionally flush file to disk and discard page cache */
+int discard_file_cache(int fd, int flush)
+{
+ // TODO: Implement for Solaris!
+ return 0;
+}
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|