Xen and Linux patches for 'nonlive' save & restore.
Also improved code for searching page_array[] while saving htab, xc_linux_save.c
A modified version of htab.h is placed in new directory: tools/libxc/xen/asm
Probably need better resolution.
htab is mapped via 'decorating' the pfn, xen/arch/powerpc/mm.c. However,
no range / validation checking is done at this time.
...................................................
Xen diffs:
diff -r 7669fca80bfc config/powerpc64.mk
--- a/config/powerpc64.mk Mon Dec 04 11:46:53 2006 -0500
+++ b/config/powerpc64.mk Wed Dec 13 15:39:32 2006 -0500
@@ -3,3 +3,4 @@ CONFIG_POWERPC_$(XEN_OS) := y
CFLAGS += -DELFSIZE=64
LIBDIR := lib
+CONFIG_XCUTILS :=y
diff -r 7669fca80bfc tools/libxc/powerpc64/Makefile
--- a/tools/libxc/powerpc64/Makefile Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/powerpc64/Makefile Wed Dec 13 15:39:32 2006 -0500
@@ -2,5 +2,7 @@ GUEST_SRCS-y += powerpc64/xc_linux_build
GUEST_SRCS-y += powerpc64/xc_linux_build.c
GUEST_SRCS-y += powerpc64/xc_prose_build.c
GUEST_SRCS-y += powerpc64/utils.c
+GUEST_SRCS-y += powerpc64/xc_linux_save.c
+GUEST_SRCS-y += powerpc64/xc_linux_restore.c
CTRL_SRCS-y += powerpc64/xc_memory.c
diff -r 7669fca80bfc tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/xc_private.c Wed Dec 13 15:39:32 2006 -0500
@@ -306,6 +306,23 @@ int xc_get_pfn_list(int xc_handle,
return (ret < 0) ? -1 : domctl.u.getmemlist.num_pfns;
}
+
+int xc_get_shadow_list( int xc_handle,
+ uint32_t domid,
+ uint64_t *htab_raddr)
+{
+ DECLARE_DOMCTL;
+ int ret;
+
+ domctl.cmd = XEN_DOMCTL_getshadowlist;
+ domctl.domain = (domid_t)domid;
+
+ ret = do_domctl(xc_handle, &domctl);
+ *htab_raddr = domctl.u.getshadowlist.htab_map;
+
+ return (ret < 0) ? -1 : domctl.u.getshadowlist.htab_num_ptes;
+}
+
#endif
long xc_get_tot_pages(int xc_handle, uint32_t domid)
diff -r 7669fca80bfc tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/xenctrl.h Wed Dec 13 15:39:32 2006 -0500
@@ -518,6 +518,8 @@ int xc_get_pfn_list(int xc_handle, uint3
int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf,
unsigned long max_pfns);
+int xc_get_shadow_list(int xc_handle, uint32_t domid, uint64_t *mfn_htab_map);
+
unsigned long xc_ia64_fpsr_default(void);
int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid,
diff -r 7669fca80bfc tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py Wed Dec 13 15:39:32 2006 -0500
@@ -461,6 +461,7 @@ class Common_XendDomainInfo:
if self.state in (DOM_STATE_HALTED, DOM_STATE_SUSPENDED):
try:
self._constructDomain()
+ self._allocMem2()
self._storeVmDetails()
self._createDevices()
self._createChannels()
@@ -1237,7 +1238,67 @@ class Common_XendDomainInfo:
# Set maximum number of vcpus in domain
xc.domain_max_vcpus(self.domid, int(self.info['vcpus']))
-
+ # Use the saved architecture- and image-specific calculations
+ # the various headrooms necessary, given the raw configured
+ # values. maxmem, memory, and shadow are all in KiB.
+
+ def _allocMem2(self):
+ log.debug("allocMem2");
+
+ maxmem = self.info['maxmem'] * 1024
+ memory = self.info['memory'] * 1024
+ shadow = self.info['shadow_memory'] * 1024
+
+ # Round shadow up to a multiple of a MiB, as shadow_mem_control
+ # takes MiB and we must not round down and end up under-providing.
+ shadow = ((shadow + 1023) / 1024) * 1024
+
+ # set memory limit
+ xc.domain_setmaxmem(self.domid, maxmem)
+
+ # Make sure there's enough RAM available for the domain
+ balloon.free(memory + shadow)
+
+ # Set up the shadow memory, i.e. the PowerPC hash table
+ shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
+ self.info['shadow_memory'] = shadow_cur
+
+ rma_log = 26 ### self.info['powerpc_rma_log']
+ if rma_log == 0:
+ # use smallest RMA size available
+ rma_log = self.getRealModeLogs()[0]
+
+ if rma_log not in self.getRealModeLogs():
+ raise ValueError("rma_log(%d) must be one of" % rma_log,
+ self.getRealModeLogs())
+
+ # store info for FlatDeviceTree
+ ### self.info['powerpc_rma_log'] = rma_log
+
+ rma_kb = (1 << rma_log) / 1024
+ if memory < rma_kb:
+ raise ValueError("Domain memory must be at least %d KB" % rma_kb)
+
+ if memory % (16 << 10):
+ raise ValueError("Domain memory %dKB must be a multiple of 16MB"
+ % memory)
+
+ # allocate the RMA
+ log.debug("alloc_real_mode_area(%d, %d)", self.domid, rma_log)
+ xc.alloc_real_mode_area(self.domid, rma_log)
+
+ # now allocate the remaining memory as large-order allocations
+ memory -= rma_kb
+ extent_log = 24 # 16 MB
+ page_log = 12 # 4 KB
+ extent_order = extent_log - page_log
+ log.debug("increase_reservation(%d, 0x%x, %d)", self.domid,
+ memory, extent_order)
+ xc.domain_memory_increase_reservation(self.domid,
+ memory,
+ extent_order)
+
+
def _introduceDomain(self):
assert self.domid is not None
assert self.store_mfn is not None
diff -r 7669fca80bfc xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/domain.c Wed Dec 13 15:39:32 2006 -0500
@@ -152,7 +152,32 @@ void vcpu_destroy(struct vcpu *v)
int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c)
{
+ int i;
+
memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
+
+ for ( i = 0; i < NUM_SLB_ENTRIES; i++) {
+ memcpy(&v->arch.slb_entries[i], &c->slb_entries[i], sizeof(struct
slb_entry));
+ }
+
+ for ( i = 0; i< 4; i++) v->arch.sprg[i] = c->sprg[i];
+
+ v->arch.timebase = c->timebase;
+ v->arch.dar = c->dar;
+ v->arch.dsisr = c->dsisr;
+
+ memcpy( &v->arch.cpu, &c->cpu, sizeof(struct cpu_vcpu));
+ v->arch.dec = c->dec;
+
+#ifdef HAS_FLOAT
+ memcpy( v->arch.fprs, c->fprs, sizeof(double)*NUM_FPRS);
+#endif /* HAS_FLOAT */
+
+#ifdef HAS_VMX
+ memcpy( &v->arch.vrs, &c->vrs, sizeof(vector128)*32);
+ memcpy( &v->arch.vscr, &c->vscr, sizeof(vector128));
+ v->arch.vrsave = c->vrsave;
+#endif /* HAS_VMX */
printk("Domain[%d].%d: initializing\n",
v->domain->domain_id, v->vcpu_id);
diff -r 7669fca80bfc xen/arch/powerpc/domctl.c
--- a/xen/arch/powerpc/domctl.c Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/domctl.c Wed Dec 13 15:39:32 2006 -0500
@@ -29,10 +29,37 @@
#include <public/sysctl.h>
#include <asm/processor.h>
+#define DECOR 0x80000000 // indicates htab address
+
+
void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *);
void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c)
{
+ int i;
+
memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
+ for (i = 0; i < NUM_SLB_ENTRIES; i++) {
+ memcpy(&c->slb_entries[i],&v->arch.slb_entries[i],sizeof(struct
slb_entry));
+ }
+
+ for (i = 0; i < 4; i++) c->sprg[i] = v->arch.sprg[i] ;
+ c->timebase = v->arch.timebase;
+ c->dar = v->arch.dar;
+ c->dsisr = v->arch.dsisr;
+ memcpy(&c->cpu,&v->arch.cpu,sizeof(struct cpu_vcpu));
+ c->dec = v->arch.dec;
+
+#ifdef HAS_FLOAT
+ memcpy(c->fprs,v->arch.fprs,sizeof(double)*NUM_FPRS);
+#endif /* HAS_FLOAT */
+
+#ifdef HAS_VMX
+ memcpy(c->vrs, v->arch.vrs, sizeof(vector128)*32);
+ memcpy(&c->vscr, &v->arch.vscr, sizeof(vector128));
+ c->vrsave = v->arch.vrsave;
+#endif /* HAS_VMX */
+
+
/* XXX fill in rest of vcpu_guest_context_t */
}
@@ -108,6 +135,27 @@ long arch_do_domctl(struct xen_domctl *d
}
}
break;
+ case XEN_DOMCTL_getshadowlist:
+ {
+ struct domain *d = find_domain_by_id(domctl->domain);
+ uint num_ptes;
+
+ ret = -EINVAL;
+ if ( d != NULL)
+ {
+ ret = 0;
+
+ domctl->u.getshadowlist.htab_map = (uint64_t)(d->arch.htab.map);
+
+ num_ptes = 1UL << d->arch.htab.log_num_ptes;
+ domctl->u.getshadowlist.htab_num_ptes = num_ptes;
+
+ copy_to_guest(u_domctl, domctl, 1);
+ put_domain(d);
+ }
+ }
+ break;
+
default:
ret = -ENOSYS;
diff -r 7669fca80bfc xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/mm.c Wed Dec 13 15:39:32 2006 -0500
@@ -37,6 +37,8 @@
#define MEM_LOG(_f, _a...) ((void)0)
#endif
+#define DECOR 0x80000000UL
+
/* Frame table and its size in pages. */
struct page_info *frame_table;
unsigned long max_page;
@@ -408,6 +410,11 @@ ulong pfn2mfn(struct domain *d, ulong pf
ulong foreign_map_pfn = 1UL << cpu_foreign_map_order();
/* quick tests first */
+ if (pfn & DECOR)
+ {
+ mfn = pfn & ~DECOR; //*** TBD Check for valid htab range?
+ }
+ else
if (pfn & foreign_map_pfn) {
t = PFN_TYPE_FOREIGN;
mfn = foreign_to_mfn(d, pfn);
diff -r 7669fca80bfc xen/include/asm-powerpc/domain.h
--- a/xen/include/asm-powerpc/domain.h Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/asm-powerpc/domain.h Wed Dec 13 15:39:32 2006 -0500
@@ -51,10 +51,6 @@ struct arch_domain {
uint large_page_order[4];
} __cacheline_aligned;
-struct slb_entry {
- ulong slb_vsid;
- ulong slb_esid;
-};
#define SLB_ESID_VALID (1ULL << (63 - 36))
#define SLB_ESID_CLASS (1ULL << (63 - 56))
#define SLB_ESID_MASK (~0ULL << (63 - 35))
@@ -63,9 +59,9 @@ struct slb_entry {
struct xencomm;
-typedef struct {
- u32 u[4];
-} __attribute__((aligned(16))) vector128;
+#ifdef HAS_VMX
+typedef _vector128 vector128;
+#endif /* HAS_VMX */
struct arch_vcpu {
cpu_user_regs_t ctxt; /* User-level CPU registers */
diff -r 7669fca80bfc xen/include/asm-powerpc/htab.h
--- a/xen/include/asm-powerpc/htab.h Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/asm-powerpc/htab.h Wed Dec 13 15:39:32 2006 -0500
@@ -69,68 +69,68 @@
union pte {
struct pte_words {
- ulong vsid;
- ulong rpn;
+ uint64_t vsid;
+ uint64_t rpn;
} words;
struct pte_bits {
/* *INDENT-OFF* */
/* high word */
- ulong avpn: 57; /* [0-56] abbreviated virtual page number */
- ulong lock: 1; /* [57] hypervisor lock bit */
- ulong res: 1; /* [58] reserved for hypervisor */
- ulong bolted: 1; /* [59] XXX software-reserved; temp hack */
- ulong sw: 1; /* [60] reserved for software */
- ulong l: 1; /* [61] Large Page */
- ulong h: 1; /* [62] hash function id */
- ulong v: 1; /* [63] valid */
+ uint64_t avpn: 57; /* [0-56] abbreviated virtual page number */
+ uint64_t lock: 1; /* [57] hypervisor lock bit */
+ uint64_t res: 1; /* [58] reserved for hypervisor */
+ uint64_t bolted: 1; /* [59] XXX software-reserved; temp hack */
+ uint64_t sw: 1; /* [60] reserved for software */
+ uint64_t l: 1; /* [61] Large Page */
+ uint64_t h: 1; /* [62] hash function id */
+ uint64_t v: 1; /* [63] valid */
/* low word */
- ulong pp0: 1; /* [0] page protection bit 0 (current PowerPC
+ uint64_t pp0: 1; /* [0] page protection bit 0 (current PowerPC
* specification says it can always be 0) */
- ulong ts: 1; /* [1] tag select */
- ulong rpn: 50; /* [2-51] real page number */
- ulong res2: 2; /* [52,53] reserved */
- ulong ac: 1; /* [54] address compare */
- ulong r: 1; /* [55] referenced */
- ulong c: 1; /* [56] changed */
- ulong w: 1; /* [57] write through */
- ulong i: 1; /* [58] cache inhibited */
- ulong m: 1; /* [59] memory coherent */
- ulong g: 1; /* [60] guarded */
- ulong n: 1; /* [61] no-execute */
- ulong pp1: 2; /* [62,63] page protection bits 1:2 */
+ uint64_t ts: 1; /* [1] tag select */
+ uint64_t rpn: 50; /* [2-51] real page number */
+ uint64_t res2: 2; /* [52,53] reserved */
+ uint64_t ac: 1; /* [54] address compare */
+ uint64_t r: 1; /* [55] referenced */
+ uint64_t c: 1; /* [56] changed */
+ uint64_t w: 1; /* [57] write through */
+ uint64_t i: 1; /* [58] cache inhibited */
+ uint64_t m: 1; /* [59] memory coherent */
+ uint64_t g: 1; /* [60] guarded */
+ uint64_t n: 1; /* [61] no-execute */
+ uint64_t pp1: 2; /* [62,63] page protection bits 1:2 */
/* *INDENT-ON* */
} bits;
};
union ptel {
- ulong word;
+ uint64_t word;
struct ptel_bits {
/* *INDENT-OFF* */
- ulong pp0: 1; /* page protection bit 0 (current PPC
+ uint64_t pp0: 1; /* page protection bit 0 (current PPC
* AS says it can always be 0) */
- ulong ts: 1; /* tag select */
- ulong rpn: 50; /* real page number */
- ulong res2: 2; /* reserved */
- ulong ac: 1; /* address compare */
- ulong r: 1; /* referenced */
- ulong c: 1; /* changed */
- ulong w: 1; /* write through */
- ulong i: 1; /* cache inhibited */
- ulong m: 1; /* memory coherent */
- ulong g: 1; /* guarded */
- ulong n: 1; /* no-execute */
- ulong pp1: 2; /* page protection bits 1:2 */
+ uint64_t ts: 1; /* tag select */
+ uint64_t rpn: 50; /* real page number */
+ uint64_t res2: 2; /* reserved */
+ uint64_t ac: 1; /* address compare */
+ uint64_t r: 1; /* referenced */
+ uint64_t c: 1; /* changed */
+ uint64_t w: 1; /* write through */
+ uint64_t i: 1; /* cache inhibited */
+ uint64_t m: 1; /* memory coherent */
+ uint64_t g: 1; /* guarded */
+ uint64_t n: 1; /* no-execute */
+ uint64_t pp1: 2; /* page protection bits 1:2 */
/* *INDENT-ON* */
} bits;
};
struct domain_htab {
- ulong sdr1;
+ uint64_t sdr1;
uint log_num_ptes; /* log number of PTEs in HTAB. */
uint order; /* order for freeing. */
union pte *map; /* access the htab like an array */
- ulong *shadow; /* idx -> logical translation array */
+ uint64_t *shadow; /* idx -> logical translation array */
};
#endif
diff -r 7669fca80bfc xen/include/public/arch-powerpc.h
--- a/xen/include/public/arch-powerpc.h Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/public/arch-powerpc.h Wed Dec 13 15:39:32 2006 -0500
@@ -98,11 +98,66 @@ typedef struct cpu_user_regs cpu_user_re
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */
+#define NUM_SLB_ENTRIES 64
+struct slb_entry {
+ uint64_t slb_vsid;
+ uint64_t slb_esid;
+};
+typedef struct slb_entry slb_entry_t;
+
+#ifndef HAS_VMX
+#define HAS_VMX 1
+#endif
+
+#ifndef HAS_FLOAT
+#define HAS_FLOAT 1
+#endif
+
+#ifdef HAS_VMX
+typedef struct {
+ uint32_t u[4];
+} __attribute__((aligned(16))) _vector128;
+#endif /* HAS_VMX */
+
+
/* ONLY used to communicate with dom0! See also struct exec_domain. */
struct vcpu_guest_context {
cpu_user_regs_t user_regs; /* User-level CPU registers */
+ slb_entry_t slb_entries[NUM_SLB_ENTRIES]; /* Segment Lookaside
Buffer */
+
+ /* Special-Purpose Registers */
+ uint64_t sprg[4];
+ uint64_t timebase;
+ uint64_t dar;
+ uint64_t dsisr;
+
+ struct cpu_vcpu_tag {
+ uint64_t hid4;
+ } cpu; /* CPU-specific bits */
+
+ uint32_t dec;
+
+ /* XXX etc */
+#ifdef HAS_FLOAT
+#define NUM_FPRS 32
+ double fprs[NUM_FPRS];
+#endif
+#ifdef HAS_VMX
+ _vector128 vrs[32];
+ _vector128 vscr;
+ uint32_t vrsave;
+#endif
+
+#if 0
+ struct xencomm *xencomm;
+
+ /* I/O-port access bitmap. */
+ u8 *iobmp; /* Guest kernel virtual address of the bitmap. */
+ int iobmp_limit; /* Number of ports represented in the bitmap. */
+ int iopl; /* Current IOPL for this VCPU. */
+#endif
+
uint64_t sdr1; /* Pagetable base */
- /* XXX etc */
};
typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
diff -r 7669fca80bfc xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/public/domctl.h Wed Dec 13 15:39:32 2006 -0500
@@ -392,6 +392,18 @@ typedef struct xen_domctl_real_mode_area
typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+#define XEN_DOMCTL_getshadowlist 29
+struct xen_domctl_getshadowlist {
+ /* OUT variables */
+ /* Start of htab array */
+ uint64_t htab_map;
+ /* Number of ptes within htab */
+ uint32_t htab_num_ptes;
+};
+
+typedef struct xen_domctl_getshadowlist xen_domctl_getshadowlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t);
+
struct xen_domctl {
uint32_t cmd;
uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
@@ -418,6 +430,7 @@ struct xen_domctl {
struct xen_domctl_arch_setup arch_setup;
struct xen_domctl_settimeoffset settimeoffset;
struct xen_domctl_real_mode_area real_mode_area;
+ struct xen_domctl_getshadowlist getshadowlist;
uint8_t pad[128];
} u;
};
diff -r 7669fca80bfc tools/libxc/powerpc64/xc_linux_restore.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/powerpc64/xc_linux_restore.c Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,312 @@
+/******************************************************************************
+ * xc_linux_restore.c
+ *
+ * Restore the state of a Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Rewritten for PPC: Dan Poff <poff@xxxxxxxxxx>, Yi Ge <geyi@xxxxxxxxxx>
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <xen/asm/htab.h>
+
+#include "xg_private.h"
+
+#define DECOR 0x80000000 // indicates htab address
+#define LOG_PTE_SIZE 4
+
+#define INVALID_MFN (~0ULL)
+
+#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+static ssize_t
+read_exact(int fd, void *buf, size_t count)
+{
+ int r = 0, s;
+ unsigned char *b = buf;
+
+ while (r < count) {
+ s = read(fd, &b[r], count - r);
+ if ((s == -1) && (errno == EINTR))
+ continue;
+ if (s <= 0) {
+ break;
+ }
+ r += s;
+ }
+
+ return (r == count) ? 1 : 0;
+}
+
+static int
+read_page(int xc_handle, int io_fd, uint32_t dom, xen_pfn_t mfn)
+{
+ void *mem;
+
+ mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, mfn);
+ if (mem == NULL) {
+ ERROR("cannot map page");
+ return -1;
+ }
+ if (!read_exact(io_fd, mem, PAGE_SIZE)) {
+ ERROR("Error when reading from state file (5)");
+ return -1;
+ }
+
+ munmap(mem, PAGE_SIZE);
+ return 0;
+}
+
+int
+xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
+ unsigned long nr_pfns, unsigned int store_evtchn,
+ unsigned long *store_mfn, unsigned int console_evtchn,
+ unsigned long *console_mfn)
+{
+ DECLARE_DOMCTL;
+ int rc = 1, i;
+ xen_pfn_t pfn;
+ xen_pfn_t mfn = INVALID_MFN;
+ unsigned long ver;
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+ unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
+ shared_info_t *shared_info = (shared_info_t *)shared_info_page;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ xen_pfn_t shared_info_pfn, *page_array = NULL;
+
+ /* A temporary mapping of the guest's start_info page. */
+ start_info_t *start_info;
+
+ max_pfn = nr_pfns;
+
+ DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn);
+
+ if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
+ ERROR("Error when reading version");
+ goto out;
+ }
+ if (ver != 1) {
+ ERROR("version of save doesn't match");
+ goto out;
+ }
+
+ if (mlock(&ctxt, sizeof(ctxt))) {
+ /* needed for build domctl, but might as well do early */
+ ERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
+ /* Get the domain's shared-info frame. */
+ domctl.cmd = XEN_DOMCTL_getdomaininfo;
+ domctl.domain = (domid_t)dom;
+ if (xc_domctl(xc_handle, &domctl) < 0) {
+ ERROR("Could not get information on new domain");
+ goto out;
+ }
+ shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
+
+ if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* Get pages. */
+ page_array = malloc(max_pfn * sizeof(xen_pfn_t));
+ if (page_array == NULL ) {
+ ERROR("Could not allocate memory");
+ goto out;
+ }
+
+ if (xc_get_pfn_list(xc_handle, dom,
+ page_array, max_pfn) != max_pfn) {
+ ERROR("Could not get the page frame list");
+ goto out;
+ }
+
+ DPRINTF("Reloading memory pages: 0%%\n");
+
+ while (1) {
+ if (!read_exact(io_fd, &pfn, sizeof(xen_pfn_t))) {
+ ERROR("Error when reading batch size");
+ goto out;
+ }
+
+ if (pfn == INVALID_MFN)
+ break;
+
+ if (pfn > max_pfn){
+ DPRINTF("pfn: 0x%016llx\n", pfn);
+ continue;
+ }
+
+ mfn = page_array[pfn];
+
+ if (read_page(xc_handle, io_fd, dom, mfn) < 0)
+ goto out;
+ }
+
+ DPRINTF("Received all pages\n");
+
+ /* Read and uncanonicalise htab, page-at-a-time */
+ {
+ int N, total_sent = 0;
+ int num_ptes, htab_ptes, htab_pages;
+ unsigned long htab_mfn;
+ uint64_t htab_raddr;
+ xen_pfn_t htab_rpn;
+ union pte *ppte;
+ char *mem, *temp, *copy;
+
+ htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr);
+ if (htab_ptes == -1){
+ ERROR("Could not get the shadow list");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, &num_ptes, sizeof(num_ptes))) {
+ ERROR("Error when reading num_ptes");
+ goto out;
+ }
+
+ if (num_ptes != htab_ptes){
+ ERROR("num_ptes != htab_ptes: %d %d htab_raddr: 0x%016llx",
+ num_ptes, htab_ptes, htab_raddr);
+ goto out;
+ }
+
+ temp = malloc(PAGE_SIZE * 2);
+ if (temp == NULL){
+ ERROR("Could not allocate temp memory");
+ goto out;
+ }
+
+ copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1)));
+
+ htab_mfn = htab_raddr >> PAGE_SHIFT;
+ htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE));
+
+ /* Replace guest pfn with rfn, then copy to htab, by page */
+ for (N = 0; N < htab_pages; N++, htab_mfn++) {
+ mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, htab_mfn | DECOR);
+ if (mem == NULL){
+ ERROR("Cannot map htab_mfn 0x%08lx: %s\n",
+ htab_mfn, strerror (errno));
+ goto out;
+ }
+
+ if (!read_exact(io_fd, copy, PAGE_SIZE)) {
+ ERROR("Error when reading htab page");
+ goto out;
+ }
+
+ ppte = (union pte *)copy;
+ for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){
+ if (ppte->bits.v == 1){ // valid htab
entry
+ htab_rpn = ppte->bits.rpn; // guest's pfn
+
+ if (htab_rpn >= max_pfn){
+ ERROR("htab_rpn: 0x%016llx not found in page_array[]",
+ htab_rpn);
+ goto out;
+ }
+
+ ppte->bits.rpn = page_array[htab_rpn]; // guest's rpn
+
+ } else { // invalid htab entry
+ ppte->words.rpn = 0;
+ }
+ }
+
+ memcpy(mem, copy, PAGE_SIZE);
+ munmap(mem, PAGE_SIZE);
+ total_sent++;
+ }
+
+ free(temp);
+ }
+
+ /* Read vcpu context and set */
+ if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+ ERROR("Error when reading ctxt");
+ goto out;
+ }
+
+ domctl.cmd = XEN_DOMCTL_setvcpucontext;
+ domctl.domain = (domid_t)dom;
+ domctl.u.vcpucontext.vcpu = 0;
+ set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
+
+ if (xc_domctl(xc_handle, &domctl) != 0) {
+ ERROR("Couldn't set vcpu context");
+ goto out;
+ }
+
+ /* Read shared info. */
+ shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
shared_info_frame);
+ if (shared_info == NULL) {
+ ERROR("cannot map page");
+ goto out;
+ }
+ if (!read_exact(io_fd, shared_info, PAGE_SIZE)) {
+ ERROR("Error when reading shared_info page");
+ goto out;
+ }
+
+ /* clear any pending events */
+ memset(&(shared_info->evtchn_pending[0]), 0,
+ sizeof (shared_info->evtchn_pending));
+ for (i = 0; i < MAX_VIRT_CPUS; i++)
+ shared_info->vcpu_info[i].evtchn_pending_sel = 0;
+
+ mfn = shared_info_frame - 3 ;
+
+ munmap (shared_info, PAGE_SIZE);
+
+ for (i = 0; i < max_pfn; i++) // find pfn of
shared_info_frame
+ if (page_array[i] == shared_info_frame) break;
+ if ( i >= max_pfn) {
+ ERROR("Cannot find pfn of shared_info_frame");
+ goto out;
+ }
+ shared_info_pfn = (unsigned long) i;
+
+ /* Setup start_info page */
+ start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ | PROT_WRITE, mfn);
+ start_info->nr_pages = max_pfn;
+ start_info->shared_info = shared_info_pfn << PAGE_SHIFT;
+ start_info->flags = 0;
+ *store_mfn = page_array[start_info->store_mfn];
+ start_info->store_evtchn = store_evtchn;
+ *console_mfn = page_array[start_info->console.domU.mfn];
+ start_info->console.domU.evtchn = console_evtchn;
+ munmap(start_info, PAGE_SIZE);
+
+ DPRINTF("Domain ready to be built.\n");
+
+ rc = 0;
+
+ out:
+ if ((rc != 0) && (dom != 0))
+ xc_domain_destroy(xc_handle, dom);
+
+ free (page_array);
+ safe_munlock(&ctxt, sizeof(ctxt));
+
+ DPRINTF("Restore exit with rc=%d\n", rc);
+
+ return rc;
+}
diff -r 7669fca80bfc tools/libxc/powerpc64/xc_linux_save.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/powerpc64/xc_linux_save.c Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,417 @@
+/******************************************************************************
+ * xc_linux_save.c
+ *
+ * Save the state of a running Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Rewritten for PPC: Dan Poff <poff@xxxxxxxxxx>, Yi Ge <geyi@xxxxxxxxxx>
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <xen/asm/htab.h>
+
+#include "xg_private.h"
+
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_linux_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS (4 - 1) /* limit us to 4 times round
loop */
+#define DEF_MAX_FACTOR 3 /* never send more
than 3x nr_pfns */
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, and to skip.
+*/
+
+#define DECOR 0x80000000 // indicates htab
address
+#define LOG_PTE_SIZE 4
+
+#define INVALID_MFN (~0ULL)
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+static int
+suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+ int dom, xc_dominfo_t *info)
+{
+ int i = 0;
+
+ if (!(*suspend)(dom)) {
+ ERROR("Suspend request failed");
+ return -1;
+ }
+
+retry:
+ if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) {
+ ERROR("Could not get domain info");
+ return -1;
+ }
+
+ if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend)
+ return 0; // success
+
+ if (info->paused) {
+ // try unpausing domain, wait, and retest
+ xc_domain_unpause(xc_handle, dom);
+
+ ERROR("Domain was paused. Wait and re-test.");
+ usleep(10000); // 10ms
+
+ goto retry;
+ }
+
+
+ if(++i < 100) {
+ ERROR("Retry suspend domain.");
+ usleep(10000); // 10ms
+ goto retry;
+ }
+
+ ERROR("Unable to suspend domain.");
+
+ return -1;
+}
+
+static inline ssize_t
+write_exact(int fd, void *buf, size_t count)
+{
+ if (write(fd, buf, count) != count)
+ return 0;
+ return 1;
+}
+
+int
+xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+{
+ xc_dominfo_t info;
+
+ int rc = 1;
+ int debug = 0;
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ xen_pfn_t *page_array = NULL;
+
+ /* Live mapping of shared info structure */
+ shared_info_t *live_shinfo = NULL;
+
+ if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+ ERROR("Could not get domain info");
+ return 1;
+ }
+
+ shared_info_frame = info.shared_info_frame;
+
+ /* Map the shared info frame */
+ live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ, shared_info_frame);
+ if (!live_shinfo) {
+ ERROR("Couldn't map live_shinfo");
+ goto out;
+ }
+
+ max_pfn = info.max_memkb >> (PAGE_SHIFT - 10);
+
+ page_array = malloc(max_pfn * sizeof(xen_pfn_t));
+ if (page_array == NULL) {
+ ERROR("Could not allocate memory");
+ goto out;
+ }
+
+ /* This is expected by xm restore. */
+ if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
+ ERROR("write: max_pfn");
+ goto out;
+ }
+
+ /* xc_linux_restore starts to read here. */
+ /* Write a version number. This can avoid searching for a stupid bug
+ if the format change.
+ The version is hard-coded, don't forget to change the restore code
+ too! */
+ {
+ unsigned long version = 1;
+
+ if (!write_exact(io_fd, &version, sizeof(unsigned long))) {
+ ERROR("write: version");
+ goto out;
+ }
+ }
+
+ /* This is a non-live suspend. Issue the call back to get the
+ domain suspended */
+
+ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
+ ERROR("Domain appears not to have suspended");
+ goto out;
+ }
+
+
+ {
+ char *mem;
+ xen_pfn_t pfn;
+ unsigned int total_sent = 0;
+
+ if (xc_get_pfn_list(xc_handle, dom,
+ page_array, max_pfn) != max_pfn) {
+ ERROR("Could not get the page frame list");
+ goto out;
+ }
+
+ /* Start writing out the saved-domain record. */
+ for (pfn = 0; pfn < max_pfn; pfn++){
+ if (page_array[pfn] == INVALID_MFN)
+ continue;
+
+ if (debug)
+ fprintf(stderr, "xc_linux_save: page %llx (%llu/%lu)\n",
+ page_array[pfn], pfn, max_pfn);
+
+ mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, page_array[pfn]);
+ if (mem == NULL) {
+ ERROR("cannot map page %llx: %s",
+ page_array[pfn], strerror (errno));
+ goto out;
+ }
+
+ if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
+ ERROR("Error when writing to state file (4)");
+ goto out;
+ }
+
+ if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
+ ERROR("Error when writing to state file (5)");
+ goto out;
+ }
+ munmap(mem, PAGE_SIZE);
+ total_sent++;
+ }
+ }
+
+ DPRINTF("All memory is saved\n");
+
+ /* terminate memory dump */
+ {
+ xen_pfn_t pfn = INVALID_MFN;
+ if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
+ ERROR("Error when writing to state file (6)");
+ goto out;
+ }
+ }
+
+ /* Canonicalize htab and save */
+ {
+ int i, k, n;
+ int N, total_sent = 0;
+ int htab_ptes, htab_pages, n_chunks;
+ unsigned long htab_mfn;
+ uint64_t htab_raddr;
+ xen_pfn_t htab_rpn, pfn = 0;
+ union pte *ppte;
+ char *mem, *temp, *copy;
+ unsigned long long sizes;
+ struct chunk_array {xen_pfn_t mfn; unsigned long long size;} *p_chunk;
+
+ n_chunks = 1;
+ for (k = 0; k < max_pfn - 1; k++){ // find number of chunks
+ if (page_array[k] + 1 != page_array[k+1]){
+ n_chunks += 1;
+ }
+ }
+
+ p_chunk = malloc(n_chunks * sizeof(struct chunk_array));
+ if (p_chunk == NULL) {
+ ERROR("Could not allocate memory for chunk_array");
+ goto out;
+ }
+
+ k = 0; n = 0;
+ p_chunk[n].mfn = page_array[k];
+ p_chunk[n].size = 1;
+
+ for (k = 0; k < max_pfn - 1; k++){ // record mfn for start of each
chunk, size
+ if (page_array[k] + 1 != page_array[k+1]){
+ p_chunk[n+1].mfn = page_array[k+1];
+ p_chunk[n+1].size = 1;
+ n += 1;
+ } else {
+ p_chunk[n].size += 1;
+ }
+ }
+#if 0
+ DPRINTF("n_chunks: %d\n", n_chunks);
+ for (i = 0; i < n_chunks; i++){
+ DPRINTF("0x%016llx 0x%016llx\n", p_chunk[i].mfn, p_chunk[i].size);
+ }
+#endif
+ htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr);
+ if (htab_ptes == -1){
+ ERROR("Could not get the shadow list");
+ goto out;
+ }
+
+ temp = malloc(PAGE_SIZE * 2);
+ if (temp == NULL){
+ ERROR("Could not allocate temp memory");
+ goto out;
+ }
+
+ copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1)));
+
+ htab_mfn = htab_raddr >> PAGE_SHIFT;
+ htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE));
+ // DPRINTF("htab_pages: 0x%08lx htab_addr: %llx htab_mfn %lx\n",
+ // htab_pages,htab_raddr, htab_mfn);
+
+ if (!write_exact(io_fd, &htab_ptes, sizeof(htab_ptes))) {
+ ERROR("Error when writing to state file (6)");
+ goto out;
+ }
+
+ /* Replace rpn with guest pfn, then write out htab, by page */
+ for (N = 0; N < htab_pages; N++, htab_mfn++) {
+ mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, htab_mfn | DECOR);
+ if (mem == NULL){
+ ERROR("Cannot map htab_mfn 0x%08lx: %s\n",
+ htab_mfn, strerror (errno));
+ goto out;
+ }
+
+ memcpy(copy, mem, PAGE_SIZE);
+
+ /* Improved search of page_array[] for htab_rpn - by chunks*/
+ ppte = (union pte *)copy;
+ for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){
+ if (ppte->bits.v == 1){ //
valid htab entry
+ sizes = 0;
+ htab_rpn = ppte->bits.rpn;
+ for (n = 0; n < n_chunks; n++){ // search by memory
chunk
+ if ((htab_rpn >= p_chunk[n].mfn) &&
+ (htab_rpn < (p_chunk[n].mfn) + p_chunk[n].size)){
+ pfn = (htab_rpn - p_chunk[n].mfn) + sizes;
+ break;
+ } else {
+ sizes += p_chunk[n].size;
+ }
+ }
+
+ if (n >= n_chunks){
+ ERROR("htab_rpn: 0x%016llx not found in page_array[]",
+ htab_rpn);
+ goto out;
+ }
+
+ if (pfn >= max_pfn){
+ ERROR("pfn >= max_pfn: 0x%08llx 0x%08lx", pfn,
max_pfn);
+ goto out;
+ }
+
+//*** validation
+ for (k = 0; k < max_pfn; k++){ // linear search
+ if (htab_rpn == page_array[k])
+ break;
+ }
+
+ if (k != pfn){
+ ERROR("k != pfn: 0x%08x 0x%08llx", k, pfn);
+ ERROR("htab_rpn: 0x%016llx", htab_rpn);
+ goto out;
+ }
+
+ if (k >= max_pfn){
+ ERROR("htab_rpn: 0x%016llx not found in page_array[]
%d",
+ htab_rpn, i);
+ goto out;
+ }
+//***
+
+ ppte->bits.rpn = pfn;
+ } else {
// invalid htab entry
+ ppte->words.rpn = 0;
+ }
+ }
+
+ if (write(io_fd, copy, PAGE_SIZE) != PAGE_SIZE) {
+ ERROR("Error when writing to state file (7)");
+ goto out;
+ }
+
+ munmap(mem, PAGE_SIZE);
+ total_sent++;
+ }
+
+ free(temp);
+ free(p_chunk);
+ }
+
+ /* save vcpu context only for vcpu 0; */
+ /* linux already suspended other vcpus via smp_suspend() */
+ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
+ ERROR("Could not get vcpu context");
+ goto out;
+ }
+
+ if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
+ ERROR("Error when writing to state file (1)");
+ goto out;
+ }
+
+ if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
+ ERROR("Error when writing to state file (1)");
+ goto out;
+ }
+
+ /* Success! */
+ rc = 0;
+
+#if 0
+ DPRINTF("Domain ready to be built.\n");
+
+ domctl.cmd = XEN_DOMCTL_setvcpucontext;
+ domctl.domain = (domid_t)dom;
+ domctl.u.vcpucontext.vcpu = 0;
+ set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
+ rc = xc_domctl(xc_handle, &domctl);
+
+ if (rc != 0) {
+ ERROR("Couldn't build the domain");
+ goto out;
+ }
+#endif
+
+ out:
+
+ free(page_array);
+ if (live_shinfo)
+ munmap(live_shinfo, PAGE_SIZE);
+
+ DPRINTF("Save exit rc=%d\n",rc);
+
+ return !!rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 7669fca80bfc tools/libxc/xen/asm/htab.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xen/asm/htab.h Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,134 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2005
+ *
+ * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ */
+
+#ifndef _ASM_HTAB_H_
+#define _ASM_HTAB_H_
+
+
+/***** general PowerPC architecture limits ******/
+
+/* 256KB, from PowerPC Architecture specification */
+#define HTAB_MIN_LOG_SIZE 18
+
+#define LOG_NUM_PTES_IN_PTEG 3
+#define NUM_PTES_IN_PTEG (1 << LOG_NUM_PTES_IN_PTEG)
+#define LOG_PTE_SIZE 4
+#define LOG_PTEG_SIZE (LOG_NUM_PTES_IN_PTEG + LOG_PTE_SIZE)
+#define LOG_HTAB_HASH (LOG_HTAB_SIZE - LOG_PTEG_SIZE)
+
+/* real page number shift to create the rpn field of the pte */
+#define RPN_SHIFT 12
+
+/* page protection bits in pp1 (name format: MSR:PR=0 | MSR:PR=1) */
+#define PP_RWxx 0x0UL
+#define PP_RWRW 0x2UL
+#define PP_RWRx 0x4UL
+#define PP_RxRx 0x6UL
+
+/***** 64-bit PowerPC architecture limits ******/
+
+#define SDR1_HTABORG_MASK 0xfffffffffff80000ULL
+#define SDR1_HTABSIZE_MASK 0x1fUL
+#define SDR1_HTABSIZE_MAX 46
+#define SDR1_HTABSIZE_BASEBITS 11
+
+/* used to turn a vsid into a number usable in the hash function */
+#define VSID_HASH_MASK 0x0000007fffffffffUL
+
+/* used to turn a vaddr into an api for a pte */
+#define VADDR_TO_API(vaddr) (((vaddr) & API_MASK) >> API_SHIFT)
+#define API_VEC 0x1fUL
+#define API_SHIFT 23
+#define API_MASK (API_VEC << API_SHIFT)
+
+/***** hypervisor internals ******/
+
+/* 64M: reasonable hypervisor limit? */
+#define HTAB_MAX_LOG_SIZE 26
+
+#define GET_HTAB(domain) ((domain)->arch.htab.sdr1 & SDR1_HTABORG_MASK)
+
+union pte {
+ struct pte_words {
+ uint64_t vsid;
+ uint64_t rpn;
+ } words;
+ struct pte_bits {
+ /* *INDENT-OFF* */
+ /* high word */
+ uint64_t avpn: 57; /* [0-56] abbreviated virtual page number */
+ uint64_t lock: 1; /* [57] hypervisor lock bit */
+ uint64_t res: 1; /* [58] reserved for hypervisor */
+ uint64_t bolted: 1; /* [59] XXX software-reserved; temp hack */
+ uint64_t sw: 1; /* [60] reserved for software */
+ uint64_t l: 1; /* [61] Large Page */
+ uint64_t h: 1; /* [62] hash function id */
+ uint64_t v: 1; /* [63] valid */
+
+ /* low word */
+ uint64_t pp0: 1; /* [0] page protection bit 0 (current PowerPC
+ * specification says it can always be 0) */
+ uint64_t ts: 1; /* [1] tag select */
+ uint64_t rpn: 50; /* [2-51] real page number */
+ uint64_t res2: 2; /* [52,53] reserved */
+ uint64_t ac: 1; /* [54] address compare */
+ uint64_t r: 1; /* [55] referenced */
+ uint64_t c: 1; /* [56] changed */
+ uint64_t w: 1; /* [57] write through */
+ uint64_t i: 1; /* [58] cache inhibited */
+ uint64_t m: 1; /* [59] memory coherent */
+ uint64_t g: 1; /* [60] guarded */
+ uint64_t n: 1; /* [61] no-execute */
+ uint64_t pp1: 2; /* [62,63] page protection bits 1:2 */
+ /* *INDENT-ON* */
+ } bits;
+};
+
+union ptel {
+ uint64_t word;
+ struct ptel_bits {
+ /* *INDENT-OFF* */
+
+ uint64_t pp0: 1; /* page protection bit 0 (current PPC
+ * AS says it can always be 0) */
+ uint64_t ts: 1; /* tag select */
+ uint64_t rpn: 50; /* real page number */
+ uint64_t res2: 2; /* reserved */
+ uint64_t ac: 1; /* address compare */
+ uint64_t r: 1; /* referenced */
+ uint64_t c: 1; /* changed */
+ uint64_t w: 1; /* write through */
+ uint64_t i: 1; /* cache inhibited */
+ uint64_t m: 1; /* memory coherent */
+ uint64_t g: 1; /* guarded */
+ uint64_t n: 1; /* no-execute */
+ uint64_t pp1: 2; /* page protection bits 1:2 */
+ /* *INDENT-ON* */
+ } bits;
+};
+
+struct domain_htab {
+ uint64_t sdr1;
+ uint log_num_ptes; /* log number of PTEs in HTAB. */
+ uint order; /* order for freeing. */
+ union pte *map; /* access the htab like an array */
+ uint64_t *shadow; /* idx -> logical translation array */
+};
+#endif
Linux diffs:
diff -r c8d1f32fd7de arch/powerpc/platforms/xen/hcall.c
--- a/arch/powerpc/platforms/xen/hcall.c Wed Nov 22 14:51:54 2006 -0500
+++ b/arch/powerpc/platforms/xen/hcall.c Wed Dec 13 15:54:20 2006 -0500
@@ -256,6 +256,7 @@ static int xenppc_privcmd_domctl(privcmd
case XEN_DOMCTL_pausedomain:
case XEN_DOMCTL_unpausedomain:
case XEN_DOMCTL_getdomaininfo:
+ case XEN_DOMCTL_getshadowlist:
break;
case XEN_DOMCTL_getmemlist:
ret = xencomm_create(
diff -r c8d1f32fd7de arch/powerpc/platforms/xen/reboot.c
--- a/arch/powerpc/platforms/xen/reboot.c Wed Nov 22 14:51:54 2006 -0500
+++ b/arch/powerpc/platforms/xen/reboot.c Wed Dec 13 15:54:20 2006 -0500
@@ -1,10 +1,20 @@
#include <linux/module.h>
+#include <linux/kernel.h>
#include <xen/interface/xen.h>
#include <xen/interface/io/console.h>
#include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+#include <xen/gnttab.h>
+#include <xen/evtchn.h>
#include <asm/hypervisor.h>
#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#define SHUTDOWN_INVALID -1
+
+extern int shutting_down ;
+
static void domain_machine_restart(char * __unused)
{
/* We really want to get pending console data out before we die. */
@@ -31,3 +41,86 @@ void xen_reboot_init(struct machdep_call
ppc_md.halt = domain_machine_power_off;
}
}
+
+static void switch_idle_mm(void)
+{
+ struct mm_struct *mm = current->active_mm;
+
+ if (mm == &init_mm)
+ return;
+
+ atomic_inc(&init_mm.mm_count);
+ switch_mm(mm, &init_mm, current);
+ current->active_mm = &init_mm;
+ mmdrop(mm);
+}
+
+int ppc_do_suspend(void *ignore)
+{
+ int err;
+ enum system_states temp_state;
+
+ BUG_ON(smp_processor_id() != 0);
+ BUG_ON(in_interrupt());
+
+#ifndef CONFIG_PPC_XEN
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ printk(KERN_WARNING "Cannot suspend in "
+ "auto_translated_physmap mode.\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+ err = smp_suspend();
+ if (err)
+ return err;
+
+ xenbus_suspend();
+
+ preempt_disable();
+
+ local_irq_disable();
+
+ temp_state = system_state;
+ system_state = SYSTEM_SUSPEND_DISK;
+
+ preempt_enable();
+
+ gnttab_suspend();
+
+ HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+
+ xen_start_info->store_mfn =
mfn_to_pfn(xen_start_info->store_mfn);
+ xen_start_info->console_mfn =
mfn_to_pfn(xen_start_info->console_mfn);
+
+ /*
+ * We'll stop somewhere inside this hypercall. When it returns,
+ * we'll start resuming after the restore.
+ */
+ HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+ shutting_down = SHUTDOWN_INVALID;
+
+ HYPERVISOR_shared_info = (shared_info_t
*)__va(xen_start_info->shared_info);
+ memset(empty_zero_page, 0, PAGE_SIZE);
+
+ gnttab_resume();
+
+ irq_resume();
+
+ switch_idle_mm();
+
+ system_state = temp_state;
+
+ per_cpu(last_jiffy, smp_processor_id()) = get_tbl(); /*
update time base */
+
+ local_irq_enable();
+
+ xencons_resume();
+
+ xenbus_resume();
+
+ smp_resume();
+
+ return err;
+}
+
diff -r c8d1f32fd7de drivers/xen/core/reboot.c
--- a/drivers/xen/core/reboot.c Wed Nov 22 14:51:54 2006 -0500
+++ b/drivers/xen/core/reboot.c Wed Dec 13 15:54:20 2006 -0500
@@ -7,16 +7,16 @@
#include <linux/reboot.h>
#include <linux/sysrq.h>
#include <linux/stringify.h>
-#include <asm/irq.h>
-#include <asm/mmu_context.h>
-#include <xen/evtchn.h>
-#include <asm/hypervisor.h>
-#include <xen/xenbus.h>
#include <linux/cpu.h>
#include <linux/kthread.h>
+#include <xen/evtchn.h>
+#include <xen/xenbus.h>
#include <xen/gnttab.h>
#include <xen/xencons.h>
#include <xen/cpu_hotplug.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/hypervisor.h>
#if defined(__i386__) || defined(__x86_64__)
/*
@@ -79,7 +79,7 @@ EXPORT_SYMBOL(machine_power_off);
*/
/* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
+int shutting_down = SHUTDOWN_INVALID;
static void __shutdown_handler(void *unused);
static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
@@ -192,13 +192,14 @@ static int __do_suspend(void *ignore)
return err;
}
-#else /* CONFIG_PPC_XEN */
+
+#else
+int ppc_do_suspend(void *ignore);
static int __do_suspend(void *ignore)
{
- printk("SUSPEND!!??\n");
- return 0;
-}
-#endif /* CONFIG_PPC_XEN */
+ return ppc_do_suspend(ignore);
+}
+#endif
static int shutdown_process(void *__unused)
{
diff -r c8d1f32fd7de include/asm-powerpc/xen/asm/hypercall.h
--- a/include/asm-powerpc/xen/asm/hypercall.h Wed Nov 22 14:51:54 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypercall.h Wed Dec 13 15:54:20 2006 -0500
@@ -60,6 +60,16 @@ static inline int HYPERVISOR_shutdown(un
return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
}
+
+static inline int HYPERVISOR_suspend(unsigned long srec)
+{
+ struct sched_shutdown sched_shutdown = {
+ .reason = SHUTDOWN_suspend
+ };
+
+ return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+}
+
static inline int HYPERVISOR_set_timer_op(unsigned long arg)
{
return plpar_hcall_norets(XEN_MARK(__HYPERVISOR_set_timer_op), arg);
diff -r c8d1f32fd7de include/xen/interface/arch-powerpc.h
--- a/include/xen/interface/arch-powerpc.h Wed Nov 22 14:51:54 2006 -0500
+++ b/include/xen/interface/arch-powerpc.h Wed Dec 13 15:54:20 2006 -0500
@@ -29,7 +29,6 @@
#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name
#define set_xen_guest_handle(hnd, val) \
do { \
if (sizeof ((hnd).__pad)) \
@@ -42,9 +41,6 @@
#endif
#ifndef __ASSEMBLY__
-
-typedef uint64_t uint64_aligned_t;
-
/* Guest handles for primitive C types. */
__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int);
@@ -98,9 +94,65 @@ typedef struct cpu_user_regs cpu_user_re
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */
+#define NUM_SLB_ENTRIES 64
+struct slb_entry {
+ uint64_t slb_vsid;
+ uint64_t slb_esid;
+};
+typedef struct slb_entry slb_entry_t;
+
+#ifndef HAS_VMX
+#define HAS_VMX 1
+#endif
+
+#ifndef HAS_FLOAT
+#define HAS_FLOAT 1
+#endif
+
+#ifdef HAS_VMX
+typedef struct {
+ uint32_t u[4];
+} __attribute__((aligned(16))) _vector128;
+#endif /* HAS_VMX */
+
+
/* ONLY used to communicate with dom0! See also struct exec_domain. */
struct vcpu_guest_context {
cpu_user_regs_t user_regs; /* User-level CPU registers */
+ slb_entry_t slb_entries[NUM_SLB_ENTRIES]; /* Segment Lookaside
Buffer */
+
+ /* Special-Purpose Registers */
+ uint64_t sprg[4];
+ uint64_t timebase;
+ uint64_t dar;
+ uint64_t dsisr;
+
+ struct cpu_vcpu_tag {
+ uint64_t hid4;
+ } cpu; /* CPU-specific bits */
+
+ uint32_t dec;
+
+ /* XXX etc */
+#ifdef HAS_FLOAT
+#define NUM_FPRS 32
+ double fprs[NUM_FPRS];
+#endif
+#ifdef HAS_VMX
+ _vector128 vrs[32];
+ _vector128 vscr;
+ uint32_t vrsave;
+#endif
+
+#if 0
+ struct xencomm *xencomm;
+
+ /* I/O-port access bitmap. */
+ u8 *iobmp; /* Guest kernel virtual address of the bitmap. */
+ int iobmp_limit; /* Number of ports represented in the bitmap. */
+ int iopl; /* Current IOPL for this VCPU. */
+#endif
+
uint64_t sdr1; /* Pagetable base */
/* XXX etc */
};
diff -r c8d1f32fd7de include/xen/interface/domctl.h
--- a/include/xen/interface/domctl.h Wed Nov 22 14:51:54 2006 -0500
+++ b/include/xen/interface/domctl.h Wed Dec 13 15:54:20 2006 -0500
@@ -354,6 +354,17 @@ struct xen_domctl_real_mode_area {
};
typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
+#define XEN_DOMCTL_getshadowlist 29
+struct xen_domctl_getshadowlist {
+ /* OUT variables. */
+ /* Start of htab array */
+ uint64_t htab_map;
+ /* Numver of ptes within htab */
+ uint htab_num_ptes;
+};
+typedef struct xen_domctl_getshadowlist xen_domctl_getshadowlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t);
struct xen_domctl {
uint32_t cmd;
@@ -381,6 +392,7 @@ struct xen_domctl {
struct xen_domctl_arch_setup arch_setup;
struct xen_domctl_settimeoffset settimeoffset;
struct xen_domctl_real_mode_area real_mode_area;
+ struct xen_domctl_getshadowlist getshadowlist;
uint8_t pad[128];
} u;
};
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
|