# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1176459266 -3600
# Node ID f92a79e39da872c5632a8490ebb97e2e1fcbca28
# Parent 5bda20f0723daea6c4390eaa77f7860ec0cd67a7
# Parent fdbbc6aa2cbf230fbe0341a04d78dc1d55fb3244
Merge with xenppc-unstable-merge.hg
---
tools/libxc/xc_hvm_save.c | 755 ----
tools/libxc/xc_linux_save.c | 1414
--------
README | 4
docs/xen-api/xenapi-datamodel.tex | 173 +
linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c | 2
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c | 4
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c | 3
linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c | 36
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S | 6
linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S | 63
linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 3
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h | 2
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 2
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h | 1
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h | 2
tools/examples/network-bridge | 19
tools/examples/xmexample.hvm | 6
tools/examples/xmexample.vti | 5
tools/ioemu/hw/pc.c | 12
tools/ioemu/monitor.c | 2
tools/ioemu/vl.c | 10
tools/ioemu/vl.h | 5
tools/ioemu/xenstore.c | 220 -
tools/libfsimage/fat/fat.h | 14
tools/libxc/Makefile | 4
tools/libxc/ia64/xc_ia64_linux_save.c | 6
tools/libxc/xc_domain.c | 39
tools/libxc/xc_domain_restore.c | 41
tools/libxc/xc_domain_save.c | 1587
++++++++++
tools/libxc/xc_hvm_build.c | 41
tools/libxc/xc_resume.c | 65
tools/libxc/xenctrl.h | 3
tools/libxc/xenguest.h | 24
tools/libxc/xg_private.c | 23
tools/libxen/include/xen_host_cpu.h | 8
tools/libxen/include/xen_vm.h | 22
tools/libxen/include/xen_vm_metrics.h | 9
tools/libxen/src/xen_host_cpu.c | 21
tools/libxen/src/xen_vm.c | 50
tools/libxen/src/xen_vm_metrics.c | 21
tools/pygrub/src/LiloConf.py | 147
tools/pygrub/src/pygrub | 32
tools/python/README.XendConfig | 1
tools/python/README.sxpcfg | 1
tools/python/xen/xend/XendConfig.py | 2
tools/python/xen/xend/XendDomainInfo.py | 1
tools/python/xen/xend/XendNode.py | 2
tools/python/xen/xend/image.py | 95
tools/python/xen/xm/create.dtd | 2
tools/python/xen/xm/create.py | 4
tools/python/xen/xm/main.py | 75
tools/python/xen/xm/xenapi_create.py | 30
tools/xcutils/xc_save.c | 9
tools/xm-test/lib/XmTestLib/NetConfig.py | 6
unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h | 5
unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c | 34
xen/arch/ia64/xen/hyperprivop.S | 14
xen/arch/ia64/xen/mm.c | 2
xen/arch/x86/hvm/hvm.c | 76
xen/arch/x86/hvm/svm/svm.c | 19
xen/arch/x86/hvm/vmx/vmx.c | 21
xen/include/public/hvm/save.h | 35
62 files changed, 2718 insertions(+), 2622 deletions(-)
diff -r 5bda20f0723d -r f92a79e39da8 README
--- a/README Thu Apr 12 16:37:32 2007 -0500
+++ b/README Fri Apr 13 11:14:26 2007 +0100
@@ -199,3 +199,7 @@ Xend (the Xen daemon) has the following
* For optional PAM support, PyPAM:
URL: http://www.pangalactic.org/PyPAM/
Debian: python-pam
+
+ * For optional XenAPI support in XM, PyXML:
+ URL: http://pyxml.sourceforge.net
+ YUM: PyXML
diff -r 5bda20f0723d -r f92a79e39da8 docs/xen-api/xenapi-datamodel.tex
--- a/docs/xen-api/xenapi-datamodel.tex Thu Apr 12 16:37:32 2007 -0500
+++ b/docs/xen-api/xenapi-datamodel.tex Fri Apr 13 11:14:26 2007 +0100
@@ -1558,6 +1558,111 @@ void
\vspace{0.3cm}
\vspace{0.3cm}
\vspace{0.3cm}
+\subsubsection{RPC name:~add\_to\_VCPUs\_params\_live}
+
+{\bf Overview:}
+Add the given key-value pair to VM.VCPUs\_params, and apply that value on
+the running VM.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} void add_to_VCPUs_params_live (session_id s, VM ref self,
string key, string value)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & The VM \\ \hline
+
+{\tt string } & key & The key \\ \hline
+
+{\tt string } & value & The value \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+void
+}
+
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~set\_memory\_dynamic\_max\_live}
+
+{\bf Overview:}
+Set memory\_dynamic\_max in database and on running VM.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} void set_memory_dynamic_max_live (session_id s, VM ref self,
int max)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & The VM \\ \hline
+
+{\tt int } & max & The memory\_dynamic\_max value \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+void
+}
+
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~set\_memory\_dynamic\_min\_live}
+
+{\bf Overview:}
+Set memory\_dynamic\_min in database and on running VM.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} void set_memory_dynamic_min_live (session_id s, VM ref self,
int min)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & The VM \\ \hline
+
+{\tt int } & min & The memory\_dynamic\_min value \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+void
+}
+
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
\subsubsection{RPC name:~send\_sysrq}
{\bf Overview:}
@@ -4184,6 +4289,7 @@ Quals & Field & Type & Description \\
$\mathit{RO}_\mathit{run}$ & {\tt VCPUs/utilisation} & (int $\rightarrow$
float) Map & Utilisation for all of guest's current VCPUs \\
$\mathit{RO}_\mathit{run}$ & {\tt VCPUs/CPU} & (int $\rightarrow$ int) Map &
VCPU to PCPU map \\
$\mathit{RO}_\mathit{run}$ & {\tt VCPUs/params} & (string $\rightarrow$
string) Map & The live equivalent to VM.VCPUs\_params \\
+$\mathit{RO}_\mathit{run}$ & {\tt state} & string Set & The state of the
guest, eg blocked, dying etc \\
$\mathit{RO}_\mathit{run}$ & {\tt start\_time} & datetime & Time at which
this VM was last booted \\
$\mathit{RO}_\mathit{run}$ & {\tt last\_updated} & datetime & Time at which
this information was last updated \\
\hline
@@ -4395,6 +4501,38 @@ Get the VCPUs/params field of the given
\noindent {\bf Return Type:}
{\tt
(string $\rightarrow$ string) Map
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_state}
+
+{\bf Overview:}
+Get the state field of the given VM\_metrics.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (string Set) get_state (session_id s, VM_metrics ref
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM\_metrics ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string Set
}
@@ -6601,7 +6739,8 @@ Quals & Field & Type & Description \\
$\mathit{RO}_\mathit{run}$ & {\tt speed} & int & the speed of the physical
CPU \\
$\mathit{RO}_\mathit{run}$ & {\tt modelname} & string & the model name of the
physical CPU \\
$\mathit{RO}_\mathit{run}$ & {\tt stepping} & string & the stepping of the
physical CPU \\
-$\mathit{RO}_\mathit{run}$ & {\tt flags} & string & the flags of the physical
CPU \\
+$\mathit{RO}_\mathit{run}$ & {\tt flags} & string & the flags of the physical
CPU (a decoded version of the features field) \\
+$\mathit{RO}_\mathit{run}$ & {\tt features} & string & the physical CPU
feature bitmap \\
$\mathit{RO}_\mathit{run}$ & {\tt utilisation} & float & the current CPU
utilisation \\
\hline
\end{longtable}
@@ -6858,6 +6997,38 @@ Get the flags field of the given host\_c
\noindent {\bf Signature:}
\begin{verbatim} string get_flags (session_id s, host_cpu ref
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt host\_cpu ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_features}
+
+{\bf Overview:}
+Get the features field of the given host\_cpu.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_features (session_id s, host_cpu ref
self)\end{verbatim}
\noindent{\bf Arguments:}
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c Thu Apr 12
16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c Fri Apr 13
11:14:26 2007 +0100
@@ -290,5 +290,7 @@ void foo(void)
DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
+ DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
+ DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);
#endif /* CONFIG_XEN */
}
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Thu Apr 12 16:37:32
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Fri Apr 13 11:14:26
2007 +0100
@@ -594,6 +594,10 @@ setup_arch (char **cmdline_p)
/* enable IA-64 Machine Check Abort Handling unless disabled */
+#ifdef CONFIG_XEN
+ if (is_running_on_xen() && !is_initial_xendomain())
+ nomca = 1;
+#endif
if (!nomca)
ia64_mca_init();
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Thu Apr 12 16:37:32
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Fri Apr 13 11:14:26
2007 +0100
@@ -852,6 +852,9 @@ time_resume(void)
/* Just trigger a tick. */
ia64_cpu_local_tick();
+
+ /* Time interpolator remembers the last timer status. Forget it */
+ time_interpolator_reset();
}
///////////////////////////////////////////////////////////////////////////
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c Thu Apr 12 16:37:32
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c Fri Apr 13 11:14:26
2007 +0100
@@ -418,3 +418,39 @@ xencomm_mini_hypercall_perfmon_op(unsign
return xencomm_arch_hypercall_perfmon_op(cmd, desc, count);
}
EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_perfmon_op);
+
+int
+xencomm_mini_hypercall_sched_op(int cmd, void *arg)
+{
+ int rc, nbr_area = 2;
+ struct xencomm_mini xc_area[2];
+ struct xencomm_handle *desc;
+ unsigned int argsize;
+
+ switch (cmd) {
+ case SCHEDOP_yield:
+ case SCHEDOP_block:
+ argsize = 0;
+ break;
+ case SCHEDOP_shutdown:
+ argsize = sizeof(sched_shutdown_t);
+ break;
+ case SCHEDOP_poll:
+ argsize = sizeof(sched_poll_t);
+ break;
+ case SCHEDOP_remote_shutdown:
+ argsize = sizeof(sched_remote_shutdown_t);
+ break;
+
+ default:
+ printk("%s: unknown sched op %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
+ if (rc)
+ return rc;
+
+ return xencomm_arch_hypercall_sched_op(cmd, desc);
+}
+EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_sched_op);
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Thu Apr 12 16:37:32
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Fri Apr 13 11:14:26
2007 +0100
@@ -614,6 +614,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
#ifdef CONFIG_XEN
;;
// r16-r31 all now hold bank1 values
+ mov r15=ar.unat
movl r2=XSI_BANK1_R16
movl r3=XSI_BANK1_R16+8
;;
@@ -641,6 +642,11 @@ GLOBAL_ENTRY(ia64_leave_kernel)
.mem.offset 0,0; st8.spill [r2]=r30,16
.mem.offset 8,0; st8.spill [r3]=r31,16
;;
+ mov r3=ar.unat
+ movl r2=XSI_B1NAT
+ ;;
+ st8 [r2]=r3
+ mov ar.unat=r15
movl r2=XSI_BANKNUM;;
st4 [r2]=r0;
#else
diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Thu Apr 12 16:37:32
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Fri Apr 13 11:14:26
2007 +0100
@@ -2013,33 +2013,6 @@ END(ia32_interrupt)
DBG_FAULT(66)
FAULT(66)
-#ifdef CONFIG_XEN
- /*
- * There is no particular reason for this code to be here, other than
that
- * there happens to be space here that would go unused otherwise. If
this
- * fault ever gets "unreserved", simply moved the following code to a
more
- * suitable spot...
- */
-
-GLOBAL_ENTRY(xen_bsw1)
- /* FIXME: THIS CODE IS NOT NaT SAFE! */
- movl r30=XSI_BANKNUM;
- mov r31=1;;
- st4 [r30]=r31;
- movl r30=XSI_BANK1_R16;
- movl r31=XSI_BANK1_R16+8;;
- ld8 r16=[r30],16; ld8 r17=[r31],16;;
- ld8 r18=[r30],16; ld8 r19=[r31],16;;
- ld8 r20=[r30],16; ld8 r21=[r31],16;;
- ld8 r22=[r30],16; ld8 r23=[r31],16;;
- ld8 r24=[r30],16; ld8 r25=[r31],16;;
- ld8 r26=[r30],16; ld8 r27=[r31],16;;
- ld8 r28=[r30],16; ld8 r29=[r31],16;;
- ld8 r30=[r30]; ld8 r31=[r31];;
- br.ret.sptk.many b0
-END(xen_bsw1)
-#endif
-
.org ia64_ivt+0x7f00
/////////////////////////////////////////////////////////////////////////////////////////
// 0x7f00 Entry 67 (size 16 bundles) Reserved
@@ -2167,4 +2140,38 @@ 1:
(p6) br.spnt.few 1b // call evtchn_do_upcall again.
br.sptk.many ia64_leave_kernel
END(xen_event_callback)
-#endif
+
+
+ /*
+ * There is no particular reason for this code to be here, other than
that
+ * there happens to be space here that would go unused otherwise. If
this
+ * fault ever gets "unreserved", simply moved the following code to a
more
+ * suitable spot...
+ */
+
+GLOBAL_ENTRY(xen_bsw1)
+ /* FIXME: THIS CODE IS NOT NaT SAFE! */
+ mov r14=ar.unat
+ movl r30=XSI_B1NAT
+ ;;
+ ld8 r30=[r30];;
+ mov ar.unat=r30
+ movl r30=XSI_BANKNUM;
+ mov r31=1;;
+ st4 [r30]=r31;
+ movl r30=XSI_BANK1_R16;
+ movl r31=XSI_BANK1_R16+8;;
+ ld8.fill r16=[r30],16; ld8.fill r17=[r31],16;;
+ ld8.fill r18=[r30],16; ld8.fill r19=[r31],16;;
+ ld8.fill r20=[r30],16; ld8.fill r21=[r31],16;;
+ ld8.fill r22=[r30],16; ld8.fill r23=[r31],16;;
+ ld8.fill r24=[r30],16; ld8.fill r25=[r31],16;;
+ ld8.fill r26=[r30],16; ld8.fill r27=[r31],16;;
+ ld8.fill r28=[r30],16; ld8.fill r29=[r31],16;;
+ ld8.fill r30=[r30]; ld8.fill r31=[r31];;
+ mov ar.unat=r14
+ br.ret.sptk.many b0
+END(xen_bsw1)
+
+
+#endif
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Thu Apr 12 16:37:32
2007 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Fri Apr 13 11:14:26
2007 +0100
@@ -118,6 +118,7 @@ static void shutdown_handler(struct xenb
err = xenbus_transaction_start(&xbt);
if (err)
return;
+
str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
/* Ignore read errors and empty reads. */
if (XENBUS_IS_ERR_READ(str)) {
@@ -206,14 +207,12 @@ static int setup_shutdown_watcher(void)
printk(KERN_ERR "Failed to set shutdown watcher\n");
return err;
}
- xenbus_write(XBT_NIL, "control", "feature-reboot", "1");
err = register_xenbus_watch(&sysrq_watch);
if (err) {
printk(KERN_ERR "Failed to set sysrq watcher\n");
return err;
}
- xenbus_write(XBT_NIL, "control", "feature-sysrq", "1");
return 0;
}
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Thu Apr
12 16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Fri Apr
13 11:14:26 2007 +0100
@@ -210,7 +210,7 @@ extern unsigned long pg0[];
/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
#define pmd_none(x) (!(unsigned long)pmd_val(x))
-#ifdef CONFIG_XEN_COMPAT_030002
+#if CONFIG_XEN_COMPAT <= 0x030002
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
can temporarily clear it. */
#define pmd_present(x) (pmd_val(x))
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Thu Apr 12
16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Fri Apr 13
11:14:26 2007 +0100
@@ -64,7 +64,6 @@ extern start_info_t *xen_start_info;
void force_evtchn_callback(void);
-#ifndef CONFIG_VMX_GUEST
/* Turn jiffies into Xen system time. XXX Implement me. */
#define jiffies_to_st(j) 0
@@ -116,6 +115,7 @@ HYPERVISOR_poll(
return rc;
}
+#ifndef CONFIG_VMX_GUEST
// for drivers/xen/privcmd/privcmd.c
#define machine_to_phys_mapping 0
struct vm_area_struct;
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Thu Apr 12
16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Fri Apr 13
11:14:26 2007 +0100
@@ -57,6 +57,7 @@
#define XSI_PSR_IC (XSI_BASE + XSI_PSR_IC_OFS)
#define XSI_IPSR (XSI_BASE + XSI_IPSR_OFS)
#define XSI_IIP (XSI_BASE + XSI_IIP_OFS)
+#define XSI_B1NAT (XSI_BASE + XSI_B1NATS_OFS)
#define XSI_BANK1_R16 (XSI_BASE + XSI_BANK1_R16_OFS)
#define XSI_BANKNUM (XSI_BASE + XSI_BANKNUM_OFS)
#define XSI_IHA (XSI_BASE + XSI_IHA_OFS)
diff -r 5bda20f0723d -r f92a79e39da8
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Apr
12 16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Fri Apr
13 11:14:26 2007 +0100
@@ -411,7 +411,7 @@ static inline int pmd_large(pmd_t pte) {
#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
pmd_index(address))
#define pmd_none(x) (!pmd_val(x))
-#ifdef CONFIG_XEN_COMPAT_030002
+#if CONFIG_XEN_COMPAT <= 0x030002
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
can temporarily clear it. */
#define pmd_present(x) (pmd_val(x))
diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/network-bridge
--- a/tools/examples/network-bridge Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/examples/network-bridge Fri Apr 13 11:14:26 2007 +0100
@@ -183,12 +183,12 @@ op_start () {
return
fi
- if ! link_exists "$vdev"; then
- if link_exists "$pdev"; then
- # The device is already up.
- return
- else
- echo "
+ if link_exists "$pdev"; then
+ # The device is already up.
+ return
+ fi
+ if link_exists veth0 && ! link_exists "$vdev"; then
+ echo "
Link $vdev is missing.
This may be because you have reached the limit of the number of interfaces
that the loopback driver supports. If the loopback driver is a module, you
@@ -196,8 +196,7 @@ driver is compiled statically into the k
driver is compiled statically into the kernel, then you may set the parameter
using netloop.nloopbacks=<N> on the domain 0 kernel command line.
" >&2
- exit 1
- fi
+ exit 1
fi
create_bridge ${bridge}
@@ -224,9 +223,13 @@ using netloop.nloopbacks=<N> on the doma
add_to_bridge2 ${bridge} ${pdev}
do_ifup ${netdev}
else
+ ip link set ${bridge} arp on
+ ip link set ${bridge} multicast on
# old style without ${vdev}
transfer_addrs ${netdev} ${bridge}
transfer_routes ${netdev} ${bridge}
+ # Attach the real interface to the bridge.
+ add_to_bridge ${bridge} ${netdev}
fi
if [ ${antispoof} = 'yes' ] ; then
diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/examples/xmexample.hvm Fri Apr 13 11:14:26 2007 +0100
@@ -170,6 +170,12 @@ serial='pty'
#-----------------------------------------------------------------------------
+# Qemu Monitor, default is disable
+# Use ctrl-alt-2 to connect
+#monitor=1
+
+
+#-----------------------------------------------------------------------------
# enable sound card support, [sb16|es1370|all|..,..], default none
#soundhw='sb16'
diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/xmexample.vti
--- a/tools/examples/xmexample.vti Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/examples/xmexample.vti Fri Apr 13 11:14:26 2007 +0100
@@ -113,6 +113,11 @@ serial='pty'
serial='pty'
#-----------------------------------------------------------------------------
+# Qemu Monitor, default is disable
+# Use ctrl-alt-2 to connect
+#monitor=1
+
+#-----------------------------------------------------------------------------
# enable sound card support, [sb16|es1370|all|..,..], default none
#soundhw='sb16'
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/hw/pc.c Fri Apr 13 11:14:26 2007 +0100
@@ -902,7 +902,6 @@ static void pc_init1(uint64_t ram_size,
if (pci_enabled && acpi_enabled) {
piix4_pm_init(pci_bus, piix3_devfn + 3);
}
-#endif /* !CONFIG_DM */
#if 0
/* ??? Need to figure out some way for the user to
@@ -921,6 +920,17 @@ static void pc_init1(uint64_t ram_size,
lsi_scsi_attach(scsi, bdrv, -1);
}
#endif
+#else
+ if (pci_enabled) {
+ void *scsi;
+
+ scsi = lsi_scsi_init(pci_bus, -1);
+ for (i = 0; i < MAX_SCSI_DISKS ; i++) {
+ if (bs_table[i + MAX_DISKS])
+ lsi_scsi_attach(scsi, bs_table[i + MAX_DISKS], -1);
+ }
+ }
+#endif /* !CONFIG_DM */
/* must be done after all PCI devices are instanciated */
/* XXX: should be done in the Bochs BIOS */
if (pci_enabled) {
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/monitor.c
--- a/tools/ioemu/monitor.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/monitor.c Fri Apr 13 11:14:26 2007 +0100
@@ -180,7 +180,7 @@ static void do_commit(void)
{
int i;
- for (i = 0; i < MAX_DISKS; i++) {
+ for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
if (bs_table[i]) {
bdrv_commit(bs_table[i]);
}
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/vl.c Fri Apr 13 11:14:26 2007 +0100
@@ -116,7 +116,7 @@ void *ioport_opaque[MAX_IOPORTS];
void *ioport_opaque[MAX_IOPORTS];
IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
-BlockDriverState *bs_table[MAX_DISKS], *fd_table[MAX_FD];
+BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS], *fd_table[MAX_FD];
int vga_ram_size;
int bios_size;
static DisplayState display_state;
@@ -1396,7 +1396,7 @@ static void stdio_received_byte(int ch)
case 's':
{
int i;
- for (i = 0; i < MAX_DISKS; i++) {
+ for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
if (bs_table[i])
bdrv_commit(bs_table[i]);
}
@@ -6057,7 +6057,7 @@ int main(int argc, char **argv)
int snapshot, linux_boot;
const char *initrd_filename;
#ifndef CONFIG_DM
- const char *hd_filename[MAX_DISKS];
+ const char *hd_filename[MAX_DISKS + MAX_SCSI_DISKS];
#endif /* !CONFIG_DM */
const char *fd_filename[MAX_FD];
const char *kernel_filename, *kernel_cmdline;
@@ -6126,7 +6126,7 @@ int main(int argc, char **argv)
for(i = 0; i < MAX_FD; i++)
fd_filename[i] = NULL;
#ifndef CONFIG_DM
- for(i = 0; i < MAX_DISKS; i++)
+ for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
hd_filename[i] = NULL;
#endif /* !CONFIG_DM */
ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
@@ -6724,7 +6724,7 @@ int main(int argc, char **argv)
}
/* open the virtual block devices */
- for(i = 0; i < MAX_DISKS; i++) {
+ for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
if (hd_filename[i]) {
if (!bs_table[i]) {
char buf[64];
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/vl.h Fri Apr 13 11:14:26 2007 +0100
@@ -818,8 +818,9 @@ int vnc_start_viewer(int port);
/* ide.c */
#define MAX_DISKS 4
-
-extern BlockDriverState *bs_table[MAX_DISKS];
+#define MAX_SCSI_DISKS 7
+
+extern BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS];
void isa_ide_init(int iobase, int iobase2, int irq,
BlockDriverState *hd0, BlockDriverState *hd1);
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/xenstore.c Fri Apr 13 11:14:26 2007 +0100
@@ -18,7 +18,7 @@
#include <fcntl.h>
static struct xs_handle *xsh = NULL;
-static char *media_filename[MAX_DISKS];
+static char *media_filename[MAX_DISKS + MAX_SCSI_DISKS];
static QEMUTimer *insert_timer = NULL;
#define UWAIT_MAX (30*1000000) /* thirty seconds */
@@ -30,11 +30,11 @@ static int pasprintf(char **buf, const c
int ret = 0;
if (*buf)
- free(*buf);
+ free(*buf);
va_start(ap, fmt);
if (vasprintf(buf, fmt, ap) == -1) {
- buf = NULL;
- ret = -1;
+ buf = NULL;
+ ret = -1;
}
va_end(ap);
return ret;
@@ -44,12 +44,12 @@ static void insert_media(void *opaque)
{
int i;
- for (i = 0; i < MAX_DISKS; i++) {
- if (media_filename[i] && bs_table[i]) {
- do_change(bs_table[i]->device_name, media_filename[i]);
- free(media_filename[i]);
- media_filename[i] = NULL;
- }
+ for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
+ if (media_filename[i] && bs_table[i]) {
+ do_change(bs_table[i]->device_name, media_filename[i]);
+ free(media_filename[i]);
+ media_filename[i] = NULL;
+ }
}
}
@@ -57,7 +57,7 @@ void xenstore_check_new_media_present(in
{
if (insert_timer == NULL)
- insert_timer = qemu_new_timer(rt_clock, insert_media, NULL);
+ insert_timer = qemu_new_timer(rt_clock, insert_media, NULL);
qemu_mod_timer(insert_timer, qemu_get_clock(rt_clock) + timeout);
}
@@ -82,17 +82,17 @@ void xenstore_parse_domain_config(int do
char **e = NULL;
char *buf = NULL, *path;
char *fpath = NULL, *bpath = NULL,
- *dev = NULL, *params = NULL, *type = NULL;
- int i;
+ *dev = NULL, *params = NULL, *type = NULL;
+ int i, is_scsi;
unsigned int len, num, hd_index;
- for(i = 0; i < MAX_DISKS; i++)
+ for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
media_filename[i] = NULL;
xsh = xs_daemon_open();
if (xsh == NULL) {
- fprintf(logfile, "Could not contact xenstore for domain config\n");
- return;
+ fprintf(logfile, "Could not contact xenstore for domain config\n");
+ return;
}
path = xs_get_domain_path(xsh, domid);
@@ -102,59 +102,60 @@ void xenstore_parse_domain_config(int do
}
if (pasprintf(&buf, "%s/device/vbd", path) == -1)
- goto out;
+ goto out;
e = xs_directory(xsh, XBT_NULL, buf, &num);
if (e == NULL)
- goto out;
+ goto out;
for (i = 0; i < num; i++) {
- /* read the backend path */
- if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
- continue;
- free(bpath);
+ /* read the backend path */
+ if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
+ continue;
+ free(bpath);
bpath = xs_read(xsh, XBT_NULL, buf, &len);
- if (bpath == NULL)
- continue;
- /* read the name of the device */
- if (pasprintf(&buf, "%s/dev", bpath) == -1)
- continue;
- free(dev);
- dev = xs_read(xsh, XBT_NULL, buf, &len);
- if (dev == NULL)
- continue;
- if (strncmp(dev, "hd", 2) || strlen(dev) != 3)
- continue;
- hd_index = dev[2] - 'a';
- if (hd_index >= MAX_DISKS)
- continue;
- /* read the type of the device */
- if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1)
- continue;
- free(type);
- type = xs_read(xsh, XBT_NULL, buf, &len);
- if (pasprintf(&buf, "%s/params", bpath) == -1)
- continue;
- free(params);
- params = xs_read(xsh, XBT_NULL, buf, &len);
- if (params == NULL)
- continue;
+ if (bpath == NULL)
+ continue;
+ /* read the name of the device */
+ if (pasprintf(&buf, "%s/dev", bpath) == -1)
+ continue;
+ free(dev);
+ dev = xs_read(xsh, XBT_NULL, buf, &len);
+ if (dev == NULL)
+ continue;
+ is_scsi = !strncmp(dev, "sd", 2);
+ if ((strncmp(dev, "hd", 2) && !is_scsi) || strlen(dev) != 3 )
+ continue;
+ hd_index = dev[2] - 'a';
+ if (hd_index >= (is_scsi ? MAX_SCSI_DISKS : MAX_DISKS))
+ continue;
+ /* read the type of the device */
+ if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1)
+ continue;
+ free(type);
+ type = xs_read(xsh, XBT_NULL, buf, &len);
+ if (pasprintf(&buf, "%s/params", bpath) == -1)
+ continue;
+ free(params);
+ params = xs_read(xsh, XBT_NULL, buf, &len);
+ if (params == NULL)
+ continue;
/*
* check if device has a phantom vbd; the phantom is hooked
* to the frontend device (for ease of cleanup), so lookup
* the frontend device, and see if there is a phantom_vbd
* if there is, we will use resolution as the filename
*/
- if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1)
- continue;
- free(fpath);
+ if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1)
+ continue;
+ free(fpath);
fpath = xs_read(xsh, XBT_NULL, buf, &len);
- if (fpath) {
- if (pasprintf(&buf, "%s/dev", fpath) == -1)
- continue;
- free(params);
+ if (fpath) {
+ if (pasprintf(&buf, "%s/dev", fpath) == -1)
+ continue;
+ free(params);
params = xs_read(xsh, XBT_NULL, buf , &len);
- if (params) {
+ if (params) {
/*
* wait for device, on timeout silently fail because we will
* fail to open below
@@ -163,19 +164,20 @@ void xenstore_parse_domain_config(int do
}
}
- bs_table[hd_index] = bdrv_new(dev);
- /* check if it is a cdrom */
- if (type && !strcmp(type, "cdrom")) {
- bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM);
- if (pasprintf(&buf, "%s/params", bpath) != -1)
- xs_watch(xsh, buf, dev);
- }
- /* open device now if media present */
- if (params[0]) {
- if (bdrv_open(bs_table[hd_index], params, 0 /* snapshot */) < 0)
+ bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)] = bdrv_new(dev);
+ /* check if it is a cdrom */
+ if (type && !strcmp(type, "cdrom")) {
+ bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM);
+ if (pasprintf(&buf, "%s/params", bpath) != -1)
+ xs_watch(xsh, buf, dev);
+ }
+ /* open device now if media present */
+ if (params[0]) {
+ if (bdrv_open(bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)],
+ params, 0 /* snapshot */) < 0)
fprintf(stderr, "qemu: could not open hard disk image '%s'\n",
params);
- }
+ }
}
/* Set a watch for log-dirty requests from the migration tools */
@@ -199,7 +201,7 @@ int xenstore_fd(void)
int xenstore_fd(void)
{
if (xsh)
- return xs_fileno(xsh);
+ return xs_fileno(xsh);
return -1;
}
@@ -316,7 +318,7 @@ void xenstore_process_event(void *opaque
vec = xs_read_watch(xsh, &num);
if (!vec)
- return;
+ return;
if (!strcmp(vec[XS_WATCH_TOKEN], "logdirty")) {
xenstore_process_logdirty_event();
@@ -324,23 +326,23 @@ void xenstore_process_event(void *opaque
}
if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) ||
- strlen(vec[XS_WATCH_TOKEN]) != 3)
- goto out;
+ strlen(vec[XS_WATCH_TOKEN]) != 3)
+ goto out;
hd_index = vec[XS_WATCH_TOKEN][2] - 'a';
image = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
if (image == NULL || !strcmp(image, bs_table[hd_index]->filename))
- goto out; /* gone or identical */
+ goto out; /* gone or identical */
do_eject(0, vec[XS_WATCH_TOKEN]);
bs_table[hd_index]->filename[0] = 0;
if (media_filename[hd_index]) {
- free(media_filename[hd_index]);
- media_filename[hd_index] = NULL;
+ free(media_filename[hd_index]);
+ media_filename[hd_index] = NULL;
}
if (image[0]) {
- media_filename[hd_index] = strdup(image);
- xenstore_check_new_media_present(5000);
+ media_filename[hd_index] = strdup(image);
+ xenstore_check_new_media_present(5000);
}
out:
@@ -354,7 +356,7 @@ void xenstore_write_vncport(int display)
char *portstr = NULL;
if (xsh == NULL)
- return;
+ return;
path = xs_get_domain_path(xsh, domid);
if (path == NULL) {
@@ -363,10 +365,10 @@ void xenstore_write_vncport(int display)
}
if (pasprintf(&buf, "%s/console/vnc-port", path) == -1)
- goto out;
+ goto out;
if (pasprintf(&portstr, "%d", 5900 + display) == -1)
- goto out;
+ goto out;
if (xs_write(xsh, XBT_NULL, buf, portstr, strlen(portstr)) == 0)
fprintf(logfile, "xs_write() vncport failed\n");
@@ -383,41 +385,41 @@ int xenstore_read_vncpasswd(int domid)
unsigned int i, len, rc = 0;
if (xsh == NULL) {
- return -1;
+ return -1;
}
path = xs_get_domain_path(xsh, domid);
if (path == NULL) {
- fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid);
- return -1;
+ fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid);
+ return -1;
}
pasprintf(&buf, "%s/vm", path);
uuid = xs_read(xsh, XBT_NULL, buf, &len);
if (uuid == NULL) {
- fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf);
- free(path);
- return -1;
+ fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf);
+ free(path);
+ return -1;
}
pasprintf(&buf, "%s/vncpasswd", uuid);
passwd = xs_read(xsh, XBT_NULL, buf, &len);
if (passwd == NULL) {
- fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf);
- free(uuid);
- free(path);
- return rc;
+ fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf);
+ free(uuid);
+ free(path);
+ return rc;
}
for (i=0; i<len && i<63; i++) {
- vncpasswd[i] = passwd[i];
- passwd[i] = '\0';
+ vncpasswd[i] = passwd[i];
+ passwd[i] = '\0';
}
vncpasswd[len] = '\0';
pasprintf(&buf, "%s/vncpasswd", uuid);
if (xs_write(xsh, XBT_NULL, buf, passwd, len) == 0) {
- fprintf(logfile, "xs_write() vncpasswd failed.\n");
- rc = -1;
+ fprintf(logfile, "xs_write() vncpasswd failed.\n");
+ rc = -1;
}
free(passwd);
@@ -443,7 +445,7 @@ char **xenstore_domain_get_devices(struc
goto out;
if (pasprintf(&buf, "%s/device/%s", path,devtype) == -1)
- goto out;
+ goto out;
e = xs_directory(handle, XBT_NULL, buf, num);
@@ -496,13 +498,13 @@ char *xenstore_backend_read_variable(str
buf = get_device_variable_path(devtype, inst, var);
if (NULL == buf)
- goto out;
+ goto out;
value = xs_read(handle, XBT_NULL, buf, &len);
free(buf);
-out:
+ out:
return value;
}
@@ -569,27 +571,27 @@ char *xenstore_vm_read(int domid, char *
char *buf = NULL, *path = NULL, *value = NULL;
if (xsh == NULL)
- goto out;
+ goto out;
path = xs_get_domain_path(xsh, domid);
if (path == NULL) {
- fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
- goto out;
+ fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
+ goto out;
}
pasprintf(&buf, "%s/vm", path);
free(path);
path = xs_read(xsh, XBT_NULL, buf, NULL);
if (path == NULL) {
- fprintf(logfile, "xs_read(%s): read error\n", buf);
- goto out;
+ fprintf(logfile, "xs_read(%s): read error\n", buf);
+ goto out;
}
pasprintf(&buf, "%s/%s", path, key);
value = xs_read(xsh, XBT_NULL, buf, len);
if (value == NULL) {
- fprintf(logfile, "xs_read(%s): read error\n", buf);
- goto out;
+ fprintf(logfile, "xs_read(%s): read error\n", buf);
+ goto out;
}
out:
@@ -604,27 +606,27 @@ int xenstore_vm_write(int domid, char *k
int rc = -1;
if (xsh == NULL)
- goto out;
+ goto out;
path = xs_get_domain_path(xsh, domid);
if (path == NULL) {
- fprintf(logfile, "xs_get_domain_path: error\n");
- goto out;
+ fprintf(logfile, "xs_get_domain_path: error\n");
+ goto out;
}
pasprintf(&buf, "%s/vm", path);
free(path);
path = xs_read(xsh, XBT_NULL, buf, NULL);
if (path == NULL) {
- fprintf(logfile, "xs_read(%s): read error\n", buf);
- goto out;
+ fprintf(logfile, "xs_read(%s): read error\n", buf);
+ goto out;
}
pasprintf(&buf, "%s/%s", path, key);
rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value));
if (rc) {
- fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
- goto out;
+ fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
+ goto out;
}
out:
diff -r 5bda20f0723d -r f92a79e39da8 tools/libfsimage/fat/fat.h
--- a/tools/libfsimage/fat/fat.h Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libfsimage/fat/fat.h Fri Apr 13 11:14:26 2007 +0100
@@ -84,17 +84,17 @@ struct fat_bpb {
#define FAT_DIRENTRY_LENGTH 32
#define FAT_DIRENTRY_ATTRIB(entry) \
- (*((unsigned char *) (entry+11)))
+ (*((__u8 *) (entry+11)))
#define FAT_DIRENTRY_VALID(entry) \
- ( ((*((unsigned char *) entry)) != 0) \
- && ((*((unsigned char *) entry)) != 0xE5) \
+ ( ((*((__u8 *) entry)) != 0) \
+ && ((*((__u8 *) entry)) != 0xE5) \
&& !(FAT_DIRENTRY_ATTRIB(entry) & FAT_ATTRIB_NOT_OK_MASK) )
#define FAT_DIRENTRY_FIRST_CLUSTER(entry) \
- ((*((unsigned short *) (entry+26)))+(*((unsigned short *) (entry+20)) << 16))
+ ((*((__u16 *) (entry+26)))+(*((__u16 *) (entry+20)) << 16))
#define FAT_DIRENTRY_FILELENGTH(entry) \
- (*((unsigned long *) (entry+28)))
+ (*((__u32 *) (entry+28)))
#define FAT_LONGDIR_ID(entry) \
- (*((unsigned char *) (entry)))
+ (*((__u8 *) (entry)))
#define FAT_LONGDIR_ALIASCHECKSUM(entry) \
- (*((unsigned char *) (entry+13)))
+ (*((__u8 *) (entry+13)))
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/Makefile
--- a/tools/libxc/Makefile Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/Makefile Fri Apr 13 11:14:26 2007 +0100
@@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra
GUEST_SRCS-y :=
GUEST_SRCS-y += xg_private.c
-GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c
-GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c
+GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
+GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
# symlink libelf from xen/common/libelf/
LIBELF_SRCS := libelf-tools.c libelf-loader.c
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/ia64/xc_ia64_linux_save.c
--- a/tools/libxc/ia64/xc_ia64_linux_save.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/ia64/xc_ia64_linux_save.c Fri Apr 13 11:14:26 2007 +0100
@@ -134,8 +134,10 @@ retry:
}
int
-xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+ int hvm, void *(*init_qemu_maps)(int, unsigned),
+ void (*qemu_flip_buffer)(int, int))
{
DECLARE_DOMCTL;
xc_dominfo_t info;
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xc_domain.c Fri Apr 13 11:14:26 2007 +0100
@@ -8,6 +8,7 @@
#include "xc_private.h"
#include <xen/memory.h>
+#include <xen/hvm/hvm_op.h>
int xc_domain_create(int xc_handle,
uint32_t ssidref,
@@ -655,6 +656,44 @@ int xc_domain_send_trigger(int xc_handle
domctl.u.sendtrigger.vcpu = vcpu;
return do_domctl(xc_handle, &domctl);
+}
+
+int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value)
+{
+ DECLARE_HYPERCALL;
+ xen_hvm_param_t arg;
+ int rc;
+
+ hypercall.op = __HYPERVISOR_hvm_op;
+ hypercall.arg[0] = HVMOP_set_param;
+ hypercall.arg[1] = (unsigned long)&arg;
+ arg.domid = dom;
+ arg.index = param;
+ arg.value = value;
+ if ( lock_pages(&arg, sizeof(arg)) != 0 )
+ return -1;
+ rc = do_xen_hypercall(handle, &hypercall);
+ unlock_pages(&arg, sizeof(arg));
+ return rc;
+}
+
+int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value)
+{
+ DECLARE_HYPERCALL;
+ xen_hvm_param_t arg;
+ int rc;
+
+ hypercall.op = __HYPERVISOR_hvm_op;
+ hypercall.arg[0] = HVMOP_get_param;
+ hypercall.arg[1] = (unsigned long)&arg;
+ arg.domid = dom;
+ arg.index = param;
+ if ( lock_pages(&arg, sizeof(arg)) != 0 )
+ return -1;
+ rc = do_xen_hypercall(handle, &hypercall);
+ unlock_pages(&arg, sizeof(arg));
+ *value = arg.value;
+ return rc;
}
/*
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xc_domain_restore.c Fri Apr 13 11:14:26 2007 +0100
@@ -688,33 +688,22 @@ int xc_domain_restore(int xc_handle, int
ERROR("error zeroing magic pages");
goto out;
}
-
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]);
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
magic_pfns[1]);
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]);
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
+
+ if ( (rc = xc_set_hvm_param(xc_handle, dom,
+ HVM_PARAM_IOREQ_PFN, magic_pfns[0]))
+ || (rc = xc_set_hvm_param(xc_handle, dom,
+ HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]))
+ || (rc = xc_set_hvm_param(xc_handle, dom,
+ HVM_PARAM_STORE_PFN, magic_pfns[2]))
+ || (rc = xc_set_hvm_param(xc_handle, dom,
+ HVM_PARAM_PAE_ENABLED, pae))
+ || (rc = xc_set_hvm_param(xc_handle, dom,
+ HVM_PARAM_STORE_EVTCHN, store_evtchn)) )
+ {
+ ERROR("error setting HVM params: %i", rc);
+ goto out;
+ }
*store_mfn = magic_pfns[2];
-
- /* Read vcpu contexts */
- for ( i = 0; i <= max_vcpu_id; i++ )
- {
- if ( !(vcpumap & (1ULL << i)) )
- continue;
-
- if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) )
- {
- ERROR("error read vcpu context.\n");
- goto out;
- }
-
- if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) )
- {
- ERROR("Could not set vcpu context, rc=%d", rc);
- goto out;
- }
- rc = 1;
- }
/* Read HVM context */
if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain_save.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_domain_save.c Fri Apr 13 11:14:26 2007 +0100
@@ -0,0 +1,1587 @@
+/******************************************************************************
+ * xc_linux_save.c
+ *
+ * Save the state of a running Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "xc_private.h"
+#include "xc_dom.h"
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+#include <xen/hvm/params.h>
+#include <xen/hvm/e820.h>
+
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_domain_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */
+#define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the current guest */
+static unsigned int pt_levels;
+
+/* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */
+static unsigned long *qemu_bitmaps[2];
+static int qemu_active;
+static int qemu_non_active;
+
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* Live mapping of the table mapping each PFN to its current MFN. */
+static xen_pfn_t *live_p2m = NULL;
+
+/* Live mapping of system MFN to PFN table. */
+static xen_pfn_t *live_m2p = NULL;
+static unsigned long m2p_mfn0;
+
+/* grep fodder: machine_to_phys */
+
+#define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
+
+/*
+ * Returns TRUE if the given machine frame number has a unique mapping
+ * in the guest's pseudophysical map.
+ */
+#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
+ (((_mfn) < (max_mfn)) && \
+ ((mfn_to_pfn(_mfn) < (p2m_size)) && \
+ (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
+
+/* Returns TRUE if MFN is successfully converted to a PFN. */
+#define translate_mfn_to_pfn(_pmfn) \
+({ \
+ unsigned long mfn = *(_pmfn); \
+ int _res = 1; \
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
+ _res = 0; \
+ else \
+ *(_pmfn) = mfn_to_pfn(mfn); \
+ _res; \
+})
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, to fixup, and to skip.
+*/
+
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define BITMAP_SIZE (BITS_TO_LONGS(p2m_size) * sizeof(unsigned long))
+
+#define BITMAP_ENTRY(_nr,_bmap) \
+ ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
+
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+static inline int test_bit (int nr, volatile void * addr)
+{
+ return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void clear_bit (int nr, volatile void * addr)
+{
+ BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
+}
+
+static inline void set_bit ( int nr, volatile void * addr)
+{
+ BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
+}
+
+/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
+static inline unsigned int hweight32(unsigned int w)
+{
+ unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
+ res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
+ return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+}
+
+static inline int count_bits ( int nr, volatile void *addr)
+{
+ int i, count = 0;
+ volatile unsigned long *p = (volatile unsigned long *)addr;
+ /* We know that the array is padded to unsigned long. */
+ for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
+ count += hweight32(*p);
+ return count;
+}
+
+static inline int permute( int i, int nr, int order_nr )
+{
+ /* Need a simple permutation function so that we scan pages in a
+ pseudo random order, enabling us to get a better estimate of
+ the domain's page dirtying rate as we go (there are often
+ contiguous ranges of pfns that have similar behaviour, and we
+ want to mix them up. */
+
+ /* e.g. nr->oder 15->4 16->4 17->5 */
+ /* 512MB domain, 128k pages, order 17 */
+
+ /*
+ QPONMLKJIHGFEDCBA
+ QPONMLKJIH
+ GFEDCBA
+ */
+
+ /*
+ QPONMLKJIHGFEDCBA
+ EDCBA
+ QPONM
+ LKJIHGF
+ */
+
+ do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
+ while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
+
+ return i;
+}
+
+static uint64_t tv_to_us(struct timeval *new)
+{
+ return (new->tv_sec * 1000000) + new->tv_usec;
+}
+
+static uint64_t llgettimeofday(void)
+{
+ struct timeval now;
+ gettimeofday(&now, NULL);
+ return tv_to_us(&now);
+}
+
+static uint64_t tv_delta(struct timeval *new, struct timeval *old)
+{
+ return (((new->tv_sec - old->tv_sec)*1000000) +
+ (new->tv_usec - old->tv_usec));
+}
+
+static int noncached_write(int fd, int live, void *buffer, int len)
+{
+ static int write_count = 0;
+
+ int rc = write(fd,buffer,len);
+
+ write_count += len;
+ if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
+ {
+ /* Time to discard cache - dont care if this fails */
+ discard_file_cache(fd, 0 /* no flush */);
+ write_count = 0;
+ }
+
+ return rc;
+}
+
+#ifdef ADAPTIVE_SAVE
+
+/*
+** We control the rate at which we transmit (or save) to minimize impact
+** on running domains (including the target if we're doing live migrate).
+*/
+
+#define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */
+#define START_MBIT_RATE 100 /* initial transmit rate for migrate */
+
+/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
+#define RATE_TO_BTU 781250
+
+/* Amount in bytes we allow ourselves to send in a burst */
+#define BURST_BUDGET (100*1024)
+
+/* We keep track of the current and previous transmission rate */
+static int mbit_rate, ombit_rate = 0;
+
+/* Have we reached the maximum transmission rate? */
+#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
+
+static inline void initialize_mbit_rate()
+{
+ mbit_rate = START_MBIT_RATE;
+}
+
+static int ratewrite(int io_fd, int live, void *buf, int n)
+{
+ static int budget = 0;
+ static int burst_time_us = -1;
+ static struct timeval last_put = { 0 };
+ struct timeval now;
+ struct timespec delay;
+ long long delta;
+
+ if ( START_MBIT_RATE == 0 )
+ return noncached_write(io_fd, live, buf, n);
+
+ budget -= n;
+ if ( budget < 0 )
+ {
+ if ( mbit_rate != ombit_rate )
+ {
+ burst_time_us = RATE_TO_BTU / mbit_rate;
+ ombit_rate = mbit_rate;
+ DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
+ mbit_rate, BURST_BUDGET, burst_time_us);
+ }
+ if ( last_put.tv_sec == 0 )
+ {
+ budget += BURST_BUDGET;
+ gettimeofday(&last_put, NULL);
+ }
+ else
+ {
+ while ( budget < 0 )
+ {
+ gettimeofday(&now, NULL);
+ delta = tv_delta(&now, &last_put);
+ while ( delta > burst_time_us )
+ {
+ budget += BURST_BUDGET;
+ last_put.tv_usec += burst_time_us;
+ if ( last_put.tv_usec > 1000000
+ {
+ last_put.tv_usec -= 1000000;
+ last_put.tv_sec++;
+ }
+ delta -= burst_time_us;
+ }
+ if ( budget > 0 )
+ break;
+ delay.tv_sec = 0;
+ delay.tv_nsec = 1000 * (burst_time_us - delta);
+ while ( delay.tv_nsec > 0 )
+ if ( nanosleep(&delay, &delay) == 0 )
+ break;
+ }
+ }
+ }
+ return noncached_write(io_fd, live, buf, n);
+}
+
+#else /* ! ADAPTIVE SAVE */
+
+#define RATE_IS_MAX() (0)
+#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live),
(_buf), (_n))
+#define initialize_mbit_rate()
+
+#endif
+
+static inline ssize_t write_exact(int fd, void *buf, size_t count)
+{
+ return (write(fd, buf, count) == count);
+}
+
+static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
+ xc_shadow_op_stats_t *stats, int print)
+{
+ static struct timeval wall_last;
+ static long long d0_cpu_last;
+ static long long d1_cpu_last;
+
+ struct timeval wall_now;
+ long long wall_delta;
+ long long d0_cpu_now, d0_cpu_delta;
+ long long d1_cpu_now, d1_cpu_delta;
+
+ gettimeofday(&wall_now, NULL);
+
+ d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
+ d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
+
+ if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
+ DPRINTF("ARRHHH!!\n");
+
+ wall_delta = tv_delta(&wall_now,&wall_last)/1000;
+ if ( wall_delta == 0 )
+ wall_delta = 1;
+
+ d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
+ d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
+
+ if ( print )
+ DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
+ "dirtied %dMb/s %" PRId32 " pages\n",
+ wall_delta,
+ (int)((d0_cpu_delta*100)/wall_delta),
+ (int)((d1_cpu_delta*100)/wall_delta),
+ (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
+ (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
+ stats->dirty_count);
+
+#ifdef ADAPTIVE_SAVE
+ if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
+ {
+ mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
+ + 50;
+ if ( mbit_rate > MAX_MBIT_RATE )
+ mbit_rate = MAX_MBIT_RATE;
+ }
+#endif
+
+ d0_cpu_last = d0_cpu_now;
+ d1_cpu_last = d1_cpu_now;
+ wall_last = wall_now;
+
+ return 0;
+}
+
+
+static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
+ unsigned long *arr, int runs)
+{
+ long long start, now;
+ xc_shadow_op_stats_t stats;
+ int j;
+
+ start = llgettimeofday();
+
+ for ( j = 0; j < runs; j++ )
+ {
+ int i;
+
+ xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+ arr, p2m_size, NULL, 0, NULL);
+ DPRINTF("#Flush\n");
+ for ( i = 0; i < 40; i++ )
+ {
+ usleep(50000);
+ now = llgettimeofday();
+ xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
+ NULL, 0, NULL, 0, &stats);
+ DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
+ ((now-start)+500)/1000,
+ stats.fault_count, stats.dirty_count);
+ }
+ }
+
+ return -1;
+}
+
+
+static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+ int dom, xc_dominfo_t *info)
+{
+ int i = 0;
+
+ if ( !(*suspend)(dom) )
+ {
+ ERROR("Suspend request failed");
+ return -1;
+ }
+
+ retry:
+
+ if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
+ {
+ ERROR("Could not get domain info");
+ return -1;
+ }
+
+ if ( info->dying )
+ {
+ ERROR("domain is dying");
+ return -1;
+ }
+
+ if ( info->crashed )
+ {
+ ERROR("domain has crashed");
+ return -1;
+ }
+
+ if ( info->shutdown )
+ {
+ switch ( info->shutdown_reason )
+ {
+ case SHUTDOWN_poweroff:
+ case SHUTDOWN_reboot:
+ ERROR("domain has shut down");
+ return -1;
+ case SHUTDOWN_suspend:
+ return 0;
+ case SHUTDOWN_crash:
+ ERROR("domain has crashed");
+ return -1;
+ }
+ }
+
+ if ( info->paused )
+ {
+ /* Try unpausing domain, wait, and retest. */
+ xc_domain_unpause( xc_handle, dom );
+ ERROR("Domain was paused. Wait and re-test.");
+ usleep(10000); /* 10ms */
+ goto retry;
+ }
+
+ if ( ++i < 100 )
+ {
+ ERROR("Retry suspend domain");
+ usleep(10000); /* 10ms */
+ goto retry;
+ }
+
+ ERROR("Unable to suspend domain.");
+
+ return -1;
+}
+
+/*
+** Map the top-level page of MFNs from the guest. The guest might not have
+** finished resuming from a previous restore operation, so we wait a while for
+** it to update the MFN to a reasonable value.
+*/
+static void *map_frame_list_list(int xc_handle, uint32_t dom,
+ shared_info_t *shinfo)
+{
+ int count = 100;
+ void *p;
+
+ while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
+ usleep(10000);
+
+ if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
+ {
+ ERROR("Timed out waiting for frame list updated.");
+ return NULL;
+ }
+
+ p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
+ shinfo->arch.pfn_to_mfn_frame_list_list);
+ if ( p == NULL )
+ ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
+
+ return p;
+}
+
+/*
+** During transfer (or in the state file), all page-table pages must be
+** converted into a 'canonical' form where references to actual mfns
+** are replaced with references to the corresponding pfns.
+**
+** This function performs the appropriate conversion, taking into account
+** which entries do not require canonicalization (in particular, those
+** entries which map the virtual address reserved for the hypervisor).
+*/
+static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
+ const void *spage, void *dpage)
+{
+
+ int i, pte_last, xen_start, xen_end, race = 0;
+ uint64_t pte;
+
+ /*
+ ** We need to determine which entries in this page table hold
+ ** reserved hypervisor mappings. This depends on the current
+ ** page table type as well as the number of paging levels.
+ */
+ xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
+
+ if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
+ xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
+
+ if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
+ xen_start = L3_PAGETABLE_ENTRIES_PAE;
+
+ /*
+ ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
+ ** We can spot this by looking for the guest linear mapping which
+ ** Xen always ensures is present in that L2. Guests must ensure
+ ** that this check will fail for other L2s.
+ */
+ if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
+ {
+ int hstart;
+ uint64_t he;
+
+ hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
+ he = ((const uint64_t *) spage)[hstart];
+
+ if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
+ {
+ /* hvirt starts with xen stuff... */
+ xen_start = hstart;
+ }
+ else if ( hvirt_start != 0xf5800000 )
+ {
+ /* old L2s from before hole was shrunk... */
+ hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
+ he = ((const uint64_t *) spage)[hstart];
+ if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
+ xen_start = hstart;
+ }
+ }
+
+ if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
+ {
+ /*
+ ** XXX SMH: should compute these from hvirt_start (which we have)
+ ** and hvirt_end (which we don't)
+ */
+ xen_start = 256;
+ xen_end = 272;
+ }
+
+ /* Now iterate through the page table, canonicalizing each PTE */
+ for (i = 0; i < pte_last; i++ )
+ {
+ unsigned long pfn, mfn;
+
+ if ( pt_levels == 2 )
+ pte = ((const uint32_t*)spage)[i];
+ else
+ pte = ((const uint64_t*)spage)[i];
+
+ if ( (i >= xen_start) && (i < xen_end) )
+ pte = 0;
+
+ if ( pte & _PAGE_PRESENT )
+ {
+ mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
+ {
+ /* This will happen if the type info is stale which
+ is quite feasible under live migration */
+ pfn = 0; /* zap it - we'll retransmit this page later */
+ race = 1; /* inform the caller of race; fatal if !live */
+ }
+ else
+ pfn = mfn_to_pfn(mfn);
+
+ pte &= ~MADDR_MASK_X86;
+ pte |= (uint64_t)pfn << PAGE_SHIFT;
+
+ /*
+ * PAE guest L3Es can contain these flags when running on
+ * a 64bit hypervisor. We zap these here to avoid any
+ * surprise at restore time...
+ */
+ if ( (pt_levels == 3) &&
+ (type == XEN_DOMCTL_PFINFO_L3TAB) &&
+ (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
+ pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
+ }
+
+ if ( pt_levels == 2 )
+ ((uint32_t*)dpage)[i] = pte;
+ else
+ ((uint64_t*)dpage)[i] = pte;
+ }
+
+ return race;
+}
+
+static xen_pfn_t *xc_map_m2p(int xc_handle,
+ unsigned long max_mfn,
+ int prot)
+{
+ struct xen_machphys_mfn_list xmml;
+ privcmd_mmap_entry_t *entries;
+ unsigned long m2p_chunks, m2p_size;
+ xen_pfn_t *m2p;
+ xen_pfn_t *extent_start;
+ int i, rc;
+
+ m2p_size = M2P_SIZE(max_mfn);
+ m2p_chunks = M2P_CHUNKS(max_mfn);
+
+ xmml.max_extents = m2p_chunks;
+ if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
+ {
+ ERROR("failed to allocate space for m2p mfns");
+ return NULL;
+ }
+ set_xen_guest_handle(xmml.extent_start, extent_start);
+
+ if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
+ (xmml.nr_extents != m2p_chunks) )
+ {
+ ERROR("xc_get_m2p_mfns");
+ return NULL;
+ }
+
+ if ( (m2p = mmap(NULL, m2p_size, prot,
+ MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
+ {
+ ERROR("failed to mmap m2p");
+ return NULL;
+ }
+
+ if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
+ {
+ ERROR("failed to allocate space for mmap entries");
+ return NULL;
+ }
+
+ for ( i = 0; i < m2p_chunks; i++ )
+ {
+ entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
+ entries[i].mfn = extent_start[i];
+ entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
+ }
+
+ if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
+ entries, m2p_chunks)) < 0 )
+ {
+ ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
+ return NULL;
+ }
+
+ m2p_mfn0 = entries[0].mfn;
+
+ free(extent_start);
+ free(entries);
+
+ return m2p;
+}
+
+
+static xen_pfn_t *map_and_save_p2m_table(int xc_handle,
+ int io_fd,
+ uint32_t dom,
+ unsigned long p2m_size,
+ shared_info_t *live_shinfo)
+{
+ vcpu_guest_context_t ctxt;
+
+ /* Double and single indirect references to the live P2M table */
+ xen_pfn_t *live_p2m_frame_list_list = NULL;
+ xen_pfn_t *live_p2m_frame_list = NULL;
+
+ /* A copy of the pfn-to-mfn table frame list. */
+ xen_pfn_t *p2m_frame_list = NULL;
+
+ /* The mapping of the live p2m table itself */
+ xen_pfn_t *p2m = NULL;
+
+ int i, success = 0;
+
+ live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
+ live_shinfo);
+ if ( !live_p2m_frame_list_list )
+ goto out;
+
+ live_p2m_frame_list =
+ xc_map_foreign_batch(xc_handle, dom, PROT_READ,
+ live_p2m_frame_list_list,
+ P2M_FLL_ENTRIES);
+ if ( !live_p2m_frame_list )
+ {
+ ERROR("Couldn't map p2m_frame_list");
+ goto out;
+ }
+
+
+ /* Map all the frames of the pfn->mfn table. For migrate to succeed,
+ the guest must not change which frames are used for this purpose.
+ (its not clear why it would want to change them, and we'll be OK
+ from a safety POV anyhow. */
+
+ p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
+ live_p2m_frame_list,
+ P2M_FL_ENTRIES);
+ if ( !p2m )
+ {
+ ERROR("Couldn't map p2m table");
+ goto out;
+ }
+ live_p2m = p2m; /* So that translation macros will work */
+
+ /* Get a local copy of the live_P2M_frame_list */
+ if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
+ {
+ ERROR("Couldn't allocate p2m_frame_list array");
+ goto out;
+ }
+ memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
+
+ /* Canonicalise the pfn-to-mfn table frame-number list. */
+ for ( i = 0; i < p2m_size; i += fpp )
+ {
+ if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
+ {
+ ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
+ ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
+ (uint64_t)p2m_frame_list[i/fpp]);
+ goto out;
+ }
+ }
+
+ if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
+ {
+ ERROR("Could not get vcpu context");
+ goto out;
+ }
+
+ /*
+ * Write an extended-info structure to inform the restore code that
+ * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
+ * slow paths in the restore code.
+ */
+ if ( (pt_levels == 3) &&
+ (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
+ {
+ unsigned long signature = ~0UL;
+ uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8;
+ uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
+ char chunk_sig[] = "vcpu";
+ if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
+ !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) ||
+ !write_exact(io_fd, &chunk_sig, 4) ||
+ !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) ||
+ !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
+ {
+ ERROR("write: extended info");
+ goto out;
+ }
+ }
+
+ if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
+ {
+ ERROR("write: p2m_frame_list");
+ goto out;
+ }
+
+ success = 1;
+
+ out:
+
+ if ( !success && p2m )
+ munmap(p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+
+ if ( live_p2m_frame_list_list )
+ munmap(live_p2m_frame_list_list, PAGE_SIZE);
+
+ if ( live_p2m_frame_list )
+ munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
+
+ if ( p2m_frame_list )
+ free(p2m_frame_list);
+
+ return success ? p2m : NULL;
+}
+
+
+
+int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+ int hvm, void *(*init_qemu_maps)(int, unsigned),
+ void (*qemu_flip_buffer)(int, int))
+{
+ xc_dominfo_t info;
+
+ int rc = 1, i, j, last_iter, iter = 0;
+ int live = (flags & XCFLAGS_LIVE);
+ int debug = (flags & XCFLAGS_DEBUG);
+ int race = 0, sent_last_iter, skip_this_iter;
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ /* A table containing the type of each PFN (/not/ MFN!). */
+ unsigned long *pfn_type = NULL;
+ unsigned long *pfn_batch = NULL;
+
+ /* A copy of one frame of guest memory. */
+ char page[PAGE_SIZE];
+
+ /* Live mapping of shared info structure */
+ shared_info_t *live_shinfo = NULL;
+
+ /* base of the region in which domain memory is mapped */
+ unsigned char *region_base = NULL;
+
+ /* power of 2 order of p2m_size */
+ int order_nr;
+
+ /* bitmap of pages:
+ - that should be sent this iteration (unless later marked as skip);
+ - to skip this iteration because already dirty;
+ - to fixup by sending at the end if not already resent; */
+ unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
+
+ xc_shadow_op_stats_t stats;
+
+ unsigned long needed_to_fix = 0;
+ unsigned long total_sent = 0;
+
+ uint64_t vcpumap = 1ULL;
+
+ /* HVM: a buffer for holding HVM context */
+ uint32_t hvm_buf_size = 0;
+ uint8_t *hvm_buf = NULL;
+
+ /* HVM: magic frames for ioreqs and xenstore comms. */
+ uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
+
+ /* If no explicit control parameters given, use defaults */
+ max_iters = max_iters ? : DEF_MAX_ITERS;
+ max_factor = max_factor ? : DEF_MAX_FACTOR;
+
+ initialize_mbit_rate();
+
+ if ( !get_platform_info(xc_handle, dom,
+ &max_mfn, &hvirt_start, &pt_levels) )
+ {
+ ERROR("Unable to get platform info.");
+ return 1;
+ }
+
+ if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
+ {
+ ERROR("Could not get domain info");
+ return 1;
+ }
+
+ shared_info_frame = info.shared_info_frame;
+
+ /* Map the shared info frame */
+ if ( !hvm )
+ {
+ live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ, shared_info_frame);
+ if ( !live_shinfo )
+ {
+ ERROR("Couldn't map live_shinfo");
+ goto out;
+ }
+ }
+
+ /* Get the size of the P2M table */
+ p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
+
+ /* Domain is still running at this point */
+ if ( live )
+ {
+ /* Live suspend. Enable log-dirty mode. */
+ if ( xc_shadow_control(xc_handle, dom,
+ XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+ NULL, 0, NULL, 0, NULL) < 0 )
+ {
+ ERROR("Couldn't enable shadow mode");
+ goto out;
+ }
+
+ if ( hvm )
+ {
+ /* Get qemu-dm logging dirty pages too */
+ void *seg = init_qemu_maps(dom, BITMAP_SIZE);
+ qemu_bitmaps[0] = seg;
+ qemu_bitmaps[1] = seg + BITMAP_SIZE;
+ qemu_active = 0;
+ qemu_non_active = 1;
+ }
+ }
+ else
+ {
+ /* This is a non-live suspend. Suspend the domain .*/
+ if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) )
+ {
+ ERROR("Domain appears not to have suspended");
+ goto out;
+ }
+ }
+
+ last_iter = !live;
+
+ /* pretend we sent all the pages last iteration */
+ sent_last_iter = p2m_size;
+
+ /* calculate the power of 2 order of p2m_size, e.g.
+ 15->4 16->4 17->5 */
+ for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
+ continue;
+
+ /* Setup to_send / to_fix and to_skip bitmaps */
+ to_send = malloc(BITMAP_SIZE);
+ to_fix = calloc(1, BITMAP_SIZE);
+ to_skip = malloc(BITMAP_SIZE);
+
+ if ( !to_send || !to_fix || !to_skip )
+ {
+ ERROR("Couldn't allocate to_send array");
+ goto out;
+ }
+
+ memset(to_send, 0xff, BITMAP_SIZE);
+
+ if ( lock_pages(to_send, BITMAP_SIZE) )
+ {
+ ERROR("Unable to lock to_send");
+ return 1;
+ }
+
+ /* (to fix is local only) */
+ if ( lock_pages(to_skip, BITMAP_SIZE) )
+ {
+ ERROR("Unable to lock to_skip");
+ return 1;
+ }
+
+ if ( hvm )
+ {
+ /* Need another buffer for HVM context */
+ hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
+ if ( hvm_buf_size == -1 )
+ {
+ ERROR("Couldn't get HVM context size from Xen");
+ goto out;
+ }
+ hvm_buf = malloc(hvm_buf_size);
+ if ( !hvm_buf )
+ {
+ ERROR("Couldn't allocate memory");
+ goto out;
+ }
+ }
+
+ analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
+
+ /* We want zeroed memory so use calloc rather than malloc. */
+ pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
+ pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
+ if ( (pfn_type == NULL) || (pfn_batch == NULL) )
+ {
+ ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
+ {
+ ERROR("Unable to lock");
+ goto out;
+ }
+
+ /* Setup the mfn_to_pfn table mapping */
+ if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
+ {
+ ERROR("Failed to map live M2P table");
+ goto out;
+ }
+
+ /* Start writing out the saved-domain record. */
+ if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
+ {
+ ERROR("write: p2m_size");
+ goto out;
+ }
+
+ if ( !hvm )
+ {
+ int err = 0;
+ unsigned long mfn;
+
+ /* Map the P2M table, and write the list of P2M frames */
+ live_p2m = map_and_save_p2m_table(xc_handle, io_fd, dom,
+ p2m_size, live_shinfo);
+ if ( live_p2m == NULL )
+ {
+ ERROR("Failed to map/save the p2m frame list");
+ goto out;
+ }
+
+ /*
+ * Quick belt and braces sanity check.
+ */
+
+ for ( i = 0; i < p2m_size; i++ )
+ {
+ mfn = live_p2m[i];
+ if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
+ {
+ DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
+ mfn, mfn_to_pfn(mfn));
+ err++;
+ }
+ }
+ DPRINTF("Had %d unexplained entries in p2m table\n", err);
+ }
+
+ print_stats(xc_handle, dom, 0, &stats, 0);
+
+ /* Now write out each data page, canonicalising page tables as we go... */
+ for ( ; ; )
+ {
+ unsigned int prev_pc, sent_this_iter, N, batch;
+
+ iter++;
+ sent_this_iter = 0;
+ skip_this_iter = 0;
+ prev_pc = 0;
+ N = 0;
+
+ DPRINTF("Saving memory pages: iter %d 0%%", iter);
+
+ while ( N < p2m_size )
+ {
+ unsigned int this_pc = (N * 100) / p2m_size;
+ int rc;
+
+ if ( (this_pc - prev_pc) >= 5 )
+ {
+ DPRINTF("\b\b\b\b%3d%%", this_pc);
+ prev_pc = this_pc;
+ }
+
+ if ( !last_iter )
+ {
+ /* Slightly wasteful to peek the whole array evey time,
+ but this is fast enough for the moment. */
+ rc = xc_shadow_control(
+ xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip,
+ p2m_size, NULL, 0, NULL);
+ if ( rc != p2m_size )
+ {
+ ERROR("Error peeking shadow bitmap");
+ goto out;
+ }
+ }
+
+ /* load pfn_type[] with the mfn of all the pages we're doing in
+ this batch. */
+ for ( batch = 0;
+ (batch < MAX_BATCH_SIZE) && (N < p2m_size);
+ N++ )
+ {
+ int n = permute(N, p2m_size, order_nr);
+
+ if ( debug )
+ DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n",
+ iter, (unsigned long)n, hvm ? 0 : live_p2m[n],
+ test_bit(n, to_send),
+ hvm ? 0 : mfn_to_pfn(live_p2m[n]&0xFFFFF));
+
+ if ( !last_iter &&
+ test_bit(n, to_send) &&
+ test_bit(n, to_skip) )
+ skip_this_iter++; /* stats keeping */
+
+ if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
+ (test_bit(n, to_send) && last_iter) ||
+ (test_bit(n, to_fix) && last_iter)) )
+ continue;
+
+ /* Skip PFNs that aren't really there */
+ if ( hvm && ((n >= 0xa0 && n < 0xc0) /* VGA hole */
+ || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT)
+ && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ )
+ continue;
+
+ /*
+ ** we get here if:
+ ** 1. page is marked to_send & hasn't already been re-dirtied
+ ** 2. (ignore to_skip in last iteration)
+ ** 3. add in pages that still need fixup (net bufs)
+ */
+
+ pfn_batch[batch] = n;
+
+ /* Hypercall interfaces operate in PFNs for HVM guests
+ * and MFNs for PV guests */
+ if ( hvm )
+ pfn_type[batch] = n;
+ else
+ pfn_type[batch] = live_p2m[n];
+
+ if ( !is_mapped(pfn_type[batch]) )
+ {
+ /*
+ ** not currently in psuedo-physical map -- set bit
+ ** in to_fix since we must send this page in last_iter
+ ** unless its sent sooner anyhow, or it never enters
+ ** pseudo-physical map (e.g. for ballooned down doms)
+ */
+ set_bit(n, to_fix);
+ continue;
+ }
+
+ if ( last_iter &&
+ test_bit(n, to_fix) &&
+ !test_bit(n, to_send) )
+ {
+ needed_to_fix++;
+ DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
+ iter, n, pfn_type[batch]);
+ }
+
+ clear_bit(n, to_fix);
+
+ batch++;
+ }
+
+ if ( batch == 0 )
+ goto skip; /* vanishingly unlikely... */
+
+ region_base = xc_map_foreign_batch(
+ xc_handle, dom, PROT_READ, pfn_type, batch);
+ if ( region_base == NULL )
+ {
+ ERROR("map batch failed");
+ goto out;
+ }
+
+ if ( !hvm )
+ {
+ /* Get page types */
+ for ( j = 0; j < batch; j++ )
+ ((uint32_t *)pfn_type)[j] = pfn_type[j];
+ if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
+ (uint32_t *)pfn_type) )
+ {
+ ERROR("get_pfn_type_batch failed");
+ goto out;
+ }
+ for ( j = batch-1; j >= 0; j-- )
+ pfn_type[j] = ((uint32_t *)pfn_type)[j];
+
+ for ( j = 0; j < batch; j++ )
+ {
+
+ if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
+ XEN_DOMCTL_PFINFO_XTAB )
+ {
+ DPRINTF("type fail: page %i mfn %08lx\n",
+ j, pfn_type[j]);
+ continue;
+ }
+
+ if ( debug )
+ DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
+ " sum= %08lx\n",
+ iter,
+ (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
+ pfn_batch[j],
+ pfn_type[j],
+ mfn_to_pfn(pfn_type[j] &
+ ~XEN_DOMCTL_PFINFO_LTAB_MASK),
+ csum_page(region_base + (PAGE_SIZE*j)));
+
+ /* canonicalise mfn->pfn */
+ pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
+ pfn_batch[j];
+ }
+ }
+
+ if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
+ {
+ ERROR("Error when writing to state file (2) (errno %d)",
+ errno);
+ goto out;
+ }
+
+ if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*batch) )
+ {
+ ERROR("Error when writing to state file (3) (errno %d)",
+ errno);
+ goto out;
+ }
+
+ /* entering this loop, pfn_type is now in pfns (Not mfns) */
+ for ( j = 0; j < batch; j++ )
+ {
+ unsigned long pfn, pagetype;
+ void *spage = (char *)region_base + (PAGE_SIZE*j);
+
+ pfn = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
+ pagetype = pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+
+ /* write out pages in batch */
+ if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
+ continue;
+
+ pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+ if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
+ (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
+ {
+ /* We have a pagetable page: need to rewrite it. */
+ race =
+ canonicalize_pagetable(pagetype, pfn, spage, page);
+
+ if ( race && !live )
+ {
+ ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
+ pagetype);
+ goto out;
+ }
+
+ if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
+ {
+ ERROR("Error when writing to state file (4)"
+ " (errno %d)", errno);
+ goto out;
+ }
+ }
+ else
+ {
+ /* We have a normal page: just write it directly. */
+ if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
+ PAGE_SIZE )
+ {
+ ERROR("Error when writing to state file (5)"
+ " (errno %d)", errno);
+ goto out;
+ }
+ }
+ } /* end of the write out for this batch */
+
+ sent_this_iter += batch;
+
+ munmap(region_base, batch*PAGE_SIZE);
+
+ } /* end of this while loop for this iteration */
+
+ skip:
+
+ total_sent += sent_this_iter;
+
+ DPRINTF("\r %d: sent %d, skipped %d, ",
+ iter, sent_this_iter, skip_this_iter );
+
+ if ( last_iter )
+ {
+ print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
+
+ DPRINTF("Total pages sent= %ld (%.2fx)\n",
+ total_sent, ((float)total_sent)/p2m_size );
+ DPRINTF("(of which %ld were fixups)\n", needed_to_fix );
+ }
+
+ if ( last_iter && debug )
+ {
+ int minusone = -1;
+ memset(to_send, 0xff, BITMAP_SIZE);
+ debug = 0;
+ DPRINTF("Entering debug resend-all mode\n");
+
+ /* send "-1" to put receiver into debug mode */
+ if ( !write_exact(io_fd, &minusone, sizeof(int)) )
+ {
+ ERROR("Error when writing to state file (6) (errno %d)",
+ errno);
+ goto out;
+ }
+
+ continue;
+ }
+
+ if ( last_iter )
+ break;
+
+ if ( live )
+ {
+ if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
+ (iter >= max_iters) ||
+ (sent_this_iter+skip_this_iter < 50) ||
+ (total_sent > p2m_size*max_factor) )
+ {
+ DPRINTF("Start last iteration\n");
+ last_iter = 1;
+
+ if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) )
+ {
+ ERROR("Domain appears not to have suspended");
+ goto out;
+ }
+
+ DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
+ }
+
+ if ( xc_shadow_control(xc_handle, dom,
+ XEN_DOMCTL_SHADOW_OP_CLEAN, to_send,
+ p2m_size, NULL, 0, &stats) != p2m_size )
+ {
+ ERROR("Error flushing shadow PT");
+ goto out;
+ }
+
+ if ( hvm )
+ {
+ /* Pull in the dirty bits from qemu-dm too */
+ if ( !last_iter )
+ {
+ qemu_active = qemu_non_active;
+ qemu_non_active = qemu_active ? 0 : 1;
+ qemu_flip_buffer(dom, qemu_active);
+ for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
+ {
+ to_send[j] |= qemu_bitmaps[qemu_non_active][j];
+ qemu_bitmaps[qemu_non_active][j] = 0;
+ }
+ }
+ else
+ {
+ for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
+ to_send[j] |= qemu_bitmaps[qemu_active][j];
+ }
+ }
+
+ sent_last_iter = sent_this_iter;
+
+ print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
+
+ }
+ } /* end of infinite for loop */
+
+ DPRINTF("All memory is saved\n");
+
+ {
+ struct {
+ int minustwo;
+ int max_vcpu_id;
+ uint64_t vcpumap;
+ } chunk = { -2, info.max_vcpu_id };
+
+ if ( info.max_vcpu_id >= 64 )
+ {
+ ERROR("Too many VCPUS in guest!");
+ goto out;
+ }
+
+ for ( i = 1; i <= info.max_vcpu_id; i++ )
+ {
+ xc_vcpuinfo_t vinfo;
+ if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
+ vinfo.online )
+ vcpumap |= 1ULL << i;
+ }
+
+ chunk.vcpumap = vcpumap;
+ if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
+ {
+ ERROR("Error when writing to state file (errno %d)", errno);
+ goto out;
+ }
+ }
+
+ /* Zero terminate */
+ i = 0;
+ if ( !write_exact(io_fd, &i, sizeof(int)) )
+ {
+ ERROR("Error when writing to state file (6') (errno %d)", errno);
+ goto out;
+ }
+
+ if ( hvm )
+ {
+ uint32_t rec_size;
+
+ /* Save magic-page locations. */
+ memset(magic_pfns, 0, sizeof(magic_pfns));
+ xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
+ (unsigned long *)&magic_pfns[0]);
+ xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
+ (unsigned long *)&magic_pfns[1]);
+ xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
+ (unsigned long *)&magic_pfns[2]);
+ if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
+ {
+ ERROR("Error when writing to state file (7)");
+ goto out;
+ }
+
+ /* Get HVM context from Xen and save it too */
+ if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf,
+ hvm_buf_size)) == -1 )
+ {
+ ERROR("HVM:Could not get hvm buffer");
+ goto out;
+ }
+
+ if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
+ {
+ ERROR("error write hvm buffer size");
+ goto out;
+ }
+
+ if ( !write_exact(io_fd, hvm_buf, rec_size) )
+ {
+ ERROR("write HVM info failed!\n");
+ goto out;
+ }
+
+ /* HVM guests are done now */
+ rc = 0;
+ goto out;
+ }
+
+ /* PV guests only from now on */
+
+ /* Send through a list of all the PFNs that were not in map at the close */
+ {
+ unsigned int i,j;
+ unsigned long pfntab[1024];
+
+ for ( i = 0, j = 0; i < p2m_size; i++ )
+ {
+ if ( !is_mapped(live_p2m[i]) )
+ j++;
+ }
+
+ if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
+ {
+ ERROR("Error when writing to state file (6a) (errno %d)", errno);
+ goto out;
+ }
+
+ for ( i = 0, j = 0; i < p2m_size; )
+ {
+ if ( !is_mapped(live_p2m[i]) )
+ pfntab[j++] = i;
+
+ i++;
+ if ( (j == 1024) || (i == p2m_size) )
+ {
+ if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
+ {
+ ERROR("Error when writing to state file (6b) (errno %d)",
+ errno);
+ goto out;
+ }
+ j = 0;
+ }
+ }
+ }
+
+ if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
+ {
+ ERROR("Could not get vcpu context");
+ goto out;
+ }
+
+ /* Canonicalise the suspend-record frame number. */
+ if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
+ {
+ ERROR("Suspend record is not in range of pseudophys map");
+ goto out;
+ }
+
+ for ( i = 0; i <= info.max_vcpu_id; i++ )
+ {
+ if ( !(vcpumap & (1ULL << i)) )
+ continue;
+
+ if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
+ {
+ ERROR("No context for VCPU%d", i);
+ goto out;
+ }
+
+ /* Canonicalise each GDT frame number. */
+ for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
+ {
+ if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
+ {
+ ERROR("GDT frame is not in range of pseudophys map");
+ goto out;
+ }
+ }
+
+ /* Canonicalise the page table base pointer. */
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
+ {
+ ERROR("PT base is not in range of pseudophys map");
+ goto out;
+ }
+ ctxt.ctrlreg[3] =
+ xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
+
+ /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
+ if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
+ {
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
+ {
+ ERROR("PT base is not in range of pseudophys map");
+ goto out;
+ }
+ /* Least-significant bit means 'valid PFN'. */
+ ctxt.ctrlreg[1] = 1 |
+ xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
+ }
+
+ if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
+ {
+ ERROR("Error when writing to state file (1) (errno %d)", errno);
+ goto out;
+ }
+ }
+
+ /*
+ * Reset the MFN to be a known-invalid value. See map_frame_list_list().
+ */
+ memcpy(page, live_shinfo, PAGE_SIZE);
+ ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
+ if ( !write_exact(io_fd, page, PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file (1) (errno %d)", errno);
+ goto out;
+ }
+
+ /* Success! */
+ rc = 0;
+
+ out:
+
+ if ( live )
+ {
+ if ( xc_shadow_control(xc_handle, dom,
+ XEN_DOMCTL_SHADOW_OP_OFF,
+ NULL, 0, NULL, 0, NULL) < 0 )
+ DPRINTF("Warning - couldn't disable shadow mode");
+ }
+
+ /* Flush last write and discard cache for file. */
+ discard_file_cache(io_fd, 1 /* flush */);
+
+ if ( live_shinfo )
+ munmap(live_shinfo, PAGE_SIZE);
+
+ if ( live_p2m )
+ munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+
+ if ( live_m2p )
+ munmap(live_m2p, M2P_SIZE(max_mfn));
+
+ free(pfn_type);
+ free(pfn_batch);
+ free(to_send);
+ free(to_fix);
+ free(to_skip);
+
+ DPRINTF("Save exit rc=%d\n",rc);
+
+ return !!rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xc_hvm_build.c Fri Apr 13 11:14:26 2007 +0100
@@ -28,47 +28,6 @@ typedef union
vcpu_guest_context_x86_32_t c32;
vcpu_guest_context_t c;
} vcpu_guest_context_either_t;
-
-
-int xc_set_hvm_param(
- int handle, domid_t dom, int param, unsigned long value)
-{
- DECLARE_HYPERCALL;
- xen_hvm_param_t arg;
- int rc;
-
- hypercall.op = __HYPERVISOR_hvm_op;
- hypercall.arg[0] = HVMOP_set_param;
- hypercall.arg[1] = (unsigned long)&arg;
- arg.domid = dom;
- arg.index = param;
- arg.value = value;
- if ( lock_pages(&arg, sizeof(arg)) != 0 )
- return -1;
- rc = do_xen_hypercall(handle, &hypercall);
- unlock_pages(&arg, sizeof(arg));
- return rc;
-}
-
-int xc_get_hvm_param(
- int handle, domid_t dom, int param, unsigned long *value)
-{
- DECLARE_HYPERCALL;
- xen_hvm_param_t arg;
- int rc;
-
- hypercall.op = __HYPERVISOR_hvm_op;
- hypercall.arg[0] = HVMOP_get_param;
- hypercall.arg[1] = (unsigned long)&arg;
- arg.domid = dom;
- arg.index = param;
- if ( lock_pages(&arg, sizeof(arg)) != 0 )
- return -1;
- rc = do_xen_hypercall(handle, &hypercall);
- unlock_pages(&arg, sizeof(arg));
- *value = arg.value;
- return rc;
-}
static void build_e820map(void *e820_page, unsigned long long mem_size)
{
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_hvm_save.c
--- a/tools/libxc/xc_hvm_save.c Thu Apr 12 16:37:32 2007 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,755 +0,0 @@
-/******************************************************************************
- * xc_hvm_save.c
- *
- * Save the state of a running HVM guest.
- *
- * Copyright (c) 2003, K A Fraser.
- * Copyright (c) 2006 Intel Corperation
- * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <inttypes.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-#include "xc_private.h"
-#include "xg_private.h"
-#include "xg_save_restore.h"
-
-#include <xen/hvm/e820.h>
-#include <xen/hvm/params.h>
-
-/*
-** Default values for important tuning parameters. Can override by passing
-** non-zero replacement values to xc_hvm_save().
-**
-** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
-**
-*/
-#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */
-#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */
-
-/* Shared-memory bitmaps for getting log-dirty bits from qemu */
-static unsigned long *qemu_bitmaps[2];
-static int qemu_active;
-static int qemu_non_active;
-
-/*
-** During (live) save/migrate, we maintain a number of bitmaps to track
-** which pages we have to send, to fixup, and to skip.
-*/
-
-#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
-#define BITMAP_SIZE (BITS_TO_LONGS(pfn_array_size) * sizeof(unsigned long))
-
-#define BITMAP_ENTRY(_nr,_bmap) \
- ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
-
-#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
-
-static inline int test_bit (int nr, volatile void * addr)
-{
- return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
-}
-
-static inline void clear_bit (int nr, volatile void * addr)
-{
- BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
-}
-
-static inline int permute( int i, int nr, int order_nr )
-{
- /* Need a simple permutation function so that we scan pages in a
- pseudo random order, enabling us to get a better estimate of
- the domain's page dirtying rate as we go (there are often
- contiguous ranges of pfns that have similar behaviour, and we
- want to mix them up. */
-
- /* e.g. nr->oder 15->4 16->4 17->5 */
- /* 512MB domain, 128k pages, order 17 */
-
- /*
- QPONMLKJIHGFEDCBA
- QPONMLKJIH
- GFEDCBA
- */
-
- /*
- QPONMLKJIHGFEDCBA
- EDCBA
- QPONM
- LKJIHGF
- */
-
- do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
- while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
-
- return i;
-}
-
-
-static uint64_t tv_to_us(struct timeval *new)
-{
- return (new->tv_sec * 1000000) + new->tv_usec;
-}
-
-static uint64_t llgettimeofday(void)
-{
- struct timeval now;
- gettimeofday(&now, NULL);
- return tv_to_us(&now);
-}
-
-static uint64_t tv_delta(struct timeval *new, struct timeval *old)
-{
- return (((new->tv_sec - old->tv_sec)*1000000) +
- (new->tv_usec - old->tv_usec));
-}
-
-
-#define RATE_IS_MAX() (0)
-#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
-#define initialize_mbit_rate()
-
-static inline ssize_t write_exact(int fd, void *buf, size_t count)
-{
- return (write(fd, buf, count) == count);
-}
-
-static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
- xc_shadow_op_stats_t *stats, int print)
-{
- static struct timeval wall_last;
- static long long d0_cpu_last;
- static long long d1_cpu_last;
-
- struct timeval wall_now;
- long long wall_delta;
- long long d0_cpu_now, d0_cpu_delta;
- long long d1_cpu_now, d1_cpu_delta;
-
- gettimeofday(&wall_now, NULL);
-
- d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
- d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
-
- if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
- DPRINTF("ARRHHH!!\n");
-
- wall_delta = tv_delta(&wall_now,&wall_last)/1000;
- if ( wall_delta == 0 )
- wall_delta = 1;
-
- d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
- d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
-
- if ( print )
- DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
- "dirtied %dMb/s %" PRId32 " pages\n",
- wall_delta,
- (int)((d0_cpu_delta*100)/wall_delta),
- (int)((d1_cpu_delta*100)/wall_delta),
- (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
- (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
- stats->dirty_count);
-
- d0_cpu_last = d0_cpu_now;
- d1_cpu_last = d1_cpu_now;
- wall_last = wall_now;
-
- return 0;
-}
-
-static int analysis_phase(int xc_handle, uint32_t domid, int pfn_array_size,
- unsigned long *arr, int runs)
-{
- long long start, now;
- xc_shadow_op_stats_t stats;
- int j;
-
- start = llgettimeofday();
-
- for ( j = 0; j < runs; j++ )
- {
- int i;
-
- xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
- arr, pfn_array_size, NULL, 0, NULL);
- DPRINTF("#Flush\n");
- for ( i = 0; i < 40; i++ )
- {
- usleep(50000);
- now = llgettimeofday();
- xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
- NULL, 0, NULL, 0, &stats);
- DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
- ((now-start)+500)/1000,
- stats.fault_count, stats.dirty_count);
- }
- }
-
- return -1;
-}
-
-static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
- int dom, xc_dominfo_t *info,
- vcpu_guest_context_t *ctxt)
-{
- int i = 0;
-
- if ( !(*suspend)(dom) )
- {
- ERROR("Suspend request failed");
- return -1;
- }
-
- retry:
-
- if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
- {
- ERROR("Could not get domain info");
- return -1;
- }
-
- if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
- ERROR("Could not get vcpu context");
-
- if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_suspend) )
- return 0; /* success */
-
- if ( info->paused )
- {
- /* Try unpausing domain, wait, and retest. */
- xc_domain_unpause( xc_handle, dom );
- ERROR("Domain was paused. Wait and re-test.");
- usleep(10000); /* 10ms */
- goto retry;
- }
-
- if ( ++i < 100 )
- {
- ERROR("Retry suspend domain.");
- usleep(10000); /* 10ms */
- goto retry;
- }
-
- ERROR("Unable to suspend domain.");
-
- return -1;
-}
-
-int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags, int (*suspend)(int),
- void *(*init_qemu_maps)(int, unsigned),
- void (*qemu_flip_buffer)(int, int))
-{
- xc_dominfo_t info;
-
- int rc = 1, i, j, last_iter, iter = 0;
- int live = !!(flags & XCFLAGS_LIVE);
- int debug = !!(flags & XCFLAGS_DEBUG);
- int sent_last_iter, skip_this_iter;
-
- /* The highest guest-physical frame number used by the current guest */
- unsigned long max_pfn;
-
- /* The size of an array big enough to contain all guest pfns */
- unsigned long pfn_array_size;
-
- /* Magic frames: ioreqs and xenstore comms. */
- uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
-
- /* A copy of the CPU context of the guest. */
- vcpu_guest_context_t ctxt;
-
- /* A table containg the PFNs (/not/ MFN!) to map. */
- xen_pfn_t *pfn_batch = NULL;
-
- /* A copy of hvm domain context buffer*/
- uint32_t hvm_buf_size;
- uint8_t *hvm_buf = NULL;
-
- /* base of the region in which domain memory is mapped */
- unsigned char *region_base = NULL;
-
- uint32_t rec_size, nr_vcpus;
-
- /* power of 2 order of pfn_array_size */
- int order_nr;
-
- /* bitmap of pages:
- - that should be sent this iteration (unless later marked as skip);
- - to skip this iteration because already dirty; */
- unsigned long *to_send = NULL, *to_skip = NULL;
-
- xc_shadow_op_stats_t stats;
-
- unsigned long total_sent = 0;
-
- uint64_t vcpumap = 1ULL;
-
- DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
- "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
- live, debug);
-
- /* If no explicit control parameters given, use defaults */
- max_iters = max_iters ? : DEF_MAX_ITERS;
- max_factor = max_factor ? : DEF_MAX_FACTOR;
-
- initialize_mbit_rate();
-
- if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
- {
- ERROR("HVM: Could not get domain info");
- return 1;
- }
- nr_vcpus = info.nr_online_vcpus;
-
- if ( mlock(&ctxt, sizeof(ctxt)) )
- {
- ERROR("HVM: Unable to mlock ctxt");
- return 1;
- }
-
- /* Only have to worry about vcpu 0 even for SMP */
- if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
- {
- ERROR("HVM: Could not get vcpu context");
- goto out;
- }
-
- DPRINTF("saved hvm domain info: max_memkb=0x%lx, nr_pages=0x%lx\n",
- info.max_memkb, info.nr_pages);
-
- if ( live )
- {
- /* Live suspend. Enable log-dirty mode. */
- if ( xc_shadow_control(xc_handle, dom,
- XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
- NULL, 0, NULL, 0, NULL) < 0 )
- {
- ERROR("Couldn't enable shadow mode");
- goto out;
- }
- }
- else
- {
- /* This is a non-live suspend. Suspend the domain .*/
- if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
- {
- ERROR("HVM Domain appears not to have suspended");
- goto out;
- }
- }
-
- last_iter = !live;
-
- max_pfn = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
-
- DPRINTF("after 1st handle hvm domain max_pfn=0x%lx, "
- "max_memkb=0x%lx, live=%d.\n",
- max_pfn, info.max_memkb, live);
-
- /* Size of any array that covers 0 ... max_pfn */
- pfn_array_size = max_pfn + 1;
- if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) )
- {
- ERROR("Error when writing to state file (1)");
- goto out;
- }
-
- /* pretend we sent all the pages last iteration */
- sent_last_iter = pfn_array_size;
-
- /* calculate the power of 2 order of pfn_array_size, e.g.
- 15->4 16->4 17->5 */
- for ( i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
- continue;
-
- /* Setup to_send / to_fix and to_skip bitmaps */
- to_send = malloc(BITMAP_SIZE);
- to_skip = malloc(BITMAP_SIZE);
-
- if ( live )
- {
- /* Get qemu-dm logging dirty pages too */
- void *seg = init_qemu_maps(dom, BITMAP_SIZE);
- qemu_bitmaps[0] = seg;
- qemu_bitmaps[1] = seg + BITMAP_SIZE;
- qemu_active = 0;
- qemu_non_active = 1;
- }
-
- hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
- if ( hvm_buf_size == -1 )
- {
- ERROR("Couldn't get HVM context size from Xen");
- goto out;
- }
- hvm_buf = malloc(hvm_buf_size);
-
- if ( !to_send || !to_skip || !hvm_buf )
- {
- ERROR("Couldn't allocate memory");
- goto out;
- }
-
- memset(to_send, 0xff, BITMAP_SIZE);
-
- if ( lock_pages(to_send, BITMAP_SIZE) )
- {
- ERROR("Unable to lock to_send");
- return 1;
- }
-
- /* (to fix is local only) */
- if ( lock_pages(to_skip, BITMAP_SIZE) )
- {
- ERROR("Unable to lock to_skip");
- return 1;
- }
-
- analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0);
-
- /* We want zeroed memory so use calloc rather than malloc. */
- pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
- if ( pfn_batch == NULL )
- {
- ERROR("failed to alloc memory for pfn_batch array");
- errno = ENOMEM;
- goto out;
- }
-
- for ( ; ; )
- {
- unsigned int prev_pc, sent_this_iter, N, batch;
-
- iter++;
- sent_this_iter = 0;
- skip_this_iter = 0;
- prev_pc = 0;
- N=0;
-
- DPRINTF("Saving memory pages: iter %d 0%%", iter);
-
- while ( N < pfn_array_size )
- {
- unsigned int this_pc = (N * 100) / pfn_array_size;
- int rc;
-
- if ( (this_pc - prev_pc) >= 5 )
- {
- DPRINTF("\b\b\b\b%3d%%", this_pc);
- prev_pc = this_pc;
- }
-
- if ( !last_iter )
- {
- /* Slightly wasteful to peek the whole array evey time,
- but this is fast enough for the moment. */
- rc = xc_shadow_control(
- xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip,
- pfn_array_size, NULL, 0, NULL);
- if ( rc != pfn_array_size )
- {
- ERROR("Error peeking shadow bitmap");
- goto out;
- }
- }
-
- /* load pfn_batch[] with the mfn of all the pages we're doing in
- this batch. */
- for ( batch = 0;
- (batch < MAX_BATCH_SIZE) && (N < pfn_array_size);
- N++ )
- {
- int n = permute(N, pfn_array_size, order_nr);
-
- if ( 0 && debug )
- DPRINTF("%d pfn= %08lx %d \n",
- iter, (unsigned long)n, test_bit(n, to_send));
-
- if ( !last_iter &&
- test_bit(n, to_send) &&
- test_bit(n, to_skip) )
- skip_this_iter++; /* stats keeping */
-
- if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
- (test_bit(n, to_send) && last_iter)) )
- continue;
-
- /* Skip PFNs that aren't really there */
- if ( (n >= 0xa0 && n < 0xc0) /* VGA hole */
- || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) &&
- n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ )
- continue;
-
- /*
- ** we get here if:
- ** 1. page is marked to_send & hasn't already been re-dirtied
- ** 2. (ignore to_skip in last iteration)
- */
-
- pfn_batch[batch] = n;
-
- batch++;
- }
-
- if ( batch == 0 )
- goto skip; /* vanishingly unlikely... */
-
- region_base = xc_map_foreign_batch(
- xc_handle, dom, PROT_READ, pfn_batch, batch);
- if ( region_base == 0 )
- {
- ERROR("map batch failed");
- goto out;
- }
-
- /* write num of pfns */
- if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
- {
- ERROR("Error when writing to state file (2)");
- goto out;
- }
-
- /* write all the pfns */
- if ( !write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch) )
- {
- ERROR("Error when writing to state file (3)");
- goto out;
- }
-
- for ( j = 0; j < batch; j++ )
- {
- if ( pfn_batch[j] & XEN_DOMCTL_PFINFO_LTAB_MASK )
- continue;
- if ( ratewrite(io_fd, region_base + j*PAGE_SIZE,
- PAGE_SIZE) != PAGE_SIZE )
- {
- ERROR("ERROR when writing to state file (4)");
- goto out;
- }
- }
-
- sent_this_iter += batch;
-
- munmap(region_base, batch*PAGE_SIZE);
-
- } /* end of this while loop for this iteration */
-
- skip:
-
- total_sent += sent_this_iter;
-
- DPRINTF("\r %d: sent %d, skipped %d, ",
- iter, sent_this_iter, skip_this_iter );
-
- if ( last_iter )
- {
- print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
- DPRINTF("Total pages sent= %ld (%.2fx)\n",
- total_sent, ((float)total_sent)/pfn_array_size );
- }
-
- if ( last_iter && debug )
- {
- int minusone = -1;
- memset(to_send, 0xff, BITMAP_SIZE);
- debug = 0;
- DPRINTF("Entering debug resend-all mode\n");
-
- /* send "-1" to put receiver into debug mode */
- if ( !write_exact(io_fd, &minusone, sizeof(int)) )
- {
- ERROR("Error when writing to state file (6)");
- goto out;
- }
-
- continue;
- }
-
- if ( last_iter )
- break;
-
- if ( live )
- {
- if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
- (iter >= max_iters) ||
- (sent_this_iter+skip_this_iter < 50) ||
- (total_sent > pfn_array_size*max_factor) )
- {
- DPRINTF("Start last iteration for HVM domain\n");
- last_iter = 1;
-
- if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
- &ctxt))
- {
- ERROR("Domain appears not to have suspended");
- goto out;
- }
-
- DPRINTF("SUSPEND eip %08lx edx %08lx\n",
- (unsigned long)ctxt.user_regs.eip,
- (unsigned long)ctxt.user_regs.edx);
- }
-
- if ( xc_shadow_control(xc_handle, dom,
- XEN_DOMCTL_SHADOW_OP_CLEAN, to_send,
- pfn_array_size, NULL,
- 0, &stats) != pfn_array_size )
- {
- ERROR("Error flushing shadow PT");
- goto out;
- }
-
- /* Pull in the dirty bits from qemu too */
- if ( !last_iter )
- {
- qemu_active = qemu_non_active;
- qemu_non_active = qemu_active ? 0 : 1;
- qemu_flip_buffer(dom, qemu_active);
- for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
- {
- to_send[j] |= qemu_bitmaps[qemu_non_active][j];
- qemu_bitmaps[qemu_non_active][j] = 0;
- }
- }
- else
- {
- for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
- to_send[j] |= qemu_bitmaps[qemu_active][j];
- }
-
- sent_last_iter = sent_this_iter;
-
- print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
- }
- } /* end of while 1 */
-
-
- DPRINTF("All HVM memory is saved\n");
-
- {
- struct {
- int minustwo;
- int max_vcpu_id;
- uint64_t vcpumap;
- } chunk = { -2, info.max_vcpu_id };
-
- if (info.max_vcpu_id >= 64) {
- ERROR("Too many VCPUS in guest!");
- goto out;
- }
-
- for (i = 1; i <= info.max_vcpu_id; i++) {
- xc_vcpuinfo_t vinfo;
- if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
- vinfo.online)
- vcpumap |= 1ULL << i;
- }
-
- chunk.vcpumap = vcpumap;
- if(!write_exact(io_fd, &chunk, sizeof(chunk))) {
- ERROR("Error when writing to state file (errno %d)", errno);
- goto out;
- }
- }
-
- /* Zero terminate */
- i = 0;
- if ( !write_exact(io_fd, &i, sizeof(int)) )
- {
- ERROR("Error when writing to state file (6)");
- goto out;
- }
-
- /* Save magic-page locations. */
- memset(magic_pfns, 0, sizeof(magic_pfns));
- xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
- (unsigned long *)&magic_pfns[0]);
- xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
- (unsigned long *)&magic_pfns[1]);
- xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
- (unsigned long *)&magic_pfns[2]);
- if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
- {
- ERROR("Error when writing to state file (7)");
- goto out;
- }
-
- /* save vcpu/vmcs contexts */
- for ( i = 0; i < nr_vcpus; i++ )
- {
- if ( !(vcpumap & (1ULL << i)) )
- continue;
-
- if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
- {
- ERROR("HVM:Could not get vcpu context");
- goto out;
- }
-
- DPRINTF("write vcpu %d context.\n", i);
- if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) )
- {
- ERROR("write vcpu context failed!\n");
- goto out;
- }
- }
-
- if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf,
- hvm_buf_size)) == -1 )
- {
- ERROR("HVM:Could not get hvm buffer");
- goto out;
- }
-
- if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
- {
- ERROR("error write hvm buffer size");
- goto out;
- }
-
- if ( !write_exact(io_fd, hvm_buf, rec_size) )
- {
- ERROR("write HVM info failed!\n");
- goto out;
- }
-
- /* Success! */
- rc = 0;
-
- out:
-
- if ( live )
- {
- if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF,
- NULL, 0, NULL, 0, NULL) < 0 )
- DPRINTF("Warning - couldn't disable shadow mode");
- }
-
- free(hvm_buf);
- free(pfn_batch);
- free(to_send);
- free(to_skip);
-
- return !!rc;
-}
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Thu Apr 12 16:37:32 2007 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1414 +0,0 @@
-/******************************************************************************
- * xc_linux_save.c
- *
- * Save the state of a running Linux session.
- *
- * Copyright (c) 2003, K A Fraser.
- */
-
-#include <inttypes.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-#include "xc_private.h"
-#include "xc_dom.h"
-#include "xg_private.h"
-#include "xg_save_restore.h"
-
-/*
-** Default values for important tuning parameters. Can override by passing
-** non-zero replacement values to xc_linux_save().
-**
-** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
-**
-*/
-#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */
-#define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */
-
-/* max mfn of the whole machine */
-static unsigned long max_mfn;
-
-/* virtual starting address of the hypervisor */
-static unsigned long hvirt_start;
-
-/* #levels of page tables used by the current guest */
-static unsigned int pt_levels;
-
-/* number of pfns this guest has (i.e. number of entries in the P2M) */
-static unsigned long p2m_size;
-
-/* Live mapping of the table mapping each PFN to its current MFN. */
-static xen_pfn_t *live_p2m = NULL;
-
-/* Live mapping of system MFN to PFN table. */
-static xen_pfn_t *live_m2p = NULL;
-static unsigned long m2p_mfn0;
-
-/* grep fodder: machine_to_phys */
-
-#define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
-
-/*
- * Returns TRUE if the given machine frame number has a unique mapping
- * in the guest's pseudophysical map.
- */
-#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
- (((_mfn) < (max_mfn)) && \
- ((mfn_to_pfn(_mfn) < (p2m_size)) && \
- (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
-
-/* Returns TRUE if MFN is successfully converted to a PFN. */
-#define translate_mfn_to_pfn(_pmfn) \
-({ \
- unsigned long mfn = *(_pmfn); \
- int _res = 1; \
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
- _res = 0; \
- else \
- *(_pmfn) = mfn_to_pfn(mfn); \
- _res; \
-})
-
-/*
-** During (live) save/migrate, we maintain a number of bitmaps to track
-** which pages we have to send, to fixup, and to skip.
-*/
-
-#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-#define BITMAP_SIZE ((p2m_size + BITS_PER_LONG - 1) / 8)
-
-#define BITMAP_ENTRY(_nr,_bmap) \
- ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
-
-#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
-
-static inline int test_bit (int nr, volatile void * addr)
-{
- return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
-}
-
-static inline void clear_bit (int nr, volatile void * addr)
-{
- BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
-}
-
-static inline void set_bit ( int nr, volatile void * addr)
-{
- BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
-}
-
-/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
-static inline unsigned int hweight32(unsigned int w)
-{
- unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
- res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
- res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
- res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
- return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
-}
-
-static inline int count_bits ( int nr, volatile void *addr)
-{
- int i, count = 0;
- volatile unsigned long *p = (volatile unsigned long *)addr;
- /* We know that the array is padded to unsigned long. */
- for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
- count += hweight32(*p);
- return count;
-}
-
-static inline int permute( int i, int nr, int order_nr )
-{
- /* Need a simple permutation function so that we scan pages in a
- pseudo random order, enabling us to get a better estimate of
- the domain's page dirtying rate as we go (there are often
- contiguous ranges of pfns that have similar behaviour, and we
- want to mix them up. */
-
- /* e.g. nr->oder 15->4 16->4 17->5 */
- /* 512MB domain, 128k pages, order 17 */
-
- /*
- QPONMLKJIHGFEDCBA
- QPONMLKJIH
- GFEDCBA
- */
-
- /*
- QPONMLKJIHGFEDCBA
- EDCBA
- QPONM
- LKJIHGF
- */
-
- do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
- while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
-
- return i;
-}
-
-static uint64_t tv_to_us(struct timeval *new)
-{
- return (new->tv_sec * 1000000) + new->tv_usec;
-}
-
-static uint64_t llgettimeofday(void)
-{
- struct timeval now;
- gettimeofday(&now, NULL);
- return tv_to_us(&now);
-}
-
-static uint64_t tv_delta(struct timeval *new, struct timeval *old)
-{
- return (((new->tv_sec - old->tv_sec)*1000000) +
- (new->tv_usec - old->tv_usec));
-}
-
-static int noncached_write(int fd, int live, void *buffer, int len)
-{
- static int write_count = 0;
-
- int rc = write(fd,buffer,len);
-
- write_count += len;
- if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
- {
- /* Time to discard cache - dont care if this fails */
- discard_file_cache(fd, 0 /* no flush */);
- write_count = 0;
- }
-
- return rc;
-}
-
-#ifdef ADAPTIVE_SAVE
-
-/*
-** We control the rate at which we transmit (or save) to minimize impact
-** on running domains (including the target if we're doing live migrate).
-*/
-
-#define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */
-#define START_MBIT_RATE 100 /* initial transmit rate for migrate */
-
-/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
-#define RATE_TO_BTU 781250
-
-/* Amount in bytes we allow ourselves to send in a burst */
-#define BURST_BUDGET (100*1024)
-
-/* We keep track of the current and previous transmission rate */
-static int mbit_rate, ombit_rate = 0;
-
-/* Have we reached the maximum transmission rate? */
-#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
-
-static inline void initialize_mbit_rate()
-{
- mbit_rate = START_MBIT_RATE;
-}
-
-static int ratewrite(int io_fd, int live, void *buf, int n)
-{
- static int budget = 0;
- static int burst_time_us = -1;
- static struct timeval last_put = { 0 };
- struct timeval now;
- struct timespec delay;
- long long delta;
-
- if ( START_MBIT_RATE == 0 )
- return noncached_write(io_fd, live, buf, n);
-
- budget -= n;
- if ( budget < 0 )
- {
- if ( mbit_rate != ombit_rate )
- {
- burst_time_us = RATE_TO_BTU / mbit_rate;
- ombit_rate = mbit_rate;
- DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
- mbit_rate, BURST_BUDGET, burst_time_us);
- }
- if ( last_put.tv_sec == 0 )
- {
- budget += BURST_BUDGET;
- gettimeofday(&last_put, NULL);
- }
- else
- {
- while ( budget < 0 )
- {
- gettimeofday(&now, NULL);
- delta = tv_delta(&now, &last_put);
- while ( delta > burst_time_us )
- {
- budget += BURST_BUDGET;
- last_put.tv_usec += burst_time_us;
- if ( last_put.tv_usec > 1000000
- {
- last_put.tv_usec -= 1000000;
- last_put.tv_sec++;
- }
- delta -= burst_time_us;
- }
- if ( budget > 0 )
- break;
- delay.tv_sec = 0;
- delay.tv_nsec = 1000 * (burst_time_us - delta);
- while ( delay.tv_nsec > 0 )
- if ( nanosleep(&delay, &delay) == 0 )
- break;
- }
- }
- }
- return noncached_write(io_fd, live, buf, n);
-}
-
-#else /* ! ADAPTIVE SAVE */
-
-#define RATE_IS_MAX() (0)
-#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live),
(_buf), (_n))
-#define initialize_mbit_rate()
-
-#endif
-
-static inline ssize_t write_exact(int fd, void *buf, size_t count)
-{
- return (write(fd, buf, count) == count);
-}
-
-static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
- xc_shadow_op_stats_t *stats, int print)
-{
- static struct timeval wall_last;
- static long long d0_cpu_last;
- static long long d1_cpu_last;
-
- struct timeval wall_now;
- long long wall_delta;
- long long d0_cpu_now, d0_cpu_delta;
- long long d1_cpu_now, d1_cpu_delta;
-
- gettimeofday(&wall_now, NULL);
-
- d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
- d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
-
- if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
- DPRINTF("ARRHHH!!\n");
-
- wall_delta = tv_delta(&wall_now,&wall_last)/1000;
- if ( wall_delta == 0 )
- wall_delta = 1;
-
- d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
- d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
-
- if ( print )
- DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
- "dirtied %dMb/s %" PRId32 " pages\n",
- wall_delta,
- (int)((d0_cpu_delta*100)/wall_delta),
- (int)((d1_cpu_delta*100)/wall_delta),
- (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
- (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
- stats->dirty_count);
-
-#ifdef ADAPTIVE_SAVE
- if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
- {
- mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
- + 50;
- if ( mbit_rate > MAX_MBIT_RATE )
- mbit_rate = MAX_MBIT_RATE;
- }
-#endif
-
- d0_cpu_last = d0_cpu_now;
- d1_cpu_last = d1_cpu_now;
- wall_last = wall_now;
-
- return 0;
-}
-
-
-static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
- unsigned long *arr, int runs)
-{
- long long start, now;
- xc_shadow_op_stats_t stats;
- int j;
-
- start = llgettimeofday();
-
- for ( j = 0; j < runs; j++ )
- {
- int i;
-
- xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
- arr, p2m_size, NULL, 0, NULL);
- DPRINTF("#Flush\n");
- for ( i = 0; i < 40; i++ )
- {
- usleep(50000);
- now = llgettimeofday();
- xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
- NULL, 0, NULL, 0, &stats);
- DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
- ((now-start)+500)/1000,
- stats.fault_count, stats.dirty_count);
- }
- }
-
- return -1;
-}
-
-
-static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
- int dom, xc_dominfo_t *info,
- vcpu_guest_context_t *ctxt)
-{
- int i = 0;
-
- if ( !(*suspend)(dom) )
- {
- ERROR("Suspend request failed");
- return -1;
- }
-
- retry:
-
- if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
- {
- ERROR("Could not get domain info");
- return -1;
- }
-
- if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
- ERROR("Could not get vcpu context");
-
-
- if ( info->dying )
- {
- ERROR("domain is dying");
- return -1;
- }
-
- if ( info->crashed )
- {
- ERROR("domain has crashed");
- return -1;
- }
-
- if ( info->shutdown )
- {
- switch ( info->shutdown_reason )
- {
- case SHUTDOWN_poweroff:
- case SHUTDOWN_reboot:
- ERROR("domain has shut down");
- return -1;
- case SHUTDOWN_suspend:
- return 0;
- case SHUTDOWN_crash:
- ERROR("domain has crashed");
- return -1;
- }
- }
-
- if ( info->paused )
- {
- /* Try unpausing domain, wait, and retest. */
- xc_domain_unpause( xc_handle, dom );
- ERROR("Domain was paused. Wait and re-test.");
- usleep(10000); /* 10ms */
- goto retry;
- }
-
- if ( ++i < 100 )
- {
- ERROR("Retry suspend domain");
- usleep(10000); /* 10ms */
- goto retry;
- }
-
- ERROR("Unable to suspend domain.");
-
- return -1;
-}
-
-/*
-** Map the top-level page of MFNs from the guest. The guest might not have
-** finished resuming from a previous restore operation, so we wait a while for
-** it to update the MFN to a reasonable value.
-*/
-static void *map_frame_list_list(int xc_handle, uint32_t dom,
- shared_info_t *shinfo)
-{
- int count = 100;
- void *p;
-
- while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
- usleep(10000);
-
- if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
- {
- ERROR("Timed out waiting for frame list updated.");
- return NULL;
- }
-
- p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
- shinfo->arch.pfn_to_mfn_frame_list_list);
- if ( p == NULL )
- ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
-
- return p;
-}
-
-/*
-** During transfer (or in the state file), all page-table pages must be
-** converted into a 'canonical' form where references to actual mfns
-** are replaced with references to the corresponding pfns.
-**
-** This function performs the appropriate conversion, taking into account
-** which entries do not require canonicalization (in particular, those
-** entries which map the virtual address reserved for the hypervisor).
-*/
-static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
- const void *spage, void *dpage)
-{
-
- int i, pte_last, xen_start, xen_end, race = 0;
- uint64_t pte;
-
- /*
- ** We need to determine which entries in this page table hold
- ** reserved hypervisor mappings. This depends on the current
- ** page table type as well as the number of paging levels.
- */
- xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
-
- if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
- xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
-
- if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
- xen_start = L3_PAGETABLE_ENTRIES_PAE;
-
- /*
- ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
- ** We can spot this by looking for the guest linear mapping which
- ** Xen always ensures is present in that L2. Guests must ensure
- ** that this check will fail for other L2s.
- */
- if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
- {
- int hstart;
- uint64_t he;
-
- hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
- he = ((const uint64_t *) spage)[hstart];
-
- if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
- {
- /* hvirt starts with xen stuff... */
- xen_start = hstart;
- }
- else if ( hvirt_start != 0xf5800000 )
- {
- /* old L2s from before hole was shrunk... */
- hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
- he = ((const uint64_t *) spage)[hstart];
- if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
- xen_start = hstart;
- }
- }
-
- if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
- {
- /*
- ** XXX SMH: should compute these from hvirt_start (which we have)
- ** and hvirt_end (which we don't)
- */
- xen_start = 256;
- xen_end = 272;
- }
-
- /* Now iterate through the page table, canonicalizing each PTE */
- for (i = 0; i < pte_last; i++ )
- {
- unsigned long pfn, mfn;
-
- if ( pt_levels == 2 )
- pte = ((const uint32_t*)spage)[i];
- else
- pte = ((const uint64_t*)spage)[i];
-
- if ( (i >= xen_start) && (i < xen_end) )
- pte = 0;
-
- if ( pte & _PAGE_PRESENT )
- {
- mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
- {
- /* This will happen if the type info is stale which
- is quite feasible under live migration */
- pfn = 0; /* zap it - we'll retransmit this page later */
- race = 1; /* inform the caller of race; fatal if !live */
- }
- else
- pfn = mfn_to_pfn(mfn);
-
- pte &= ~MADDR_MASK_X86;
- pte |= (uint64_t)pfn << PAGE_SHIFT;
-
- /*
- * PAE guest L3Es can contain these flags when running on
- * a 64bit hypervisor. We zap these here to avoid any
- * surprise at restore time...
- */
- if ( (pt_levels == 3) &&
- (type == XEN_DOMCTL_PFINFO_L3TAB) &&
- (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
- pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
- }
-
- if ( pt_levels == 2 )
- ((uint32_t*)dpage)[i] = pte;
- else
- ((uint64_t*)dpage)[i] = pte;
- }
-
- return race;
-}
-
-static xen_pfn_t *xc_map_m2p(int xc_handle,
- unsigned long max_mfn,
- int prot)
-{
- struct xen_machphys_mfn_list xmml;
- privcmd_mmap_entry_t *entries;
- unsigned long m2p_chunks, m2p_size;
- xen_pfn_t *m2p;
- xen_pfn_t *extent_start;
- int i, rc;
-
- m2p_size = M2P_SIZE(max_mfn);
- m2p_chunks = M2P_CHUNKS(max_mfn);
-
- xmml.max_extents = m2p_chunks;
- if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
- {
- ERROR("failed to allocate space for m2p mfns");
- return NULL;
- }
- set_xen_guest_handle(xmml.extent_start, extent_start);
-
- if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
- (xmml.nr_extents != m2p_chunks) )
- {
- ERROR("xc_get_m2p_mfns");
- return NULL;
- }
-
- if ( (m2p = mmap(NULL, m2p_size, prot,
- MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
- {
- ERROR("failed to mmap m2p");
- return NULL;
- }
-
- if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
- {
- ERROR("failed to allocate space for mmap entries");
- return NULL;
- }
-
- for ( i = 0; i < m2p_chunks; i++ )
- {
- entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
- entries[i].mfn = extent_start[i];
- entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
- }
-
- if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
- entries, m2p_chunks)) < 0 )
- {
- ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
- return NULL;
- }
-
- m2p_mfn0 = entries[0].mfn;
-
- free(extent_start);
- free(entries);
-
- return m2p;
-}
-
-int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags, int (*suspend)(int))
-{
- xc_dominfo_t info;
-
- int rc = 1, i, j, last_iter, iter = 0;
- int live = (flags & XCFLAGS_LIVE);
- int debug = (flags & XCFLAGS_DEBUG);
- int race = 0, sent_last_iter, skip_this_iter;
-
- /* The new domain's shared-info frame number. */
- unsigned long shared_info_frame;
-
- /* A copy of the CPU context of the guest. */
- vcpu_guest_context_t ctxt;
-
- /* A table containg the type of each PFN (/not/ MFN!). */
- unsigned long *pfn_type = NULL;
- unsigned long *pfn_batch = NULL;
-
- /* A temporary mapping, and a copy, of one frame of guest memory. */
- char page[PAGE_SIZE];
-
- /* Double and single indirect references to the live P2M table */
- xen_pfn_t *live_p2m_frame_list_list = NULL;
- xen_pfn_t *live_p2m_frame_list = NULL;
-
- /* A copy of the pfn-to-mfn table frame list. */
- xen_pfn_t *p2m_frame_list = NULL;
-
- /* Live mapping of shared info structure */
- shared_info_t *live_shinfo = NULL;
-
- /* base of the region in which domain memory is mapped */
- unsigned char *region_base = NULL;
-
- /* power of 2 order of p2m_size */
- int order_nr;
-
- /* bitmap of pages:
- - that should be sent this iteration (unless later marked as skip);
- - to skip this iteration because already dirty;
- - to fixup by sending at the end if not already resent; */
- unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
-
- xc_shadow_op_stats_t stats;
-
- unsigned long needed_to_fix = 0;
- unsigned long total_sent = 0;
-
- uint64_t vcpumap = 1ULL;
-
- /* If no explicit control parameters given, use defaults */
- max_iters = max_iters ? : DEF_MAX_ITERS;
- max_factor = max_factor ? : DEF_MAX_FACTOR;
-
- initialize_mbit_rate();
-
- if ( !get_platform_info(xc_handle, dom,
- &max_mfn, &hvirt_start, &pt_levels) )
- {
- ERROR("Unable to get platform info.");
- return 1;
- }
-
- if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
- {
- ERROR("Could not get domain info");
- return 1;
- }
-
- if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
- {
- ERROR("Could not get vcpu context");
- goto out;
- }
- shared_info_frame = info.shared_info_frame;
-
- /* Map the shared info frame */
- if ( !(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ, shared_info_frame)) )
- {
- ERROR("Couldn't map live_shinfo");
- goto out;
- }
-
- p2m_size = live_shinfo->arch.max_pfn;
-
- live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
- live_shinfo);
- if ( !live_p2m_frame_list_list )
- goto out;
-
- live_p2m_frame_list =
- xc_map_foreign_batch(xc_handle, dom, PROT_READ,
- live_p2m_frame_list_list,
- P2M_FLL_ENTRIES);
- if ( !live_p2m_frame_list )
- {
- ERROR("Couldn't map p2m_frame_list");
- goto out;
- }
-
- /* Map all the frames of the pfn->mfn table. For migrate to succeed,
- the guest must not change which frames are used for this purpose.
- (its not clear why it would want to change them, and we'll be OK
- from a safety POV anyhow. */
-
- live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
- live_p2m_frame_list,
- P2M_FL_ENTRIES);
- if ( !live_p2m )
- {
- ERROR("Couldn't map p2m table");
- goto out;
- }
-
- /* Setup the mfn_to_pfn table mapping */
- if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
- {
- ERROR("Failed to map live M2P table");
- goto out;
- }
-
-
- /* Get a local copy of the live_P2M_frame_list */
- if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
- {
- ERROR("Couldn't allocate p2m_frame_list array");
- goto out;
- }
- memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
-
- /* Canonicalise the pfn-to-mfn table frame-number list. */
- for ( i = 0; i < p2m_size; i += fpp )
- {
- if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
- {
- ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
- ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
- (uint64_t)p2m_frame_list[i/fpp]);
- goto out;
- }
- }
-
- /* Domain is still running at this point */
- if ( live )
- {
- /* Live suspend. Enable log-dirty mode. */
- if ( xc_shadow_control(xc_handle, dom,
- XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
- NULL, 0, NULL, 0, NULL) < 0 )
- {
- ERROR("Couldn't enable shadow mode");
- goto out;
- }
- }
- else
- {
- /* This is a non-live suspend. Suspend the domain .*/
- if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
- {
- ERROR("Domain appears not to have suspended");
- goto out;
- }
- }
-
- last_iter = !live;
-
- /* pretend we sent all the pages last iteration */
- sent_last_iter = p2m_size;
-
- /* calculate the power of 2 order of p2m_size, e.g.
- 15->4 16->4 17->5 */
- for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
- continue;
-
- /* Setup to_send / to_fix and to_skip bitmaps */
- to_send = malloc(BITMAP_SIZE);
- to_fix = calloc(1, BITMAP_SIZE);
- to_skip = malloc(BITMAP_SIZE);
-
- if ( !to_send || !to_fix || !to_skip )
- {
- ERROR("Couldn't allocate to_send array");
- goto out;
- }
-
- memset(to_send, 0xff, BITMAP_SIZE);
-
- if ( lock_pages(to_send, BITMAP_SIZE) )
- {
- ERROR("Unable to lock to_send");
- return 1;
- }
-
- /* (to fix is local only) */
- if ( lock_pages(to_skip, BITMAP_SIZE) )
- {
- ERROR("Unable to lock to_skip");
- return 1;
- }
-
- analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
-
- /* We want zeroed memory so use calloc rather than malloc. */
- pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
- pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
- if ( (pfn_type == NULL) || (pfn_batch == NULL) )
- {
- ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
- errno = ENOMEM;
- goto out;
- }
-
- if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
- {
- ERROR("Unable to lock");
- goto out;
- }
-
- /*
- * Quick belt and braces sanity check.
- */
- {
- int err=0;
- unsigned long mfn;
- for ( i = 0; i < p2m_size; i++ )
- {
- mfn = live_p2m[i];
- if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
- {
- DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
- mfn, mfn_to_pfn(mfn));
- err++;
- }
- }
- DPRINTF("Had %d unexplained entries in p2m table\n", err);
- }
-
- /* Start writing out the saved-domain record. */
- if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
- {
- ERROR("write: p2m_size");
- goto out;
- }
-
- /*
- * Write an extended-info structure to inform the restore code that
- * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
- * slow paths in the restore code.
- */
- if ( (pt_levels == 3) &&
- (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
- {
- unsigned long signature = ~0UL;
- uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8;
- uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
- char chunk_sig[] = "vcpu";
- if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
- !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) ||
- !write_exact(io_fd, &chunk_sig, 4) ||
- !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) ||
- !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
- {
- ERROR("write: extended info");
- goto out;
- }
- }
-
- if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
- {
- ERROR("write: p2m_frame_list");
- goto out;
- }
-
- print_stats(xc_handle, dom, 0, &stats, 0);
-
- /* Now write out each data page, canonicalising page tables as we go... */
- for ( ; ; )
- {
- unsigned int prev_pc, sent_this_iter, N, batch;
-
- iter++;
- sent_this_iter = 0;
- skip_this_iter = 0;
- prev_pc = 0;
- N = 0;
-
- DPRINTF("Saving memory pages: iter %d 0%%", iter);
-
- while ( N < p2m_size )
- {
- unsigned int this_pc = (N * 100) / p2m_size;
- int rc;
-
- if ( (this_pc - prev_pc) >= 5 )
- {
- DPRINTF("\b\b\b\b%3d%%", this_pc);
- prev_pc = this_pc;
- }
-
- if ( !last_iter )
- {
- /* Slightly wasteful to peek the whole array evey time,
- but this is fast enough for the moment. */
- rc = xc_shadow_control(
- xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip,
- p2m_size, NULL, 0, NULL);
- if ( rc != p2m_size )
- {
- ERROR("Error peeking shadow bitmap");
- goto out;
- }
- }
-
- /* load pfn_type[] with the mfn of all the pages we're doing in
- this batch. */
- for ( batch = 0;
- (batch < MAX_BATCH_SIZE) && (N < p2m_size);
- N++ )
- {
- int n = permute(N, p2m_size, order_nr);
-
- if ( debug )
- DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n",
- iter, (unsigned long)n, live_p2m[n],
- test_bit(n, to_send),
- mfn_to_pfn(live_p2m[n]&0xFFFFF));
-
- if ( !last_iter &&
- test_bit(n, to_send) &&
- test_bit(n, to_skip) )
- skip_this_iter++; /* stats keeping */
-
- if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
- (test_bit(n, to_send) && last_iter) ||
- (test_bit(n, to_fix) && last_iter)) )
- continue;
-
- /*
- ** we get here if:
- ** 1. page is marked to_send & hasn't already been re-dirtied
- ** 2. (ignore to_skip in last iteration)
- ** 3. add in pages that still need fixup (net bufs)
- */
-
- pfn_batch[batch] = n;
- pfn_type[batch] = live_p2m[n];
-
- if ( !is_mapped(pfn_type[batch]) )
- {
- /*
- ** not currently in psuedo-physical map -- set bit
- ** in to_fix since we must send this page in last_iter
- ** unless its sent sooner anyhow, or it never enters
- ** pseudo-physical map (e.g. for ballooned down domains)
- */
- set_bit(n, to_fix);
- continue;
- }
-
- if ( last_iter &&
- test_bit(n, to_fix) &&
- !test_bit(n, to_send) )
- {
- needed_to_fix++;
- DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
- iter, n, pfn_type[batch]);
- }
-
- clear_bit(n, to_fix);
-
- batch++;
- }
-
- if ( batch == 0 )
- goto skip; /* vanishingly unlikely... */
-
- region_base = xc_map_foreign_batch(
- xc_handle, dom, PROT_READ, pfn_type, batch);
- if ( region_base == NULL )
- {
- ERROR("map batch failed");
- goto out;
- }
-
- for ( j = 0; j < batch; j++ )
- ((uint32_t *)pfn_type)[j] = pfn_type[j];
- if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
- (uint32_t *)pfn_type) )
- {
- ERROR("get_pfn_type_batch failed");
- goto out;
- }
- for ( j = batch-1; j >= 0; j-- )
- pfn_type[j] = ((uint32_t *)pfn_type)[j];
-
- for ( j = 0; j < batch; j++ )
- {
-
- if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
- XEN_DOMCTL_PFINFO_XTAB )
- {
- DPRINTF("type fail: page %i mfn %08lx\n", j, pfn_type[j]);
- continue;
- }
-
- if ( debug )
- DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
- " sum= %08lx\n",
- iter,
- (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
- pfn_batch[j],
- pfn_type[j],
- mfn_to_pfn(pfn_type[j] &
- ~XEN_DOMCTL_PFINFO_LTAB_MASK),
- csum_page(region_base + (PAGE_SIZE*j)));
-
- /* canonicalise mfn->pfn */
- pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
- pfn_batch[j];
- }
-
- if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
- {
- ERROR("Error when writing to state file (2) (errno %d)",
- errno);
- goto out;
- }
-
- if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*j) )
- {
- ERROR("Error when writing to state file (3) (errno %d)",
- errno);
- goto out;
- }
-
- /* entering this loop, pfn_type is now in pfns (Not mfns) */
- for ( j = 0; j < batch; j++ )
- {
- unsigned long pfn, pagetype;
- void *spage = (char *)region_base + (PAGE_SIZE*j);
-
- pfn = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
- pagetype = pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK;
-
- /* write out pages in batch */
- if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
- continue;
-
- pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
- if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
- (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
- {
- /* We have a pagetable page: need to rewrite it. */
- race =
- canonicalize_pagetable(pagetype, pfn, spage, page);
-
- if ( race && !live )
- {
- ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
- pagetype);
- goto out;
- }
-
- if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
- {
- ERROR("Error when writing to state file (4)"
- " (errno %d)", errno);
- goto out;
- }
- }
- else
- {
- /* We have a normal page: just write it directly. */
- if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
- PAGE_SIZE )
- {
- ERROR("Error when writing to state file (5)"
- " (errno %d)", errno);
- goto out;
- }
- }
- } /* end of the write out for this batch */
-
- sent_this_iter += batch;
-
- munmap(region_base, batch*PAGE_SIZE);
-
- } /* end of this while loop for this iteration */
-
- skip:
-
- total_sent += sent_this_iter;
-
- DPRINTF("\r %d: sent %d, skipped %d, ",
- iter, sent_this_iter, skip_this_iter );
-
- if ( last_iter )
- {
- print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
-
- DPRINTF("Total pages sent= %ld (%.2fx)\n",
- total_sent, ((float)total_sent)/p2m_size );
- DPRINTF("(of which %ld were fixups)\n", needed_to_fix );
- }
-
- if ( last_iter && debug )
- {
- int minusone = -1;
- memset(to_send, 0xff, BITMAP_SIZE);
- debug = 0;
- DPRINTF("Entering debug resend-all mode\n");
-
- /* send "-1" to put receiver into debug mode */
- if ( !write_exact(io_fd, &minusone, sizeof(int)) )
- {
- ERROR("Error when writing to state file (6) (errno %d)",
- errno);
- goto out;
- }
-
- continue;
- }
-
- if ( last_iter )
- break;
-
- if ( live )
- {
- if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
- (iter >= max_iters) ||
- (sent_this_iter+skip_this_iter < 50) ||
- (total_sent > p2m_size*max_factor) )
- {
- DPRINTF("Start last iteration\n");
- last_iter = 1;
-
- if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
- &ctxt) )
- {
- ERROR("Domain appears not to have suspended");
- goto out;
- }
-
- DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n",
- info.shared_info_frame,
- (unsigned long)ctxt.user_regs.eip,
- (unsigned long)ctxt.user_regs.edx);
- }
-
- if ( xc_shadow_control(xc_handle, dom,
- XEN_DOMCTL_SHADOW_OP_CLEAN, to_send,
- p2m_size, NULL, 0, &stats) != p2m_size )
- {
- ERROR("Error flushing shadow PT");
- goto out;
- }
-
- sent_last_iter = sent_this_iter;
-
- print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
-
- }
- } /* end of infinite for loop */
-
- DPRINTF("All memory is saved\n");
-
- {
- struct {
- int minustwo;
- int max_vcpu_id;
- uint64_t vcpumap;
- } chunk = { -2, info.max_vcpu_id };
-
- if ( info.max_vcpu_id >= 64 )
- {
- ERROR("Too many VCPUS in guest!");
- goto out;
- }
-
- for ( i = 1; i <= info.max_vcpu_id; i++ )
- {
- xc_vcpuinfo_t vinfo;
- if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
- vinfo.online )
- vcpumap |= 1ULL << i;
- }
-
- chunk.vcpumap = vcpumap;
- if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
- {
- ERROR("Error when writing to state file (errno %d)", errno);
- goto out;
- }
- }
-
- /* Zero terminate */
- i = 0;
- if ( !write_exact(io_fd, &i, sizeof(int)) )
- {
- ERROR("Error when writing to state file (6') (errno %d)", errno);
- goto out;
- }
-
- /* Send through a list of all the PFNs that were not in map at the close */
- {
- unsigned int i,j;
- unsigned long pfntab[1024];
-
- for ( i = 0, j = 0; i < p2m_size; i++ )
- {
- if ( !is_mapped(live_p2m[i]) )
- j++;
- }
-
- if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
- {
- ERROR("Error when writing to state file (6a) (errno %d)", errno);
- goto out;
- }
-
- for ( i = 0, j = 0; i < p2m_size; )
- {
- if ( !is_mapped(live_p2m[i]) )
- pfntab[j++] = i;
-
- i++;
- if ( (j == 1024) || (i == p2m_size) )
- {
- if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
- {
- ERROR("Error when writing to state file (6b) (errno %d)",
- errno);
- goto out;
- }
- j = 0;
- }
- }
- }
-
- /* Canonicalise the suspend-record frame number. */
- if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
- {
- ERROR("Suspend record is not in range of pseudophys map");
- goto out;
- }
-
- for ( i = 0; i <= info.max_vcpu_id; i++ )
- {
- if ( !(vcpumap & (1ULL << i)) )
- continue;
-
- if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
- {
- ERROR("No context for VCPU%d", i);
- goto out;
- }
-
- /* Canonicalise each GDT frame number. */
- for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
- {
- if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
- {
- ERROR("GDT frame is not in range of pseudophys map");
- goto out;
- }
- }
-
- /* Canonicalise the page table base pointer. */
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
- {
- ERROR("PT base is not in range of pseudophys map");
- goto out;
- }
- ctxt.ctrlreg[3] =
- xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
-
- /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
- if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
- {
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
- {
- ERROR("PT base is not in range of pseudophys map");
- goto out;
- }
- /* Least-significant bit means 'valid PFN'. */
- ctxt.ctrlreg[1] = 1 |
- xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
- }
-
- if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
- {
- ERROR("Error when writing to state file (1) (errno %d)", errno);
- goto out;
- }
- }
-
- /*
- * Reset the MFN to be a known-invalid value. See map_frame_list_list().
- */
- memcpy(page, live_shinfo, PAGE_SIZE);
- ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
- if ( !write_exact(io_fd, page, PAGE_SIZE) )
- {
- ERROR("Error when writing to state file (1) (errno %d)", errno);
- goto out;
- }
-
- /* Success! */
- rc = 0;
-
- out:
-
- if ( live )
- {
- if ( xc_shadow_control(xc_handle, dom,
- XEN_DOMCTL_SHADOW_OP_OFF,
- NULL, 0, NULL, 0, NULL) < 0 )
- DPRINTF("Warning - couldn't disable shadow mode");
- }
-
- /* Flush last write and discard cache for file. */
- discard_file_cache(io_fd, 1 /* flush */);
-
- if ( live_shinfo )
- munmap(live_shinfo, PAGE_SIZE);
-
- if ( live_p2m_frame_list_list )
- munmap(live_p2m_frame_list_list, PAGE_SIZE);
-
- if ( live_p2m_frame_list )
- munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
-
- if ( live_p2m )
- munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
-
- if ( live_m2p )
- munmap(live_m2p, M2P_SIZE(max_mfn));
-
- free(pfn_type);
- free(pfn_batch);
- free(to_send);
- free(to_fix);
- free(to_skip);
-
- DPRINTF("Save exit rc=%d\n",rc);
-
- return !!rc;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_resume.c
--- a/tools/libxc/xc_resume.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xc_resume.c Fri Apr 13 11:14:26 2007 +0100
@@ -3,24 +3,71 @@
#include "xg_save_restore.h"
#if defined(__i386__) || defined(__x86_64__)
+
+#include <xen/foreign/x86_32.h>
+#include <xen/foreign/x86_64.h>
+#include <xen/hvm/params.h>
+
+/* Need to provide the right flavour of vcpu context for Xen */
+typedef union
+{
+ vcpu_guest_context_x86_64_t c64;
+ vcpu_guest_context_x86_32_t c32;
+ vcpu_guest_context_t c;
+} vcpu_guest_context_either_t;
+
static int modify_returncode(int xc_handle, uint32_t domid)
{
- vcpu_guest_context_t ctxt;
+ vcpu_guest_context_either_t ctxt;
+ xc_dominfo_t info;
+ xen_capabilities_info_t caps;
int rc;
- if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt)) != 0 )
- return rc;
- ctxt.user_regs.eax = 1;
- if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt)) != 0 )
+ if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 )
+ {
+ PERROR("Could not get domain info");
+ return -1;
+ }
+
+ /* HVM guests without PV drivers do not have a return code to modify. */
+ if ( info.hvm )
+ {
+ unsigned long irq = 0;
+ xc_get_hvm_param(xc_handle, domid, HVM_PARAM_CALLBACK_IRQ, &irq);
+ if ( !irq )
+ return 0;
+ }
+
+ if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 )
+ {
+ PERROR("Could not get Xen capabilities\n");
+ return -1;
+ }
+
+ if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt.c)) != 0 )
+ return rc;
+
+ if ( !info.hvm )
+ ctxt.c.user_regs.eax = 1;
+ else if ( strstr(caps, "x86_64") )
+ ctxt.c64.user_regs.eax = 1;
+ else
+ ctxt.c32.user_regs.eax = 1;
+
+ if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt.c)) != 0 )
return rc;
return 0;
}
+
#else
+
static int modify_returncode(int xc_handle, uint32_t domid)
{
return 0;
-}
+
+}
+
#endif
static int xc_domain_resume_cooperative(int xc_handle, uint32_t domid)
@@ -65,6 +112,12 @@ static int xc_domain_resume_any(int xc_h
* (x86 only) Rewrite store_mfn and console_mfn back to MFN (from PFN).
*/
#if defined(__i386__) || defined(__x86_64__)
+ if ( info.hvm )
+ {
+ ERROR("Cannot resume uncooperative HVM guests");
+ return rc;
+ }
+
/* Map the shared info frame */
shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
PROT_READ, info.shared_info_frame);
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xenctrl.h Fri Apr 13 11:14:26 2007 +0100
@@ -840,6 +840,9 @@ const char *xc_error_code_to_desc(int co
*/
xc_error_handler xc_set_error_handler(xc_error_handler handler);
+int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value);
+int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value);
+
/* PowerPC specific. */
int xc_alloc_real_mode_area(int xc_handle,
uint32_t domid,
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xenguest.h Fri Apr 13 11:14:26 2007 +0100
@@ -16,26 +16,19 @@
/**
- * This function will save a domain running Linux.
+ * This function will save a running domain.
*
* @parm xc_handle a handle to an open hypervisor interface
* @parm fd the file descriptor to save a domain to
* @parm dom the id of the domain
* @return 0 on success, -1 on failure
*/
-int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
- int (*suspend)(int domid));
+int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+ int (*suspend)(int domid), int hvm,
+ void *(*init_qemu_maps)(int, unsigned), /* HVM only */
+ void (*qemu_flip_buffer)(int, int)); /* HVM only */
-/**
- * This function will save a hvm domain running unmodified guest.
- * @return 0 on success, -1 on failure
- */
-int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
- int (*suspend)(int domid),
- void *(*init_qemu_maps)(int, unsigned),
- void (*qemu_flip_buffer)(int, int));
/**
* This function will restore a saved domain.
@@ -143,11 +136,6 @@ int xc_hvm_build_mem(int xc_handle,
const char *image_buffer,
unsigned long image_size);
-int xc_set_hvm_param(
- int handle, domid_t dom, int param, unsigned long value);
-int xc_get_hvm_param(
- int handle, domid_t dom, int param, unsigned long *value);
-
/* PowerPC specific. */
int xc_prose_build(int xc_handle,
uint32_t domid,
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xg_private.c Fri Apr 13 11:14:26 2007 +0100
@@ -196,29 +196,6 @@ __attribute__((weak))
{
errno = ENOSYS;
return -1;
-}
-
-__attribute__((weak))
- int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags,
- int (*suspend)(int domid),
- void *(*init_qemu_maps)(int, unsigned),
- void (*qemu_flip_buffer)(int, int))
-{
- errno = ENOSYS;
- return -1;
-}
-
-__attribute__((weak)) int xc_get_hvm_param(
- int handle, domid_t dom, int param, unsigned long *value)
-{
- return -ENOSYS;
-}
-
-__attribute__((weak)) int xc_set_hvm_param(
- int handle, domid_t dom, int param, unsigned long value)
-{
- return -ENOSYS;
}
/*
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_host_cpu.h
--- a/tools/libxen/include/xen_host_cpu.h Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/include/xen_host_cpu.h Fri Apr 13 11:14:26 2007 +0100
@@ -70,6 +70,7 @@ typedef struct xen_host_cpu_record
char *modelname;
char *stepping;
char *flags;
+ char *features;
double utilisation;
} xen_host_cpu_record;
@@ -223,6 +224,13 @@ xen_host_cpu_get_flags(xen_session *sess
/**
+ * Get the features field of the given host_cpu.
+ */
+extern bool
+xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu
host_cpu);
+
+
+/**
* Get the utilisation field of the given host_cpu.
*/
extern bool
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_vm.h
--- a/tools/libxen/include/xen_vm.h Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/include/xen_vm.h Fri Apr 13 11:14:26 2007 +0100
@@ -838,6 +838,28 @@ xen_vm_set_vcpus_number_live(xen_session
/**
+ * Add the given key-value pair to VM.VCPUs_params, and apply that
+ * value on the running VM.
+ */
+extern bool
+xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key,
char *value);
+
+
+/**
+ * Set memory_dynamic_max in database and on running VM.
+ */
+extern bool
+xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t
max);
+
+
+/**
+ * Set memory_dynamic_min in database and on running VM.
+ */
+extern bool
+xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t
min);
+
+
+/**
* Send the given key as a sysrq to this VM. The key is specified as a
* single character (a String of length 1). This can only be called when the
* specified VM is in the Running state.
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_vm_metrics.h
--- a/tools/libxen/include/xen_vm_metrics.h Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/include/xen_vm_metrics.h Fri Apr 13 11:14:26 2007 +0100
@@ -22,6 +22,7 @@
#include "xen_common.h"
#include "xen_int_float_map.h"
#include "xen_int_int_map.h"
+#include "xen_string_set.h"
#include "xen_string_string_map.h"
#include "xen_vm_metrics_decl.h"
@@ -70,6 +71,7 @@ typedef struct xen_vm_metrics_record
xen_int_float_map *vcpus_utilisation;
xen_int_int_map *vcpus_cpu;
xen_string_string_map *vcpus_params;
+ struct xen_string_set *state;
time_t start_time;
time_t last_updated;
} xen_vm_metrics_record;
@@ -210,6 +212,13 @@ xen_vm_metrics_get_vcpus_params(xen_sess
/**
+ * Get the state field of the given VM_metrics.
+ */
+extern bool
+xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result,
xen_vm_metrics vm_metrics);
+
+
+/**
* Get the start_time field of the given VM_metrics.
*/
extern bool
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_host_cpu.c
--- a/tools/libxen/src/xen_host_cpu.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/src/xen_host_cpu.c Fri Apr 13 11:14:26 2007 +0100
@@ -61,6 +61,9 @@ static const struct_member xen_host_cpu_
{ .key = "flags",
.type = &abstract_type_string,
.offset = offsetof(xen_host_cpu_record, flags) },
+ { .key = "features",
+ .type = &abstract_type_string,
+ .offset = offsetof(xen_host_cpu_record, features) },
{ .key = "utilisation",
.type = &abstract_type_float,
.offset = offsetof(xen_host_cpu_record, utilisation) }
@@ -90,6 +93,7 @@ xen_host_cpu_record_free(xen_host_cpu_re
free(record->modelname);
free(record->stepping);
free(record->flags);
+ free(record->features);
free(record);
}
@@ -252,6 +256,23 @@ xen_host_cpu_get_flags(xen_session *sess
bool
+xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu
host_cpu)
+{
+ abstract_value param_values[] =
+ {
+ { .type = &abstract_type_string,
+ .u.string_val = host_cpu }
+ };
+
+ abstract_type result_type = abstract_type_string;
+
+ *result = NULL;
+ XEN_CALL_("host_cpu.get_features");
+ return session->ok;
+}
+
+
+bool
xen_host_cpu_get_utilisation(xen_session *session, double *result,
xen_host_cpu host_cpu)
{
abstract_value param_values[] =
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_vm.c
--- a/tools/libxen/src/xen_vm.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/src/xen_vm.c Fri Apr 13 11:14:26 2007 +0100
@@ -1610,6 +1610,56 @@ xen_vm_set_vcpus_number_live(xen_session
bool
+xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key,
char *value)
+{
+ abstract_value param_values[] =
+ {
+ { .type = &abstract_type_string,
+ .u.string_val = self },
+ { .type = &abstract_type_string,
+ .u.string_val = key },
+ { .type = &abstract_type_string,
+ .u.string_val = value }
+ };
+
+ xen_call_(session, "VM.add_to_VCPUs_params_live", param_values, 3, NULL,
NULL);
+ return session->ok;
+}
+
+
+bool
+xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t
max)
+{
+ abstract_value param_values[] =
+ {
+ { .type = &abstract_type_string,
+ .u.string_val = self },
+ { .type = &abstract_type_int,
+ .u.int_val = max }
+ };
+
+ xen_call_(session, "VM.set_memory_dynamic_max_live", param_values, 2,
NULL, NULL);
+ return session->ok;
+}
+
+
+bool
+xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t
min)
+{
+ abstract_value param_values[] =
+ {
+ { .type = &abstract_type_string,
+ .u.string_val = self },
+ { .type = &abstract_type_int,
+ .u.int_val = min }
+ };
+
+ xen_call_(session, "VM.set_memory_dynamic_min_live", param_values, 2,
NULL, NULL);
+ return session->ok;
+}
+
+
+bool
xen_vm_send_sysrq(xen_session *session, xen_vm vm, char *key)
{
abstract_value param_values[] =
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_vm_metrics.c
--- a/tools/libxen/src/xen_vm_metrics.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/src/xen_vm_metrics.c Fri Apr 13 11:14:26 2007 +0100
@@ -57,6 +57,9 @@ static const struct_member xen_vm_metric
{ .key = "VCPUs_params",
.type = &abstract_type_string_string_map,
.offset = offsetof(xen_vm_metrics_record, vcpus_params) },
+ { .key = "state",
+ .type = &abstract_type_string_set,
+ .offset = offsetof(xen_vm_metrics_record, state) },
{ .key = "start_time",
.type = &abstract_type_datetime,
.offset = offsetof(xen_vm_metrics_record, start_time) },
@@ -87,6 +90,7 @@ xen_vm_metrics_record_free(xen_vm_metric
xen_int_float_map_free(record->vcpus_utilisation);
xen_int_int_map_free(record->vcpus_cpu);
xen_string_string_map_free(record->vcpus_params);
+ xen_string_set_free(record->state);
free(record);
}
@@ -215,6 +219,23 @@ xen_vm_metrics_get_vcpus_params(xen_sess
bool
+xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result,
xen_vm_metrics vm_metrics)
+{
+ abstract_value param_values[] =
+ {
+ { .type = &abstract_type_string,
+ .u.string_val = vm_metrics }
+ };
+
+ abstract_type result_type = abstract_type_string_set;
+
+ *result = NULL;
+ XEN_CALL_("VM_metrics.get_state");
+ return session->ok;
+}
+
+
+bool
xen_vm_metrics_get_start_time(xen_session *session, time_t *result,
xen_vm_metrics vm_metrics)
{
abstract_value param_values[] =
diff -r 5bda20f0723d -r f92a79e39da8 tools/pygrub/src/LiloConf.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/pygrub/src/LiloConf.py Fri Apr 13 11:14:26 2007 +0100
@@ -0,0 +1,147 @@
+#
+#LiloConf.py
+#
+
+import sys, re, os
+import logging
+import GrubConf
+
+class LiloImage(object):
+ def __init__(self, lines, path):
+ self.reset(lines, path)
+
+ def __repr__(self):
+ return ("title: %s\n"
+ " root: %s\n"
+ " kernel: %s\n"
+ " args: %s\n"
+ " initrd: %s\n" %(self.title, self.root, self.kernel,
+ self.args, self.initrd))
+ def reset(self, lines, path):
+ self._root = self._initrd = self._kernel = self._args = None
+ self.title = ""
+ self.lines = []
+ self.path = path
+ map(self.set_from_line, lines)
+ self.root = "" # dummy
+
+ def set_from_line(self, line, replace = None):
+ (com, arg) = GrubConf.grub_exact_split(line, 2)
+
+ if self.commands.has_key(com):
+ if self.commands[com] is not None:
+ exec("%s = r\'%s\'" %(self.commands[com], re.sub('^"(.+)"$',
r"\1", arg.strip())))
+ else:
+ logging.info("Ignored image directive %s" %(com,))
+ else:
+ logging.warning("Unknown image directive %s" %(com,))
+
+ # now put the line in the list of lines
+ if replace is None:
+ self.lines.append(line)
+ else:
+ self.lines.pop(replace)
+ self.lines.insert(replace, line)
+
+ def set_kernel(self, val):
+ self._kernel = (None, self.path + "/" + val)
+ def get_kernel(self):
+ return self._kernel
+ kernel = property(get_kernel, set_kernel)
+
+ def set_initrd(self, val):
+ self._initrd = (None, self.path + "/" + val)
+ def get_initrd(self):
+ return self._initrd
+ initrd = property(get_initrd, set_initrd)
+
+ # set up command handlers
+ commands = { "label": "self.title",
+ "root": "self.root",
+ "rootnoverify": "self.root",
+ "image": "self.kernel",
+ "initrd": "self.initrd",
+ "append": "self.args",
+ "read-only": None,
+ "chainloader": None,
+ "module": None}
+
+class LiloConfigFile(object):
+ def __init__(self, fn = None):
+ self.filename = fn
+ self.images = []
+ self.timeout = -1
+ self._default = 0
+
+ if fn is not None:
+ self.parse()
+
+ def parse(self, buf = None):
+ if buf is None:
+ if self.filename is None:
+ raise ValueError, "No config file defined to parse!"
+
+ f = open(self.filename, 'r')
+ lines = f.readlines()
+ f.close()
+ else:
+ lines = buf.split("\n")
+
+ path = os.path.dirname(self.filename)
+ img = []
+ for l in lines:
+ l = l.strip()
+ # skip blank lines
+ if len(l) == 0:
+ continue
+ # skip comments
+ if l.startswith('#'):
+ continue
+ # new image
+ if l.startswith("image"):
+ if len(img) > 0:
+ self.add_image(LiloImage(img, path))
+ img = [l]
+ continue
+
+ if len(img) > 0:
+ img.append(l)
+ continue
+
+ (com, arg) = GrubConf.grub_exact_split(l, 2)
+ if self.commands.has_key(com):
+ if self.commands[com] is not None:
+ exec("%s = r\"%s\"" %(self.commands[com], arg.strip()))
+ else:
+ logging.info("Ignored directive %s" %(com,))
+ else:
+ logging.warning("Unknown directive %s" %(com,))
+
+ if len(img) > 0:
+ self.add_image(LiloImage(img, path))
+
+ def add_image(self, image):
+ self.images.append(image)
+
+ def _get_default(self):
+ for i in range(0, len(self.images) - 1):
+ if self.images[i].title == self._default:
+ return i
+ return 0
+ def _set_default(self, val):
+ self._default = val
+ default = property(_get_default, _set_default)
+
+ commands = { "default": "self.default",
+ "timeout": "self.timeout",
+ "prompt": None,
+ "relocatable": None,
+ }
+
+if __name__ == "__main__":
+ if sys.argv < 2:
+ raise RuntimeError, "Need a grub.conf to read"
+ g = LiloConfigFile(sys.argv[1])
+ for i in g.images:
+ print i #, i.title, i.root, i.kernel, i.args, i.initrd
+ print g.default
diff -r 5bda20f0723d -r f92a79e39da8 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/pygrub/src/pygrub Fri Apr 13 11:14:26 2007 +0100
@@ -16,6 +16,7 @@ import os, sys, string, struct, tempfile
import os, sys, string, struct, tempfile, re
import copy
import logging
+import platform
import curses, _curses, curses.wrapper, curses.textpad, curses.ascii
import getopt
@@ -24,6 +25,7 @@ sys.path = [ '/usr/lib/python' ] + sys.p
import fsimage
import grub.GrubConf
+import grub.LiloConf
PYGRUB_VER = 0.5
@@ -58,6 +60,13 @@ def get_active_partition(file):
# active partition has 0x80 as the first byte
if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',):
return buf[poff:poff+16]
+
+ # type=0xee: GUID partition table
+ # XXX assume the first partition is active
+ if struct.unpack("<c", buf[poff+4:poff+5]) == ('\xee',):
+ os.lseek(fd, 0x400, 0)
+ buf = os.read(fd, 512)
+ return buf[24:40] # XXX buf[32:40]
# if there's not a partition marked as active, fall back to
# the first partition
@@ -346,7 +355,13 @@ class Grub:
if not os.access(fn, os.R_OK):
raise RuntimeError, "Unable to access %s" %(fn,)
- self.cf = grub.GrubConf.GrubConfigFile()
+ if platform.machine() == 'ia64':
+ self.cf = grub.LiloConf.LiloConfigFile()
+ file_list = ("/efi/redhat/elilo.conf",)
+ else:
+ self.cf = grub.GrubConf.GrubConfigFile()
+ file_list = ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
+ "/grub/menu.lst", "/grub/grub.conf")
if not fs:
# set the config file and parse it
@@ -354,18 +369,15 @@ class Grub:
self.cf.parse()
return
- grubfile = None
- for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
- "/grub/menu.lst", "/grub/grub.conf"):
+ for f in file_list:
if fs.file_exists(f):
- grubfile = f
- break
- if grubfile is None:
- raise RuntimeError, "we couldn't find grub config file in the
image provided."
- f = fs.open_file(grubfile)
+ self.cf.filename = f
+ break
+ if self.cf.filename is None:
+ raise RuntimeError, "couldn't find bootloader config file in the
image provided."
+ f = fs.open_file(self.cf.filename)
buf = f.read()
del f
- # then parse the grub config
self.cf.parse(buf)
def run(self):
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/README.XendConfig
--- a/tools/python/README.XendConfig Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/README.XendConfig Fri Apr 13 11:14:26 2007 +0100
@@ -115,6 +115,7 @@ otherConfig
image.nographic
image.vnc
image.sdl
+ image.monitor
image.vncdisplay
image.vncunused
image.hvm.device_model
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/README.sxpcfg
--- a/tools/python/README.sxpcfg Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/README.sxpcfg Fri Apr 13 11:14:26 2007 +0100
@@ -63,6 +63,7 @@ image
- fdb
- soundhw
- localtime
+ - monitor
- serial
- stdvga
- isa
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xend/XendConfig.py Fri Apr 13 11:14:26 2007 +0100
@@ -117,7 +117,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
# Platform configuration keys.
XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display',
- 'fda', 'fdb', 'keymap', 'isa', 'localtime',
+ 'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor',
'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
'vncconsole', 'vncdisplay', 'vnclisten',
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py Fri Apr 13 11:14:26 2007 +0100
@@ -1601,7 +1601,6 @@ class XendDomainInfo:
self.image = image.create(self, self.info)
if self.image:
self.image.createDeviceModel(True)
- self.image.register_shutdown_watch()
self._storeDomDetails()
self._registerWatches()
self.refreshShutdown()
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xend/XendNode.py Fri Apr 13 11:14:26 2007 +0100
@@ -603,7 +603,7 @@ class XendNode:
return [[k, info[k]] for k in ITEM_ORDER]
def xendinfo(self):
- return [['xend_config_format', 3]]
+ return [['xend_config_format', 4]]
#
# utilisation tracking
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xend/image.py Fri Apr 13 11:14:26 2007 +0100
@@ -284,17 +284,16 @@ class HVMImageHandler(ImageHandler):
log.debug("acpi = %d", self.acpi)
log.debug("apic = %d", self.apic)
- self.register_shutdown_watch()
- self.register_reboot_feature_watch()
-
- return xc.hvm_build(domid = self.vm.getDomid(),
- image = self.kernel,
- store_evtchn = store_evtchn,
- memsize = mem_mb,
- vcpus = self.vm.getVCpuCount(),
- pae = self.pae,
- acpi = self.acpi,
- apic = self.apic)
+ rc = xc.hvm_build(domid = self.vm.getDomid(),
+ image = self.kernel,
+ store_evtchn = store_evtchn,
+ memsize = mem_mb,
+ vcpus = self.vm.getVCpuCount(),
+ pae = self.pae,
+ acpi = self.acpi,
+ apic = self.apic)
+ rc['notes'] = { 'SUSPEND_CANCEL': 1 }
+ return rc
# Return a list of cmd line args to the device models based on the
# xm config file
@@ -418,6 +417,8 @@ class HVMImageHandler(ImageHandler):
else:
ret.append('-nographic')
+ if int(vmConfig['platform'].get('monitor', 0)) != 0:
+ ret.append('-monitor vc')
return ret
def createDeviceModel(self, restore = False):
@@ -448,13 +449,9 @@ class HVMImageHandler(ImageHandler):
log.info("device model pid: %d", self.pid)
def recreate(self):
- self.register_shutdown_watch()
- self.register_reboot_feature_watch()
self.pid = self.vm.gatherDom(('image/device-model-pid', int))
def destroy(self, suspend = False):
- self.unregister_shutdown_watch()
- self.unregister_reboot_feature_watch();
if self.pid:
try:
sig = signal.SIGKILL
@@ -473,74 +470,6 @@ class HVMImageHandler(ImageHandler):
pass
self.pid = None
- def register_shutdown_watch(self):
- """ add xen store watch on control/shutdown """
- self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown",
- self.hvm_shutdown)
- log.debug("hvm shutdown watch registered")
-
- def unregister_shutdown_watch(self):
- """Remove the watch on the control/shutdown, if any. Nothrow
- guarantee."""
-
- try:
- if self.shutdownWatch:
- self.shutdownWatch.unwatch()
- except:
- log.exception("Unwatching hvm shutdown watch failed.")
- self.shutdownWatch = None
- log.debug("hvm shutdown watch unregistered")
-
- def hvm_shutdown(self, _):
- """ watch call back on node control/shutdown,
- if node changed, this function will be called
- """
- xd = xen.xend.XendDomain.instance()
- try:
- vm = xd.domain_lookup( self.vm.getDomid() )
- except XendError:
- # domain isn't registered, no need to clean it up.
- return False
-
- reason = vm.getShutdownReason()
- log.debug("hvm_shutdown fired, shutdown reason=%s", reason)
- if reason in REVERSE_DOMAIN_SHUTDOWN_REASONS:
- vm.info['shutdown'] = 1
- vm.info['shutdown_reason'] = \
- REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
- vm.refreshShutdown(vm.info)
-
- return True # Keep watching
-
- def register_reboot_feature_watch(self):
- """ add xen store watch on control/feature-reboot """
- self.rebootFeatureWatch = xswatch(self.vm.dompath +
"/control/feature-reboot", \
- self.hvm_reboot_feature)
- log.debug("hvm reboot feature watch registered")
-
- def unregister_reboot_feature_watch(self):
- """Remove the watch on the control/feature-reboot, if any. Nothrow
- guarantee."""
-
- try:
- if self.rebootFeatureWatch:
- self.rebootFeatureWatch.unwatch()
- except:
- log.exception("Unwatching hvm reboot feature watch failed.")
- self.rebootFeatureWatch = None
- log.debug("hvm reboot feature watch unregistered")
-
- def hvm_reboot_feature(self, _):
- """ watch call back on node control/feature-reboot,
- if node changed, this function will be called
- """
- status = self.vm.readDom('control/feature-reboot')
- log.debug("hvm_reboot_feature fired, module status=%s", status)
- if status == '1':
- self.unregister_shutdown_watch()
-
- return True # Keep watching
-
class IA64_HVM_ImageHandler(HVMImageHandler):
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/create.dtd
--- a/tools/python/xen/xm/create.dtd Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xm/create.dtd Fri Apr 13 11:14:26 2007 +0100
@@ -95,7 +95,7 @@
src %URI; #REQUIRED
type %VDI_TYPE; #REQUIRED
size CDATA #REQUIRED
- shareable CDATA #REQUIRED
+ sharable CDATA #REQUIRED
read_only CDATA #REQUIRED>
<!ELEMENT name (label,
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xm/create.py Fri Apr 13 11:14:26 2007 +0100
@@ -420,6 +420,10 @@ gopts.var('serial', val='FILE',
gopts.var('serial', val='FILE',
fn=set_value, default='',
use="Path to serial or pty or vc")
+
+gopts.var('monitor', val='no|yes',
+ fn=set_bool, default=0,
+ use="""Should the device model use monitor?""")
gopts.var('localtime', val='no|yes',
fn=set_bool, default=0,
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xm/main.py Fri Apr 13 11:14:26 2007 +0100
@@ -1544,34 +1544,59 @@ def xm_info(args):
host_metrics_record =
server.xenapi.host_metrics.get_record(host_record["metrics"])
+ def getVal(keys, default=""):
+ data = host_record
+ for key in keys:
+ if key in data:
+ data = data[key]
+ else:
+ return default
+ return data
+
+ def getCpuMhz():
+ cpu_speeds = [int(host_cpu_record["speed"])
+ for host_cpu_record in host_cpu_records
+ if "speed" in host_cpu_record]
+ if len(cpu_speeds) > 0:
+ return sum(cpu_speeds) / len(cpu_speeds)
+ else:
+ return 0
+
+ getCpuMhz()
+
+ def getCpuFeatures():
+ if len(host_cpu_records) > 0:
+ return host_cpu_records[0].get("features", "")
+ else:
+ return ""
+
info = {
- "host": host_record["name_label"],
- "release": host_record["software_version"]["release"],
- "version": host_record["software_version"]["version"],
- "machine": host_record["software_version"]["machine"],
- "nr_cpus": len(host_record["host_CPUs"]),
- "nr_nodes": host_record["cpu_configuration"]["nr_nodes"],
- "sockets_per_node":
host_record["cpu_configuration"]["sockets_per_node"],
- "cores_per_socket":
host_record["cpu_configuration"]["cores_per_socket"],
- "threads_per_core":
host_record["cpu_configuration"]["threads_per_core"],
- "cpu_mhz": sum([int(host_cpu_record["speed"]) for
host_cpu_record in host_cpu_records])
- / len(host_cpu_records),
- "hw_caps": host_cpu_records[0]["features"],
+ "host": getVal(["name_label"]),
+ "release": getVal(["software_version", "release"]),
+ "version": getVal(["software_version", "version"]),
+ "machine": getVal(["software_version", "machine"]),
+ "nr_cpus": len(getVal(["host_CPUs"], [])),
+ "nr_nodes": getVal(["cpu_configuration", "nr_nodes"]),
+ "sockets_per_node": getVal(["cpu_configuration",
"sockets_per_node"]),
+ "cores_per_socket": getVal(["cpu_configuration",
"cores_per_socket"]),
+ "threads_per_core": getVal(["cpu_configuration",
"threads_per_core"]),
+ "cpu_mhz": getCpuMhz(),
+ "hw_caps": getCpuFeatures(),
"total_memory":
int(host_metrics_record["memory_total"])/1024/1024,
"free_memory":
int(host_metrics_record["memory_free"])/1024/1024,
- "xen_major": host_record["software_version"]["xen_major"],
- "xen_minor": host_record["software_version"]["xen_minor"],
- "xen_extra": host_record["software_version"]["xen_extra"],
- "xen_caps": " ".join(host_record["capabilities"]),
- "xen_scheduler": host_record["sched_policy"],
- "xen_pagesize": host_record["other_config"]["xen_pagesize"],
- "platform_params":
host_record["other_config"]["platform_params"],
- "xen_changeset":
host_record["software_version"]["xen_changeset"],
- "cc_compiler":
host_record["software_version"]["cc_compiler"],
- "cc_compile_by":
host_record["software_version"]["cc_compile_by"],
- "cc_compile_domain":
host_record["software_version"]["cc_compile_domain"],
- "cc_compile_date":
host_record["software_version"]["cc_compile_date"],
-
"xend_config_format":host_record["software_version"]["xend_config_format"]
+ "xen_major": getVal(["software_version", "xen_major"]),
+ "xen_minor": getVal(["software_version", "xen_minor"]),
+ "xen_extra": getVal(["software_version", "xen_extra"]),
+ "xen_caps": " ".join(getVal(["capabilities"], [])),
+ "xen_scheduler": getVal(["sched_policy"]),
+ "xen_pagesize": getVal(["other_config", "xen_pagesize"]),
+ "platform_params": getVal(["other_config", "platform_params"]),
+ "xen_changeset": getVal(["software_version", "xen_changeset"]),
+ "cc_compiler": getVal(["software_version", "cc_compiler"]),
+ "cc_compile_by": getVal(["software_version", "cc_compile_by"]),
+ "cc_compile_domain": getVal(["software_version",
"cc_compile_domain"]),
+ "cc_compile_date": getVal(["software_version",
"cc_compile_date"]),
+ "xend_config_format":getVal(["software_version",
"xend_config_format"])
}
sorted = info.items()
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xm/xenapi_create.py Fri Apr 13 11:14:26 2007 +0100
@@ -48,7 +48,7 @@ def get_name_description(node):
def get_text_in_child_node(node, child):
tag_node = node.getElementsByTagName(child)[0]
- return tag_node.nodeValue
+ return " ".join([child.nodeValue for child in tag_node.childNodes])
def get_child_node_attribute(node, child, attribute):
tag_node = node.getElementsByTagName(child)[0]
@@ -212,8 +212,8 @@ class xenapi_create:
"SR": self.DEFAULT_STORAGE_REPOSITORY,
"virtual_size": vdi.attributes["size"].value,
"type": vdi.attributes["type"].value,
- "shareable": vdi.attributes["shareable"].value,
- "read_only": vdi.attributes["read_only"].value,
+ "sharable": bool(vdi.attributes["sharable"].value),
+ "read_only": bool(vdi.attributes["read_only"].value),
"other_config": {"location":
vdi.attributes["src"].value}
}
@@ -264,7 +264,23 @@ class xenapi_create:
"platform":
get_child_nodes_as_dict(vm, "platform", "key", "value"),
"other_config":
- get_child_nodes_as_dict(vm, "other_config", "key", "value")
+ get_child_nodes_as_dict(vm, "other_config", "key", "value"),
+ "PV_bootloader":
+ "",
+ "PV_kernel":
+ "",
+ "PV_ramdisk":
+ "",
+ "PV_args":
+ "",
+ "PV_bootloader_args":
+ "",
+ "HVM_boot_policy":
+ "",
+ "HVM_boot_params":
+ {},
+ "PCI_bus":
+ ""
}
if len(vm.getElementsByTagName("pv")) > 0:
@@ -494,7 +510,7 @@ class sxp2xml:
# Make version tag
version = document.createElement("version")
- version.appendChild(document.createTextNode("1.0"))
+ version.appendChild(document.createTextNode("0"))
vm.appendChild(version)
# Make pv or hvm tag
@@ -629,10 +645,10 @@ class sxp2xml:
vdi.attributes["src"] = src
vdi.attributes["read_only"] \
= (get_child_by_name(vbd_sxp, "mode") != "w") \
- and "true" or "false"
+ and "True" or "False"
vdi.attributes["size"] = '-1'
vdi.attributes["type"] = "system"
- vdi.attributes["shareable"] = "false"
+ vdi.attributes["sharable"] = "False"
vdi.attributes["name"] = name
vdi.appendChild(self.make_name_tag(name, document))
diff -r 5bda20f0723d -r f92a79e39da8 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/xcutils/xc_save.c Fri Apr 13 11:14:26 2007 +0100
@@ -174,12 +174,9 @@ main(int argc, char **argv)
max_f = atoi(argv[4]);
flags = atoi(argv[5]);
- if (flags & XCFLAGS_HVM)
- ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags,
- &suspend, &init_qemu_maps, &qemu_flip_buffer);
- else
- ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags,
- &suspend);
+ ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags,
+ &suspend, !!(flags & XCFLAGS_HVM),
+ &init_qemu_maps, &qemu_flip_buffer);
xc_interface_close(xc_fd);
diff -r 5bda20f0723d -r f92a79e39da8 tools/xm-test/lib/XmTestLib/NetConfig.py
--- a/tools/xm-test/lib/XmTestLib/NetConfig.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/xm-test/lib/XmTestLib/NetConfig.py Fri Apr 13 11:14:26 2007 +0100
@@ -44,7 +44,11 @@ def getXendNetConfig():
if not xconfig:
xconfig = "/etc/xen/xend-config.sxp"
- configfile = open(xconfig, 'r')
+ try:
+ configfile = open(xconfig, 'r')
+ except:
+ return "bridge"
+
S = configfile.read()
pin = Parser()
pin.input(S)
diff -r 5bda20f0723d -r f92a79e39da8
unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
--- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Thu Apr
12 16:37:32 2007 -0500
+++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Fri Apr
13 11:14:26 2007 +0100
@@ -2,8 +2,8 @@
#define COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
#include <linux/version.h>
-
#include <linux/spinlock.h>
+#include <asm/maddr.h>
#if defined(__LINUX_COMPILER_H) && !defined(__always_inline)
#define __always_inline inline
@@ -98,8 +98,6 @@ extern char *kasprintf(gfp_t gfp, const
#if defined(_PAGE_PRESENT) && !defined(_PAGE_NX)
#define _PAGE_NX 0
-#endif
-
/*
* This variable at present is referenced by netfront, but only in code that
* is dead when running in hvm guests. To detect potential active uses of it
@@ -107,5 +105,6 @@ extern char *kasprintf(gfp_t gfp, const
* mappings created with it will fault when accessed.
*/
#define __supported_pte_mask ((maddr_t)0)
+#endif
#endif
diff -r 5bda20f0723d -r f92a79e39da8
unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
--- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Thu Apr
12 16:37:32 2007 -0500
+++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Fri Apr
13 11:14:26 2007 +0100
@@ -6,21 +6,32 @@
#include "platform-pci.h"
#include <asm/hypervisor.h>
+struct ap_suspend_info {
+ int do_spin;
+ atomic_t nr_spinning;
+};
+
/*
* Spinning prevents, for example, APs touching grant table entries while
* the shared grant table is not mapped into the address space imemdiately
* after resume.
*/
-static void ap_suspend(void *_ap_spin)
+static void ap_suspend(void *_info)
{
- int *ap_spin = _ap_spin;
+ struct ap_suspend_info *info = _info;
BUG_ON(!irqs_disabled());
- while (*ap_spin) {
+ atomic_inc(&info->nr_spinning);
+ mb();
+
+ while (info->do_spin) {
cpu_relax();
HYPERVISOR_yield();
}
+
+ mb();
+ atomic_dec(&info->nr_spinning);
}
static int bp_suspend(void)
@@ -42,7 +53,8 @@ static int bp_suspend(void)
int __xen_suspend(int fast_suspend)
{
- int err, suspend_cancelled, ap_spin;
+ int err, suspend_cancelled, nr_cpus;
+ struct ap_suspend_info info;
xenbus_suspend();
@@ -51,22 +63,30 @@ int __xen_suspend(int fast_suspend)
/* Prevent any races with evtchn_interrupt() handler. */
disable_irq(xen_platform_pdev->irq);
- ap_spin = 1;
+ info.do_spin = 1;
+ atomic_set(&info.nr_spinning, 0);
smp_mb();
- err = smp_call_function(ap_suspend, &ap_spin, 0, 0);
+ nr_cpus = num_online_cpus() - 1;
+
+ err = smp_call_function(ap_suspend, &info, 0, 0);
if (err < 0) {
preempt_enable();
xenbus_suspend_cancel();
return err;
}
+ while (atomic_read(&info.nr_spinning) != nr_cpus)
+ cpu_relax();
+
local_irq_disable();
suspend_cancelled = bp_suspend();
local_irq_enable();
smp_mb();
- ap_spin = 0;
+ info.do_spin = 0;
+ while (atomic_read(&info.nr_spinning) != 0)
+ cpu_relax();
enable_irq(xen_platform_pdev->irq);
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/ia64/xen/hyperprivop.S Fri Apr 13 11:14:26 2007 +0100
@@ -304,6 +304,8 @@ ENTRY(hyper_ssm_i)
;;
adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18;
adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;;
+ // temporarily save ar.unat
+ mov r28=ar.unat
bsw.1;;
// FIXME?: ar.unat is not really handled correctly,
// but may not matter if the OS is NaT-clean
@@ -324,6 +326,12 @@ ENTRY(hyper_ssm_i)
.mem.offset 0,0; st8.spill [r2]=r30,16;
.mem.offset 8,0; st8.spill [r3]=r31,16 ;;
bsw.0 ;;
+ mov r27=ar.unat
+ adds r26=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ //save bank1 ar.unat
+ st8 [r26]=r27
+ //restore ar.unat
+ mov ar.unat=r28
mov r2=r30
mov r3=r29
adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
@@ -1518,8 +1526,10 @@ ENTRY(hyper_get_psr)
adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18 ;;
ld8 r20=[r20];;
ld1 r21=[r20];;
- dep r8=r21,r8,IA64_PSR_I_BIT,1
- ;;
+ cmp.eq p8,p9=r0,r21
+ ;;
+(p8) dep r8=-1,r8,IA64_PSR_I_BIT,1
+(p9) dep r8=0,r8,IA64_PSR_I_BIT,1
// set vpsr.dfh
adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
ld1 r21=[r20];;
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/ia64/xen/mm.c Fri Apr 13 11:14:26 2007 +0100
@@ -673,7 +673,7 @@ unsigned long lookup_domain_mpa(struct d
} else if (mpaddr - IO_PORTS_PADDR < IO_PORTS_SIZE) {
/* Log I/O port probing, but complain less loudly about it */
gdprintk(XENLOG_INFO, "vcpu %d iip 0x%016lx: bad I/O port access "
- "0x%lx\n ", current->vcpu_id, PSCB(current, iip),
+ "0x%lx\n", current->vcpu_id, PSCB(current, iip),
IO_SPACE_SPARSE_DECODING(mpaddr - IO_PORTS_PADDR));
} else {
gdprintk(XENLOG_WARNING, "vcpu %d iip 0x%016lx: bad mpa 0x%lx "
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/x86/hvm/hvm.c Fri Apr 13 11:14:26 2007 +0100
@@ -191,6 +191,7 @@ static int hvm_save_cpu_ctxt(struct doma
{
struct vcpu *v;
struct hvm_hw_cpu ctxt;
+ struct vcpu_guest_context *vc;
for_each_vcpu(d, v)
{
@@ -199,7 +200,40 @@ static int hvm_save_cpu_ctxt(struct doma
if ( test_bit(_VPF_down, &v->pause_flags) )
continue;
+ /* Architecture-specific vmcs/vmcb bits */
hvm_funcs.save_cpu_ctxt(v, &ctxt);
+
+ /* Other vcpu register state */
+ vc = &v->arch.guest_context;
+ if ( vc->flags & VGCF_i387_valid )
+ memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));
+ else
+ memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));
+ ctxt.rax = vc->user_regs.eax;
+ ctxt.rbx = vc->user_regs.ebx;
+ ctxt.rcx = vc->user_regs.ecx;
+ ctxt.rdx = vc->user_regs.edx;
+ ctxt.rbp = vc->user_regs.ebp;
+ ctxt.rsi = vc->user_regs.esi;
+ ctxt.rdi = vc->user_regs.edi;
+ /* %rsp handled by arch-specific call above */
+#ifdef __x86_64__
+ ctxt.r8 = vc->user_regs.r8;
+ ctxt.r9 = vc->user_regs.r9;
+ ctxt.r10 = vc->user_regs.r10;
+ ctxt.r11 = vc->user_regs.r11;
+ ctxt.r12 = vc->user_regs.r12;
+ ctxt.r13 = vc->user_regs.r13;
+ ctxt.r14 = vc->user_regs.r14;
+ ctxt.r15 = vc->user_regs.r15;
+#endif
+ ctxt.dr0 = vc->debugreg[0];
+ ctxt.dr1 = vc->debugreg[1];
+ ctxt.dr2 = vc->debugreg[2];
+ ctxt.dr3 = vc->debugreg[3];
+ ctxt.dr6 = vc->debugreg[6];
+ ctxt.dr7 = vc->debugreg[7];
+
if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
return 1;
}
@@ -208,9 +242,10 @@ static int hvm_save_cpu_ctxt(struct doma
static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
{
- int vcpuid;
+ int vcpuid, rc;
struct vcpu *v;
struct hvm_hw_cpu ctxt;
+ struct vcpu_guest_context *vc;
/* Which vcpu is this? */
vcpuid = hvm_load_instance(h);
@@ -219,12 +254,51 @@ static int hvm_load_cpu_ctxt(struct doma
gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);
return -EINVAL;
}
+ vc = &v->arch.guest_context;
+
+ /* Need to init this vcpu before loading its contents */
+ LOCK_BIGLOCK(d);
+ if ( !v->is_initialised )
+ if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
+ return rc;
+ UNLOCK_BIGLOCK(d);
if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
return -EINVAL;
+ /* Architecture-specific vmcs/vmcb bits */
if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
return -EINVAL;
+
+ /* Other vcpu register state */
+ memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
+ vc->user_regs.eax = ctxt.rax;
+ vc->user_regs.ebx = ctxt.rbx;
+ vc->user_regs.ecx = ctxt.rcx;
+ vc->user_regs.edx = ctxt.rdx;
+ vc->user_regs.ebp = ctxt.rbp;
+ vc->user_regs.esi = ctxt.rsi;
+ vc->user_regs.edi = ctxt.rdi;
+ vc->user_regs.esp = ctxt.rsp;
+#ifdef __x86_64__
+ vc->user_regs.r8 = ctxt.r8;
+ vc->user_regs.r9 = ctxt.r9;
+ vc->user_regs.r10 = ctxt.r10;
+ vc->user_regs.r11 = ctxt.r11;
+ vc->user_regs.r12 = ctxt.r12;
+ vc->user_regs.r13 = ctxt.r13;
+ vc->user_regs.r14 = ctxt.r14;
+ vc->user_regs.r15 = ctxt.r15;
+#endif
+ vc->debugreg[0] = ctxt.dr0;
+ vc->debugreg[1] = ctxt.dr1;
+ vc->debugreg[2] = ctxt.dr2;
+ vc->debugreg[3] = ctxt.dr3;
+ vc->debugreg[6] = ctxt.dr6;
+ vc->debugreg[7] = ctxt.dr7;
+
+ vc->flags = VGCF_i387_valid | VGCF_online;
+ v->fpu_initialised = 1;
/* Auxiliary processors should be woken immediately. */
if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c Fri Apr 13 11:14:26 2007 +0100
@@ -233,7 +233,7 @@ int svm_vmcb_save(struct vcpu *v, struct
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- c->eip = vmcb->rip;
+ c->rip = vmcb->rip;
#ifdef HVM_DEBUG_SUSPEND
printk("%s: eip=0x%"PRIx64".\n",
@@ -241,10 +241,11 @@ int svm_vmcb_save(struct vcpu *v, struct
inst_len, c->eip);
#endif
- c->esp = vmcb->rsp;
- c->eflags = vmcb->rflags;
+ c->rsp = vmcb->rsp;
+ c->rflags = vmcb->rflags;
c->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+ c->cr2 = v->arch.hvm_svm.cpu_cr2;
c->cr3 = v->arch.hvm_svm.cpu_cr3;
c->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
@@ -315,14 +316,16 @@ int svm_vmcb_restore(struct vcpu *v, str
unsigned long mfn, old_base_mfn;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- vmcb->rip = c->eip;
- vmcb->rsp = c->esp;
- vmcb->rflags = c->eflags;
+ vmcb->rip = c->rip;
+ vmcb->rsp = c->rsp;
+ vmcb->rflags = c->rflags;
v->arch.hvm_svm.cpu_shadow_cr0 = c->cr0;
vmcb->cr0 = c->cr0 | X86_CR0_WP | X86_CR0_ET;
if ( !paging_mode_hap(v->domain) )
vmcb->cr0 |= X86_CR0_PG;
+
+ v->arch.hvm_svm.cpu_cr2 = c->cr2;
#ifdef HVM_DEBUG_SUSPEND
printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
@@ -421,6 +424,9 @@ int svm_vmcb_restore(struct vcpu *v, str
vmcb->sysenter_esp = c->sysenter_esp;
vmcb->sysenter_eip = c->sysenter_eip;
+ vmcb->dr6 = c->dr6;
+ vmcb->dr7 = c->dr7;
+
paging_update_paging_modes(v);
return 0;
@@ -440,6 +446,7 @@ void svm_save_cpu_state(struct vcpu *v,
data->msr_cstar = vmcb->cstar;
data->msr_syscall_mask = vmcb->sfmask;
data->msr_efer = v->arch.hvm_svm.cpu_shadow_efer;
+ data->msr_flags = -1ULL;
data->tsc = hvm_get_guest_time(v);
}
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Apr 13 11:14:26 2007 +0100
@@ -370,11 +370,12 @@ static inline void __restore_debug_regis
int vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
{
- c->eip = __vmread(GUEST_RIP);
- c->esp = __vmread(GUEST_RSP);
- c->eflags = __vmread(GUEST_RFLAGS);
+ c->rip = __vmread(GUEST_RIP);
+ c->rsp = __vmread(GUEST_RSP);
+ c->rflags = __vmread(GUEST_RFLAGS);
c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
+ c->cr2 = v->arch.hvm_vmx.cpu_cr2;
c->cr3 = v->arch.hvm_vmx.cpu_cr3;
c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
@@ -444,12 +445,14 @@ int vmx_vmcs_restore(struct vcpu *v, str
vmx_vmcs_enter(v);
- __vmwrite(GUEST_RIP, c->eip);
- __vmwrite(GUEST_RSP, c->esp);
- __vmwrite(GUEST_RFLAGS, c->eflags);
+ __vmwrite(GUEST_RIP, c->rip);
+ __vmwrite(GUEST_RSP, c->rsp);
+ __vmwrite(GUEST_RFLAGS, c->rflags);
v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
__vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
+
+ v->arch.hvm_vmx.cpu_cr2 = c->cr2;
#ifdef HVM_DEBUG_SUSPEND
printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64",
cr4=0x%"PRIx64".\n",
@@ -555,6 +558,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
__vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
__vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
+ __vmwrite(GUEST_DR7, c->dr7);
+
vmx_vmcs_exit(v);
paging_update_paging_modes(v);
@@ -590,7 +595,7 @@ void vmx_save_cpu_state(struct vcpu *v,
data->shadow_gs = guest_state->shadow_gs;
/* save msrs */
- data->flags = guest_flags;
+ data->msr_flags = guest_flags;
data->msr_lstar = guest_state->msrs[VMX_INDEX_MSR_LSTAR];
data->msr_star = guest_state->msrs[VMX_INDEX_MSR_STAR];
data->msr_cstar = guest_state->msrs[VMX_INDEX_MSR_CSTAR];
@@ -607,7 +612,7 @@ void vmx_load_cpu_state(struct vcpu *v,
struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
/* restore msrs */
- guest_state->flags = data->flags;
+ guest_state->flags = data->msr_flags;
guest_state->msrs[VMX_INDEX_MSR_LSTAR] = data->msr_lstar;
guest_state->msrs[VMX_INDEX_MSR_STAR] = data->msr_star;
guest_state->msrs[VMX_INDEX_MSR_CSTAR] = data->msr_cstar;
diff -r 5bda20f0723d -r f92a79e39da8 xen/include/public/hvm/save.h
--- a/xen/include/public/hvm/save.h Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/include/public/hvm/save.h Fri Apr 13 11:14:26 2007 +0100
@@ -87,12 +87,39 @@ DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct
*/
struct hvm_hw_cpu {
- uint64_t eip;
- uint64_t esp;
- uint64_t eflags;
+ uint8_t fpu_regs[512];
+
+ uint64_t rax;
+ uint64_t rbx;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t rsp;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+
+ uint64_t rip;
+ uint64_t rflags;
+
uint64_t cr0;
+ uint64_t cr2;
uint64_t cr3;
uint64_t cr4;
+
+ uint64_t dr0;
+ uint64_t dr1;
+ uint64_t dr2;
+ uint64_t dr3;
+ uint64_t dr6;
+ uint64_t dr7;
uint32_t cs_sel;
uint32_t ds_sel;
@@ -142,9 +169,9 @@ struct hvm_hw_cpu {
/* msr for em64t */
uint64_t shadow_gs;
- uint64_t flags;
/* msr content saved/restored. */
+ uint64_t msr_flags;
uint64_t msr_lstar;
uint64_t msr_star;
uint64_t msr_cstar;
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|