# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1194455961 25200
# Node ID 74b40a9f4c0a27217f093f4172ef68d783644fbb
# Parent 828cb584c1ccd9a1ed42ae856f3ee86b2dfa8ace
[IA64] vti save-restore: clean up arch_get/set_info_guest()
- Update comment in copy_rbs()
- Don't warn when rbs_size = 0 for cpu initialization case.
- Remove struct vcpu_guest_context_regs::rbs_nat member which isn't used.
and add num_phys_stacked to struct vcpu_guest_context_regs.
so far rbs_nat and rbs_rnat isn't, so it is allowed to change the offset
of rbs_rnat.
- Add check when setting vRR[].
- Don't set vRR[] if val is zero.
Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
---
xen/arch/ia64/xen/domain.c | 93 ++++++++++++++++++++++++++++++-----------
xen/include/public/arch-ia64.h | 8 +++
2 files changed, 77 insertions(+), 24 deletions(-)
diff -r 828cb584c1cc -r 74b40a9f4c0a xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Wed Nov 07 10:10:20 2007 -0700
+++ b/xen/arch/ia64/xen/domain.c Wed Nov 07 10:19:21 2007 -0700
@@ -636,6 +636,7 @@ int arch_vcpu_reset(struct vcpu *v)
return 0;
}
+/* Here it is assumed that all of the CPUs has same RSE.N_STACKED_PHYS */
static unsigned long num_phys_stacked;
static int __init
init_num_phys_stacked(void)
@@ -822,8 +823,9 @@ void arch_get_info_guest(struct vcpu *v,
COPY_FPREG(&c.nat->regs.f[30], &sw->f30);
COPY_FPREG(&c.nat->regs.f[31], &sw->f31);
- for (i = 0; i < 96; i++)
- COPY_FPREG(&c.nat->regs.f[i + 32], &v->arch._thread.fph[i]);
+ // f32 - f127
+ memcpy(&c.nat->regs.f[32], &v->arch._thread.fph[0],
+ sizeof(v->arch._thread.fph));
#define NATS_UPDATE(reg) \
nats_update(&c.nat->regs.nats, (reg), \
@@ -939,6 +941,8 @@ void arch_get_info_guest(struct vcpu *v,
c.nat->regs.rbs_rnat &= ~((1UL << bottom_slot) - 1);
}
+ c.nat->regs.num_phys_stacked = num_phys_stacked;
+
if (VMX_DOMAIN(v))
c.nat->privregs_pfn = VGC_PRIVREGS_HVM;
else
@@ -1101,7 +1105,6 @@ copy_rbs(struct vcpu* v, unsigned long*
if (((unsigned long)dst_bsp & ~PAGE_MASK) > KERNEL_STACK_SIZE / 2)
goto out;
- //XXX TODO
// ia64_copy_rbs() uses real cpu's stack register.
// So it may fault with an Illigal Operation fault resulting
// in panic if rbs_size is too large to load compared to
@@ -1113,10 +1116,9 @@ copy_rbs(struct vcpu* v, unsigned long*
// we need to copy them by hand without loadrs and flushrs
// However even if we implement that, similar issue still occurs
// when running guest. CPU context restore routine issues loadrs
- // resulting in Illegal Operation fault. For such a case,
- // we need to emulate RSE store.
- // So it would be better to implement only RSE store emulation
- // and copy stacked registers directly into guest RBS.
+ // resulting in Illegal Operation fault. And what if the vRSE is in
+ // enforced lazy mode? We can't store any dirty stacked registers
+ // into RBS without cover or br.call.
if (num_regs > num_phys_stacked) {
rc = -ENOSYS;
gdprintk(XENLOG_WARNING,
@@ -1240,10 +1242,6 @@ int arch_set_info_guest(struct vcpu *v,
uregs->pr = c.nat->regs.pr;
uregs->b0 = c.nat->regs.b[0];
- if (((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff)
- gdprintk(XENLOG_INFO,
- "rbs stack offset is different! xen 0x%x given 0x%x",
- (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff);
num_regs = ia64_rse_num_regs((unsigned long*)c.nat->regs.ar.bspstore,
(unsigned long*)c.nat->regs.ar.bsp);
rbs_size = (unsigned long)ia64_rse_skip_regs(rbs_bottom, num_regs) -
@@ -1254,6 +1252,11 @@ int arch_set_info_guest(struct vcpu *v,
rbs_size, sizeof (c.nat->regs.rbs));
return -EINVAL;
}
+ if (rbs_size > 0 &&
+ ((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff)
+ gdprintk(XENLOG_INFO,
+ "rbs stack offset is different! xen 0x%x given 0x%x",
+ (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff);
/* Protection against crazy user code. */
if (!was_initialised)
@@ -1279,6 +1282,49 @@ int arch_set_info_guest(struct vcpu *v,
sw->ar_bspstore = (unsigned long)((char*)rbs_bottom +
dst_rbs_size);
}
+ }
+
+ // inhibit save/restore between cpus of different RSE.N_STACKED_PHYS.
+ // to avoid nasty issues.
+ //
+ // The number of physical stacked general register(RSE.N_STACKED_PHYS)
+ // isn't virtualized. Guest OS utilizes it via PAL_RSE_INFO call and
+ // the value might be exported to user/user process.
+ // (Linux does via /proc/cpuinfo)
+ // The SDM says only that the number is cpu implementation specific.
+ //
+ // If the number of restoring cpu is different from one of saving cpu,
+ // the following, or something worse, might happen.
+ // - Xen VMM itself may panic when issuing loadrs to run guest with
+ // illegal operation fault
+ // When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of
+ // restoring CPU
+ // This case is detected to refuse restore by rbs_copy()
+ // - guest kernel may panic with illegal operation fault
+ // When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of
+ // restoring CPU
+ // - infomation leak from guest kernel to user process
+ // When RSE.N_STACKED_PHYS of saving CPU < RSE.N_STACKED_PHYS of
+ // restoring CPU
+ // Before returning to user process, kernel should zero clear all
+ // physical stacked resgisters to prevent kernel bits leak.
+ // It would be based on RSE.N_STACKED_PHYS (Linux does.).
+ // On the restored environtment the kernel clears only a part
+ // of the physical stacked registers.
+ // - user processes or human operators would be confused.
+ // RSE.N_STACKED_PHYS might be exported to user process or human
+ // operators. Actually on linux it is exported via /proc/cpuinfo.
+ // user processes might use it.
+ // I don't know any concrete example, but it's possible in theory.
+ // e.g. thread libraly may allocate RBS area based on the value.
+ // (Fortunately glibc nptl doesn't)
+ if (c.nat->regs.num_phys_stacked != 0 && /* COMPAT */
+ c.nat->regs.num_phys_stacked != num_phys_stacked) {
+ gdprintk(XENLOG_WARNING,
+ "num phys stacked is different! "
+ "xen 0x%lx given 0x%lx",
+ num_phys_stacked, c.nat->regs.num_phys_stacked);
+ return -EINVAL;
}
uregs->r1 = c.nat->regs.r[1];
@@ -1342,9 +1388,9 @@ int arch_set_info_guest(struct vcpu *v,
COPY_FPREG(&sw->f30, &c.nat->regs.f[30]);
COPY_FPREG(&sw->f31, &c.nat->regs.f[31]);
- for (i = 0; i < 96; i++)
- COPY_FPREG(&v->arch._thread.fph[i], &c.nat->regs.f[i + 32]);
-
+ // f32 - f127
+ memcpy(&v->arch._thread.fph[0], &c.nat->regs.f[32],
+ sizeof(v->arch._thread.fph));
#define UNAT_UPDATE(reg) \
unat_update(&uregs->eml_unat, &uregs->r ## reg, \
@@ -1439,20 +1485,21 @@ int arch_set_info_guest(struct vcpu *v,
/* rr[] must be set before setting itrs[] dtrs[] */
for (i = 0; i < 8; i++) {
- //XXX TODO integrity check.
- // if invalid value is given,
- // vmx_load_all_rr() and load_region_regs()
- // result in General exception, reserved register/field
- // failt causing panicing xen.
+ unsigned long rrval = c.nat->regs.rr[i];
+ unsigned long reg = (unsigned long)i << 61;
+ IA64FAULT fault = IA64_NO_FAULT;
+
+ if (rrval == 0)
+ continue;
if (d->arch.is_vti) {
//without VGCF_EXTRA_REGS check,
//VTi domain doesn't boot.
if (c.nat->flags & VGCF_EXTRA_REGS)
- vmx_vcpu_set_rr(v, (unsigned long)i << 61,
- c.nat->regs.rr[i]);
+ fault = vmx_vcpu_set_rr(v, reg, rrval);
} else
- vcpu_set_rr(v, (unsigned long)i << 61,
- c.nat->regs.rr[i]);
+ fault = vcpu_set_rr(v, reg, rrval);
+ if (fault != IA64_NO_FAULT)
+ return -EINVAL;
}
if (c.nat->flags & VGCF_EXTRA_REGS) {
diff -r 828cb584c1cc -r 74b40a9f4c0a xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h Wed Nov 07 10:10:20 2007 -0700
+++ b/xen/include/public/arch-ia64.h Wed Nov 07 10:19:21 2007 -0700
@@ -417,8 +417,14 @@ struct vcpu_guest_context_regs {
*/
unsigned int rbs_voff;
unsigned long rbs[2048];
- unsigned long rbs_nat;
unsigned long rbs_rnat;
+
+ /*
+ * RSE.N_STACKED_PHYS via PAL_RSE_INFO
+ * Strictly this isn't cpu context, but this value is necessary
+ * for domain save/restore. So is here.
+ */
+ unsigned long num_phys_stacked;
};
struct vcpu_guest_context {
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|