[Xen-ia64-devel] [PATCH 4/7] vti save-restore: clean up of arch_

# HG changeset patch
# User yamahata@xxxxxxxxxxxxx
# Date 1193036269 -32400
# Node ID 977f5886e288f4c6764e5e12cb6492eff54a6bca
# Parent  75280d0f64d5946bf0351a9f2cbc178b26696df1
clean up arch_get/set_info_guest()
- update comment in copy_rbs()
- don't warn when rbs_size = 0 for cpu initialization case.
- remove struct vcpu_guest_context_regs::rbs_nat member which isn't used.
  and add num_phys_stacked to struct vcpu_guest_context_regs.
  so far rbs_nat and rbs_rnat isn't, so it is allowed to change the offset
  of rbs_rnat.
- Add check when setting vRR[].
- don't set vRR[] if the value is zero
PATCHNAME: clean_up_arch_set_info_guest

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>

diff -r 75280d0f64d5 -r 977f5886e288 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Tue Oct 16 18:48:05 2007 +0900
+++ b/xen/arch/ia64/xen/domain.c        Mon Oct 22 15:57:49 2007 +0900
@@ -634,6 +634,7 @@ int arch_vcpu_reset(struct vcpu *v)
        return 0;
 }
 
+/* Here it is assumed that all of the CPUs has same RSE.N_STACKED_PHYS */
 static unsigned long num_phys_stacked;
 static int __init
 init_num_phys_stacked(void)
@@ -820,8 +821,9 @@ void arch_get_info_guest(struct vcpu *v,
        COPY_FPREG(&c.nat->regs.f[30], &sw->f30);
        COPY_FPREG(&c.nat->regs.f[31], &sw->f31);
 
-       for (i = 0; i < 96; i++)
-               COPY_FPREG(&c.nat->regs.f[i + 32], &v->arch._thread.fph[i]);
+       // f32 - f127
+       memcpy(&c.nat->regs.f[32], &v->arch._thread.fph[0],
+              sizeof(v->arch._thread.fph));
 
 #define NATS_UPDATE(reg)                                               \
        nats_update(&c.nat->regs.nats, (reg),                           \
@@ -937,6 +939,8 @@ void arch_get_info_guest(struct vcpu *v,
                c.nat->regs.rbs_rnat &= ~((1UL << bottom_slot) - 1);
        }
 
+       c.nat->regs.num_phys_stacked = num_phys_stacked;
+
        c.nat->privregs_pfn = get_gpfn_from_mfn
                (virt_to_maddr(v->arch.privregs) >> PAGE_SHIFT);
 
@@ -1096,7 +1100,6 @@ copy_rbs(struct vcpu* v, unsigned long* 
        if (((unsigned long)dst_bsp & ~PAGE_MASK) > KERNEL_STACK_SIZE / 2)
                goto out;
 
-       //XXX TODO
        // ia64_copy_rbs() uses real cpu's stack register.
        // So it may fault with an Illigal Operation fault resulting
        // in panic if rbs_size is too large to load compared to
@@ -1108,10 +1111,9 @@ copy_rbs(struct vcpu* v, unsigned long* 
        // we need to copy them by hand without loadrs and flushrs
        // However even if we implement that, similar issue still occurs
        // when running guest. CPU context restore routine issues loadrs
-       // resulting in Illegal Operation fault. For such a case,
-       // we need to emulate RSE store.
-       // So it would be better to implement only RSE store emulation
-       // and copy stacked registers directly into guest RBS.
+       // resulting in Illegal Operation fault. And what if the vRSE is in
+       // enforced lazy mode? We can't store any dirty stacked registers
+       // into RBS without cover or br.call.
        if (num_regs > num_phys_stacked) {
                rc = -ENOSYS;
                gdprintk(XENLOG_WARNING,
@@ -1235,10 +1237,6 @@ int arch_set_info_guest(struct vcpu *v, 
        
        uregs->pr = c.nat->regs.pr;
        uregs->b0 = c.nat->regs.b[0];
-       if (((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff)
-               gdprintk(XENLOG_INFO,
-                        "rbs stack offset is different! xen 0x%x given 0x%x",
-                        (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff);
        num_regs = ia64_rse_num_regs((unsigned long*)c.nat->regs.ar.bspstore,
                                     (unsigned long*)c.nat->regs.ar.bsp);
        rbs_size = (unsigned long)ia64_rse_skip_regs(rbs_bottom, num_regs) -
@@ -1249,6 +1247,11 @@ int arch_set_info_guest(struct vcpu *v, 
                         rbs_size, sizeof (c.nat->regs.rbs));
                return -EINVAL;
        }
+       if (rbs_size > 0 &&
+           ((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff)
+               gdprintk(XENLOG_INFO,
+                        "rbs stack offset is different! xen 0x%x given 0x%x",
+                        (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff);
        
        /* Protection against crazy user code.  */
        if (!was_initialised)
@@ -1274,6 +1277,49 @@ int arch_set_info_guest(struct vcpu *v, 
                        sw->ar_bspstore = (unsigned long)((char*)rbs_bottom +
                                                          dst_rbs_size);
                }
+       }
+
+       // inhibit save/restore between cpus of different RSE.N_STACKED_PHYS.
+       // to avoid nasty issues.
+       // 
+       // The number of physical stacked general register(RSE.N_STACKED_PHYS)
+       // isn't virtualized. Guest OS utilizes it via PAL_RSE_INFO call and
+       // the value might be exported to user/user process.
+       // (Linux does via /proc/cpuinfo)
+       // The SDM says only that the number is cpu implementation specific.
+       //
+       // If the number of restoring cpu is different from one of saving cpu,
+       // the followings or something wrose might happen.
+       // - Xen VMM itself may panic when issuing loadrs to run guest with
+       //   illegal operation fault
+       //   When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of
+       //   restoring CPU
+       //   This case is detected to refuse restore by rbs_copy()
+       // - guest kernel may panic with illegal operation fault
+       //   When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of
+       //   restoring CPU
+       // - infomation leak from guest kernel to user process
+       //   When RSE.N_STACKED_PHYS of saving CPU < RSE.N_STACKED_PHYS of
+       //   restoring CPU
+       //   Before returning to user process, kernel should zero clear all
+       //   physical stacked resgisters to prevent kernel bits leak.
+       //   It would be based on RSE.N_STACKED_PHYS (Linux does.).
+       //   On the restored environtment the kernel clears only a part
+       //   of the physical stacked registers.
+       // - user processes or human operators would be confused.
+       //   RSE.N_STACKED_PHYS might be exported to user process or human
+       //   operators. Actually on linux it is exported via /proc/cpuinfo.
+       //   user processes might use it.
+       //   I don't know any concreate example, but it's possible in theory.
+       //   e.g. thread libraly may allocate RBS area based on the value.
+       //        (Fortunately glibc nptl doesn't)
+       if (c.nat->regs.num_phys_stacked != 0 && /* COMPAT */
+           c.nat->regs.num_phys_stacked != num_phys_stacked) {
+               gdprintk(XENLOG_WARNING,
+                        "num phys stacked is different! "
+                        "xen 0x%lx given 0x%lx",
+                        num_phys_stacked, c.nat->regs.num_phys_stacked);
+               return -EINVAL;
        }
 
        uregs->r1 = c.nat->regs.r[1];
@@ -1337,9 +1383,9 @@ int arch_set_info_guest(struct vcpu *v, 
        COPY_FPREG(&sw->f30, &c.nat->regs.f[30]);
        COPY_FPREG(&sw->f31, &c.nat->regs.f[31]);
 
-       for (i = 0; i < 96; i++)
-               COPY_FPREG(&v->arch._thread.fph[i], &c.nat->regs.f[i + 32]);
-
+       // f32 - f127
+       memcpy(&v->arch._thread.fph[0], &c.nat->regs.f[32],
+              sizeof(v->arch._thread.fph));
 
 #define UNAT_UPDATE(reg)                                       \
        unat_update(&uregs->eml_unat, &uregs->r ## reg,         \
@@ -1434,20 +1480,21 @@ int arch_set_info_guest(struct vcpu *v, 
 
        /* rr[] must be set before setting itrs[] dtrs[] */
        for (i = 0; i < 8; i++) {
-               //XXX TODO integrity check.
-               //    if invalid value is given, 
-               //    vmx_load_all_rr() and load_region_regs()
-               //    result in General exception, reserved register/field
-               //    failt causing panicing xen.
+               unsigned long rrval = c.nat->regs.rr[i];
+               unsigned long reg = (unsigned long)i << 61;
+               IA64FAULT fault = IA64_NO_FAULT;
+
+               if (rrval == 0)
+                       continue;
                if (d->arch.is_vti) {
                        //without VGCF_EXTRA_REGS check,
                        //VTi domain doesn't boot.
                        if (c.nat->flags & VGCF_EXTRA_REGS)
-                               vmx_vcpu_set_rr(v, (unsigned long)i << 61,
-                                               c.nat->regs.rr[i]);
+                               fault = vmx_vcpu_set_rr(v, reg, rrval);
                } else
-                       vcpu_set_rr(v, (unsigned long)i << 61,
-                                   c.nat->regs.rr[i]);
+                       fault = vcpu_set_rr(v, reg, rrval);
+               if (fault != IA64_NO_FAULT)
+                       return -EINVAL;
        }
 
        if (c.nat->flags & VGCF_EXTRA_REGS) {
diff -r 75280d0f64d5 -r 977f5886e288 xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Tue Oct 16 18:48:05 2007 +0900
+++ b/xen/include/public/arch-ia64.h    Mon Oct 22 15:57:49 2007 +0900
@@ -416,8 +416,14 @@ struct vcpu_guest_context_regs {
          */
         unsigned int rbs_voff;
         unsigned long rbs[2048];
-        unsigned long rbs_nat;
         unsigned long rbs_rnat;
+
+        /*
+         * RSE.N_STACKED_PHYS via PAL_RSE_INFO
+         * Strictly this isn't cpu context, but this value is necessary
+         * for domain save/restore. So is here.
+         */
+        unsigned long num_phys_stacked;
 };
 
 struct vcpu_guest_context {

16181_977f5886e288_clean_up_arch_set_info_guest.patch
Description: Text Data

_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel

WARNING - OLD ARCHIVES

xen-ia64-devel

[Xen-ia64-devel] [PATCH 4/7] vti save-restore: clean up of arch_get/set_