WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: Xsave support for PV guests.

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: Xsave support for PV guests.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 04 Nov 2010 16:50:12 -0700
Delivery-date: Thu, 04 Nov 2010 16:50:23 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1288772120 0
# Node ID a3ec4b3b685e05e9890c37be0763b3c33f6dc722
# Parent  ee4d52f0d16a91583d511c18aa679a7fec6be86c
x86: Xsave support for PV guests.

Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
Signed-off-by: Han Weidong <weidong.han@xxxxxxxxx>
---
 tools/libxc/xc_cpuid_x86.c     |    1 
 xen/arch/x86/domain.c          |   30 +++++++++++++++++++++++-
 xen/arch/x86/hvm/hvm.c         |   13 ----------
 xen/arch/x86/hvm/vmx/vmx.c     |    8 ++----
 xen/arch/x86/i387.c            |   16 ++++++++----
 xen/arch/x86/traps.c           |   51 ++++++++++++++++++++++++++++++++++-------
 xen/include/asm-x86/domain.h   |   20 +++++++++++++++-
 xen/include/asm-x86/hvm/vcpu.h |    9 -------
 xen/include/asm-x86/i387.h     |   22 +++++++++++++----
 9 files changed, 122 insertions(+), 48 deletions(-)

diff -r ee4d52f0d16a -r a3ec4b3b685e tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c        Tue Nov 02 07:35:52 2010 +0000
+++ b/tools/libxc/xc_cpuid_x86.c        Wed Nov 03 08:15:20 2010 +0000
@@ -323,7 +323,6 @@ static void xc_cpuid_pv_policy(
         clear_bit(X86_FEATURE_XTPR, regs[2]);
         clear_bit(X86_FEATURE_PDCM, regs[2]);
         clear_bit(X86_FEATURE_DCA, regs[2]);
-        clear_bit(X86_FEATURE_XSAVE, regs[2]);
         set_bit(X86_FEATURE_HYPERVISOR, regs[2]);
         break;
     case 0x80000001:
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/domain.c     Wed Nov 03 08:15:20 2010 +0000
@@ -343,10 +343,26 @@ int vcpu_initialise(struct vcpu *v)
 
     paging_vcpu_init(v);
 
+    if ( cpu_has_xsave )
+    {
+        /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
+        void *xsave_area = _xmalloc(xsave_cntxt_size, 64);
+        if ( xsave_area == NULL )
+            return -ENOMEM;
+
+        xsave_init_save_area(xsave_area);
+        v->arch.xsave_area = xsave_area;
+        v->arch.xcr0 = XSTATE_FP_SSE;
+        v->arch.xcr0_accum = XSTATE_FP_SSE;
+    }
+
     if ( is_hvm_domain(d) )
     {
         if ( (rc = hvm_vcpu_initialise(v)) != 0 )
+        {
+            xfree(v->arch.xsave_area);
             return rc;
+        }
     }
     else
     {
@@ -376,13 +392,21 @@ int vcpu_initialise(struct vcpu *v)
 
     spin_lock_init(&v->arch.shadow_ldt_lock);
 
-    return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
+    rc = 0;
+    if ( is_pv_32on64_vcpu(v) )
+        rc = setup_compat_l4(v);
+    if ( !rc )
+        xfree(v->arch.xsave_area);
+
+    return rc;
 }
 
 void vcpu_destroy(struct vcpu *v)
 {
     if ( is_pv_32on64_vcpu(v) )
         release_compat_l4(v);
+
+    xfree(v->arch.xsave_area);
 
     if ( is_hvm_vcpu(v) )
         hvm_vcpu_destroy(v);
@@ -592,6 +616,8 @@ unsigned long pv_guest_cr4_fixup(const s
         hv_cr4_mask &= ~X86_CR4_DE;
     if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) )
         hv_cr4_mask &= ~X86_CR4_FSGSBASE;
+    if ( cpu_has_xsave )
+        hv_cr4_mask &= ~X86_CR4_OSXSAVE;
 
     if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) )
         gdprintk(XENLOG_WARNING,
@@ -1367,6 +1393,8 @@ static void __context_switch(void)
         memcpy(stack_regs,
                &n->arch.guest_context.user_regs,
                CTXT_SWITCH_STACK_BYTES);
+        if ( cpu_has_xsave && n->arch.xcr0 != get_xcr0() )
+            set_xcr0(n->arch.xcr0);
         n->arch.ctxt_switch_to(n);
     }
 
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/hvm/hvm.c    Wed Nov 03 08:15:20 2010 +0000
@@ -805,18 +805,6 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
     hvm_asid_flush_vcpu(v);
 
-    if ( cpu_has_xsave )
-    {
-        /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
-        void *xsave_area = _xmalloc(xsave_cntxt_size, 64);
-        if ( xsave_area == NULL )
-            return -ENOMEM;
-
-        xsave_init_save_area(xsave_area);
-        v->arch.hvm_vcpu.xsave_area = xsave_area;
-        v->arch.hvm_vcpu.xcr0 = XSTATE_FP_SSE;
-    }
-
     if ( (rc = vlapic_init(v)) != 0 )
         goto fail1;
 
@@ -879,7 +867,6 @@ void hvm_vcpu_destroy(struct vcpu *v)
     hvm_vcpu_cacheattr_destroy(v);
     vlapic_destroy(v);
     hvm_funcs.vcpu_destroy(v);
-    xfree(v->arch.hvm_vcpu.xsave_area);
 
     /* Event channel is already freed by evtchn_destroy(). */
     /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Nov 03 08:15:20 2010 +0000
@@ -652,10 +652,7 @@ static void vmx_ctxt_switch_to(struct vc
     struct domain *d = v->domain;
     unsigned long old_cr4 = read_cr4(), new_cr4 = mmu_cr4_features;
 
-    /* HOST_CR4 in VMCS is always mmu_cr4_features and
-     * CR4_OSXSAVE(if supported). Sync CR4 now. */
-    if ( cpu_has_xsave )
-        new_cr4 |= X86_CR4_OSXSAVE;
+    /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */
     if ( old_cr4 != new_cr4 )
         write_cr4(new_cr4);
 
@@ -2215,7 +2212,8 @@ static int vmx_handle_xsetbv(u64 new_bv)
     if ( (xfeature_mask & XSTATE_YMM & new_bv) && !(new_bv & XSTATE_SSE) )
         goto err;
 
-    v->arch.hvm_vcpu.xcr0 = new_bv;
+    v->arch.xcr0 = new_bv;
+    v->arch.xcr0_accum |= new_bv;
     set_xcr0(new_bv);
     return 0;
 err:
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/i387.c
--- a/xen/arch/x86/i387.c       Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/i387.c       Wed Nov 03 08:15:20 2010 +0000
@@ -33,9 +33,14 @@ void save_init_fpu(struct vcpu *v)
     if ( cr0 & X86_CR0_TS )
         clts();
 
-    if ( cpu_has_xsave && is_hvm_vcpu(v) )
-    {
+    if ( cpu_has_xsave )
+    {
+        /* XCR0 normally represents what guest OS set. In case of Xen itself,
+         * we set all accumulated feature mask before doing save/restore.
+         */
+        set_xcr0(v->arch.xcr0_accum);
         xsave(v);
+        set_xcr0(v->arch.xcr0);
     }
     else if ( cpu_has_fxsr )
     {
@@ -144,6 +149,9 @@ u32 xsave_cntxt_size;
 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
 u64 xfeature_mask;
 
+/* Cached xcr0 for fast read */
+DEFINE_PER_CPU(uint64_t, xcr0);
+
 void xsave_init(void)
 {
     u32 eax, ebx, ecx, edx;
@@ -171,13 +179,11 @@ void xsave_init(void)
     BUG_ON(ecx < min_size);
 
     /*
-     * We will only enable the features we know for hvm guest. Here we use
-     * set/clear CR4_OSXSAVE and re-run cpuid to get xsave_cntxt_size.
+     * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size.
      */
     set_in_cr4(X86_CR4_OSXSAVE);
     set_xcr0(eax & XCNTXT_MASK);
     cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-    clear_in_cr4(X86_CR4_OSXSAVE);
 
     if ( cpu == 0 )
     {
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/traps.c      Wed Nov 03 08:15:20 2010 +0000
@@ -795,7 +795,6 @@ static void pv_cpuid(struct cpu_user_reg
         __clear_bit(X86_FEATURE_XTPR % 32, &c);
         __clear_bit(X86_FEATURE_PDCM % 32, &c);
         __clear_bit(X86_FEATURE_DCA % 32, &c);
-        __clear_bit(X86_FEATURE_XSAVE % 32, &c);
         if ( !cpu_has_apic )
            __clear_bit(X86_FEATURE_X2APIC % 32, &c);
         __set_bit(X86_FEATURE_HYPERVISOR % 32, &c);
@@ -1715,7 +1714,7 @@ static int emulate_privileged_op(struct 
     enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none;
     int rc;
     unsigned int port, i, data_sel, ar, data, bpmatch = 0;
-    unsigned int op_bytes, op_default, ad_bytes, ad_default;
+    unsigned int op_bytes, op_default, ad_bytes, ad_default, opsize_prefix= 0;
 #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \
                     ? regs->reg \
                     : ad_bytes == 4 \
@@ -1751,6 +1750,7 @@ static int emulate_privileged_op(struct 
         switch ( opcode = insn_fetch(u8, code_base, eip, code_limit) )
         {
         case 0x66: /* operand-size override */
+            opsize_prefix = 1;
             op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
             continue;
         case 0x67: /* address-size override */
@@ -2051,13 +2051,48 @@ static int emulate_privileged_op(struct 
         goto fail;
     switch ( opcode )
     {
-    case 0x1: /* RDTSCP */
-        if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) &&
-             !guest_kernel_mode(v, regs) )
+    case 0x1: /* RDTSCP and XSETBV */
+        switch ( insn_fetch(u8, code_base, eip, code_limit) )
+        {
+        case 0xf9: /* RDTSCP */
+            if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) &&
+                 !guest_kernel_mode(v, regs) )
+                goto fail;
+            pv_soft_rdtsc(v, regs, 1);
+            break;
+        case 0xd1: /* XSETBV */
+        {
+            u64 new_xfeature = (u32)regs->eax | ((u64)regs->edx << 32);
+
+            if ( lock || rep_prefix || opsize_prefix
+                 || !(v->arch.guest_context.ctrlreg[4] & X86_CR4_OSXSAVE) )
+            {
+                do_guest_trap(TRAP_invalid_op, regs, 0);
+                goto skip;
+            }
+
+            if ( !guest_kernel_mode(v, regs) )
+                goto fail;
+
+            switch ( (u32)regs->ecx )
+            {
+                case XCR_XFEATURE_ENABLED_MASK:
+                    /* bit 0 of XCR0 must be set and reserved bit must not be 
set */
+                    if ( !(new_xfeature & XSTATE_FP) || (new_xfeature & 
~xfeature_mask) )
+                        goto fail;
+
+                    v->arch.xcr0 = new_xfeature;
+                    v->arch.xcr0_accum |= new_xfeature;
+                    set_xcr0(new_xfeature);
+                    break;
+                default:
+                    goto fail;
+            }
+            break;
+        }
+        default:
             goto fail;
-        if ( insn_fetch(u8, code_base, eip, code_limit) != 0xf9 )
-            goto fail;
-        pv_soft_rdtsc(v, regs, 1);
+        }
         break;
 
     case 0x06: /* CLTS */
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/include/asm-x86/domain.h      Wed Nov 03 08:15:20 2010 +0000
@@ -400,6 +400,23 @@ struct arch_vcpu
     pagetable_t monitor_table;          /* (MFN) hypervisor PT (for HVM) */
     unsigned long cr3;                  /* (MA) value to install in HW CR3 */
 
+    /*
+     * The save area for Processor Extended States and the bitmask of the
+     * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has
+     * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in
+     * #NM handler, we XRSTOR the states we XSAVE-ed;
+     */
+    void *xsave_area;
+    uint64_t xcr0;
+    /* Accumulated eXtended features mask for using XSAVE/XRESTORE by Xen
+     * itself, as we can never know whether guest OS depends on content
+     * preservation whenever guest OS clears one feature flag (for example,
+     * temporarily).
+     * However, processor should not be able to touch eXtended states before
+     * it explicitly enables it via xcr0.
+     */
+    uint64_t xcr0_accum;
+
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
     spinlock_t shadow_ldt_lock;
@@ -435,7 +452,8 @@ unsigned long pv_guest_cr4_fixup(const s
 #define pv_guest_cr4_to_real_cr4(v)                         \
     (((v)->arch.guest_context.ctrlreg[4]                    \
       | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))    \
-      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
+      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)         \
+      | ((cpu_has_xsave)? X86_CR4_OSXSAVE : 0))              \
       & ~X86_CR4_DE)
 #define real_cr4_to_pv_guest_cr4(c) \
     ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | X86_CR4_OSXSAVE))
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/include/asm-x86/hvm/vcpu.h    Wed Nov 03 08:15:20 2010 +0000
@@ -48,15 +48,6 @@ struct hvm_vcpu {
      *  CR3:      Always used and kept up to date by paging subsystem.
      */
     unsigned long       hw_cr[5];
-
-    /*
-     * The save area for Processor Extended States and the bitmask of the
-     * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has
-     * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in
-     * #NM handler, we XRSTOR the states we XSAVE-ed;
-     */
-    void *xsave_area;
-    uint64_t xcr0;
 
     struct vlapic       vlapic;
     s64                 cache_tsc_offset;
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/i387.h
--- a/xen/include/asm-x86/i387.h        Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/include/asm-x86/i387.h        Wed Nov 03 08:15:20 2010 +0000
@@ -49,6 +49,8 @@ struct xsave_struct
 #define REX_PREFIX
 #endif
 
+DECLARE_PER_CPU(uint64_t, xcr0);
+
 static inline void xsetbv(u32 index, u64 xfeatures)
 {
     u32 hi = xfeatures >> 32;
@@ -60,14 +62,20 @@ static inline void xsetbv(u32 index, u64
 
 static inline void set_xcr0(u64 xfeatures)
 {
+    this_cpu(xcr0) = xfeatures;
     xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures);
+}
+
+static inline uint64_t get_xcr0(void)
+{
+    return this_cpu(xcr0);
 }
 
 static inline void xsave(struct vcpu *v)
 {
     struct xsave_struct *ptr;
 
-    ptr =(struct xsave_struct *)v->arch.hvm_vcpu.xsave_area;
+    ptr =(struct xsave_struct *)v->arch.xsave_area;
 
     asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
         :
@@ -79,7 +87,7 @@ static inline void xrstor(struct vcpu *v
 {
     struct xsave_struct *ptr;
 
-    ptr =(struct xsave_struct *)v->arch.hvm_vcpu.xsave_area;
+    ptr =(struct xsave_struct *)v->arch.xsave_area;
 
     asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
         :
@@ -108,14 +116,18 @@ static inline void setup_fpu(struct vcpu
     if ( !v->fpu_dirtied )
     {
         v->fpu_dirtied = 1;
-        if ( cpu_has_xsave && is_hvm_vcpu(v) )
+        if ( cpu_has_xsave )
         {
             if ( !v->fpu_initialised )
                 v->fpu_initialised = 1;
 
-            set_xcr0(v->arch.hvm_vcpu.xcr0 | XSTATE_FP_SSE);
+            /* XCR0 normally represents what guest OS set. In case of Xen
+             * itself, we set all supported feature mask before doing
+             * save/restore.
+             */
+            set_xcr0(v->arch.xcr0_accum);
             xrstor(v);
-            set_xcr0(v->arch.hvm_vcpu.xcr0);
+            set_xcr0(v->arch.xcr0);
         }
         else
         {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: Xsave support for PV guests., Xen patchbot-unstable <=