[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/AMD: Add support for AMD's OSVW feature in guests



# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
# Date 1327942850 -3600
# Node ID 1361d9895cb17aa8fce1054120311285144b8399
# Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
x86/AMD: Add support for AMD's OSVW feature in guests.

In some cases guests should not provide workarounds for errata even when the
physical processor is affected. For example, because of erratum 400 on family
10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
going to idle in order to avoid getting stuck in a non-C0 state. This is not
necessary: HLT and IO instructions are intercepted and therefore there is no
reason for erratum 400 workaround in the guest.

This patch allows us to present a guest with certain errata as fixed,
regardless of the state of actual hardware.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx>

diff -r e2722b24dc09 -r 1361d9895cb1 tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c        Thu Jan 26 17:43:31 2012 +0000
+++ b/tools/libxc/xc_cpuid_x86.c        Mon Jan 30 18:00:50 2012 +0100
@@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
                     bitmaskof(X86_FEATURE_SSE4A) |
                     bitmaskof(X86_FEATURE_MISALIGNSSE) |
                     bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
+                    bitmaskof(X86_FEATURE_OSVW) |
                     bitmaskof(X86_FEATURE_XOP) |
                     bitmaskof(X86_FEATURE_FMA4) |
                     bitmaskof(X86_FEATURE_TBM) |
diff -r e2722b24dc09 -r 1361d9895cb1 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Jan 30 18:00:50 2012 +0100
@@ -83,6 +83,9 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
 
 static bool_t amd_erratum383_found __read_mostly;
 
+/* OSVW bits */
+static uint64_t osvw_length, osvw_status;
+
 void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
 {
     struct vcpu *curr = current;
@@ -902,6 +905,60 @@ static void svm_do_resume(struct vcpu *v
     reset_stack_and_jump(svm_asm_do_resume);
 }
 
+static void svm_guest_osvw_init(struct vcpu *vcpu)
+{
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+        return;
+
+    /*
+     * Guests should see errata 400 and 415 as fixed (assuming that
+     * HLT and IO instructions are intercepted).
+     */
+    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length : 3;
+    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
+
+    /*
+     * By increasing VCPU's osvw.length to 3 we are telling the guest that
+     * all osvw.status bits inside that length, including bit 0 (which is
+     * reserved for erratum 298), are valid. However, if host processor's
+     * osvw_len is 0 then osvw_status[0] carries no information. We need to
+     * be conservative here and therefore we tell the guest that erratum 298
+     * is present (because we really don't know).
+     */
+    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
+        vcpu->arch.hvm_svm.osvw.status |= 1;
+}
+
+void svm_host_osvw_reset()
+{
+    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
+    osvw_status = 0;
+}
+
+void svm_host_osvw_init()
+{
+    /* 
+     * Get OSVW bits. If bits are not the same on different processors then
+     * choose the worst case (i.e. if erratum is present on one processor and
+     * not on another assume that the erratum is present everywhere).
+     */
+    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
+    {
+        uint64_t len, status;
+         
+        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
+             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
+            len = status = 0;
+
+        if (len < osvw_length)
+            osvw_length = len;
+
+        osvw_status |= status;
+        osvw_status &= (1ULL << osvw_length) - 1;
+    } else
+        osvw_length = osvw_status = 0;
+}
+
 static int svm_domain_initialise(struct domain *d)
 {
     return 0;
@@ -930,6 +987,9 @@ static int svm_vcpu_initialise(struct vc
     }
 
     vpmu_initialise(v);
+
+    svm_guest_osvw_init(v);
+
     return 0;
 }
 
@@ -1044,6 +1104,27 @@ static void svm_init_erratum_383(struct 
     }
 }
 
+static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, bool_t 
read)
+{
+    uint eax, ebx, ecx, edx;
+     
+    /* Guest OSVW support */
+    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
+        return -1;
+
+    if ( read )
+    {
+        if (msr == MSR_AMD_OSVW_ID_LENGTH)
+            *val = v->arch.hvm_svm.osvw.length;
+        else
+            *val = v->arch.hvm_svm.osvw.status;
+    } 
+    /* Writes are ignored */
+
+    return 0;
+}
+
 static int svm_cpu_up(void)
 {
     uint64_t msr_content;
@@ -1094,6 +1175,9 @@ static int svm_cpu_up(void)
     }
 #endif
 
+    /* Initialize OSVW bits to be used by guests */
+    svm_host_osvw_init();
+
     return 0;
 }
 
@@ -1104,6 +1188,8 @@ struct hvm_function_table * __init start
     if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
         return NULL;
 
+    svm_host_osvw_reset();
+
     if ( svm_cpu_up() )
     {
         printk("SVM: failed to initialise.\n");
@@ -1388,6 +1474,13 @@ static int svm_msr_read_intercept(unsign
         vpmu_do_rdmsr(msr, msr_content);
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, msr_content, 1);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_rdmsr(v, msr, msr_content);
         if ( ret < 0 )
@@ -1512,6 +1605,13 @@ static int svm_msr_write_intercept(unsig
          */
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, &msr_content, 0);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_wrmsr(v, msr, msr_content);
         if ( ret < 0 )
diff -r e2722b24dc09 -r 1361d9895cb1 xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c  Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/microcode.c  Mon Jan 30 18:00:50 2012 +0100
@@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
     info->error = 0;
     info->cpu = cpumask_first(&cpu_online_map);
 
+    if ( microcode_ops->start_update )
+    {
+       ret = microcode_ops->start_update();
+       if ( ret != 0 )
+        {
+           xfree(info);
+           return ret;
+       }
+    }
+
     return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
 }
 
diff -r e2722b24dc09 -r 1361d9895cb1 xen/arch/x86/microcode_amd.c
--- a/xen/arch/x86/microcode_amd.c      Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/microcode_amd.c      Mon Jan 30 18:00:50 2012 +0100
@@ -25,6 +25,7 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/microcode.h>
+#include <asm/hvm/svm/svm.h>
 
 struct equiv_cpu_entry {
     uint32_t installed_cpu;
@@ -287,7 +288,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! Wrong "
                "microcode patch file magic\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_amd = xmalloc(struct microcode_amd);
@@ -295,7 +297,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! "
                "Can not allocate memory for microcode patch\n");
-        return -ENOMEM;
+        error = -ENOMEM;
+        goto out;
     }
 
     error = install_equiv_cpu_table(mc_amd, buf, &offset);
@@ -303,7 +306,8 @@ static int cpu_request_microcode(int cpu
     {
         xfree(mc_amd);
         printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_old = uci->mc.mc_amd;
@@ -337,13 +341,18 @@ static int cpu_request_microcode(int cpu
     /* On success keep the microcode patch for
      * re-apply on resume.
      */
-    if (error == 1)
+    if ( error == 1 )
     {
         xfree(mc_old);
-        return 0;
+        error = 0;
+    } else 
+    {
+        xfree(mc_amd);
+        uci->mc.mc_amd = mc_old;
     }
-    xfree(mc_amd);
-    uci->mc.mc_amd = mc_old;
+
+  out:
+    svm_host_osvw_init();
 
     return error;
 }
@@ -395,11 +404,19 @@ err1:
     return -ENOMEM;
 }
 
+static int start_update(void) 
+{
+    svm_host_osvw_reset();
+
+    return 0;
+}
+
 static const struct microcode_ops microcode_amd_ops = {
     .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
+    .start_update                     = start_update,
 };
 
 static __init int microcode_init_amd(void)
diff -r e2722b24dc09 -r 1361d9895cb1 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/platform_hypercall.c Mon Jan 30 18:00:50 2012 +0100
@@ -166,7 +166,17 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
             break;
 
         guest_from_compat_handle(data, op->u.microcode.data);
+
+        while ( !domctl_lock_acquire() )
+            if ( hypercall_preempt_check() )
+            {
+                ret = hypercall_create_continuation(
+                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
+                goto out;
+            }
+
         ret = microcode_update(data, op->u.microcode.length);
+        domctl_lock_release();
     }
     break;
 
diff -r e2722b24dc09 -r 1361d9895cb1 xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/hvm/svm/svm.h Mon Jan 30 18:00:50 2012 +0100
@@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
                                   ~TSC_RATIO_RSVD_BITS )
 #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz)
 
+extern void svm_host_osvw_reset(void);
+extern void svm_host_osvw_init(void);
+
 #endif /* __ASM_X86_HVM_SVM_H__ */
diff -r e2722b24dc09 -r 1361d9895cb1 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h        Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h        Mon Jan 30 18:00:50 2012 +0100
@@ -516,6 +516,12 @@ struct arch_svm_struct {
     
     /* AMD lightweight profiling MSR */
     uint64_t guest_lwp_cfg;
+
+    /* OSVW MSRs */
+    struct {
+        u64 length;
+        u64 status;
+    } osvw;
 };
 
 struct vmcb_struct *alloc_vmcb(void);
diff -r e2722b24dc09 -r 1361d9895cb1 xen/include/asm-x86/microcode.h
--- a/xen/include/asm-x86/microcode.h   Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/microcode.h   Mon Jan 30 18:00:50 2012 +0100
@@ -11,6 +11,7 @@ struct microcode_ops {
     int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
     int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
     int (*apply_microcode)(int cpu);
+    int (*start_update)(void);
 };
 
 struct cpu_signature {


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.