[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4] x86/viridian: Add Partition Reference Time enlightenment



The presence of the partition reference time enlightenment persuades newer
versions of Windows to prefer the TSC as their primary time source. Hence,
if rdtsc is not being emulated and is invariant then many vmexits (for
alternative time sources such as the HPET or reference counter MSR) can
be avoided.

The implementation is not yet complete as no attempt is made to prevent
emulation of rdtsc if the enlightenment is active and guest and host
TSC frequencies differ. To do that requires invasive changes in the core
x86 time code and hence a lot more testing.

This patch avoids the issue by disabling the enlightenment if rdtsc is
being emulated, causing Windows to choose another time source. This is
safe, but may cause a big variation in performance of guests migrated
between hosts of differing TSC frequency. Thus the enlightenment is not
enabled in the default set, but may be enabled to improve guest performance
where such migrations are not a concern.

See section 15.4 of the Microsoft Hypervisor Top Level Functional
Specification v4.0a for details.

Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Cc: Christoph Egger <chegger@xxxxxxxxx>
---
v4:
- Get rid of loop, as suggested by Matt Wilson

v3:
- Prevent TscSequence increment loop from being exposed to a running
  domain.

v2:
- Addressed comments from Jan:
  - Leave enlightenment out of default set and add a comment explaining
    why.
  - De-dup code surrounding adjustment of sequence number on resume.
  - Remove accidental code deletion.

 docs/man/xl.cfg.pod.5                  |    6 ++
 tools/libxl/libxl_dom.c                |    3 +
 tools/libxl/libxl_types.idl            |    1 +
 xen/arch/x86/hvm/viridian.c            |  113 +++++++++++++++++++++++++++++++-
 xen/include/asm-x86/hvm/viridian.h     |   35 ++++++++++
 xen/include/public/arch-x86/hvm/save.h |   11 ++++
 xen/include/public/hvm/params.h        |    7 +-
 7 files changed, 174 insertions(+), 2 deletions(-)

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 8bba21c..925adbb 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -1163,6 +1163,12 @@ This group incorporates Partition Time Reference Counter 
MSR. This
 enlightenment can improve performance of Windows 8 and Windows
 Server 2012 onwards.
 
+=item B<reference_tsc>
+
+This set incorporates the Partition Reference TSC MSR. This
+enlightenment can improve performance of Windows 7 and Windows
+Server 2008 R2 onwards.
+
 =item B<defaults>
 
 This is a special value that enables the default set of groups, which
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index bd21841..d04fc0d 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -262,6 +262,9 @@ static int hvm_set_viridian_features(libxl__gc *gc, 
uint32_t domid,
     if (libxl_bitmap_test(&enlightenments, 
LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT))
         mask |= HVMPV_time_ref_count;
 
+    if (libxl_bitmap_test(&enlightenments, 
LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
+        mask |= HVMPV_reference_tsc;
+
     if (mask != 0 &&
         xc_hvm_param_set(CTX->xch,
                          domid,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 90d152f..da75a40 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -183,6 +183,7 @@ libxl_viridian_enlightenment = 
Enumeration("viridian_enlightenment", [
     (0, "base"),
     (1, "freq"),
     (2, "time_ref_count"),
+    (3, "reference_tsc"),
     ])
 
 #
diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
index 6726168..5008121 100644
--- a/xen/arch/x86/hvm/viridian.c
+++ b/xen/arch/x86/hvm/viridian.c
@@ -21,6 +21,7 @@
 #define VIRIDIAN_MSR_HYPERCALL                  0x40000001
 #define VIRIDIAN_MSR_VP_INDEX                   0x40000002
 #define VIRIDIAN_MSR_TIME_REF_COUNT             0x40000020
+#define VIRIDIAN_MSR_REFERENCE_TSC              0x40000021
 #define VIRIDIAN_MSR_TSC_FREQUENCY              0x40000022
 #define VIRIDIAN_MSR_APIC_FREQUENCY             0x40000023
 #define VIRIDIAN_MSR_EOI                        0x40000070
@@ -40,6 +41,7 @@
 #define CPUID3A_MSR_APIC_ACCESS    (1 << 4)
 #define CPUID3A_MSR_HYPERCALL      (1 << 5)
 #define CPUID3A_MSR_VP_INDEX       (1 << 6)
+#define CPUID3A_MSR_REFERENCE_TSC  (1 << 9)
 #define CPUID3A_MSR_FREQ           (1 << 11)
 
 /* Viridian CPUID 4000004, Implementation Recommendations. */
@@ -95,6 +97,8 @@ int cpuid_viridian_leaves(unsigned int leaf, unsigned int 
*eax,
             *eax |= CPUID3A_MSR_FREQ;
         if ( viridian_feature_mask(d) & HVMPV_time_ref_count )
             *eax |= CPUID3A_MSR_TIME_REF_COUNT;
+        if ( viridian_feature_mask(d) & HVMPV_reference_tsc )
+            *eax |= CPUID3A_MSR_REFERENCE_TSC;
         break;
     case 4:
         /* Recommended hypercall usage. */
@@ -155,6 +159,17 @@ static void dump_apic_assist(const struct vcpu *v)
            v, aa->fields.enabled, (unsigned long)aa->fields.pfn);
 }
 
+static void dump_reference_tsc(const struct domain *d)
+{
+    const union viridian_reference_tsc *rt;
+
+    rt = &d->arch.hvm_domain.viridian.reference_tsc;
+    
+    printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: enabled: %x pfn: %lx\n",
+           d->domain_id,
+           rt->fields.enabled, (unsigned long)rt->fields.pfn);
+}
+
 static void enable_hypercall_page(struct domain *d)
 {
     unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn;
@@ -224,6 +239,78 @@ static void initialize_apic_assist(struct vcpu *v)
     put_page_and_type(page);
 }
 
+static void update_reference_tsc(struct domain *d, bool_t initialize)
+{
+    unsigned long gmfn = d->arch.hvm_domain.viridian.reference_tsc.fields.pfn;
+    struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
+    HV_REFERENCE_TSC_PAGE *p;
+
+    if ( !page || !get_page_type(page, PGT_writable_page) )
+    {
+        if ( page )
+            put_page(page);
+        gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn,
+                 page ? page_to_mfn(page) : INVALID_MFN);
+        return;
+    }
+
+    p = __map_domain_page(page);
+
+    if ( initialize )
+        clear_page(p);
+
+    /*
+     * This enlightenment must be disabled is the host TSC is not invariant.
+     * However it is also disabled if vtsc is true (which means rdtsc is being
+     * emulated). This generally happens when guest TSC freq and host TSC freq
+     * don't match. The TscScale value could be adjusted to cope with this,
+     * allowing vtsc to be turned off, but support for this is not yet present
+     * in the hypervisor. Thus is it is possible that migrating a Windows VM
+     * between hosts of differing TSC frequencies may result in large
+     * differences in guest performance.
+     */
+    if ( !host_tsc_is_safe() || d->arch.vtsc )
+    {
+        /*
+         * The specification states that valid values of TscSequence range
+         * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate
+         * this mechanism is no longer a reliable source of time and that
+         * the VM should fall back to a different source.
+         *
+         * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate
+         * the spec. and rely on a value of 0 to indicate that this
+         * enlightenment should no longer be used. These two kernel
+         * versions are currently the only ones to make use of this
+         * enlightenment, so just use 0 here.
+         */
+        p->TscSequence = 0;
+
+        printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n",
+               d->domain_id);
+        return;
+    }
+
+    /*
+     * The guest will calculate reference time according to the following
+     * formula:
+     *
+     * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset
+     *
+     * Windows uses a 100ns tick, so we need a scale which is cpu
+     * ticks per 100ns shifted left by 64.
+     */
+    p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
+
+    p->TscSequence++;
+    if ( p->TscSequence == 0xFFFFFFFF ||
+         p->TscSequence == 0 ) /* Avoid both 'invalid' values */
+        p->TscSequence = 1;
+
+    unmap_domain_page(p);
+
+    put_page_and_type(page);
+}
+
 int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
 {
     struct vcpu *v = current;
@@ -282,6 +369,17 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
             initialize_apic_assist(v);
         break;
 
+    case VIRIDIAN_MSR_REFERENCE_TSC:
+        if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) )
+            return 0;
+
+        perfc_incr(mshv_wrmsr_tsc_msr);
+        d->arch.hvm_domain.viridian.reference_tsc.raw = val;
+        dump_reference_tsc(d);
+        if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled )
+            update_reference_tsc(d, 1);
+        break;
+
     default:
         return 0;
     }
@@ -346,6 +444,14 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
         *val = v->arch.hvm_vcpu.viridian.apic_assist.raw;
         break;
 
+    case VIRIDIAN_MSR_REFERENCE_TSC:
+        if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) )
+            return 0;
+
+        perfc_incr(mshv_rdmsr_tsc_msr);
+        *val = d->arch.hvm_domain.viridian.reference_tsc.raw;
+        break;
+
     case VIRIDIAN_MSR_TIME_REF_COUNT:
     {
         uint64_t tsc;
@@ -452,6 +558,7 @@ static int viridian_save_domain_ctxt(struct domain *d, 
hvm_domain_context_t *h)
 
     ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
     ctxt.guest_os_id   = d->arch.hvm_domain.viridian.guest_os_id.raw;
+    ctxt.reference_tsc = d->arch.hvm_domain.viridian.reference_tsc.raw;
 
     return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
 }
@@ -460,11 +567,15 @@ static int viridian_load_domain_ctxt(struct domain *d, 
hvm_domain_context_t *h)
 {
     struct hvm_viridian_domain_context ctxt;
 
-    if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
+    if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
         return -EINVAL;
 
     d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
     d->arch.hvm_domain.viridian.guest_os_id.raw   = ctxt.guest_os_id;
+    d->arch.hvm_domain.viridian.reference_tsc.raw = ctxt.reference_tsc;
+
+    if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled )
+        update_reference_tsc(d, 0);
 
     return 0;
 }
diff --git a/xen/include/asm-x86/hvm/viridian.h 
b/xen/include/asm-x86/hvm/viridian.h
index 496da33..35a22f5 100644
--- a/xen/include/asm-x86/hvm/viridian.h
+++ b/xen/include/asm-x86/hvm/viridian.h
@@ -48,10 +48,35 @@ union viridian_hypercall_gpa
     } fields;
 };
 
+union viridian_reference_tsc
+{
+    uint64_t raw;
+    struct
+    {
+        uint64_t enabled:1;
+        uint64_t reserved_preserved:11;
+        uint64_t pfn:48;
+    } fields;
+};
+
+/*
+ * Type defintion as in Microsoft Hypervisor Top-Level Functional
+ * Specification v4.0a, section 15.4.2.
+ */
+typedef struct _HV_REFERENCE_TSC_PAGE
+{
+    uint32_t TscSequence;
+    uint32_t Reserved1;
+    uint64_t TscScale;
+    int64_t  TscOffset;
+    uint64_t Reserved2[509];
+} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
+
 struct viridian_domain
 {
     union viridian_guest_os_id guest_os_id;
     union viridian_hypercall_gpa hypercall_gpa;
+    union viridian_reference_tsc reference_tsc;
 };
 
 int
@@ -76,3 +101,13 @@ int
 viridian_hypercall(struct cpu_user_regs *regs);
 
 #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/public/arch-x86/hvm/save.h 
b/xen/include/public/arch-x86/hvm/save.h
index 16d85a3..006924b 100644
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave {
 struct hvm_viridian_domain_context {
     uint64_t hypercall_gpa;
     uint64_t guest_os_id;
+    uint64_t reference_tsc;
 };
 
 DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context);
@@ -616,3 +617,13 @@ struct hvm_msr {
 #define HVM_SAVE_CODE_MAX 20
 
 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/public/hvm/params.h b/xen/include/public/hvm/params.h
index 3c51072..a2d43bc 100644
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -92,10 +92,15 @@
 #define _HVMPV_time_ref_count 2
 #define HVMPV_time_ref_count  (1 << _HVMPV_time_ref_count)
 
+/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */
+#define _HVMPV_reference_tsc 3
+#define HVMPV_reference_tsc  (1 << _HVMPV_reference_tsc)
+
 #define HVMPV_feature_mask \
        (HVMPV_base_freq | \
         HVMPV_no_freq | \
-        HVMPV_time_ref_count)
+        HVMPV_time_ref_count | \
+        HVMPV_reference_tsc)
 
 #endif
 
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.