[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] softtsc for PV domains



Attached patch implements softtsc (TSC emulation) for userland
code in PV domains.  It currently is tied to the existing
"softtsc" Xen boot option (which does the same thing but for
HVM domains).  Later it should be tied to a
vm.cfg option, but this is sufficient for now to obtain
performance degradation data for PV environments that
heavily utilize rdtsc.  To record emulation frequency,
additional output has been added to debug-key "t".

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>

diff -r 5619bed51ec4 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/arch/x86/domain.c     Wed Aug 26 11:23:21 2009 -0600
@@ -369,7 +369,7 @@ int vcpu_initialise(struct vcpu *v)
         }
 
         v->arch.guest_context.ctrlreg[4] =
-            real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+            real_cr4_to_pv_guest_cr4(v,mmu_cr4_features);
     }
 
     v->arch.perdomain_ptes = perdomain_ptes(d, v);
@@ -509,6 +509,10 @@ int arch_domain_create(struct domain *d,
         d->arch.cpuids[i].input[1] = XEN_CPUID_INPUT_UNUSED;
     }
 
+    d->arch.vtsc = opt_softtsc; /* FIXME, should be decided by vm.cfg */
+    d->arch.vtsc_last = d->arch.vtsc_stime_offset = 0;
+    spin_lock_init(&d->arch.vtsc_lock);
+
     return 0;
 
  fail:
@@ -565,11 +569,11 @@ void arch_domain_destroy(struct domain *
     xfree(d->arch.pirq_vector);
 }
 
-unsigned long pv_guest_cr4_fixup(unsigned long guest_cr4)
+unsigned long pv_guest_cr4_fixup(struct vcpu *v, unsigned long guest_cr4)
 {
-    unsigned long hv_cr4_mask, hv_cr4 = real_cr4_to_pv_guest_cr4(read_cr4());
+    unsigned long hv_cr4_mask, hv_cr4 = real_cr4_to_pv_guest_cr4(v,read_cr4());
 
-    hv_cr4_mask = ~X86_CR4_TSD;
+    hv_cr4_mask = (v->domain->arch.vtsc ? ~0L : ~X86_CR4_TSD);
     if ( cpu_has_de )
         hv_cr4_mask &= ~X86_CR4_DE;
 
@@ -682,8 +686,8 @@ int arch_set_info_guest(
     v->arch.guest_context.user_regs.eflags |= X86_EFLAGS_IF;
 
     cr4 = v->arch.guest_context.ctrlreg[4];
-    v->arch.guest_context.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(cr4) :
-        real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+    v->arch.guest_context.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(v,cr4) :
+        real_cr4_to_pv_guest_cr4(v,mmu_cr4_features);
 
     memset(v->arch.guest_context.debugreg, 0,
            sizeof(v->arch.guest_context.debugreg));
@@ -1250,7 +1254,7 @@ static void paravirt_ctxt_switch_to(stru
     set_int80_direct_trap(v);
     switch_kernel_stack(v);
 
-    cr4 = pv_guest_cr4_to_real_cr4(v->arch.guest_context.ctrlreg[4]);
+    cr4 = pv_guest_cr4_to_real_cr4(v,v->arch.guest_context.ctrlreg[4]);
     if ( unlikely(cr4 != read_cr4()) )
         write_cr4(cr4);
 
diff -r 5619bed51ec4 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Wed Aug 26 11:23:21 2009 -0600
@@ -61,8 +61,7 @@ unsigned int opt_hvm_debug_level __read_
 unsigned int opt_hvm_debug_level __read_mostly;
 integer_param("hvm_debug", opt_hvm_debug_level);
 
-int opt_softtsc;
-boolean_param("softtsc", opt_softtsc);
+extern int opt_softtsc;
 
 struct hvm_function_table hvm_funcs __read_mostly;
 
diff -r 5619bed51ec4 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/arch/x86/time.c       Wed Aug 26 11:23:21 2009 -0600
@@ -34,6 +34,9 @@
 /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */
 static char opt_clocksource[10];
 string_param("clocksource", opt_clocksource);
+
+int opt_softtsc;
+boolean_param("softtsc", opt_softtsc);
 
 /*
  * opt_consistent_tscs: All TSCs tick at the exact same rate, allowing
@@ -1429,6 +1432,36 @@ struct tm wallclock_time(void)
     return gmtime(seconds);
 }
 
+static unsigned long rdtsc_kerncount = 0, rdtsc_usercount = 0;
+
+void do_rdtsc(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    s_time_t now;
+
+    if ( guest_kernel_mode(v, regs) || !v->domain->arch.vtsc )
+    {
+        rdtsc(regs->eax, regs->edx);
+        rdtsc_kerncount++;
+    } else {
+        spin_lock(&v->domain->arch.vtsc_lock);
+        rdtsc_usercount++;
+        now = get_s_time() + v->domain->arch.vtsc_stime_offset;
+        if ( (int64_t)(now - v->domain->arch.vtsc_last) >= 0 )
+            v->domain->arch.vtsc_last = now;
+        else
+            now = v->domain->arch.vtsc_last;
+        spin_unlock(&v->domain->arch.vtsc_lock);
+        regs->eax = now & 0xffffffff;
+        regs->edx = now >> 32;
+    }
+}
+
+void arch_read_clocks(void)
+{
+    printk("softtsc count:%lu kernel, %lu user\n",
+        rdtsc_kerncount, rdtsc_usercount);
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 5619bed51ec4 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/arch/x86/traps.c      Wed Aug 26 11:23:21 2009 -0600
@@ -2005,12 +2005,13 @@ static int emulate_privileged_op(struct 
     goto fail;
 
  twobyte_opcode:
-    /* Two-byte opcodes only emulated from guest kernel. */
-    if ( !guest_kernel_mode(v, regs) )
-        goto fail;
+    /* Privileged (ring 0) instructions... except rdtsc */
+    opcode = insn_fetch(u8, code_base, eip, code_limit);
 
-    /* Privileged (ring 0) instructions. */
-    opcode = insn_fetch(u8, code_base, eip, code_limit);
+    /* Two-byte opcodes only emulated from guest kernel... except rdtsc */
+    if ( !guest_kernel_mode(v, regs) && opcode != 0x31 )
+            goto fail;
+
     if ( lock && (opcode & ~3) != 0x20 )
         goto fail;
     switch ( opcode )
@@ -2126,8 +2127,8 @@ static int emulate_privileged_op(struct 
             break;
 
         case 4: /* Write CR4 */
-            v->arch.guest_context.ctrlreg[4] = pv_guest_cr4_fixup(*reg);
-            write_cr4(pv_guest_cr4_to_real_cr4(
+            v->arch.guest_context.ctrlreg[4] = pv_guest_cr4_fixup(v,*reg);
+            write_cr4(pv_guest_cr4_to_real_cr4(v,
                 v->arch.guest_context.ctrlreg[4]));
             break;
 
@@ -2266,7 +2267,7 @@ static int emulate_privileged_op(struct 
     }
 
     case 0x31: /* RDTSC */
-        rdtsc(regs->eax, regs->edx);
+        do_rdtsc(v,regs);
         break;
 
     case 0x32: /* RDMSR */
diff -r 5619bed51ec4 xen/arch/x86/x86_emulate/x86_emulate.c
--- a/xen/arch/x86/x86_emulate/x86_emulate.c    Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c    Wed Aug 26 11:23:21 2009 -0600
@@ -47,6 +47,8 @@
 #define Mov         (1<<7)
 /* All operands are implicit in the opcode. */
 #define ImplicitOps (DstImplicit|SrcImplicit)
+
+extern int opt_softtsc;
 
 static uint8_t opcode_table[256] = {
     /* 0x00 - 0x07 */
@@ -3717,7 +3719,8 @@ x86_emulate(
         fail_if(ops->read_cr == NULL);
         if ( (rc = ops->read_cr(4, &cr4, ctxt)) )
             goto done;
-        generate_exception_if((cr4 & CR4_TSD) && !mode_ring0(), EXC_GP, 0);
+        generate_exception_if((cr4 & CR4_TSD) && !opt_softtsc &&
+            !mode_ring0(), EXC_GP, 0);
         fail_if(ops->read_msr == NULL);
         if ( (rc = ops->read_msr(MSR_TSC, &val, ctxt)) != 0 )
             goto done;
diff -r 5619bed51ec4 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/common/keyhandler.c   Wed Aug 26 11:23:21 2009 -0600
@@ -318,6 +318,7 @@ static void read_clocks(unsigned char ke
     printk("Synced cycles skew: max=%"PRIu64" avg=%"PRIu64" "
            "samples=%"PRIu32" current=%"PRIu64"\n",
            maxdif_cycles, sumdif_cycles/count, count, dif_cycles);
+    arch_read_clocks();
 }
 
 static struct keyhandler read_clocks_keyhandler = {
diff -r 5619bed51ec4 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/include/asm-x86/domain.h      Wed Aug 26 11:23:21 2009 -0600
@@ -2,6 +2,7 @@
 #define __ASM_DOMAIN_H__
 
 #include <xen/config.h>
+#include <xen/mm.h>
 #include <xen/mm.h>
 #include <asm/hvm/vcpu.h>
 #include <asm/hvm/domain.h>
@@ -299,6 +300,13 @@ struct arch_domain
 
     /* For Guest vMCA handling */
     struct domain_mca_msrs vmca_msrs;
+
+    /* tsc emulation */
+    bool_t vtsc;
+    s_time_t vtsc_last;
+    spinlock_t vtsc_lock;
+    int64_t vtsc_stime_offset;
+
 } __cacheline_aligned;
 
 #define has_arch_pdevs(d)    (!list_empty(&(d)->arch.pdev_list))
@@ -423,13 +431,14 @@ void vcpu_show_registers(const struct vc
 void vcpu_show_registers(const struct vcpu *);
 
 /* Clean up CR4 bits that are not under guest control. */
-unsigned long pv_guest_cr4_fixup(unsigned long guest_cr4);
+unsigned long pv_guest_cr4_fixup(struct vcpu *v, unsigned long guest_cr4);
 
 /* Convert between guest-visible and real CR4 values. */
-#define pv_guest_cr4_to_real_cr4(c) \
-    (((c) | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))) & ~X86_CR4_DE)
-#define real_cr4_to_pv_guest_cr4(c) \
-    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE))
+#define pv_guest_cr4_to_real_cr4(v,c) \
+    ((((c) | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))) & ~X86_CR4_DE) \
+        | (v->domain->arch.vtsc ? X86_CR4_TSD : 0))
+#define real_cr4_to_pv_guest_cr4(v,c) \
+    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | (v->domain->arch.vtsc ? X86_CR4_TSD : 
0)))
 
 void domain_cpuid(struct domain *d,
                   unsigned int  input,
diff -r 5619bed51ec4 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h        Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/include/asm-x86/time.h        Wed Aug 26 11:23:21 2009 -0600
@@ -41,4 +41,6 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
 uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
 uint64_t ns_to_acpi_pm_tick(uint64_t ns);
 
+void do_rdtsc(struct vcpu *v, struct cpu_user_regs *regs);
+
 #endif /* __X86_TIME_H__ */
diff -r 5619bed51ec4 xen/include/xen/time.h
--- a/xen/include/xen/time.h    Fri Aug 14 17:26:23 2009 +0100
+++ b/xen/include/xen/time.h    Wed Aug 26 11:23:21 2009 -0600
@@ -63,6 +63,8 @@ extern void send_timer_event(struct vcpu
 
 void domain_set_time_offset(struct domain *d, int32_t time_offset_seconds);
 
+void arch_read_clocks(void);
+
 #endif /* __XEN_TIME_H__ */
 
 /*

Attachment: vtsc-090826.patch
Description: Binary data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.