[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] cpufreq/PowerNow! implementation - time keeps changing



I'm working on getting cpufreq (specifically PowerNow!) support
for Xen.  My current approach is to have a platform hypervisor
call occur after the processor switches frequency.  The platform
call adjusts the CPU's time records to a scale of the new
frequency.

This usually seems to work, but sometimes Xen seems to alter
its record of the CPU frequency in a way that is very wrong.
The bad frequency causes subsequent cpufreq changes to scale
the new frequency wrong also, and sooner or later time briefly
goes wrong.  I had thought this was possibly happening in the
timer interrupt but I couldn't confirm it.  Does anyone have
any suggestions on what else I need to implement?

Thanks in advance for any advice.

-Mark Langsdorf
Operating System Research Center
AMD

diff -r a70de77dd8d3 arch/i386/kernel/time-xen.c
--- a/arch/i386/kernel/time-xen.c       Tue Jul 10 10:18:24 2007 +0100
+++ b/arch/i386/kernel/time-xen.c       Fri Aug 03 15:01:00 2007 -0500
@@ -50,6 +50,7 @@
 #include <linux/percpu.h>
 #include <linux/kernel_stat.h>
 #include <linux/posix-timers.h>
+#include <linux/cpufreq.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -1118,6 +1122,74 @@ void local_teardown_timer(unsigned int c
        BUG_ON(cpu == 0);
        unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL);
 }
+#endif
+
+#ifdef CONFIG_CPU_FREQ
+/* 
+ * cpufreq scaling handling
+ */
+static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 
+                               void *data)
+{
+       struct cpufreq_freqs *freq = data;
+        struct vcpu_time_info *info = &vcpu_info(freq->cpu)->time;
+       struct xen_platform_op op;
+
+       if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
+               return 0;
+
+       if (val == CPUFREQ_PRECHANGE)
+               return 0;
+
+       /* re order time per Ian's instructions */
+       op.cmd = XENPF_change_freq;
+       op.u.change_freq.info = info;
+       op.u.change_freq.old = freq->old;
+       op.u.change_freq.new = freq->new;
+       HYPERVISOR_platform_op(&op);
+       update_wallclock();
+
+       return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+       .notifier_call = time_cpufreq_notifier
+};
+
+static int __init cpufreq_time_setup(void)
+{
+       if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
+                       CPUFREQ_TRANSITION_NOTIFIER)) {
+               printk(KERN_ERR "failed to set up cpufreq notifier\n");
+               return -ENODEV;
+       }
+       return 0;
+}
+
+core_initcall(cpufreq_time_setup);
 #endif
 
 /*
diff -r 5682f899c7ae xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Fri Jul 27 09:06:58 2007 +0100
+++ b/xen/arch/x86/platform_hypercall.c Fri Aug 03 14:51:11 2007 -0500
@@ -252,6 +252,13 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
         ret = acpi_enter_sleep(&op->u.enter_acpi_sleep);
         break;
 
+    case XENPF_change_freq:
+    {
+        do_change_freq((struct vcpu_time_info *) op->u.change_freq.info, 
op->u.change_freq.old, op->u.change_freq.new);
+        ret = 0;
+    }
+    break;
+
     default:
         ret = -ENOSYS;
         break;
diff -r 5682f899c7ae xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Jul 27 09:06:58 2007 +0100
+++ b/xen/arch/x86/time.c       Fri Aug 03 14:51:11 2007 -0500
@@ -723,6 +723,35 @@ void update_domain_wallclock_time(struct
     spin_unlock(&wc_lock);
 }
 
+void do_change_freq(struct vcpu_time_info *info, unsigned int old, unsigned 
int new)
+{
+    u64 new_mult;
+    s8 new_shift;
+    struct cpu_time *t;
+
+    t = &this_cpu(cpu_time);
+    new_mult = info->tsc_to_system_mul;
+    do_div(new_mult, new / 1000);
+    new_mult *= (old / 1000);
+    new_shift = info->tsc_shift;
+    while (new_mult > (1LL <<32))  {
+        new_shift += 1;
+        new_mult = new_mult >> 1;
+    }
+    while (new_mult < (1LL << 31)) {
+        new_shift -= 1;
+        new_mult = new_mult << 1;
+    }
+    version_update_begin(&info->version);
+    info->tsc_timestamp = t->local_tsc_stamp;
+    info->system_time   = t->stime_local_stamp;
+    info->tsc_to_system_mul = new_mult; 
+    t->tsc_scale.mul_frac = new_mult;
+    t->tsc_scale.shift = new_shift;
+    info->tsc_shift     =  new_shift;
+    version_update_end(&info->version);
+}
+
 /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
 void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
 {
diff -r 5682f899c7ae xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Jul 27 09:06:58 2007 +0100
+++ b/xen/arch/x86/traps.c      Fri Aug 03 14:51:11 2007 -0500
@@ -1724,6 +1724,15 @@ static int emulate_privileged_op(struct 
                 ((u64)regs->edx << 32) | regs->eax;
             break;
 #endif
+       case MSR_K8_FIDVID_STATUS:
+       case MSR_K8_FIDVID_CTL:
+           if ( IS_COMPAT(v->domain) )
+               goto fail;
+           if ( wrmsr_safe(regs->ecx, regs->eax, regs->edx) )
+               goto fail;
+            v->arch.guest_context.gs_base_user =
+               ((u64)regs->edx << 32) | regs->eax;
+           break;
         default:
             if ( wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx) )
                 break;
@@ -1760,6 +1769,13 @@ static int emulate_privileged_op(struct 
             regs->edx = v->arch.guest_context.gs_base_user >> 32;
             break;
 #endif
+       case MSR_K8_FIDVID_CTL:
+       case MSR_K8_FIDVID_STATUS:
+           if ( IS_COMPAT(v->domain) )
+               goto fail;
+            if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
+               goto fail;
+           break;
         case MSR_EFER:
             if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
                 goto fail;
diff -r 5682f899c7ae xen/include/asm-x86/msr.h
--- a/xen/include/asm-x86/msr.h Fri Jul 27 09:06:58 2007 +0100
+++ b/xen/include/asm-x86/msr.h Fri Aug 03 14:51:11 2007 -0500
@@ -352,6 +352,9 @@ static inline void write_efer(__u64 val)
 #define MSR_K8_VM_CR                   0xC0010114
 #define MSR_K8_VM_HSAVE_PA             0xC0010117
 
+#define MSR_K8_FIDVID_CTL              0xC0010041
+#define MSR_K8_FIDVID_STATUS           0xC0010042
+
 /* MSR_K8_VM_CR bits: */
 #define _K8_VMCR_SVME_DISABLE          4
 #define K8_VMCR_SVME_DISABLE           (1 << _K8_VMCR_SVME_DISABLE)
diff -r 5682f899c7ae xen/include/public/platform.h
--- a/xen/include/public/platform.h     Fri Jul 27 09:06:58 2007 +0100
+++ b/xen/include/public/platform.h     Fri Aug 03 14:51:11 2007 -0500
@@ -164,6 +164,16 @@ typedef struct xenpf_enter_acpi_sleep xe
 typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t);
 
+#define XENPF_change_freq       52
+struct xenpf_change_freq {
+    /* IN variables */
+    struct vcpu_time_info *info; /* vcpu time info for changing vcpu */
+    uint32_t old;  /* original frequency */
+    uint32_t new;  /* new frequency */
+};
+typedef struct xenpf_change_freq xenpf_change_freq_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t);
+
 struct xen_platform_op {
     uint32_t cmd;
     uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -176,6 +186,7 @@ struct xen_platform_op {
         struct xenpf_platform_quirk    platform_quirk;
         struct xenpf_firmware_info     firmware_info;
         struct xenpf_enter_acpi_sleep  enter_acpi_sleep;
+        struct xenpf_change_freq       change_freq;
         uint8_t                        pad[128];
     } u;
 };
diff -r 5682f899c7ae xen/include/xen/time.h
--- a/xen/include/xen/time.h    Fri Jul 27 09:06:58 2007 +0100
+++ b/xen/include/xen/time.h    Fri Aug 03 14:51:11 2007 -0500
@@ -74,6 +74,8 @@ extern void do_settime(
 extern void do_settime(
     unsigned long secs, unsigned long nsecs, u64 system_time_base);
 
+extern void do_change_freq(struct vcpu_time_info *info, unsigned int old, 
unsigned int new);
+
 extern void send_timer_event(struct vcpu *v);
 
 #endif /* __XEN_TIME_H__ */




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.