WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

RE: [Xen-devel] Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid bi

To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: RE: [Xen-devel] Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
From: "Wei, Gang" <gang.wei@xxxxxxxxx>
Date: Mon, 15 Dec 2008 11:06:52 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc: "Tian, Kevin" <kevin.tian@xxxxxxxxx>
Delivery-date: Sun, 14 Dec 2008 19:07:48 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <C5698841.20366%keir.fraser@xxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <8FED46E8A9CA574792FC7AACAC38FE7701C7C99515@xxxxxxxxxxxxxxxxxxxxxxxxxxxx> <C5698841.20366%keir.fraser@xxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AclWoeM2xc+X6mj6QOaQHsyxDmpzagAFRoNpAAEP6tAAAWmCgAAATdDsAAIJ5qAAAvL7GQFMKVOwAA1xISwAJTEgYAANxIuJAAtYh4AAAOh3kwBJ7/4Q
Thread-topic: [Xen-devel] Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
Here is the updated patch for constant-tsc case. -Jimmy

CPUIDLE: revise tsc-restore to avoid increasing tsc skew between cpus

Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt, 
exit deepC, tsc-restore], the system error is quite easy to be accumulated. 
Once the workloads between cpus are not balanced, the tsc skew between cpus 
will eventually become bigger & begger - more than 10 seconds can be observed.

Then we remove the tsc-save step, and just based on percpu 
t->stime_master_stamp, t->tsc_scale, & t->local_tsc_stamp to do the tsc-restore 
after exit from deepC. It make the accumulating slower, but can't remove it.

Now, for constant-tsc case, we just keep a initial stamp via cstate_init_stamp 
during the booting/s3 resuming, which is based on the platform stime. All cpus 
need only to do tsc-restore relative to the initial stamp after exit deepC. The 
base  and tsc->ns scale are fixed and same for all cpus, so it can avoid 
accumulated tsc-skew. BTW, bypass the percpu tsc scale calibration for 
constant-tsc case.

Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>

diff -r 045f70d1acdb xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/time.c       Mon Dec 15 10:35:11 2008 +0800
@@ -69,8 +69,11 @@ static DEFINE_PER_CPU(struct cpu_time, c
 #define EPOCH MILLISECS(1000)
 static struct timer calibration_timer;
 
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
+/* TSC will not stop during deep C state? */
+static bool_t tsc_nostop;
+/* TSC will be constant rate, independent with P/T state? */
+static int constant_tsc = 0;
+boolean_param("const_tsc", constant_tsc);
 
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -551,6 +554,10 @@ static u64 plt_stamp;            /* hard
 static u64 plt_stamp;            /* hardware-width platform counter stamp   */
 static struct timer plt_overflow_timer;
 
+/* following 2 variables are for deep C state TSC restore usage */
+static u64 initial_tsc_stamp;    /* initial tsc stamp while plt starting */
+static s_time_t initial_stime_platform_stamp; /* initial stime stamp */
+
 static void plt_overflow(void *unused)
 {
     u64 count;
@@ -664,25 +671,41 @@ static void init_platform_timer(void)
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_restore_tsc(void)
+static void cstate_init_stamp(void)
+{
+    if ( tsc_nostop || !constant_tsc )
+        return;
+
+    initial_stime_platform_stamp = read_platform_stime();
+    rdtscll(initial_tsc_stamp);
+}
+
+static inline void __restore_tsc(s_time_t plt_stime)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale);
     s_time_t stime_delta;
     u64 tsc_delta;
 
-    if ( tsc_invariant )
+    if ( tsc_nostop )
         return;
 
-    stime_delta = read_platform_stime() - t->stime_master_stamp;
+    stime_delta = plt_stime - 
+        (constant_tsc ? initial_stime_platform_stamp : t->stime_master_stamp);
+
     if ( stime_delta < 0 )
         stime_delta = 0;
 
     tsc_delta = scale_delta(stime_delta, &sys_to_tsc);
 
-    wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
+    wrmsrl(MSR_IA32_TSC, 
+        (constant_tsc ? initial_tsc_stamp : t->local_tsc_stamp) + tsc_delta);
 }
 
+void cstate_restore_tsc(void)
+{
+    __restore_tsc(read_platform_stime());
+}
 /***************************************************************************
  * CMOS Timer functions
  ***************************************************************************/
@@ -960,6 +983,18 @@ static void local_time_calibration(void)
            curr_master_stime - curr_local_stime);
 #endif
 
+    if ( constant_tsc )
+    {
+        local_irq_disable();
+        t->local_tsc_stamp    = curr_tsc;
+        t->stime_local_stamp  = curr_master_stime;
+        t->stime_master_stamp = curr_master_stime;
+        local_irq_enable();
+
+        update_vcpu_system_time(current);
+        goto out;
+    }
+
     /* Local time warps forward if it lags behind master time. */
     if ( curr_local_stime < curr_master_stime )
         curr_local_stime = curr_master_stime;
@@ -1082,6 +1117,8 @@ static void time_calibration_rendezvous(
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
+    __restore_tsc(r->master_stime);
+
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
@@ -1125,9 +1162,23 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    /* Is TSC invariant during deep C state? */
+    /* for recent intel x86 model, the tsc increments at a constant rate */
+    if ( (current_cpu_data.x86 == 0xf && current_cpu_data.x86_model >= 0x03) ||
+         (current_cpu_data.x86 == 0x6 && current_cpu_data.x86_model >= 0x0e) )
+    {
+        int cpu;
+
+        constant_tsc = 1;
+
+        for_each_cpu(cpu)
+        {
+            per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+        }
+    }
+
+    /* Is TSC not stop during deep C state ? */
     if ( cpuid_edx(0x80000007) & (1u<<8) )
-        tsc_invariant = 1;
+        tsc_nostop = 1;
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1139,6 +1190,8 @@ int __init init_xen_time(void)
 
     stime_platform_stamp = NOW();
     init_platform_timer();
+
+    cstate_init_stamp();
 
     init_percpu_time();
 
@@ -1260,6 +1313,8 @@ int time_resume(void)
     disable_pit_irq();
 
     init_percpu_time();
+
+    cstate_init_stamp();
 
     do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
 

Attachment: tsc-skew-20081213-1.patch
Description: tsc-skew-20081213-1.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>