WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] verify TSC sync

To: "Xen-Devel (E-mail)" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] verify TSC sync
From: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
Date: Fri, 18 Dec 2009 10:52:30 -0800 (PST)
Cc: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Delivery-date: Fri, 18 Dec 2009 10:53:22 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Verify TSC sync even on systems with constant and non-stop TSC.
We now reserve X86_FEATURE_TSC_RELIABLE for those systems
that have been verified.

For the record... Jeremy was right!  (there, I said it ;-)

See linux patch described here:
http://patchwork.kernel.org/patch/68397/

(Note, the bulk of this patch is moving 100 lines within the
same file to avoid forward references to warp check code.)

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>

diff -r 1a911fd65e52 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Fri Dec 18 07:53:27 2009 +0000
+++ b/xen/arch/x86/cpu/amd.c    Fri Dec 18 11:40:25 2009 -0700
@@ -465,8 +465,6 @@ static void __devinit init_amd(struct cp
                if (c->x86_power & (1<<8)) {
                        set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
                        set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
-                       if (c->x86 != 0x11)
-                               set_bit(X86_FEATURE_TSC_RELIABLE, 
c->x86_capability);
                }
        }
 
diff -r 1a911fd65e52 xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c  Fri Dec 18 07:53:27 2009 +0000
+++ b/xen/arch/x86/cpu/intel.c  Fri Dec 18 11:40:25 2009 -0700
@@ -212,7 +212,6 @@ static void __devinit init_intel(struct 
        if (cpuid_edx(0x80000007) & (1u<<8)) {
                set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
                set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
-               set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
        }
        if ((c->cpuid_level >= 0x00000006) &&
            (cpuid_eax(0x00000006) & (1u<<2)))
diff -r 1a911fd65e52 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Dec 18 07:53:27 2009 +0000
+++ b/xen/arch/x86/time.c       Fri Dec 18 11:40:25 2009 -0700
@@ -1135,6 +1135,107 @@ static void local_time_calibration(void)
 }
 
 /*
+ * TSC Reliability check
+ */
+
+/*
+ * The Linux original version of this function is
+ * Copyright (c) 2006, Red Hat, Inc., Ingo Molnar
+ */
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
+{
+#define rdtsc_barrier() mb()
+    static DEFINE_SPINLOCK(sync_lock);
+    static cycles_t last_tsc;
+
+    cycles_t start, now, prev, end;
+    int i;
+
+    rdtsc_barrier();
+    start = get_cycles();
+    rdtsc_barrier();
+
+    /* The measurement runs for 20 msecs: */
+    end = start + tsc_khz * 20ULL;
+    now = start;
+
+    for ( i = 0; ; i++ )
+    {
+        /*
+         * We take the global lock, measure TSC, save the
+         * previous TSC that was measured (possibly on
+         * another CPU) and update the previous TSC timestamp.
+         */
+        spin_lock(&sync_lock);
+        prev = last_tsc;
+        rdtsc_barrier();
+        now = get_cycles();
+        rdtsc_barrier();
+        last_tsc = now;
+        spin_unlock(&sync_lock);
+
+        /*
+         * Be nice every now and then (and also check whether measurement is 
+         * done [we also insert a 10 million loops safety exit, so we dont 
+         * lock up in case the TSC readout is totally broken]):
+         */
+        if ( unlikely(!(i & 7)) )
+        {
+            if ( (now > end) || (i > 10000000) )
+                break;
+            cpu_relax();
+            /*touch_nmi_watchdog();*/
+        }
+
+        /*
+         * Outside the critical section we can now see whether we saw a 
+         * time-warp of the TSC going backwards:
+         */
+        if ( unlikely(prev > now) )
+        {
+            spin_lock(&sync_lock);
+            if ( *max_warp < prev - now )
+                *max_warp = prev - now;
+            spin_unlock(&sync_lock);
+        }
+    }
+}
+
+static unsigned long tsc_max_warp, tsc_check_count;
+static cpumask_t tsc_check_cpumask = CPU_MASK_NONE;
+
+static void tsc_check_slave(void *unused)
+{
+    unsigned int cpu = smp_processor_id();
+    local_irq_disable();
+    while ( !cpu_isset(cpu, tsc_check_cpumask) )
+        mb();
+    check_tsc_warp(cpu_khz, &tsc_max_warp);
+    cpu_clear(cpu, tsc_check_cpumask);
+    local_irq_enable();
+}
+
+void tsc_check_reliability(void)
+{
+    unsigned int cpu = smp_processor_id();
+    static DEFINE_SPINLOCK(lock);
+
+    spin_lock(&lock);
+
+    tsc_check_count++;
+    smp_call_function(tsc_check_slave, NULL, 0);
+    tsc_check_cpumask = cpu_online_map;
+    local_irq_disable();
+    check_tsc_warp(cpu_khz, &tsc_max_warp);
+    cpu_clear(cpu, tsc_check_cpumask);
+    local_irq_enable();
+    while ( !cpus_empty(tsc_check_cpumask) )
+        cpu_relax();
+
+    spin_unlock(&lock);
+}
+
+/*
  * Rendezvous for all CPUs in IRQ context.
  * Master CPU snapshots the platform timer.
  * All CPUS snapshot their local TSC and extrapolation of system time.
@@ -1271,16 +1372,30 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
+    extern unsigned int max_cstate;
+
     /* If we have constant-rate TSCs then scale factor can be shared. */
     if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
     {
         int cpu;
         for_each_possible_cpu ( cpu )
             per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
-        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. */
-        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+    }
+    if ( (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && max_cstate <= 2) ||
+         boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
+    {
+        /*
+         * Sadly, despite processor vendors' best design guidance efforts,
+         * on some systems, cpus may come out of reset improperly
+         * synchronized.  So we must verify there is no warp and we
+         * can't do that until all CPUs are booted
+         */
+        tsc_check_reliability();
+        if ( tsc_max_warp == 0 )
+            set_boot_cpu_bit(X86_FEATURE_TSC_RELIABLE);
+    }
+    if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
             time_calibration_rendezvous_fn = time_calibration_tsc_rendezvous;
-    }
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1481,107 +1596,6 @@ struct tm wallclock_time(void)
 }
 
 /*
- * TSC Reliability check
- */
-
-/*
- * The Linux original version of this function is
- * Copyright (c) 2006, Red Hat, Inc., Ingo Molnar
- */
-void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
-{
-#define rdtsc_barrier() mb()
-    static DEFINE_SPINLOCK(sync_lock);
-    static cycles_t last_tsc;
-
-    cycles_t start, now, prev, end;
-    int i;
-
-    rdtsc_barrier();
-    start = get_cycles();
-    rdtsc_barrier();
-
-    /* The measurement runs for 20 msecs: */
-    end = start + tsc_khz * 20ULL;
-    now = start;
-
-    for ( i = 0; ; i++ )
-    {
-        /*
-         * We take the global lock, measure TSC, save the
-         * previous TSC that was measured (possibly on
-         * another CPU) and update the previous TSC timestamp.
-         */
-        spin_lock(&sync_lock);
-        prev = last_tsc;
-        rdtsc_barrier();
-        now = get_cycles();
-        rdtsc_barrier();
-        last_tsc = now;
-        spin_unlock(&sync_lock);
-
-        /*
-         * Be nice every now and then (and also check whether measurement is 
-         * done [we also insert a 10 million loops safety exit, so we dont 
-         * lock up in case the TSC readout is totally broken]):
-         */
-        if ( unlikely(!(i & 7)) )
-        {
-            if ( (now > end) || (i > 10000000) )
-                break;
-            cpu_relax();
-            /*touch_nmi_watchdog();*/
-        }
-
-        /*
-         * Outside the critical section we can now see whether we saw a 
-         * time-warp of the TSC going backwards:
-         */
-        if ( unlikely(prev > now) )
-        {
-            spin_lock(&sync_lock);
-            if ( *max_warp < prev - now )
-                *max_warp = prev - now;
-            spin_unlock(&sync_lock);
-        }
-    }
-}
-
-static unsigned long tsc_max_warp, tsc_check_count;
-static cpumask_t tsc_check_cpumask = CPU_MASK_NONE;
-
-static void tsc_check_slave(void *unused)
-{
-    unsigned int cpu = smp_processor_id();
-    local_irq_disable();
-    while ( !cpu_isset(cpu, tsc_check_cpumask) )
-        mb();
-    check_tsc_warp(cpu_khz, &tsc_max_warp);
-    cpu_clear(cpu, tsc_check_cpumask);
-    local_irq_enable();
-}
-
-void tsc_check_reliability(void)
-{
-    unsigned int cpu = smp_processor_id();
-    static DEFINE_SPINLOCK(lock);
-
-    spin_lock(&lock);
-
-    tsc_check_count++;
-    smp_call_function(tsc_check_slave, NULL, 0);
-    tsc_check_cpumask = cpu_online_map;
-    local_irq_disable();
-    check_tsc_warp(cpu_khz, &tsc_max_warp);
-    cpu_clear(cpu, tsc_check_cpumask);
-    local_irq_enable();
-    while ( !cpus_empty(tsc_check_cpumask) )
-        cpu_relax();
-
-    spin_unlock(&lock);
-}
-
-/*
  * PV SoftTSC Emulation.
  */
 
@@ -1616,19 +1630,10 @@ void pv_soft_rdtsc(struct vcpu *v, struc
 
 static int host_tsc_is_safe(void)
 {
-    extern unsigned int max_cstate;
-
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
         return 1;
     if ( num_online_cpus() == 1 )
         return 1;
-    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && max_cstate <= 2 )
-    {
-        if ( !tsc_check_count )
-            tsc_check_reliability();
-        if ( tsc_max_warp == 0 )
-            return 1;
-    }
     return 0;
 }
 
diff -r 1a911fd65e52 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h  Fri Dec 18 07:53:27 2009 +0000
+++ b/xen/include/asm-x86/cpufeature.h  Fri Dec 18 11:40:25 2009 -0700
@@ -132,6 +132,7 @@
 
 #define cpu_has(c, bit)                test_bit(bit, (c)->x86_capability)
 #define boot_cpu_has(bit)      test_bit(bit, boot_cpu_data.x86_capability)
+#define set_boot_cpu_bit(bit)  set_bit(bit, boot_cpu_data.x86_capability)
 
 #ifdef __i386__
 #define cpu_has_vme            boot_cpu_has(X86_FEATURE_VME)

Attachment: verify-tsc-sync.patch
Description: Binary data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] verify TSC sync, Dan Magenheimer <=