[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Problems with APIC on versions 4.9 and later (4.8 works)



On 28.01.2021 14:08, Claudemir Todo Bom wrote:
> Em qui., 28 de jan. de 2021 às 06:49, Jan Beulich <jbeulich@xxxxxxxx> 
> escreveu:
>>
>> On 28.01.2021 10:47, Jan Beulich wrote:
>>> On 26.01.2021 14:03, Claudemir Todo Bom wrote:
>>>> If this information is good for more tests, please send the patch and
>>>> I will test it!
>>>
>>> Here you go. For simplifying analysis it may be helpful if you
>>> could limit the number of CPUs in use, e.g. by "maxcpus=4" or
>>> at least "smt=0". Provided the problem still reproduces with
>>> such options, of course.
>>
>> Speaking of command line options - it doesn't look like you have
>> told us what else you have on the Xen command line, and without
>> a serial log this isn't visible (e.g. in your video).
> 
> All tests are done with xen command line:
> 
> dom0_mem=1024M,max:2048M dom0_max_vcpus=4 dom0_vcpus_pin=true
> smt=false vga=text-80x50,keep
> 
> and kernel command line:
> 
> loglevel=0 earlyprintk=xen nomodeset
> 
> this way I can get all xen messages on console.
> 
> Attached are the frames I captured from a video, I manually selected
> them starting from the first readable frame.

I've just sent a pair of patches, with you Cc-ed on the 2nd one.
Please give that one a try, with or without the updated debugging
patch below. In case of problems I'd of course want to see the
output from the debugging patch as well. I think it's up to you
whether you also use the first patch from that series - afaict it
shouldn't directly affect your case, but I may be wrong.

Thanks, Jan

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1558,6 +1558,12 @@ static void local_time_calibration(void)
  * TSC Reliability check
  */
 
+static struct {//temp
+ unsigned cpu;
+ signed iter;
+ cycles_t prev, now;
+} check_log[NR_CPUS + 4];
+static unsigned check_idx;//temp
 /*
  * The Linux original version of this function is
  * Copyright (c) 2006, Red Hat, Inc., Ingo Molnar
@@ -1566,6 +1572,7 @@ static void check_tsc_warp(unsigned long
 {
     static DEFINE_SPINLOCK(sync_lock);
     static cycles_t last_tsc;
+unsigned idx, cpu = smp_processor_id();//temp
 
     cycles_t start, now, prev, end;
     int i;
@@ -1576,6 +1583,15 @@ static void check_tsc_warp(unsigned long
     end = start + tsc_khz * 20ULL;
     now = start;
 
+{//temp
+ spin_lock(&sync_lock);
+ idx = check_idx++;
+ check_log[idx].cpu = cpu;
+ check_log[idx].iter = -1;
+ check_log[idx].now = now;
+ spin_unlock(&sync_lock);
+}
+
     for ( i = 0; ; i++ )
     {
         /*
@@ -1610,7 +1626,14 @@ static void check_tsc_warp(unsigned long
         {
             spin_lock(&sync_lock);
             if ( *max_warp < prev - now )
+{//temp
                 *max_warp = prev - now;
+ idx = check_idx++;
+ check_log[idx].cpu = cpu;
+ check_log[idx].iter = i;
+ check_log[idx].prev = prev;
+ check_log[idx].now = now;
+}
             spin_unlock(&sync_lock);
         }
     }
@@ -1647,6 +1670,12 @@ static void tsc_check_reliability(void)
         cpu_relax();
 
     spin_unlock(&lock);
+{//temp
+ unsigned i;
+ printk("CHK[%2u] %lx\n", cpu, tsc_max_warp);//temp
+ for(i = 0; i < ARRAY_SIZE(check_log) && check_log[i].now; ++i)
+  printk("chk[%4u] CPU%-2u %016lx %016lx #%d\n", i, check_log[i].cpu, 
check_log[i].prev, check_log[i].now, check_log[i].iter);
+}
 }
 
 /*
@@ -1661,6 +1690,7 @@ struct calibration_rendezvous {
     uint64_t master_tsc_stamp, max_tsc_stamp;
 };
 
+static bool rdzv_log;//temp
 static void
 time_calibration_rendezvous_tail(const struct calibration_rendezvous *r)
 {
@@ -1670,6 +1700,7 @@ time_calibration_rendezvous_tail(const s
     c->local_stime  = get_s_time_fixed(c->local_tsc);
     c->master_stime = r->master_stime;
 
+if(rdzv_log) printk("RDZV[%2u] t=%016lx(%016lx) s=%012lx(%012lx)\n", 
smp_processor_id(), c->local_tsc, r->master_tsc_stamp, c->local_stime, 
r->master_stime);//temp
     raise_softirq(TIME_CALIBRATE_SOFTIRQ);
 }
 
@@ -1682,7 +1713,9 @@ static void time_calibration_tsc_rendezv
     int i;
     struct calibration_rendezvous *r = _r;
     unsigned int total_cpus = cpumask_weight(&r->cpu_calibration_map);
+uint64_t adj = 0;//temp
 
+if(rdzv_log) printk("RDZV[%2u] t=%016lx\n", smp_processor_id(), 
rdtsc_ordered());//temp
     /* Loop to get rid of cache effects on TSC skew. */
     for ( i = 4; i >= 0; i-- )
     {
@@ -1706,6 +1739,7 @@ static void time_calibration_tsc_rendezv
                  */
                 uint64_t delta = r->max_tsc_stamp - r->master_tsc_stamp;
 
+adj = delta;//temp
                 r->master_stime += scale_delta(delta,
                                                &this_cpu(cpu_time).tsc_scale);
                 r->master_tsc_stamp = r->max_tsc_stamp;
@@ -1747,6 +1781,13 @@ static void time_calibration_tsc_rendezv
     }
 
     time_calibration_rendezvous_tail(r);
+if(adj) {//temp
+ static unsigned long cnt, thr;
+ if(++cnt > thr) {
+  thr |= cnt;
+  printk("TSC adjusted by %lx\n", adj);
+ }
+}
 }
 
 /* Ordinary rendezvous function which does not modify TSC values. */
@@ -1798,6 +1839,12 @@ static void time_calibration(void *unuse
     struct calibration_rendezvous r = {
         .semaphore = ATOMIC_INIT(0)
     };
+static unsigned long cnt, thr;//temp
+if(system_state > SYS_STATE_smp_boot && ++cnt > thr) {//temp
+ thr |= cnt;
+ printk("TSC: %ps\n", time_calibration_rendezvous_fn);
+ rdzv_log = true;
+}
 
     if ( clocksource_is_tsc() )
     {
@@ -1812,6 +1859,10 @@ static void time_calibration(void *unuse
     on_selected_cpus(&r.cpu_calibration_map,
                      time_calibration_rendezvous_fn,
                      &r, 1);
+if(rdzv_log || system_state <= SYS_STATE_smp_boot) {//temp
+ rdzv_log = false;
+ printk("TSC: end rendezvous\n");
+}
 }
 
 static struct cpu_time_stamp ap_bringup_ref;
@@ -1908,6 +1959,7 @@ void init_percpu_time(void)
     }
     t->stamp.local_tsc   = tsc;
     t->stamp.local_stime = now;
+printk("INIT[%2u] t=%016lx s=%012lx m=%012lx\n", smp_processor_id(), tsc, now, 
t->stamp.master_stime);//temp
 }
 
 /*
@@ -2050,6 +2102,7 @@ static int __init verify_tsc_reliability
      * While with constant-rate TSCs the scale factor can be shared, when TSCs
      * are not marked as 'reliable', re-sync during rendezvous.
      */
+printk("TSC: c=%d r=%d\n", !!boot_cpu_has(X86_FEATURE_CONSTANT_TSC), 
!!boot_cpu_has(X86_FEATURE_TSC_RELIABLE));//temp
     if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
          !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
         time_calibration_rendezvous_fn = time_calibration_tsc_rendezvous;
@@ -2065,6 +2118,7 @@ int __init init_xen_time(void)
 {
     tsc_check_writability();
 
+printk("TSC: c=%d r=%d\n", !!boot_cpu_has(X86_FEATURE_CONSTANT_TSC), 
!!boot_cpu_has(X86_FEATURE_TSC_RELIABLE));//temp
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
     /* NB. get_wallclock_time() can take over one second to execute. */



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.