[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 4/6] x86: bring up all CPUs even if not all are supposed to be used



On Wed, Jul 18, 2018 at 02:21:53AM -0600, Jan Beulich wrote:
> Reportedly Intel CPUs which can't broadcast #MC to all targeted
> cores/threads because some have CR4.MCE clear will shut down. Therefore
> we want to keep CR4.MCE enabled when offlining a CPU, and we need to
> bring up all CPUs in order to be able to set CR4.MCE in the first place.
> 
> The use of clear_in_cr4() in cpu_mcheck_disable() was ill advised
> anyway, and to avoid future similar mistakes I'm removing clear_in_cr4()
> altogether right here.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> v2: Use ROUNDUP().
> ---
> Instead of fully bringing up CPUs and then calling cpu_down(), another
> option would be to suppress/cancel full bringup in smp_callin(). But I
> guess we should try to keep things simple for now, and see later whether
> this can be "optimized".
> ---
> Note: The parked CPUs can be brought online (i.e. the meaning of
>       "maxcpus=" isn't as strict anymore as it was before), but won't
>       immediately be used for scheduling pre-existing Dom0 CPUs. That's
>       because dom0_setup_vcpu() artifically restricts the affinity. For
>       DomU-s whose affinity was not artifically restricted, no such
>       limitation exists, albeit the shown "soft" affinity appears to
>       suffer a similar issue. As that's not a goal of this patch, I've
>       put the issues on the side for now, perhaps for someone else to
>       take care of.
> Note: On one of my test systems the parked CPUs get _PSD data reported
>       by Dom0 that is different from the non-parked ones (coord_type is
>       0xFC instead of 0xFE). Giving Dom0 enough vCPU-s eliminates this

From drivers/xen/xen-acpi-processor.c:

181         /* 'acpi_processor_preregister_performance' does not parse if the   
    
182          * num_processors <= 1, but Xen still requires it. Do it manually 
here. 
183          */                                                                 
    
184         if (pdomain->num_processors <= 1) {                                 
    
185                 if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)        
    
186                         dst->shared_type = CPUFREQ_SHARED_TYPE_ALL;         
    
187                 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)   
    
188                         dst->shared_type = CPUFREQ_SHARED_TYPE_HW;          
    
189                 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)   
    
190                         dst->shared_type = CPUFREQ_SHARED_TYPE_ANY;         
    
191                                                                             
    
192         }                                        

?
>       problem, so there is apparently something amiss in the processor
>       driver. I've tried to figure out what, but I couldn't, despite the
>       AML suggesting that this might be some _OSC invocation (but if it
>       is, I can't find it - acpi_run_osc() clearly does not anywhere get
>       invoked in a per-CPU fashion).
> 
> --- a/xen/arch/x86/cpu/common.c
> +++ b/xen/arch/x86/cpu/common.c
> @@ -13,6 +13,7 @@
>  #include <public/sysctl.h> /* for XEN_INVALID_{SOCKET,CORE}_ID */
>  
>  #include "cpu.h"
> +#include "mcheck/x86_mca.h"
>  
>  bool_t opt_arat = 1;
>  boolean_param("arat", opt_arat);
> @@ -343,6 +344,9 @@ static void __init early_cpu_detect(void
>                       hap_paddr_bits = PADDR_BITS;
>       }
>  
> +     if (c->x86_vendor != X86_VENDOR_AMD)
> +             park_offline_cpus = opt_mce;
> +
>       initialize_cpu_data(0);
>  }
>  
> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c
> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
> @@ -636,8 +636,6 @@ static void clear_cmci(void)
>  
>  static void cpu_mcheck_disable(void)
>  {
> -    clear_in_cr4(X86_CR4_MCE);
> -
>      if ( cmci_support && opt_mce )
>          clear_cmci();
>  }
> --- a/xen/arch/x86/mpparse.c
> +++ b/xen/arch/x86/mpparse.c
> @@ -68,19 +68,26 @@ physid_mask_t phys_cpu_present_map;
>  
>  void __init set_nr_cpu_ids(unsigned int max_cpus)
>  {
> +     unsigned int tot_cpus = num_processors + disabled_cpus;
> +
>       if (!max_cpus)
> -             max_cpus = num_processors + disabled_cpus;
> +             max_cpus = tot_cpus;
>       if (max_cpus > NR_CPUS)
>               max_cpus = NR_CPUS;
>       else if (!max_cpus)
>               max_cpus = 1;
>       printk(XENLOG_INFO "SMP: Allowing %u CPUs (%d hotplug CPUs)\n",
>              max_cpus, max_t(int, max_cpus - num_processors, 0));
> -     nr_cpu_ids = max_cpus;
> +
> +     if (!park_offline_cpus)
> +             tot_cpus = max_cpus;
> +     nr_cpu_ids = min(tot_cpus, NR_CPUS + 0u);
> +     if (park_offline_cpus && nr_cpu_ids < num_processors)
> +             printk(XENLOG_WARNING "SMP: Cannot bring up %u further CPUs\n",
> +                    num_processors - nr_cpu_ids);
>  
>  #ifndef nr_cpumask_bits
> -     nr_cpumask_bits = (max_cpus + (BITS_PER_LONG - 1)) &
> -                       ~(BITS_PER_LONG - 1);
> +     nr_cpumask_bits = ROUNDUP(nr_cpu_ids, BITS_PER_LONG);
>       printk(XENLOG_DEBUG "NR_CPUS:%u nr_cpumask_bits:%u\n",
>              NR_CPUS, nr_cpumask_bits);
>  #endif
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -665,7 +665,7 @@ void __init noreturn __start_xen(unsigne
>  {
>      char *memmap_type = NULL;
>      char *cmdline, *kextra, *loader;
> -    unsigned int initrdidx;
> +    unsigned int initrdidx, num_parked = 0;
>      multiboot_info_t *mbi;
>      module_t *mod;
>      unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
> @@ -1512,7 +1512,8 @@ void __init noreturn __start_xen(unsigne
>      else
>      {
>          set_nr_cpu_ids(max_cpus);
> -        max_cpus = nr_cpu_ids;
> +        if ( !max_cpus )
> +            max_cpus = nr_cpu_ids;
>      }
>  
>      if ( xen_guest )
> @@ -1635,16 +1636,27 @@ void __init noreturn __start_xen(unsigne
>              /* Set up node_to_cpumask based on cpu_to_node[]. */
>              numa_add_cpu(i);
>  
> -            if ( (num_online_cpus() < max_cpus) && !cpu_online(i) )
> +            if ( (park_offline_cpus || num_online_cpus() < max_cpus) &&
> +                 !cpu_online(i) )
>              {
>                  int ret = cpu_up(i);
>                  if ( ret != 0 )
>                      printk("Failed to bring up CPU %u (error %d)\n", i, ret);
> +                else if ( num_online_cpus() > max_cpus )
> +                {
> +                    ret = cpu_down(i);
> +                    if ( !ret )
> +                        ++num_parked;
> +                    else
> +                        printk("Could not re-offline CPU%u (%d)\n", i, ret);
> +                }
>              }
>          }
>      }
>  
>      printk("Brought up %ld CPUs\n", (long)num_online_cpus());
> +    if ( num_parked )
> +        printk(XENLOG_INFO "Parked %u CPUs\n", num_parked);
>      smp_cpus_done();
>  
>      do_initcalls();
> --- a/xen/include/asm-x86/processor.h
> +++ b/xen/include/asm-x86/processor.h
> @@ -342,12 +342,6 @@ static always_inline void set_in_cr4 (un
>      write_cr4(read_cr4() | mask);
>  }
>  
> -static always_inline void clear_in_cr4 (unsigned long mask)
> -{
> -    mmu_cr4_features &= ~mask;
> -    write_cr4(read_cr4() & ~mask);
> -}
> -
>  static inline unsigned int read_pkru(void)
>  {
>      unsigned int pkru;
> 
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxxx
> https://lists.xenproject.org/mailman/listinfo/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.