Xen project Mailing List

Re: [Xen-devel] [PATCH v8 14/16] x86/microcode: Synchronize late microcode loading

Date: Mon, 5 Aug 2019 10:43:16 +0000

Accept-language: en-US

Arc-authentication-results: i=1; mx.microsoft.com 1;spf=pass smtp.mailfrom=suse.com;dmarc=pass action=none header.from=suse.com;dkim=pass header.d=suse.com;arc=none

Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=+A5crmVVTiQTHrwgJeGWQUob8UJx0u+0AQ+jmyjGYNc=; b=c7bsJfxlyug0onPcXUN4//bbrNP18jxBuHY5ybVY5NTZeNtMebPmFn1p7DcMRy/nyiYQl77Wh38UBKItAeZ9X+QBHN3uW8CI6IxH9MMpazPAzf652ypESnJiRFMOGduMed3UZG8jXPVbOJgJOpbIdTniEPUwPpFL54D/UfA9X5Kzugbhy1EJudkJgs+gd54nHRujeqZHdC+8qe0pU05K0kMzb2G8wHSP3JeblC8GA7JAxzOBg01bl0ved8gp5X22kwvNELTXGSt65zP7/BsnFXIvGO6V6VYBn434m79GTDk9lXK0bKk5kX5/4HvMxH7/nC+0EsoXoL2Q4tk95BkeGg==

Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=AlcfX9Lf1ok6cjuhUF29bNLjapxWq28SewTHllMw+7CPHBd8SUfGncNMxQ/ImguQTU6vQD5TUsLRn3QggiCn7QkwMUGV52QAwqeq3ynVbxeYyXI9Oqb1dqR553oPHp5utG5g53WRjayzMC56UnWvXKOrCPc4bwZNgpTfo7Xh03e8IyoCKhObqzjbd4AHKNHnArmKX4wqIw3Icq4aUog6d7acztbXEafRcmFBaWkORM3sRIefNmf/+NU45tGYuTEkrfSxv4uXozbULwGMaaHoagP+eHFyVslujPxZ6DpM52RjkzLldhvhjHQXubqYxNXhfLlGA0pMNQOg0o0kGmEIHg==

Authentication-results: spf=none (sender IP is ) smtp.mailfrom=JBeulich@xxxxxxxx;

Cc: Kevin Tian <kevin.tian@xxxxxxxxx>, Ashok Raj <ashok.raj@xxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Jun Nakajima <jun.nakajima@xxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>, Thomas Gleixner <tglx@xxxxxxxxxxxxx>, Borislav Petkov <bp@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>

Delivery-date: Mon, 05 Aug 2019 10:44:15 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Thread-index: AQHVSFKr3nOgmxjRgEC58Iy/dUUpk6bsZIcA

Thread-topic: [PATCH v8 14/16] x86/microcode: Synchronize late microcode loading

On 01.08.2019 12:22, Chao Gao wrote: > @@ -234,6 +267,35 @@ bool microcode_update_cache(struct microcode_patch > *patch) > } > > /* > + * Wait for a condition to be met with a timeout (us). > + */ > +static int wait_for_condition(int (*func)(void *data), void *data, > + unsigned int timeout) > +{ > + while ( !func(data) ) > + { > + if ( !timeout-- ) > + { > + printk("CPU%u: Timeout in %s\n", smp_processor_id(), __func__); I don't think __func__ is helpful here for problem analysis. Instead I think you would want to log either func or __builtin_return_address(0). > @@ -283,37 +345,105 @@ static int microcode_update_cpu(const struct > microcode_patch *patch) > return err; > } > > -static long do_microcode_update(void *patch) > +static int do_microcode_update(void *patch) > { > - unsigned int cpu; > + unsigned int cpu = smp_processor_id(); > + unsigned int cpu_nr = num_online_cpus(); > + int ret; > > - /* store the patch after a successful loading */ > - if ( !microcode_update_cpu(patch) && patch ) > + /* Mark loading an ucode is in progress */ > + cmpxchg(&loading_state, LOADING_EXITED, LOADING_ENTERED); Why cmpxchg(), especially when you don't check whether the store has actually happened? (Same further down then.) > + cpumask_set_cpu(cpu, &cpu_callin_map); > + ret = wait_for_condition(wait_cpu_callin, (void *)(unsigned long)cpu_nr, > + MICROCODE_CALLIN_TIMEOUT_US); > + if ( ret ) > { > - spin_lock(&microcode_mutex); > - microcode_update_cache(patch); > - spin_unlock(&microcode_mutex); > - patch = NULL; > + cmpxchg(&loading_state, LOADING_ENTERED, LOADING_ABORTED); > + return ret; > } > > - if ( microcode_ops->end_update ) > - microcode_ops->end_update(); > + /* > + * Load microcode update on only one logical processor per core, or in > + * AMD's term, one core per compute unit. The one with the lowest thread > + * id among all siblings is chosen to perform the loading. > + */ > + if ( (cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu))) ) > + { > + static unsigned int panicked = 0; > + bool monitor; > + unsigned int done; > + unsigned long tick = 0; > > - cpu = cpumask_next(smp_processor_id(), &cpu_online_map); > - if ( cpu < nr_cpu_ids ) > - return continue_hypercall_on_cpu(cpu, do_microcode_update, patch); > + ret = microcode_ops->apply_microcode(patch); > + if ( !ret ) > + { > + unsigned int cpu2; > > - /* Free the patch if no CPU has loaded it successfully. */ > - if ( patch ) > - microcode_free_patch(patch); > + atomic_inc(&cpu_updated); > + /* Propagate revision number to all siblings */ > + for_each_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu)) > + per_cpu(cpu_sig, cpu2).rev = this_cpu(cpu_sig).rev; > + } > > - return 0; > + /* > + * The first CPU reaching here will monitor the progress and emit > + * warning message if the duration is too long (e.g. >1 second). > + */ > + monitor = !atomic_inc_return(&cpu_out); > + if ( monitor ) > + tick = rdtsc_ordered(); > + > + /* Waiting for all cores or computing units finishing update */ > + done = atomic_read(&cpu_out); > + while ( panicked && done != nr_cores ) Don't you mean "!panicked" here, or || instead of && ? Otherwise I don't see how the loop would ever be entered. > + { > + /* > + * During each timeout interval, at least a CPU is expected to > + * finish its update. Otherwise, something goes wrong. > + * > + * Note that RDTSC (in wait_for_condition()) is safe for threads > to > + * execute while waiting for completion of loading an update. > + */ > + if ( wait_for_condition(&wait_cpu_callout, > + (void *)(unsigned long)(done + 1), > + MICROCODE_UPDATE_TIMEOUT_US) && > + !cmpxchg(&panicked, 0, 1) ) > + panic("Timeout when finishing updating microcode (finished > %u/%u)", > + done, nr_cores); > + > + /* Print warning message once if long time is spent here */ > + if ( monitor ) > + { > + if ( rdtsc_ordered() - tick >= cpu_khz * 1000 ) > + { > + printk(XENLOG_WARNING "WARNING: UPDATING MICROCODE HAS > CONSUMED MORE THAN 1 SECOND!\n"); Please save a little bit on line length by wrapping lines before the initial quote. > + monitor = false; > + } > + } Please combine both if()-s. And please also consider dropping the "monitor" local variable - "tick" being non-zero can easily replace it afaics. > + done = atomic_read(&cpu_out); > + } > + > + /* Mark loading is done to unblock other threads */ > + loading_state = LOADING_EXITED; > + } > + else > + { > + while ( loading_state == LOADING_ENTERED ) > + rep_nop(); Instead of the "for_each_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu))" above, wouldn't it be more logical to have each thread update its signature here? Also - do you perhaps mean cpu_relax() instead of rep_nop()? > @@ -344,19 +481,67 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) > buf, unsigned long len) > { > ret = PTR_ERR(patch); > printk(XENLOG_INFO "Parsing microcode blob error %d\n", ret); > - goto free; > + goto put; > } > > if ( !patch ) > { > printk(XENLOG_INFO "No ucode found. Update aborted!\n"); > ret = -EINVAL; > - goto free; > + goto put; > + } > + > + cpumask_clear(&cpu_callin_map); > + atomic_set(&cpu_out, 0); > + atomic_set(&cpu_updated, 0); > + loading_state = LOADING_EXITED; > + > + /* Calculate the number of online CPU core */ > + nr_cores = 0; > + for_each_online_cpu(cpu) > + if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) ) > + nr_cores++; > + > + printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores); > + > + /* > + * We intend to disable interrupt for long time, which may lead to > + * watchdog timeout. > + */ > + watchdog_disable(); > + /* > + * Late loading dance. Why the heavy-handed stop_machine effort? > + * > + * - HT siblings must be idle and not execute other code while the other > + * sibling is loading microcode in order to avoid any negative > + * interactions cause by the loading. > + * > + * - In addition, microcode update on the cores must be serialized until > + * this requirement can be relaxed in the future. Right now, this is > + * conservative and good. > + */ > + ret = stop_machine_run(do_microcode_update, patch, NR_CPUS); > + watchdog_enable(); > + > + updated = atomic_read(&cpu_updated); > + if ( updated > 0 ) > + { > + spin_lock(&microcode_mutex); > + microcode_update_cache(patch); > + spin_unlock(&microcode_mutex); > } > + else > + microcode_free_patch(patch); > > - ret = continue_hypercall_on_cpu(cpumask_first(&cpu_online_map), > - do_microcode_update, patch); > + if ( updated && updated != nr_cores ) > + printk(XENLOG_ERR > + "ERROR: Updating microcode succeeded on %u cores and failed > on\n" > + "other %u cores. A system with differing microcode revisions > is\n" > + "considered unstable. Please reboot and do not load the > microcode\n" > + "that triggers this warning!\n", updated, nr_cores - updated); Note that for such multi-line log messages you need to prefix XENLOG_* to every one of them. Jan _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.