diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index a9ca09acb2..e4439b2397 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -46,6 +46,8 @@ static void do_idle(void) { unsigned int cpu = smp_processor_id(); + rcu_quiet_enter(); + sched_tick_suspend(); /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */ process_pending_softirqs(); @@ -59,6 +61,8 @@ static void do_idle(void) local_irq_enable(); sched_tick_resume(); + + rcu_quiet_exit(); } void idle_loop(void) diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c index 1d2b762e22..5158a03746 100644 --- a/xen/arch/arm/traps.c +++ b/xen/arch/arm/traps.c @@ -2007,6 +2007,8 @@ void enter_hypervisor_from_guest(void) { struct vcpu *v = current; + rcu_quiet_exit(); + /* * If we pended a virtual abort, preserve it until it gets cleared. * See ARM ARM DDI 0487A.j D1.14.3 (Virtual Interrupts) for details, @@ -2264,6 +2266,8 @@ static void check_for_vcpu_work(void) */ void leave_hypervisor_to_guest(void) { + rcu_quiet_enter(); + local_irq_disable(); check_for_vcpu_work(); diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c index 836f524ef4..3d8dcec143 100644 --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c @@ -647,7 +647,8 @@ static void acpi_processor_idle(void) cpufreq_dbs_timer_suspend(); sched_tick_suspend(); - /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */ + rcu_quiet_enter(); + /* rcu_quiet_enter() can raise TIMER_SOFTIRQ. Process it now. */ process_pending_softirqs(); /* @@ -660,6 +661,7 @@ static void acpi_processor_idle(void) { local_irq_enable(); sched_tick_resume(); + rcu_quiet_exit(); cpufreq_dbs_timer_resume(); return; } @@ -785,6 +787,7 @@ static void acpi_processor_idle(void) power->last_state = &power->states[0]; local_irq_enable(); sched_tick_resume(); + rcu_quiet_exit(); cpufreq_dbs_timer_resume(); return; } @@ -793,6 +796,7 @@ static void acpi_processor_idle(void) power->last_state = &power->states[0]; sched_tick_resume(); + rcu_quiet_exit(); cpufreq_dbs_timer_resume(); if ( cpuidle_current_governor->reflect ) diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c index 52413e6da1..2657ec76f4 100644 --- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -756,7 +756,8 @@ static void mwait_idle(void) cpufreq_dbs_timer_suspend(); sched_tick_suspend(); - /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */ + rcu_quiet_enter(); + /* rcu_quiet_enter() can raise TIMER_SOFTIRQ. Process it now. */ process_pending_softirqs(); /* Interrupts must be disabled for C2 and higher transitions. */ @@ -765,6 +766,7 @@ static void mwait_idle(void) if (!cpu_is_haltable(cpu)) { local_irq_enable(); sched_tick_resume(); + rcu_quiet_exit(); cpufreq_dbs_timer_resume(); return; } @@ -807,6 +809,7 @@ static void mwait_idle(void) lapic_timer_on(); sched_tick_resume(); + rcu_quiet_exit(); cpufreq_dbs_timer_resume(); if ( cpuidle_current_governor->reflect ) diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c index 42ab9dbbd6..a9c24b5889 100644 --- a/xen/common/rcupdate.c +++ b/xen/common/rcupdate.c @@ -52,8 +52,8 @@ static struct rcu_ctrlblk { int next_pending; /* Is the next batch already waiting? */ spinlock_t lock __cacheline_aligned; - cpumask_t cpumask; /* CPUs that need to switch in order ... */ - cpumask_t ignore_cpumask; /* ... unless they are already idle */ + cpumask_t cpumask; /* CPUs that need to switch in order... */ + cpumask_t ignore_cpumask; /* ...unless already idle or in guest */ /* for current batch to proceed. */ } __cacheline_aligned rcu_ctrlblk = { .cur = -300, @@ -85,7 +85,7 @@ struct rcu_data { struct rcu_head barrier; long last_rs_qlen; /* qlen during the last resched */ - /* 3) idle CPUs handling */ + /* 3) idle (or in guest mode) CPUs handling */ struct timer cb_timer; bool cb_timer_active; }; @@ -107,6 +107,12 @@ struct rcu_data { * 3) it is stopped immediately, if the CPU wakes up from idle and * resumes 'normal' execution. * + * Note also that the same happens if a CPU starts executing a guest that + * (almost) never comes back into the hypervisor. This may be the case if + * the guest uses "idle=poll" / "vwfi=native". Therefore, we need to handle + * guest entry events in the same way as the CPU going idle, i.e., consider + * it quiesced and arm the timer. + * * About how far in the future the timer should be programmed each time, * it's hard to tell (guess!!). Since this mimics Linux's periodic timer * tick, take values used there as an indication. In Linux 2.6.21, tick @@ -304,9 +310,10 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp) * Make sure the increment of rcp->cur is visible so, even if a * CPU that is about to go idle, is captured inside rcp->cpumask, * rcu_pending() will return false, which then means cpu_quiet() - * will be invoked, before the CPU would actually enter idle. + * will be invoked, before the CPU would actually go idle (or + * enter a guest). * - * This barrier is paired with the one in rcu_idle_enter(). + * This barrier is paired with the one in rcu_quiet_enter(). */ smp_mb(); cpumask_andnot(&rcp->cpumask, &cpu_online_map, &rcp->ignore_cpumask); @@ -463,14 +470,15 @@ int rcu_needs_cpu(int cpu) * periodically poke rcu_pedning(), so that it will invoke the callback * not too late after the end of the grace period. */ -static void cb_timer_start(void) +static void cb_timer_start(unsigned int cpu) { - struct rcu_data *rdp = &this_cpu(rcu_data); + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); /* * Note that we don't check rcu_pending() here. In fact, we don't want * the timer armed on CPUs that are in the process of quiescing while - * going idle, unless they really are the ones with a queued callback. + * going idle or entering guest mode, unless they really have queued + * callbacks. */ if (likely(!rdp->curlist)) return; @@ -479,9 +487,9 @@ static void cb_timer_start(void) rdp->cb_timer_active = true; } -static void cb_timer_stop(void) +static void cb_timer_stop(unsigned int cpu) { - struct rcu_data *rdp = &this_cpu(rcu_data); + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); if (likely(!rdp->cb_timer_active)) return; @@ -635,11 +643,14 @@ void __init rcu_init(void) } /* - * The CPU is becoming idle, so no more read side critical - * sections, and one more step toward grace period. + * The CPU is becoming about to either idle or enter the guest. In any of + * these cases, it can't have any outstanding read side critical sections + * so this is one step toward the end of the grace period. */ -void rcu_idle_enter(unsigned int cpu) +void rcu_quiet_enter() { + unsigned int cpu = smp_processor_id(); + ASSERT(!cpumask_test_cpu(cpu, &rcu_ctrlblk.ignore_cpumask)); cpumask_set_cpu(cpu, &rcu_ctrlblk.ignore_cpumask); /* @@ -652,11 +663,15 @@ void rcu_idle_enter(unsigned int cpu) */ smp_mb(); cb_timer_start(); + cb_timer_start(cpu); } -void rcu_idle_exit(unsigned int cpu) + +void rcu_quiet_exit() { - cb_timer_stop(); + unsigned int cpu = smp_processor_id(); + + cb_timer_stop(cpu); ASSERT(cpumask_test_cpu(cpu, &rcu_ctrlblk.ignore_cpumask)); cpumask_clear_cpu(cpu, &rcu_ctrlblk.ignore_cpumask); } diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h index 13850865ed..63db0f9887 100644 --- a/xen/include/xen/rcupdate.h +++ b/xen/include/xen/rcupdate.h @@ -145,8 +145,8 @@ void call_rcu(struct rcu_head *head, int rcu_barrier(void); -void rcu_idle_enter(unsigned int cpu); -void rcu_idle_exit(unsigned int cpu); +void rcu_quiet_enter(void); +void rcu_quiet_exit(void); void rcu_idle_timer_start(void); void rcu_idle_timer_stop(void);