$NetBSD: $ Commit df8234fd2c ("replace vCPU's dirty CPU mask by numeric ID") was too lax in two respects: First of all it didn't consider the case of a vCPU not having a valid dirty CPU in the descriptor table TLB flush case. This is the issue Manual has run into with NetBSD. Additionally reads of ->dirty_cpu for other than the current vCPU are at risk of racing with scheduler actions, i.e. single atomic reads need to be used there. Obviously the non-init write sites then better also use atomic writes. Having to touch the descriptor table TLB flush code here anyway, take the opportunity and switch it to be at most one flush_tlb_mask() invocation. Reported-by: Manuel Bouyer Signed-off-by: Jan Beulich --- xen/arch/x86/domain.c.orig +++ xen/arch/x86/domain.c @@ -1631,7 +1631,7 @@ static void __context_switch(void) */ if ( pd != nd ) cpumask_set_cpu(cpu, nd->dirty_cpumask); - n->dirty_cpu = cpu; + write_atomic(&n->dirty_cpu, cpu); if ( !is_idle_domain(nd) ) { @@ -1687,7 +1687,7 @@ static void __context_switch(void) if ( pd != nd ) cpumask_clear_cpu(cpu, pd->dirty_cpumask); - p->dirty_cpu = VCPU_CPU_CLEAN; + write_atomic(&p->dirty_cpu, VCPU_CPU_CLEAN); per_cpu(curr_vcpu, cpu) = n; } --- xen/arch/x86/mm.c.orig +++ xen/arch/x86/mm.c @@ -1202,11 +1202,23 @@ void put_page_from_l1e(l1_pgentry_t l1e, unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) && (l1e_owner == pg_owner) ) { + cpumask_t *mask = this_cpu(scratch_cpumask); + + cpumask_clear(mask); + for_each_vcpu ( pg_owner, v ) { - if ( pv_destroy_ldt(v) ) - flush_tlb_mask(cpumask_of(v->dirty_cpu)); + unsigned int cpu; + + if ( !pv_destroy_ldt(v) ) + continue; + cpu = read_atomic(&v->dirty_cpu); + if ( is_vcpu_dirty_cpu(cpu) ) + __cpumask_set_cpu(cpu, mask); } + + if ( !cpumask_empty(mask) ) + flush_tlb_mask(mask); } put_page(page); } @@ -2979,13 +2991,18 @@ static inline int vcpumask_to_pcpumask( while ( vmask ) { + unsigned int cpu; + vcpu_id = find_first_set_bit(vmask); vmask &= ~(1UL << vcpu_id); vcpu_id += vcpu_bias; if ( (vcpu_id >= d->max_vcpus) ) return 0; - if ( ((v = d->vcpu[vcpu_id]) != NULL) && vcpu_cpu_dirty(v) ) - __cpumask_set_cpu(v->dirty_cpu, pmask); + if ( (v = d->vcpu[vcpu_id]) == NULL ) + continue; + cpu = read_atomic(&v->dirty_cpu); + if ( is_vcpu_dirty_cpu(cpu) ) + __cpumask_set_cpu(cpu, pmask); } } } --- xen/include/xen/sched.h.orig +++ xen/include/xen/sched.h @@ -795,10 +795,15 @@ static inline int vcpu_runnable(struct v atomic_read(&v->domain->pause_count)); } -static inline bool vcpu_cpu_dirty(const struct vcpu *v) +static inline bool is_vcpu_dirty_cpu(unsigned int cpu) { BUILD_BUG_ON(NR_CPUS >= VCPU_CPU_CLEAN); - return v->dirty_cpu != VCPU_CPU_CLEAN; + return cpu != VCPU_CPU_CLEAN; +} + +static inline bool vcpu_cpu_dirty(const struct vcpu *v) +{ + return is_vcpu_dirty_cpu(v->dirty_cpu); } void vcpu_block(void);