|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 15/18] x86/mm: introduce a per-vCPU mapcache when using ASI
On Wed Jan 8, 2025 at 2:26 PM GMT, Roger Pau Monne wrote:
> When using a unique per-vCPU root page table the per-domain region becomes
> per-vCPU, and hence the mapcache is no longer shared between all vCPUs of a
> domain. Introduce per-vCPU mapcache structures, and modify map_domain_page()
> to create per-vCPU mappings when possible. Note the lock is also not needed
> with using per-vCPU map caches, as the structure is no longer shared.
>
> This introduces some duplication in the domain and vcpu structures, as both
> contain a mapcache field to support running with and without per-vCPU
> page-tables.
>
> Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
> xen/arch/x86/domain_page.c | 90 ++++++++++++++++++++-----------
> xen/arch/x86/include/asm/domain.h | 20 ++++---
> 2 files changed, 71 insertions(+), 39 deletions(-)
>
> diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
> index 1372be20224e..65900d6218f8 100644
> --- a/xen/arch/x86/domain_page.c
> +++ b/xen/arch/x86/domain_page.c
> @@ -74,7 +74,9 @@ void *map_domain_page(mfn_t mfn)
> struct vcpu *v;
> struct mapcache_domain *dcache;
> struct mapcache_vcpu *vcache;
> + struct mapcache *cache;
> struct vcpu_maphash_entry *hashent;
> + struct domain *d;
>
> #ifdef NDEBUG
> if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
> @@ -85,9 +87,12 @@ void *map_domain_page(mfn_t mfn)
> if ( !v || !is_pv_vcpu(v) )
> return mfn_to_virt(mfn_x(mfn));
>
> - dcache = &v->domain->arch.pv.mapcache;
> + d = v->domain;
> + dcache = &d->arch.pv.mapcache;
> vcache = &v->arch.pv.mapcache;
> - if ( !dcache->inuse )
> + cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> + : &d->arch.pv.mapcache.cache;
> + if ( !cache->inuse )
> return mfn_to_virt(mfn_x(mfn));
>
> perfc_incr(map_domain_page_count);
> @@ -98,17 +103,18 @@ void *map_domain_page(mfn_t mfn)
> if ( hashent->mfn == mfn_x(mfn) )
> {
> idx = hashent->idx;
> - ASSERT(idx < dcache->entries);
> + ASSERT(idx < cache->entries);
> hashent->refcnt++;
> ASSERT(hashent->refcnt);
> ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn));
> goto out;
> }
>
> - spin_lock(&dcache->lock);
> + if ( !d->arch.vcpu_pt )
> + spin_lock(&dcache->lock);
Hmmm. I wonder whether we might not want a nospec here...
>
> /* Has some other CPU caused a wrap? We must flush if so. */
> - if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
> + if ( unlikely(!d->arch.vcpu_pt && dcache->epoch != vcache->shadow_epoch)
> )
> {
> vcache->shadow_epoch = dcache->epoch;
> if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp)
> )
> @@ -118,21 +124,21 @@ void *map_domain_page(mfn_t mfn)
> }
> }
>
> - idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
> - if ( unlikely(idx >= dcache->entries) )
> + idx = find_next_zero_bit(cache->inuse, cache->entries, cache->cursor);
> + if ( unlikely(idx >= cache->entries) )
> {
> unsigned long accum = 0, prev = 0;
>
> /* /First/, clean the garbage map and update the inuse list. */
> - for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
> + for ( i = 0; i < BITS_TO_LONGS(cache->entries); i++ )
> {
> accum |= prev;
> - dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
> - prev = ~dcache->inuse[i];
> + cache->inuse[i] &= ~xchg(&cache->garbage[i], 0);
> + prev = ~cache->inuse[i];
> }
>
> - if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
> - idx = find_first_zero_bit(dcache->inuse, dcache->entries);
> + if ( accum | (prev & BITMAP_LAST_WORD_MASK(cache->entries)) )
> + idx = find_first_zero_bit(cache->inuse, cache->entries);
> else
> {
> /* Replace a hash entry instead. */
> @@ -152,19 +158,23 @@ void *map_domain_page(mfn_t mfn)
> i = 0;
> } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) );
> }
> - BUG_ON(idx >= dcache->entries);
> + BUG_ON(idx >= cache->entries);
>
> /* /Second/, flush TLBs. */
> perfc_incr(domain_page_tlb_flush);
> flush_tlb_local();
> - vcache->shadow_epoch = ++dcache->epoch;
> - dcache->tlbflush_timestamp = tlbflush_current_time();
> + if ( !d->arch.vcpu_pt )
> + {
> + vcache->shadow_epoch = ++dcache->epoch;
> + dcache->tlbflush_timestamp = tlbflush_current_time();
> + }
> }
>
> - set_bit(idx, dcache->inuse);
> - dcache->cursor = idx + 1;
> + set_bit(idx, cache->inuse);
> + cache->cursor = idx + 1;
>
> - spin_unlock(&dcache->lock);
> + if ( !d->arch.vcpu_pt )
> + spin_unlock(&dcache->lock);
... and here.
>
> l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
>
> @@ -178,6 +188,7 @@ void unmap_domain_page(const void *ptr)
> unsigned int idx;
> struct vcpu *v;
> struct mapcache_domain *dcache;
> + struct mapcache *cache;
> unsigned long va = (unsigned long)ptr, mfn, flags;
> struct vcpu_maphash_entry *hashent;
>
> @@ -190,7 +201,9 @@ void unmap_domain_page(const void *ptr)
> ASSERT(v && is_pv_vcpu(v));
>
> dcache = &v->domain->arch.pv.mapcache;
> - ASSERT(dcache->inuse);
> + cache = v->domain->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> + : &v->domain->arch.pv.mapcache.cache;
> + ASSERT(cache->inuse);
>
> idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
> mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
> @@ -213,7 +226,7 @@ void unmap_domain_page(const void *ptr)
> hashent->mfn);
> l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
> /* /Second/, mark as garbage. */
> - set_bit(hashent->idx, dcache->garbage);
> + set_bit(hashent->idx, cache->garbage);
> }
>
> /* Add newly-freed mapping to the maphash. */
> @@ -225,7 +238,7 @@ void unmap_domain_page(const void *ptr)
> /* /First/, zap the PTE. */
> l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
> /* /Second/, mark as garbage. */
> - set_bit(idx, dcache->garbage);
> + set_bit(idx, cache->garbage);
> }
>
> local_irq_restore(flags);
> @@ -234,7 +247,6 @@ void unmap_domain_page(const void *ptr)
> void mapcache_domain_init(struct domain *d)
> {
> struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> - unsigned int bitmap_pages;
>
> ASSERT(is_pv_domain(d));
>
> @@ -243,13 +255,12 @@ void mapcache_domain_init(struct domain *d)
> return;
> #endif
>
> + if ( d->arch.vcpu_pt )
> + return;
> +
> BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
> 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)))
> >
> MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
> - bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
> - dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
> - dcache->garbage = dcache->inuse +
> - (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
>
> spin_lock_init(&dcache->lock);
> }
> @@ -258,30 +269,45 @@ int mapcache_vcpu_init(struct vcpu *v)
> {
> struct domain *d = v->domain;
> struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> + struct mapcache *cache;
> unsigned long i;
> - unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
> + unsigned int ents = (d->arch.vcpu_pt ? 1 : d->max_vcpus) *
> + MAPCACHE_VCPU_ENTRIES;
> unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
>
> - if ( !is_pv_vcpu(v) || !dcache->inuse )
> + if ( !is_pv_vcpu(v) )
> return 0;
>
> - if ( ents > dcache->entries )
> + cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> + : &dcache->cache;
> +
> + if ( !cache->inuse )
> + return 0;
> +
> + if ( ents > cache->entries )
> {
> /* Populate page tables. */
> int rc = create_perdomain_mapping(v, MAPCACHE_VIRT_START, ents,
> false);
> + const unsigned int bitmap_pages =
> + PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
> +
> + cache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
> + cache->garbage = cache->inuse +
> + (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
> +
>
> /* Populate bit maps. */
> if ( !rc )
> - rc = create_perdomain_mapping(v, (unsigned long)dcache->inuse,
> + rc = create_perdomain_mapping(v, (unsigned long)cache->inuse,
> nr, true);
> if ( !rc )
> - rc = create_perdomain_mapping(v, (unsigned long)dcache->garbage,
> + rc = create_perdomain_mapping(v, (unsigned long)cache->garbage,
> nr, true);
>
> if ( rc )
> return rc;
>
> - dcache->entries = ents;
> + cache->entries = ents;
> }
>
> /* Mark all maphash entries as not in use. */
Cheers,
Alejandro
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |