[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 3/3] x86/hyperv: L0 assisted TLB flush



On Fri, Feb 14, 2020 at 12:34:30PM +0000, Wei Liu wrote:
> Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage
> of several hypercalls:
> 
>  * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST
>  * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX
>  * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE
>  * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX
> 
> Pick the most efficient hypercalls available.
> 
> Signed-off-by: Wei Liu <liuwe@xxxxxxxxxxxxx>

Thanks! LGTM, I've just got a couple of comments below.

> ---
> v2:
> 1. Address Roger and Jan's comments re types etc.
> 2. Fix pointer arithmetic.
> 3. Misc improvement to code.
> ---
>  xen/arch/x86/guest/hyperv/Makefile  |   1 +
>  xen/arch/x86/guest/hyperv/private.h |   9 ++
>  xen/arch/x86/guest/hyperv/tlb.c     | 172 +++++++++++++++++++++++++++-
>  xen/arch/x86/guest/hyperv/util.c    |  74 ++++++++++++
>  4 files changed, 255 insertions(+), 1 deletion(-)
>  create mode 100644 xen/arch/x86/guest/hyperv/util.c
> 
> diff --git a/xen/arch/x86/guest/hyperv/Makefile 
> b/xen/arch/x86/guest/hyperv/Makefile
> index 18902c33e9..0e39410968 100644
> --- a/xen/arch/x86/guest/hyperv/Makefile
> +++ b/xen/arch/x86/guest/hyperv/Makefile
> @@ -1,2 +1,3 @@
>  obj-y += hyperv.o
>  obj-y += tlb.o
> +obj-y += util.o
> diff --git a/xen/arch/x86/guest/hyperv/private.h 
> b/xen/arch/x86/guest/hyperv/private.h
> index 509bedaafa..79a77930a0 100644
> --- a/xen/arch/x86/guest/hyperv/private.h
> +++ b/xen/arch/x86/guest/hyperv/private.h
> @@ -24,12 +24,21 @@
>  
>  #include <xen/cpumask.h>
>  #include <xen/percpu.h>
> +#include <xen/types.h>
>  
>  DECLARE_PER_CPU(void *, hv_input_page);
>  DECLARE_PER_CPU(void *, hv_vp_assist);
>  DECLARE_PER_CPU(unsigned int, hv_vp_index);
>  
> +static inline unsigned int hv_vp_index(unsigned int cpu)
> +{
> +    return per_cpu(hv_vp_index, cpu);
> +}
> +
>  int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
>                       unsigned int flags);
>  
> +/* Returns number of banks, -ev if error */
> +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask);
> +
>  #endif /* __XEN_HYPERV_PRIVIATE_H__  */
> diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c
> index 48f527229e..f68e14f151 100644
> --- a/xen/arch/x86/guest/hyperv/tlb.c
> +++ b/xen/arch/x86/guest/hyperv/tlb.c
> @@ -19,15 +19,185 @@
>   * Copyright (c) 2020 Microsoft.
>   */
>  
> +#include <xen/cpu.h>
>  #include <xen/cpumask.h>
>  #include <xen/errno.h>
>  
> +#include <asm/guest/hyperv.h>
> +#include <asm/guest/hyperv-hcall.h>
> +#include <asm/guest/hyperv-tlfs.h>
> +
>  #include "private.h"
>  
> +/*
> + * It is possible to encode up to 4096 pages using the lower 12 bits
> + * in an element of gva_list
> + */
> +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
> +
> +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va,
> +                                  unsigned int order)
> +{
> +    unsigned long start = (unsigned long)va;
> +    unsigned long end = start + (PAGE_SIZE << order) - 1;
> +    unsigned int n = 0;
> +
> +    do {
> +        unsigned long remain = end - start;
> +
> +        gva_list[n] = start & PAGE_MASK;
> +
> +        /*
> +         * Use lower 12 bits to encode the number of additional pages
> +         * to flush
> +         */
> +        if ( remain >= HV_TLB_FLUSH_UNIT )
> +        {
> +            gva_list[n] |= ~PAGE_MASK;
> +            start += HV_TLB_FLUSH_UNIT;
> +        }
> +        else if ( remain )

remain is always going to be > 0, since the loop condition is end >
start, and hence this can be a plain else.

> +        {
> +            gva_list[n] |= (remain - 1) >> PAGE_SHIFT;
> +            start = end;
> +        }
> +
> +        n++;
> +    } while ( start < end );
> +
> +    return n;
> +}
> +
> +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va,
> +                             unsigned int flags)
> +{
> +    struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page);
> +    int nr_banks;
> +    unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK;
> +    uint64_t ret;
> +
> +    if ( !flush || local_irq_is_enabled() )
> +    {
> +        ASSERT_UNREACHABLE();
> +        return ~0ULL;
> +    }
> +
> +    if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) )
> +        return ~0ULL;
> +
> +    flush->address_space = 0;
> +    flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
> +    if ( !(flags & FLUSH_TLB_GLOBAL) )
> +        flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
> +
> +    nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask);
> +    if ( nr_banks < 0 )
> +        return ~0ULL;

It would be nice to propagate the error code from cpumask_to_vpset,
but since the function can also return HyperV error codes this doesn't
make much sense.

> +
> +    max_gvas =
> +        (PAGE_SIZE - sizeof(*flush) - nr_banks *
> +         sizeof(flush->hv_vp_set.bank_contents[0])) /
> +        sizeof(uint64_t);       /* gva is represented as uint64_t */
> +
> +    /*
> +     * Flush the entire address space if va is NULL or if there is not
> +     * enough space for gva_list.
> +     */
> +    if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas )
> +        ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0,
> +                                  nr_banks, virt_to_maddr(flush), 0);

You could just return hv_do_rep_hypercall(...); here, which will avoid
the else branch below and the indentation.

> +    else
> +    {
> +        uint64_t *gva_list =
> +            (uint64_t *)flush + sizeof(*flush) / sizeof(uint64_t) + nr_banks;
> +        unsigned int gvas = fill_gva_list(gva_list, va, order);
> +
> +        BUILD_BUG_ON(sizeof(*flush) % sizeof(uint64_t));
> +
> +        ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
> +                                  gvas, nr_banks, virt_to_maddr(flush), 0);
> +    }
> +
> +    return ret;
> +}
> +
>  int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
>                       unsigned int flags)
>  {
> -    return -EOPNOTSUPP;
> +    unsigned long irq_flags;
> +    struct hv_tlb_flush *flush = this_cpu(hv_input_page);
> +    unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK;
> +    uint64_t ret;
> +
> +    ASSERT(flush);
> +    ASSERT(!cpumask_empty(mask));

I would also turn this into an if ( ... ) { ASSERT; return -EFOO; }

> +
> +    local_irq_save(irq_flags);
> +
> +    flush->address_space = 0;
> +    flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
> +    flush->processor_mask = 0;
> +    if ( !(flags & FLUSH_TLB_GLOBAL) )
> +        flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
> +
> +    if ( cpumask_equal(mask, &cpu_online_map) )
> +        flush->flags |= HV_FLUSH_ALL_PROCESSORS;
> +    else
> +    {
> +        unsigned int cpu;
> +
> +        /*
> +         * Normally VP indices are in ascending order and match Xen's
> +         * idea of CPU ids. Check the last index to see if VP index is
> +         * >= 64. If so, we can skip setting up parameters for
> +         * non-applicable hypercalls without looking further.
> +         */
> +        if ( hv_vp_index(cpumask_last(mask)) >= 64 )
> +            goto do_ex_hypercall;
> +
> +        for_each_cpu ( cpu, mask )
> +        {
> +            uint32_t vpid = hv_vp_index(cpu);

This should be unsigned int now.

> +
> +            if ( vpid > ms_hyperv.max_vp_index )
> +            {
> +                local_irq_restore(irq_flags);
> +                return -ENXIO;
> +            }
> +
> +            if ( vpid >= 64 )
> +                goto do_ex_hypercall;
> +
> +            __set_bit(vpid, &flush->processor_mask);
> +        }

Would it make sense to abstract this as cpumask_to_processor_mask,
since you are adding cpumask_to_vpset below.

> +    }
> +
> +    max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);

You could init this at declaration, and make it const static since the
value can be calculated at compile time AFAICT. Or create a define
with it (HV_TLB_FLUSH_MAX_GVAS?). There's no need to store it on the
stack.

> +
> +    /*
> +     * Flush the entire address space if va is NULL or if there is not
> +     * enough space for gva_list.
> +     */
> +    if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas )
> +        ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
> +                              virt_to_maddr(flush), 0);
> +    else
> +    {
> +        unsigned int gvas = fill_gva_list(flush->gva_list, va, order);

No need for the gvas variable, you can just call fill_gva_list at
hv_do_rep_hypercall.

> +
> +        ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, gvas, 0,
> +                                  virt_to_maddr(flush), 0);
> +    }
> +
> +    goto done;
> +
> + do_ex_hypercall:
> +    ret = flush_tlb_ex(mask, va, flags);
> +
> + done:
> +    local_irq_restore(irq_flags);
> +
> +    return ret & HV_HYPERCALL_RESULT_MASK ? -ENXIO : 0;
>  }
>  
>  /*
> diff --git a/xen/arch/x86/guest/hyperv/util.c 
> b/xen/arch/x86/guest/hyperv/util.c
> new file mode 100644
> index 0000000000..e092593746
> --- /dev/null
> +++ b/xen/arch/x86/guest/hyperv/util.c
> @@ -0,0 +1,74 @@
> +/******************************************************************************
> + * arch/x86/guest/hyperv/util.c
> + *
> + * Hyper-V utility functions
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Copyright (c) 2020 Microsoft.
> + */
> +
> +#include <xen/cpu.h>
> +#include <xen/cpumask.h>
> +#include <xen/errno.h>
> +
> +#include <asm/guest/hyperv.h>
> +#include <asm/guest/hyperv-tlfs.h>
> +
> +#include "private.h"
> +
> +int cpumask_to_vpset(struct hv_vpset *vpset,
> +                     const cpumask_t *mask)
> +{
> +    int nr = 1;
> +    unsigned int cpu, vcpu_bank, vcpu_offset;
> +    unsigned int max_banks = ms_hyperv.max_vp_index / 64;
> +
> +    /* Up to 64 banks can be represented by valid_bank_mask */
> +    if ( max_banks >= 64 )
> +        return -E2BIG;
> +
> +    /* Clear all banks to avoid flushing unwanted CPUs */
> +    for ( vcpu_bank = 0; vcpu_bank <= max_banks; vcpu_bank++ )

I think this is off by one and should be vcpu_bank < max_banks? Or
else you are clearing one extra bank.

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.