[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 4/5] xen/arm64: Add skeleton to harden the branch predictor aliasing attacks



On Tue, 16 Jan 2018, Julien Grall wrote:
> Aliasing attacked against CPU branch predictors can allow an attacker to
> redirect speculative control flow on some CPUs and potentially divulge
> information from one context to another.
> 
> This patch adds initial skeleton code behind a new Kconfig option to
> enable implementation-specific mitigations against these attacks for
> CPUs that are affected.
> 
> Most of the mitigations will have to be applied when entering to the
> hypervisor from the guest context. For safety, it is applied at every
> exception entry. So there are potential for optimizing when receiving
> an exception at the same level.
> 
> Because the attack is against branch predictor, it is not possible to
> safely use branch instruction before the mitigation is applied.
> Therefore, this has to be done in the vector entry before jump to the
> helper handling a given exception.
> 
> On Arm64, each vector can hold 32 instructions. This leave us 31
> instructions for the mitigation. The last one is the branch instruction
> to the helper.
> 
> Because a platform may have CPUs with different micro-architectures,
> per-CPU vector table needs to be provided. Realistically, only a few
> different mitigations will be necessary. So provide a small set of
> vector tables. They will be re-used and patch with the mitigations
> on-demand.
> 
> This is based on the work done in Linux (see [1]).
> 
> This is part of XSA-254.
> 
> [1] git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
> branch ktpi

If mentioning the original commit/branch is sufficient, then OK.
Otherwise, if we need to explicitly mention all the copyright holders,
then please add all the required signed-off-bys.


> Signed-off-by: Julien Grall <julien.grall@xxxxxxxxxx>
> ---
>  xen/arch/arm/Kconfig             |  20 ++++++
>  xen/arch/arm/arm64/Makefile      |   1 +
>  xen/arch/arm/arm64/bpi.S         |  64 ++++++++++++++++++
>  xen/arch/arm/cpuerrata.c         | 142 
> +++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/traps.c             |   5 +-
>  xen/include/asm-arm/cpuerrata.h  |   1 +
>  xen/include/asm-arm/cpufeature.h |   3 +-
>  xen/include/asm-arm/processor.h  |   5 +-
>  8 files changed, 237 insertions(+), 4 deletions(-)
>  create mode 100644 xen/arch/arm/arm64/bpi.S
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index f58019d6ed..06fd85cc77 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -171,6 +171,26 @@ config ARM64_ERRATUM_834220
>  
>  endmenu
>  
> +config HARDEN_BRANCH_PREDICTOR
> +     bool "Harden the branch predictor against aliasing attacks" if EXPERT
> +     default y
> +     help
> +       Speculation attacks against some high-performance processors rely on
> +       being able to manipulate the branch predictor for a victim context by
> +       executing aliasing branches in the attacker context.  Such attacks
> +       can be partially mitigated against by clearing internal branch
> +       predictor state and limiting the prediction logic in some situations.
> +
> +       This config option will take CPU-specific actions to harden the
> +       branch predictor against aliasing attacks and may rely on specific
> +       instruction sequences or control bits being set by the system
> +       firmware.
> +
> +       If unsure, say Y.
> +
> +config ARM64_HARDEN_BRANCH_PREDICTOR
> +    def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
> +
>  source "common/Kconfig"
>  
>  source "drivers/Kconfig"
> diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
> index 718fe44455..bb5c610b2a 100644
> --- a/xen/arch/arm/arm64/Makefile
> +++ b/xen/arch/arm/arm64/Makefile
> @@ -1,6 +1,7 @@
>  subdir-y += lib
>  
>  obj-y += cache.o
> +obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
>  obj-$(EARLY_PRINTK) += debug.o
>  obj-y += domctl.o
>  obj-y += domain.o
> diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S
> new file mode 100644
> index 0000000000..6cc2f17529
> --- /dev/null
> +++ b/xen/arch/arm/arm64/bpi.S
> @@ -0,0 +1,64 @@
> +/*
> + * Contains CPU specific branch predictor invalidation sequences
> + *
> + * Copyright (C) 2018 ARM Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +.macro ventry target
> +    .rept 31
> +    nop
> +    .endr
> +    b        \target
> +.endm
> +
> +.macro vectors target
> +    ventry \target + 0x000
> +    ventry \target + 0x080
> +    ventry \target + 0x100
> +    ventry \target + 0x180
> +
> +    ventry \target + 0x200
> +    ventry \target + 0x280
> +    ventry \target + 0x300
> +    ventry \target + 0x380
> +
> +    ventry \target + 0x400
> +    ventry \target + 0x480
> +    ventry \target + 0x500
> +    ventry \target + 0x580
> +
> +    ventry \target + 0x600
> +    ventry \target + 0x680
> +    ventry \target + 0x700
> +    ventry \target + 0x780
> +.endm
> +
> +/*
> + * Populate 4 vector tables. This will cover up to 4 different
> + * micro-architectures in a system.
> + */
> +    .align   11
> +ENTRY(__bp_harden_hyp_vecs_start)
> +    .rept 4
> +    vectors hyp_traps_vector
> +    .endr
> +ENTRY(__bp_harden_hyp_vecs_end)
> +
> +/*
> + * Local variables:
> + * mode: ASM
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index c50d3331f2..76d98e771d 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -1,6 +1,148 @@
> +#include <xen/cpumask.h>
> +#include <xen/mm.h>
> +#include <xen/sizes.h>
> +#include <xen/smp.h>
> +#include <xen/spinlock.h>
> +#include <xen/vmap.h>
>  #include <asm/cpufeature.h>
>  #include <asm/cpuerrata.h>
>  
> +/* Override macros from asm/page.h to make them work with mfn_t */
> +#undef virt_to_mfn
> +#define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
> +
> +/* Hardening Branch predictor code for Arm64 */
> +#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
> +
> +#define VECTOR_TABLE_SIZE SZ_2K
> +
> +/*
> + * Number of available table vectors (this should be in-sync with
> + * arch/arm64/bpi.S
> + */
> +#define NR_BPI_HYP_VECS 4
> +
> +extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
> +
> +/*
> + * Key for each slot. This is used to find whether a specific workaround
> + * had a slot assigned.
> + *
> + * The key is virtual address of the vector workaround
> + */
> +static uintptr_t bp_harden_slot_key[NR_BPI_HYP_VECS];
> +
> +/*
> + * [hyp_vec_start, hyp_vec_end[ corresponds to the first 31 instructions
> + * of each vector. The last (i.e 32th) instruction is used to branch to
> + * the original entry.
> + *
> + * Those instructions will be copied on each vector to harden them.
> + */
> +static bool copy_hyp_vect_bpi(unsigned int slot, const char *hyp_vec_start,
> +                              const char *hyp_vec_end)
> +{
> +    void *dst_remapped;
> +    const void *dst = __bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE;
> +    unsigned int i;
> +    mfn_t dst_mfn = virt_to_mfn(dst);
> +
> +    BUG_ON(((hyp_vec_end - hyp_vec_start) / 4) > 31);
> +
> +    /*
> +     * Vectors are part of the text that are mapped read-only. So re-map
> +     * the vector table to be able to update vectors.
> +     */
> +    dst_remapped = __vmap(&dst_mfn,
> +                          1UL << get_order_from_bytes(VECTOR_TABLE_SIZE),
> +                          1, 1, PAGE_HYPERVISOR, VMAP_DEFAULT);
> +    if ( !dst_remapped )
> +        return false;
> +
> +    dst_remapped += (vaddr_t)dst & ~PAGE_MASK;
> +
> +    for ( i = 0; i < VECTOR_TABLE_SIZE; i += 0x80 )
> +    {
> +        memcpy(dst_remapped + i, hyp_vec_start, hyp_vec_end - hyp_vec_start);
> +    }
> +
> +    clean_dcache_va_range(dst_remapped, VECTOR_TABLE_SIZE);
> +    invalidate_icache();
> +
> +    vunmap(dst_remapped);
> +
> +    return true;
> +}
> +
> +static bool __maybe_unused
> +install_bp_hardening_vec(const struct arm_cpu_capabilities *entry,
> +                         const char *hyp_vec_start,
> +                         const char *hyp_vec_end)
> +{
> +    static int last_slot = -1;
> +    static DEFINE_SPINLOCK(bp_lock);
> +    unsigned int i, slot = -1;
> +    bool ret = true;
> +
> +    /*
> +     * Enable callbacks are called on every CPU based on the
> +     * capabilities. So double-check whether the CPU matches the
> +     * entry.
> +     */
> +    if ( !entry->matches(entry) )
> +        return true;
> +    /*
> +     * No need to install hardened vector when the processor has
> +     * ID_AA64PRF0_EL1.CSV2 set.
> +     */
> +    if ( cpu_data[smp_processor_id()].pfr64.csv2 )
> +        return true;
> +
> +    spin_lock(&bp_lock);
> +    /*
> +     * Look up whether the hardening vector had a slot already
> +     * assigned.
> +     */
> +    for ( i = 0; i < 4; i++ )
> +    {
> +        if ( bp_harden_slot_key[i] == (uintptr_t)hyp_vec_start )
> +        {
> +            slot = i;
> +            break;
> +        }
> +    }
> +
> +    if ( slot == -1 )
> +    {
> +        last_slot++;
> +        /* Check we don't overrun the number of slots available. */
> +        BUG_ON(NR_BPI_HYP_VECS <= last_slot);
> +
> +        slot = last_slot;
> +        ret = copy_hyp_vect_bpi(slot, hyp_vec_start, hyp_vec_end);
> +
> +        /* Only update the slot if the copy succeeded. */
> +        if ( ret )
> +            bp_harden_slot_key[slot] = (uintptr_t)hyp_vec_start;
> +    }
> +
> +    if ( ret )
> +    {
> +        /* Install the new vector table. */
> +        WRITE_SYSREG((vaddr_t)(__bp_harden_hyp_vecs_start + slot * 
> VECTOR_TABLE_SIZE),
> +                     VBAR_EL2);
> +        isb();
> +    }
> +
> +    spin_unlock(&bp_lock);
> +
> +    return ret;
> +}
> +
> +#endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */
> +
>  #define MIDR_RANGE(model, min, max)     \
>      .matches = is_affected_midr_range,  \
>      .midr_model = model,                \
> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> index 013c1600ec..a3e4919751 100644
> --- a/xen/arch/arm/traps.c
> +++ b/xen/arch/arm/traps.c
> @@ -160,7 +160,10 @@ __initcall(update_serrors_cpu_caps);
>  
>  void init_traps(void)
>  {
> -    /* Setup Hyp vector base */
> +    /*
> +     * Setup Hyp vector base. Note they might get updated with the
> +     * branch predictor hardening.
> +     */
>      WRITE_SYSREG((vaddr_t)hyp_traps_vector, VBAR_EL2);
>  
>      /* Trap Debug and Performance Monitor accesses */
> diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
> index 7de68361ff..23ebf367ea 100644
> --- a/xen/include/asm-arm/cpuerrata.h
> +++ b/xen/include/asm-arm/cpuerrata.h
> @@ -1,6 +1,7 @@
>  #ifndef __ARM_CPUERRATA_H__
>  #define __ARM_CPUERRATA_H__
>  
> +#include <xen/percpu.h>

This doesn't seem to be necessary?

In any case:

Reviewed-by: Stefano Stabellini <sstabellini@xxxxxxxxxx>


>  #include <asm/cpufeature.h>
>  #include <asm/alternative.h>
>  
> diff --git a/xen/include/asm-arm/cpufeature.h 
> b/xen/include/asm-arm/cpufeature.h
> index 21c65e198c..e557a095af 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -42,8 +42,9 @@
>  #define LIVEPATCH_FEATURE   4
>  #define SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT 5
>  #define SKIP_CTXT_SWITCH_SERROR_SYNC 6
> +#define ARM_HARDEN_BRANCH_PREDICTOR 7
>  
> -#define ARM_NCAPS           7
> +#define ARM_NCAPS           8
>  
>  #ifndef __ASSEMBLY__
>  
> diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
> index 3edab1b893..466da5da86 100644
> --- a/xen/include/asm-arm/processor.h
> +++ b/xen/include/asm-arm/processor.h
> @@ -385,8 +385,9 @@ struct cpuinfo_arm {
>              unsigned long fp:4;   /* Floating Point */
>              unsigned long simd:4; /* Advanced SIMD */
>              unsigned long gic:4;  /* GIC support */
> -            unsigned long __res0:4;
> -            unsigned long __res1;
> +            unsigned long __res0:28;
> +            unsigned long csv2:4;
> +            unsigned long __res1:4;
>          };
>      } pfr64;
>  

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.