[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH] xen/arm: Add 4-level page table for stage 2 translation



On Wed, 2014-04-30 at 17:45 +0530, vijay.kilari@xxxxxxxxx wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@xxxxxxxxxxxxxxxxxx>
> 
> To support 48-bit Physical Address support, add 4-level
> page tables for stage 2 translation.By default stage 1 and
> stage 2 translation at EL2 are with 4-levels

Not just by default, but "Now", default implies a config option.

> Configure TCR_EL2.IPS and VTCR_EL2.PS based on platform
> supported PA range at runtime
> 
> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@xxxxxxxxxxxxxxxxxx>
> ---
>  xen/arch/arm/arm64/head.S       |    7 ++-
>  xen/arch/arm/mm.c               |   11 +++-
>  xen/arch/arm/p2m.c              |  132 
> ++++++++++++++++++++++++++++++++++-----
>  xen/include/asm-arm/p2m.h       |    5 +-
>  xen/include/asm-arm/page.h      |  117 ++++++++++++++++++++++++++++++----
>  xen/include/asm-arm/processor.h |   14 ++++-
>  6 files changed, 249 insertions(+), 37 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index d151724..c0e0362 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -224,13 +224,16 @@ skip_bss:
>          ldr   x0, =MAIRVAL
>          msr   mair_el2, x0
>  
> +        mrs x1, ID_AA64MMFR0_EL1

Please pay attention to the indentation and alignment of the rest of
this file.

> +
>          /* Set up the HTCR:
> -         * PASize -- 40 bits / 1TB
> +         * PASize -- based on ID_AA64MMFR0_EL1.PARange value
>           * Top byte is used
>           * PT walks use Outer-Shareable accesses,
>           * PT walks are write-back, write-allocate in both cache levels,
>           * Full 64-bit address space goes through this table. */
> -        ldr   x0, =0x80822500
> +        ldr x0, =TCR_VAL_40PA
> +        bfi x0, x1, #16, #3

Notice that the rest of this file is extensively commented. This should
say something like /* Or in the PASize from x1 */

>          msr   tcr_el2, x0
>  
>          /* Set up the SCTLR_EL2:
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 305879f..d577b23 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -382,13 +382,18 @@ void __cpuinit setup_virt_paging(void)
>      /* SH0=00, ORGN0=IRGN0=01
>       * SL0=01 (Level-1)
>       * ARVv7: T0SZ=(1)1000 = -8 (32-(-8) = 40 bit physical addresses)
>       * ARMv8: T0SZ=01 1000 = 24 (64-24   = 40 bit physical addresses)

This comment is no longer correct.

>       *        PS=010 == 40 bits
>       */
>  #ifdef CONFIG_ARM_32
> -    WRITE_SYSREG32(0x80002558, VTCR_EL2);
> +    WRITE_SYSREG32(VTCR_VAL, VTCR_EL2);
>  #else
> -    WRITE_SYSREG32(0x80022558, VTCR_EL2);
> +    /* Change PS to 48 and T0SZ = 16  SL0 - 2 to take VA 48 bit */

SL0 should not be variable, it should just be unconditionally configured
for 4 level page tables. Otherwise all sorts of other places needs to
know dynamically how many level the page tables have. 

TBH, I'm not sure why we don't just use 48bit IPAs unconditionally.

> +    if ( current_cpu_data.mm64.pa_range == VTCR_PS_48BIT )
> +        WRITE_SYSREG32(VTCR_VAL_48PA, VTCR_EL2);
> +    else
> +        /* Consider by default 40 PA support for ARM64 */
> +        WRITE_SYSREG32(VTCR_VAL_40PA, VTCR_EL2);
>  #endif
>      isb();
>  }
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index d00c882..bdaab46 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -10,29 +10,38 @@
>  #include <asm/hardirq.h>
>  #include <asm/page.h>
>  
> +#ifdef CONFIG_ARM_64
> +/* Zeroeth level is of 1 page size */
> +#define P2M_FIRST_ORDER 0
> +#else
>  /* First level P2M is 2 consecutive pages */
>  #define P2M_FIRST_ORDER 1
> +#endif
>  #define P2M_FIRST_ENTRIES (LPAE_ENTRIES<<P2M_FIRST_ORDER)

I think P2M_FIRST_ENTRIES is now only used under ifdef CONFIG_ARM_32
(or !ARM64). If I'm right please move it inside the #else here.

>  
>  void dump_p2m_lookup(struct domain *d, paddr_t addr)
>  {
>      struct p2m_domain *p2m = &d->arch.p2m;
> -    lpae_t *first;
> +    lpae_t *lookup;
>  
>      printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr);
>  
> +#ifdef CONFIG_ARM_64
> +    if ( zeroeth_linear_offset(addr) > LPAE_ENTRIES )
> +#else
>      if ( first_linear_offset(addr) > LPAE_ENTRIES )
> +#endif
>      {
> -        printk("Cannot dump addresses in second of first level pages...\n");
> +        printk("Cannot dump addresses in second of 
> first(ARM32)/zeroeth(ARM64) level pages...\n");

"Cannot dump addresses in second page of a concatenated initial paging
level" or something perhaps.

Is concatenation possible at level 0? Table D5-9 in the ARMv8 ARM
suggests not, which would simplify this I think, by putting the bulk of
the above in an arm32 only ifdef.

>          return;
>      }
>  
>      printk("P2M @ %p mfn:0x%lx\n",
> -           p2m->first_level, page_to_mfn(p2m->first_level));
> +           p2m->lookup_level, page_to_mfn(p2m->lookup_level));
>  
> -    first = __map_domain_page(p2m->first_level);
> -    dump_pt_walk(first, addr);
> -    unmap_domain_page(first);
> +    lookup = __map_domain_page(p2m->lookup_level);
> +    dump_pt_walk(lookup, addr);
> +    unmap_domain_page(lookup);
>  }
>  
>  void p2m_load_VTTBR(struct domain *d)
> @@ -44,6 +53,20 @@ void p2m_load_VTTBR(struct domain *d)
>      isb(); /* Ensure update is visible */
>  }
>  
> +#ifdef CONFIG_ARM_64
> +/*
> + * Map zeroeth level page that addr contains.
> + */
> +static lpae_t *p2m_map_zeroeth(struct p2m_domain *p2m, paddr_t addr)
> +{
> +    if ( zeroeth_linear_offset(addr) >= LPAE_ENTRIES )
> +        return NULL;
> +
> +    return __map_domain_page(p2m->lookup_level);
> +}
> +
> +#else
> +
>  static int p2m_first_level_index(paddr_t addr)
>  {
>      /*
> @@ -64,10 +87,11 @@ static lpae_t *p2m_map_first(struct p2m_domain *p2m, 
> paddr_t addr)
>      if ( first_linear_offset(addr) >= P2M_FIRST_ENTRIES )
>          return NULL;
>  
> -    page = p2m->first_level + p2m_first_level_index(addr);
> +    page = p2m->lookup_level + p2m_first_level_index(addr);
>  
>      return __map_domain_page(page);
>  }
> +#endif
>  
>  /*
>   * Lookup the MFN corresponding to a domain's PFN.
> @@ -79,6 +103,9 @@ paddr_t p2m_lookup(struct domain *d, paddr_t paddr, 
> p2m_type_t *t)
>  {
>      struct p2m_domain *p2m = &d->arch.p2m;
>      lpae_t pte, *first = NULL, *second = NULL, *third = NULL;
> +#ifdef CONFIG_ARM_64
> +    lpae_t *zeroeth = NULL;
> +#endif
>      paddr_t maddr = INVALID_PADDR;
>      p2m_type_t _t;
>  
> @@ -89,9 +116,29 @@ paddr_t p2m_lookup(struct domain *d, paddr_t paddr, 
> p2m_type_t *t)
>  
>      spin_lock(&p2m->lock);
>  
> +#ifdef CONFIG_ARM_64
> +    zeroeth = p2m_map_zeroeth(p2m, paddr);
> +    if ( !zeroeth )
> +        goto err;
> +
> +    pte = zeroeth[zeroeth_table_offset(paddr)];
> +    /* Zeroeth level does not support block translation
> +     * so pte.p2m.table should be always set.
> +     * Just check for valid bit
> +     */
> +    if ( !pte.p2m.valid )
> +        goto done;
> +
> +#else
>      first = p2m_map_first(p2m, paddr);
>      if ( !first )
>          goto err;
> +#endif
> +
> +#ifdef CONFIG_ARM_64
> +    /* Map first level table */
> +    first = map_domain_page(pte.p2m.base);
> +#endif

You should merge this into the existing ifdef just above.

>  
>      pte = first[first_table_offset(paddr)];
>      if ( !pte.p2m.valid || !pte.p2m.table )
> @@ -120,6 +167,9 @@ done:
>      if (third) unmap_domain_page(third);
>      if (second) unmap_domain_page(second);
>      if (first) unmap_domain_page(first);
> +#ifdef CONFIG_ARM_64
> +    if (zeroeth) unmap_domain_page(zeroeth);
> +#endif
>  
>  err:
>      spin_unlock(&p2m->lock);
> @@ -244,8 +294,14 @@ static int apply_p2m_changes(struct domain *d,
>      struct p2m_domain *p2m = &d->arch.p2m;
>      lpae_t *first = NULL, *second = NULL, *third = NULL;
>      paddr_t addr;
> -    unsigned long cur_first_page = ~0,
> -                  cur_first_offset = ~0,
> +#ifdef CONFIG_ARM_64
> +    lpae_t *zeroeth = NULL;
> +    unsigned long cur_zeroeth_page = ~0,
> +                  cur_zeroeth_offset = ~0;
> +#else
> +    unsigned long cur_first_page = ~0;
> +#endif
> +    unsigned long cur_first_offset = ~0,
>                    cur_second_offset = ~0;
>      unsigned long count = 0;
>      unsigned int flush = 0;
> @@ -260,6 +316,37 @@ static int apply_p2m_changes(struct domain *d,
>      addr = start_gpaddr;
>      while ( addr < end_gpaddr )
>      {
> +#ifdef CONFIG_ARM_64
> +        /* Find zeoeth offset and Map zeroeth page */
                     ^r

> +        if ( cur_zeroeth_page != zeroeth_table_offset(addr) )
> +        {
> +            if ( zeroeth ) unmap_domain_page(zeroeth);
> +            zeroeth = p2m_map_zeroeth(p2m, addr);
> +            if ( !zeroeth )
> +            {
> +                rc = -EINVAL;
> +                goto out;
> +            }
> +            cur_zeroeth_page = zeroeth_table_offset(addr);
> +        }
> +
> +        if ( !zeroeth[zeroeth_table_offset(addr)].p2m.valid )
> +        {
> +            if ( !populate )
> +            {
> +                addr = (addr + ZEROETH_SIZE) & ZEROETH_MASK;
> +                continue;
> +            }
> +            rc = p2m_create_table(d, &zeroeth[zeroeth_table_offset(addr)]);
> +            if ( rc < 0 )
> +            {
> +                printk("p2m_populate_ram: L0 failed\n");
> +                goto out;
> +            }
> +        } 
> +
> +        BUG_ON(!zeroeth[zeroeth_table_offset(addr)].p2m.valid);
> +#else
>          if ( cur_first_page != p2m_first_level_index(addr) )
>          {
>              if ( first ) unmap_domain_page(first);
> @@ -271,7 +358,16 @@ static int apply_p2m_changes(struct domain *d,
>              }
>              cur_first_page = p2m_first_level_index(addr);
>          }
> +#endif
>  
> +#ifdef CONFIG_ARM_64
> +        if ( cur_zeroeth_offset != zeroeth_table_offset(addr) )
> +        {
> +            if ( first ) unmap_domain_page(first);
> +            first = 
> map_domain_page(zeroeth[zeroeth_table_offset(addr)].p2m.base);
> +            cur_zeroeth_offset = zeroeth_table_offset(addr);
> +        }
> +#endif

Again, I think you can do this in the previous ifdef, or possibly use a
simpler ifdef within what is currently the #else clause above. Depending
on how much ends up looking the same.

>          if ( !first[first_table_offset(addr)].p2m.valid )
>          {
>              if ( !populate )
> @@ -279,7 +375,6 @@ static int apply_p2m_changes(struct domain *d,
>                  addr = (addr + FIRST_SIZE) & FIRST_MASK;
>                  continue;
>              }
> -

Please drop these spurious changes.

(two more followed)
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index 3b39c45..3aa3623 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -13,8 +13,9 @@ struct p2m_domain {
>      /* Pages used to construct the p2m */
>      struct page_list_head pages;
>  
> -    /* Root of p2m page tables, 2 contiguous pages */
> -    struct page_info *first_level;
> +    /* ARMv7: Root of p2m page tables, 2 contiguous pages */
> +    /* ARMv8: Look up table is zeroeth level */
> +    struct page_info *lookup_level;

"root" might be a better name.


> diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
> index 905beb8..8477206 100644
> --- a/xen/include/asm-arm/page.h
> +++ b/xen/include/asm-arm/page.h
> @@ -6,12 +6,13 @@
>  #include <public/xen.h>
>  #include <asm/processor.h>
>  
> +#ifdef CONFIG_ARM_64
> +#define PADDR_BITS              48
> +#else
>  #define PADDR_BITS              40
> +#endif

Move these to arm32/page.h and arm64/page.h I think, you'll needto move
the #include above mfn_to_xen_entry.

> @@ -40,6 +41,94 @@
>  #define MAIR1VAL 0xff000004
>  #define MAIRVAL (MAIR0VAL|MAIR1VAL<<32)
>  
> +/* 
> + * VTCR register configuration for stage 2 translation
> + */
> +#define VTCR_TOSZ_40BIT  24
> +#define VTCR_TOSZ_48BIT  16

Please use (0xN << M) form for all of these. Also processor.h is the
right place for these sorts of defines.

> +#define VTCR_SL0_0       0x2
> +#define VTCR_SL0_1       0x1
> +#define VTCR_SL0_2       0x0
> +#define VTCR_SL0_SHIFT   6
> +#define VTCR_IRGN0_NC    0x0
> +#define VTCR_IRGN0_WBWA  0x1
> +#define VTCR_IRGN0_WT    0x2
> +#define VTCR_IRGN0_WB    0x3
> +#define VTCR_IRGN0_SHIFT 8
> +#define VTCR_ORGN0_NC    0x0
> +#define VTCR_ORGN0_WBWA  0x1
> +#define VTCR_ORGN0_WT    0x2
> +#define VTCR_ORGN0_WB    0x3
> +#define VTCR_ORGN0_SHIFT 10
> +#define VTCR_SH0_NS      0x0
> +#define VTCR_SH0_OS      0x2
> +#define VTCR_SH0_IS      0x3
> +#define VTCR_SH0_SHIFT   12
> +#define VTCR_TG0_4K      0x0
> +#define VTCR_TG0_64K     0x1
> +#define VTCR_TG0_SHIFT   14
> +#define VTCR_PS_32BIT    0x0
> +#define VTCR_PS_40BIT    0x2
> +#define VTCR_PS_48BIT    0x5
> +#define VTCR_PS_SHIFT    16
> +
> +#ifdef CONFIG_ARM_64
> +/* 48 bit VA to 48 bit PA */
> +#define VTCR_VAL_48PA  ((VTCR_TOSZ_48BIT)                    | \
> +                        (VTCR_SL0_0      << VTCR_SL0_SHIFT)   | \
> +                        (VTCR_IRGN0_WBWA << VTCR_IRGN0_SHIFT) | \
> +                        (VTCR_ORGN0_WBWA << VTCR_ORGN0_SHIFT) | \
> +                        (VTCR_SH0_OS     << VTCR_SH0_SHIFT)   | \
> +                        (VTCR_TG0_4K     << VTCR_TG0_SHIFT)   | \
> +                        (VTCR_PS_48BIT   << VTCR_PS_SHIFT))
> +
> +/* 40 bit VA to 40 bit PA */
> +#define VTCR_VAL_40PA  ((VTCR_TOSZ_40BIT)                     | \
> +                        (VTCR_SL0_1      << VTCR_SL0_SHIFT)   | \
> +                        (VTCR_IRGN0_WBWA << VTCR_IRGN0_SHIFT) | \
> +                        (VTCR_ORGN0_WBWA << VTCR_ORGN0_SHIFT) | \
> +                        (VTCR_SH0_OS     << VTCR_SH0_SHIFT)   | \
> +                        (VTCR_TG0_4K     << VTCR_TG0_SHIFT)   | \
> +                        (VTCR_PS_40BIT   << VTCR_PS_SHIFT))
> +#else
> +/* 40 bit VA to 32 bit PA */
> +#define VTCR_VAL  ((VTCR_TOSZ_40BIT)                     | \
> +                   (VTCR_SL0_1      << VTCR_SL0_SHIFT)   | \
> +                   (VTCR_IRGN0_WBWA << VTCR_IRGN0_SHIFT) | \
> +                   (VTCR_ORGN0_WBWA << VTCR_ORGN0_SHIFT) | \
> +                   (VTCR_SH0_OS     << VTCR_SH0_SHIFT)   | \
> +                   (VTCR_TG0_4K     << VTCR_TG0_SHIFT)   | \
> +                   (VTCR_PS_32BIT   << VTCR_PS_SHIFT))
> +#endif

IMHO this was fine as a suitably commented literal number in the
corresponding function and would continue to be if suitable commented.

If you feel you must go this route then please define a VTCR_VAL_BASE
with all the common bits. Also please consider or-ing in the variable
bits in explicitly during setup_virt_paging(). Whatever you do the
comment and the corresponding #defines should be in the same place (if
indeed the comment isn't then redundant).

> +
> +/* TCR register configuration for Xen Stage 1 translation*/
> +
> +#define TCR_TBI_USE_TBYTE  0x0
> +#define TCR_TBI_SHIFT      20
> +
> +#ifdef CONFIG_ARM_64
> +/*
> + * 48 bit VA to 40 bit PA
> + * if platform supports 48 bit PA update runtime in head.S

IMHO this was also fine as a (commented) 0xXXXX in head.S. It's no
clearer here like this.

> + */
> +#define TCR_VAL_40PA   ((VTCR_TOSZ_48BIT)                     | \
> +                        (VTCR_IRGN0_WBWA << VTCR_IRGN0_SHIFT) | \
> +                        (VTCR_ORGN0_WBWA << VTCR_ORGN0_SHIFT) | \
> +                        (VTCR_SH0_OS     << VTCR_SH0_SHIFT)   | \
> +                        (VTCR_TG0_4K     << VTCR_TG0_SHIFT)   | \
> +                        (VTCR_PS_40BIT   << VTCR_PS_SHIFT)    | \
> +                        (TCR_TBI_USE_TBYTE << TCR_TBI_SHIFT))
> +#endif
> +
>  /*
>   * Attribute Indexes.
>   *
> @@ -109,9 +198,10 @@ typedef struct __packed {
>      unsigned long af:1;         /* Access Flag */
>      unsigned long ng:1;         /* Not-Global */
>  
> -    /* The base address must be appropriately aligned for Block entries */
> -    unsigned long base:28;      /* Base address of block or next table */
> -    unsigned long sbz:12;       /* Must be zero */
> +    /* The base address must be appropriately aligned for Block entries.
> +     * base now can hold upto 36 bits to support 48 PA */

"up to" is two words, although I think the sentence you added is
unnecessary.

> +    unsigned long base:36;      /* Base address of block or next table */
> +    unsigned long sbz:4;       /* Must be zero */

This comment is no longer lined up.

>  
>      /* These seven bits are only used in Block entries and are ignored
>       * in Table entries. */
> @@ -144,9 +234,10 @@ typedef struct __packed {
>      unsigned long af:1;         /* Access Flag */
>      unsigned long sbz4:1;
>  
> -    /* The base address must be appropriately aligned for Block entries */
> -    unsigned long base:28;      /* Base address of block or next table */
> -    unsigned long sbz3:12;
> +    /* The base address must be appropriately aligned for Block entries.
> +     * base now can hold upto 36 bits to support 48 PA */

As above.

> +    unsigned long base:36;      /* Base address of block or next table */
> +    unsigned long sbz3:4;
>  
>      /* These seven bits are only used in Block entries and are ignored
>       * in Table entries. */
> @@ -169,10 +260,12 @@ typedef struct __packed {
>  
>      unsigned long pad2:10;
>  
> -    /* The base address must be appropriately aligned for Block entries */
> -    unsigned long base:28;      /* Base address of block or next table */
> +    /* The base address must be appropriately aligned for Block entries.
> +     * base now can hold upto 36 bits to support 48 PA */

As above.

> +    unsigned long base:36;      /* Base address of block or next table */
>  
> -    unsigned long pad1:24;
> +    //unsigned long pad1:24;

Please remove.

> +    unsigned long pad1:16;
>  } lpae_walk_t;
>  
>  typedef union {

Thanks,
Ian.



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.