[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 6/8] device tree, arm: supply a flat device tree to dom0



On Tue, 2012-02-28 at 16:54 +0000, David Vrabel wrote:
> From: David Vrabel <david.vrabel@xxxxxxxxxx>
> 
> Build a flat device tree for dom0 based on the one supplied to Xen.
> The following changes are made:
> 
>   * In the /chosen node, the xen,dom0-bootargs parameter is renamed to
>     bootargs.
> 
>   * In all memory nodes, the reg parameters are adjusted to reflect
>     the amount of memory dom0 can use.  The p2m is updated using this
>     info.

Do you also do
    * Hide the xen console UART from dom0

? (it seems not)

> Support for passing ATAGS to dom0 is removed.

> 
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
>  xen/arch/arm/Makefile         |    2 +
>  xen/arch/arm/domain_build.c   |  238 
> ++++++++++++++++++++++++++++++++++-------
>  xen/arch/arm/kernel.c         |    4 +-
>  xen/arch/arm/kernel.h         |    8 +-
>  xen/common/device_tree.c      |   44 +++++---
>  xen/include/xen/device_tree.h |    8 ++
>  6 files changed, 246 insertions(+), 58 deletions(-)
> 
> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
> index da5096a..4c9517c 100644
> --- a/xen/arch/arm/Makefile
> +++ b/xen/arch/arm/Makefile
> @@ -25,6 +25,8 @@ obj-y += vtimer.o
> 
>  #obj-bin-y += ....o
> 
> +CFLAGS += -I../../common/libfdt

This suggests that something ought to be moved to xen/include/xen.

> +
>  ifdef CONFIG_DTB_FILE
>  obj-y += dtb.o
>  AFLAGS += -DCONFIG_DTB_FILE=\"$(CONFIG_DTB_FILE)\"
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 9240209..ca8c706 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -6,6 +6,11 @@
>  #include <xen/sched.h>
>  #include <asm/irq.h>
>  #include <asm/regs.h>
> +#include <xen/errno.h>
> +#include <xen/device_tree.h>
> +#include <xen/guest_access.h>
> +
> +#include <libfdt.h>
> 
>  #include "gic.h"
>  #include "kernel.h"
> @@ -27,43 +32,190 @@ struct vcpu *__init alloc_dom0_vcpu0(void)
>      return alloc_vcpu(dom0, 0, 0);
>  }
> 
> -extern void guest_mode_entry(void);
> +static void set_memory_reg(struct domain *d, struct kernel_info *dom0,
> +                           const void *fdt,
> +                           const u32 *cell, int address_cells, int 
> size_cells,
> +                           u32 *new_cell, int *len)
> +{
> +    int reg_size = (address_cells + size_cells) * sizeof(*cell);
> +    int l;
> +    u64 start;
> +    u64 size;
> +
> +    l = *len;
> +
> +    while (dom0->unassigned_mem > 0 && l > reg_size
> +           && dom0->mem.nr_banks < NR_MEM_BANKS)
> +    {
> +        device_tree_get_reg(&cell, address_cells, size_cells, &start, &size);
> +        if (size > dom0->unassigned_mem)
> +            size = dom0->unassigned_mem;

I somehow can't find any struct members named unassigned_mem in struct
domain (or in my whole tree). I'm obviously missing something...

... which is that there is a local "struct kernel_info *dom0" shadowing
the global "struct domain *dom0". I think another name for the local var
would be useful ;-)

> +
> +        device_tree_set_reg(&new_cell, address_cells, size_cells, start, 
> size);
> +
> +        printk("Populate P2M %#llx->%#llx\n", start, start + size);
> +        p2m_populate_ram(d, start, start + size);
> +        dom0->mem.bank[dom0->mem.nr_banks].start = start;
> +        dom0->mem.bank[dom0->mem.nr_banks].size = size;
> +        dom0->unassigned_mem -= size;
> +
> +        l -= reg_size;
> +    }
> +
> +    *len -= l;
> +}
> +
> +static int write_properties(struct domain *d, struct kernel_info *dom0,
> +                            const void *fdt,
> +                            int node, const char *name, int depth,
> +                            u32 address_cells, u32 size_cells)
> +{
> +    int prop;
> +
> +    for (prop = fdt_first_property_offset(fdt, node);
> +         prop >= 0;
> +         prop = fdt_next_property_offset(fdt, prop))
> +    {
> +        const struct fdt_property *p;
> +        const char *prop_name;
> +        const char *prop_data;
> +        int prop_len;
> +        char *new_data = NULL;
> +
> +        p = fdt_get_property_by_offset(fdt, prop, NULL);
> +        prop_name = fdt_string(fdt, fdt32_to_cpu(p->nameoff));
> +        prop_data = p->data;
> +        prop_len  = fdt32_to_cpu(p->len);
> +
> +        /*
> +         * In chosen node: replace bootargs with value from
> +         * xen,dom0-bootargs.
> +         */
> +        if (device_tree_node_matches(fdt, node, "chosen")) {
> +            if (strcmp(prop_name, "bootargs") == 0)
> +                continue;
> +            if (strcmp(prop_name, "xen,dom0-bootargs") == 0)
> +                prop_name = "bootargs";
> +        }
> +
> +        /*
> +         * In a memory node: adjust reg property.
> +         */
> +        if (device_tree_node_matches(fdt, node, "memory")) {
> +            if (strcmp(prop_name, "reg") == 0) {
> +                new_data = xzalloc_bytes(prop_len);
> +                if (new_data  == NULL)
> +                    return -ENOMEM;
> +
> +                set_memory_reg(d, dom0, fdt,
> +                               (u32 *)prop_data, address_cells, size_cells,
> +                               (u32 *)new_data, &prop_len);
> +                prop_data = new_data;

Need to free the previous value of prop_data?

> +            }
> +        }
> +
> +        /*
> +         * TODO: Should call map_mmio_regions() for all devices in the
> +         * tree that have a "reg" parameter (except cpus).  This
> +         * requires that adjacent regions are coalesced and rounded up
> +         * to whole pages.

Does map_mmio_regions currently require you to coalesce? If it was smart
enough to use larger mappings for contiguous areas (which it isn't, but
probably should?) then I suppose coalescing would be useful.

> +         */
> +
> +        fdt_property(dom0->fdt, prop_name, prop_data, prop_len);
> +
> +        xfree(new_data);

Does fdt_property duplicate prop_data? (answer: yes)

> +    }
> +
> +    if (prop == -FDT_ERR_NOTFOUND)
> +        return 0;
> +    return prop;
> +}
> +
> +static int write_nodes(struct domain *d, struct kernel_info *dom0,
> +                       const void *fdt)
> +{
> +    int node;
> +    int depth = 0, last_depth = -1;
> +    u32 address_cells[DEVICE_TREE_MAX_DEPTH];
> +    u32 size_cells[DEVICE_TREE_MAX_DEPTH];

In a previous patch you had a local MAX_DEPTH. Shouldn't this be used
instead.

(comes back later: oh, I see it is. good ;-))

> +    int ret;
> +
> +    for (node = 0, depth = 0;
> +         node >= 0 && depth >= 0;
> +         node = fdt_next_node(fdt, node, &depth))
> +    {
> +        const char *name;
> +
> +        name = fdt_get_name(fdt, node, NULL);
> +
> +        if (depth >= DEVICE_TREE_MAX_DEPTH) {
> +            printk("warning: node `%s' is nested too deep\n", name);
> +            continue;
> +        }
> +
> +        while (last_depth-- >= depth)
> +            fdt_end_node(dom0->fdt);
> +
> +        address_cells[depth] = device_tree_get_u32(fdt, node, 
> "#address-cells");
> +        size_cells[depth] = device_tree_get_u32(fdt, node, "#size-cells");
> +
> +        fdt_begin_node(dom0->fdt, name);
> 
> -static void setup_linux_atag(paddr_t tags, paddr_t ram_s, paddr_t ram_e)
> +        ret = write_properties(d, dom0, fdt, node, name, depth,
> +                               address_cells[depth-1], size_cells[depth-1]);
> +        if (ret < 0)
> +            return ret;
> +
> +        last_depth = depth;
> +    }
> +
> +    while (last_depth-- >= 0)
> +        fdt_end_node(dom0->fdt);
> +
> +    return 0;
> +}
> +
> +static int prepare_dtb(struct domain *d, struct kernel_info *dom0)
>  {
> -    paddr_t ma = gvirt_to_maddr(tags);
> -    void *map = map_domain_page(ma>>PAGE_SHIFT);
> -    void *p = map + (tags & (PAGE_SIZE - 1));
> -    char cmdline[] = "earlyprintk=xenboot console=ttyAMA1 root=/dev/mmcblk0 
> debug rw";
> +    void *fdt;
> +    int new_size;
> +    int ret;
> 
> -    /* not enough room on this page for all the tags */
> -    BUG_ON(PAGE_SIZE - (tags & (PAGE_SIZE - 1)) < 8 * sizeof(uint32_t));
> +    fdt = device_tree_flattened;
> 
> -#define TAG(type, val) *(type*)p = val; p+= sizeof(type)
> +    new_size = fdt_totalsize(fdt) + 8192;

2*PAGE_SIZE? Or is this number magic in some other way?

> +    dom0->fdt = xmalloc_bytes(new_size);
> +    if (dom0->fdt == NULL)
> +        return -ENOMEM;
> 
> -    /* ATAG_CORE */
> -    TAG(uint32_t, 2);
> -    TAG(uint32_t, 0x54410001);
> +    ret = fdt_create(dom0->fdt, new_size);
> +    if (ret < 0)
> +        goto err;
> 
> -    /* ATAG_MEM */
> -    TAG(uint32_t, 4);
> -    TAG(uint32_t, 0x54410002);
> -    TAG(uint32_t, (ram_e - ram_s) & 0xFFFFFFFF);
> -    TAG(uint32_t, ram_s & 0xFFFFFFFF);
> +    fdt_finish_reservemap(dom0->fdt);
> 
> -    /* ATAG_CMDLINE */
> -    TAG(uint32_t, 2 + ((strlen(cmdline) + 4) >> 2));
> -    TAG(uint32_t, 0x54410009);
> -    memcpy(p, cmdline, strlen(cmdline) + 1);
> -    p += ((strlen(cmdline) + 4) >> 2) << 2;
> +    ret = write_nodes(d, dom0, fdt);
> +    if (ret < 0)
> +        goto err;
> 
> -    /* ATAG_NONE */
> -    TAG(uint32_t, 0);
> -    TAG(uint32_t, 0);
> +    fdt_finish(dom0->fdt);
> 
> -#undef TAG
> +    device_tree_dump(dom0->fdt);
> +
> +    dom0->fdt = dom0->fdt;
> +    return 0;
> +
> +  err:
> +    xfree(dom0->fdt);
> +    return ret;
> +}
> +
> +static void dtb_load(struct kernel_info *dom0)
> +{
> +    void *dtb_virt = (void *)(u32)dom0->dtb_paddr;

This is part of the 1:1 map? Shuldn't this be either a variant of __va
or some sort of explicit map create function?

> 
> -    unmap_domain_page(map);
> +    raw_copy_to_guest(dtb_virt, dom0->fdt, fdt_totalsize(dom0->fdt));

Oh, so dtb_virt is a _dom0_ virtual address, which because we are
building the domain with the stage 1 MMU disabled == the phys addr.
Might be worthy of a comment? Our use of __user for such functions seems
a little inconsistent or I would suggest
        void * __user dtb_virt = ...
(maybe that's a good idea anyway?)

Do we arrange somewhere that dtb_paddr < 4G ? I suppose we must...

> +    xfree(dom0->fdt);
>  }
> 
>  int construct_dom0(struct domain *d)
> @@ -81,22 +233,27 @@ int construct_dom0(struct domain *d)
> 
>      printk("*** LOADING DOMAIN 0 ***\n");
> 
> -    /* 128M at 2G physical */
> -    /* TODO size and location from DT. */
> -    kinfo.ram_start = 0x80000000;
> -    kinfo.ram_end   = 0x88000000;
> +    d->max_pages = ~0U;
> 
> -    rc = kernel_prepare(&kinfo);
> -    if (rc < 0)
> +    if ( (rc = p2m_alloc_table(d)) != 0 )
>          return rc;
> 
> -    d->max_pages = ~0U;
> +    kinfo.unassigned_mem = 0x08000000; /* XXX */
> 
> -    if ( (rc = p2m_alloc_table(d)) != 0 )
> +    rc = prepare_dtb(d, &kinfo);
> +    if (rc < 0)
>          return rc;
> 
> -    printk("Populate P2M %#llx->%#llx\n", kinfo.ram_start, kinfo.ram_end);
> -    p2m_populate_ram(d, kinfo.ram_start, kinfo.ram_end);
> +    /* First RAM bank must be below 4 GiB or a 32-bit dom0 kernel
> +       cannot be loaded. */

In principal we could relocate a bank to a hole under 4G but I expect
that all machines will meet this constraint, or they'd have trouble
booting native.

BTW, does this DTB stuff mean that we no longer use the 40 bit physical
address alias to access RAM form the hypervisor? Not a big problem, we
just did that to help shake out 32 bitness assumptions.

> +    if (kinfo.mem.bank[0].start >= 1ull << 32) {
> +        printk("No memory accessible to 32 bit dom0.");
> +        return -EINVAL;
> +    }
> +
> +    rc = kernel_prepare(&kinfo);
> +    if (rc < 0)
> +        return rc;
> 
>      printk("Map CS2 MMIO regions 1:1 in the P2M %#llx->%#llx\n", 
> 0x18000000ULL, 0x1BFFFFFFULL);
>      map_mmio_regions(d, 0x18000000, 0x1BFFFFFF, 0x18000000);
> @@ -124,13 +281,12 @@ int construct_dom0(struct domain *d)
>      /* Enable second stage translation */
>      WRITE_CP32(READ_CP32(HCR) | HCR_VM, HCR); isb();
> 
> -    /* The following load uses domain's p2m */
> +    /* The following loads use the domain's p2m */
>      p2m_load_VTTBR(d);
> 
> +    dtb_load(&kinfo);
>      kernel_load(&kinfo);
> 
> -    setup_linux_atag(kinfo.ram_start + 0x100, kinfo.ram_start, 
> kinfo.ram_end);
> -
>      clear_bit(_VPF_down, &v->pause_flags);
> 
>      memset(regs, 0, sizeof(*regs));
> @@ -152,7 +308,7 @@ int construct_dom0(struct domain *d)
> 
>      regs->r0 = 0; /* SBZ */
>      regs->r1 = 2272; /* Machine NR: Versatile Express */
> -    regs->r2 = kinfo.ram_start + 0x100; /* ATAGS */
> +    regs->r2 = kinfo.dtb_paddr;
> 
>      WRITE_CP32(SCTLR_BASE, SCTLR);
> 
> diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
> index dd757e5..b27b20d 100644
> --- a/xen/arch/arm/kernel.c
> +++ b/xen/arch/arm/kernel.c
> @@ -121,7 +121,7 @@ static int kernel_try_zimage_prepare(struct kernel_info 
> *info)
>       * at 32k from start of RAM.
>       */
>      if (start == 0)
> -        info->zimage.load_addr = info->ram_start + 0x8000;
> +        info->zimage.load_addr = info->mem.bank[0].start + 0x8000;
>      else
>          info->zimage.load_addr = start;
>      info->zimage.len = end - start;
> @@ -176,6 +176,8 @@ int kernel_prepare(struct kernel_info *info)
>  {
>      int rc;
> 
> +    info->dtb_paddr = info->mem.bank[0].start + 0x100;

assert(info->dtb_paddr < 0x100000000ULL); ???

I suppose it migth also be useful to check dtb_paddr + dtb_len < 4G too?

> +
>      rc = kernel_try_zimage_prepare(info);
>      if (rc < 0)
>          rc = kernel_try_elf_prepare(info);
> diff --git a/xen/arch/arm/kernel.h b/xen/arch/arm/kernel.h
> index 5caebe5..4533568 100644
> --- a/xen/arch/arm/kernel.h
> +++ b/xen/arch/arm/kernel.h
> @@ -7,11 +7,15 @@
>  #define __ARCH_ARM_KERNEL_H__
> 
>  #include <xen/libelf.h>
> +#include <xen/device_tree.h>
> 
>  struct kernel_info {
> +    void *fdt; /* flat device tree */
> +    paddr_t unassigned_mem; /* RAM not (yet) assigned to a bank */
> +    struct dt_mem_info mem;
> +
> +    paddr_t dtb_paddr;
>      paddr_t entry;
> -    paddr_t ram_start;
> -    paddr_t ram_end;
> 
>      void *kernel_img;
>      unsigned kernel_order;
> diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
> index dbc4f61..2422fba 100644
> --- a/xen/common/device_tree.c
> +++ b/xen/common/device_tree.c
> @@ -24,7 +24,7 @@
>  struct dt_early_info __initdata early_info;
>  void *device_tree_flattened;
> 
> -static bool_t __init node_matches(const void *fdt, int node, const char 
> *match)
> +bool_t device_tree_node_matches(const void *fdt, int node, const char *match)
>  {
>      const char *name;
>      size_t len;
> @@ -48,14 +48,32 @@ static void __init get_val(const u32 **cell, u32 cells, 
> u64 *val)
>      }
>  }
> 
> -static void __init get_register(const u32 **cell, u32 address_cells, u32 
> size_cells,
> -                                u64 *start, u64 *size)
> +void device_tree_get_reg(const u32 **cell, u32 address_cells, u32 size_cells,
> +                         u64 *start, u64 *size)
>  {
>      get_val(cell, address_cells, start);
>      get_val(cell, size_cells, size);
>  }
> 
> -static u32 __init prop_by_name_u32(const void *fdt, int node, const char 
> *prop_name)
> +void set_val(u32 **cell, u32 cells, u64 val)
> +{
> +    u32 c = cells;
> +
> +    while (c--) {
> +        (*cell)[c] = cpu_to_fdt32(val);
> +        val >>= 32;
> +    }
> +    (*cell) += cells;
> +}
> +
> +void device_tree_set_reg(u32 **cell, u32 address_cells, u32 size_cells,
> +                         u64 start, u64 size)
> +{
> +    set_val(cell, address_cells, start);
> +    set_val(cell, size_cells, size);
> +}
> +
> +u32 device_tree_get_u32(const void *fdt, int node, const char *prop_name)
>  {
>      const struct fdt_property *prop;
> 
> @@ -66,8 +84,6 @@ static u32 __init prop_by_name_u32(const void *fdt, int 
> node, const char *prop_n
>      return fdt32_to_cpu(*(uint32_t*)prop->data);
>  }
> 
> -#define MAX_DEPTH 16
> -
>  /**
>   * device_tree_for_each_node - iterate over all device tree nodes
>   * @fdt: flat device tree.
> @@ -79,19 +95,19 @@ int device_tree_for_each_node(const void *fdt,
>  {
>      int node;
>      int depth;
> -    u32 address_cells[MAX_DEPTH];
> -    u32 size_cells[MAX_DEPTH];
> +    u32 address_cells[DEVICE_TREE_MAX_DEPTH];
> +    u32 size_cells[DEVICE_TREE_MAX_DEPTH];
>      int ret;
> 
>      for (node = 0, depth = 0;
>           node >=0 && depth >= 0;
>           node = fdt_next_node(fdt, node, &depth))
>      {
> -        if (depth >= MAX_DEPTH)
> +        if (depth >= DEVICE_TREE_MAX_DEPTH)
>              continue;
> 
> -        address_cells[depth] = prop_by_name_u32(fdt, node, "#address-cells");
> -        size_cells[depth] = prop_by_name_u32(fdt, node, "#size-cells");
> +        address_cells[depth] = device_tree_get_u32(fdt, node, 
> "#address-cells");
> +        size_cells[depth] = device_tree_get_u32(fdt, node, "#size-cells");
> 
>          ret = func(fdt, node, fdt_get_name(fdt, node, NULL), depth,
>                     address_cells[depth-1], size_cells[depth-1], data);
> @@ -104,7 +120,7 @@ int device_tree_for_each_node(const void *fdt,
>  static int dump_node(const void *fdt, int node, const char *name, int depth,
>                       u32 address_cells, u32 size_cells, void *data)
>  {
> -    char prefix[2*MAX_DEPTH + 1] = "";
> +    char prefix[2*DEVICE_TREE_MAX_DEPTH + 1] = "";
>      int i;
>      int prop;
> 
> @@ -165,7 +181,7 @@ static void __init process_memory_node(const void *fdt, 
> int node, const char *na
>      banks = fdt32_to_cpu(prop->len) / (reg_cells * sizeof(u32));
> 
>      for (i = 0; i < banks && early_info.mem.nr_banks < NR_MEM_BANKS; i++) {
> -        get_register(&cell, address_cells, size_cells, &start, &size);
> +        device_tree_get_reg(&cell, address_cells, size_cells, &start, &size);
>          early_info.mem.bank[early_info.mem.nr_banks].start = start;
>          early_info.mem.bank[early_info.mem.nr_banks].size = size;
>          early_info.mem.nr_banks++;
> @@ -176,7 +192,7 @@ static int __init early_scan_node(const void *fdt,
>                                    int node, const char *name, int depth,
>                                    u32 address_cells, u32 size_cells, void 
> *data)
>  {
> -    if (node_matches(fdt, node, "memory"))
> +    if (device_tree_node_matches(fdt, node, "memory"))
>          process_memory_node(fdt, node, name, address_cells, size_cells);
> 
>      return 0;
> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
> index b91b39f..510b5b4 100644
> --- a/xen/include/xen/device_tree.h
> +++ b/xen/include/xen/device_tree.h
> @@ -12,6 +12,8 @@
> 
>  #include <xen/types.h>
> 
> +#define DEVICE_TREE_MAX_DEPTH 16
> +
>  #define NR_MEM_BANKS 8
> 
>  struct membank {
> @@ -39,6 +41,12 @@ extern void *device_tree_flattened;
>  size_t device_tree_early_init(const void *fdt);
>  paddr_t device_tree_get_xen_paddr(void);
> 
> +void device_tree_get_reg(const u32 **cell, u32 address_cells, u32 size_cells,
> +                         u64 *start, u64 *size);
> +void device_tree_set_reg(u32 **cell, u32 address_cells, u32 size_cells,
> +                         u64 start, u64 size);
> +u32 device_tree_get_u32(const void *fdt, int node, const char *prop_name);
> +bool_t device_tree_node_matches(const void *fdt, int node, const char 
> *match);
>  int device_tree_for_each_node(const void *fdt,
>                                device_tree_node_func func, void *data);
>  void device_tree_dump(const void *fdt);
> --
> 1.7.2.5
> 



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.