[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [v3][PATCH 13/16] tools/libxl: detect and avoid conflicts with RDM



> From: Chen, Tiejun
> Sent: Thursday, June 11, 2015 9:15 AM
> 
> While building a VM, HVM domain builder provides struct hvm_info_table{}
> to help hvmloader. Currently it includes two fields to construct guest
> e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should
> check them to fix any conflict with RAM.
> 
> RMRR can reside in address space beyond 4G theoretically, but we never
> see this in real world. So in order to avoid breaking highmem layout
> we don't solve highmem conflict. Note this means highmem rmrr could still
> be supported if no conflict.
> 
> But in the case of lowmem, RMRR probably scatter the whole RAM space.
> Especially multiple RMRR entries would worsen this to lead a complicated
> memory layout. And then its hard to extend hvm_info_table{} to work
> hvmloader out. So here we're trying to figure out a simple solution to
> avoid breaking existing layout. So when a conflict occurs,
> 
>     #1. Above a predefined boundary (default 2G)
>         - move lowmem_end below reserved region to solve conflict;
> 
>     #2. Below a predefined boundary (default 2G)
>         - Check strict/relaxed policy.
>         "strict" policy leads to fail libxl. Note when both policies
>         are specified on a given region, 'strict' is always preferred.
>         "relaxed" policy issue a warning message and also mask this entry 
> INVALID
>         to indicate we shouldn't expose this entry to hvmloader.
> 
> Note this predefined boundary can be changes with the parameter
> "rdm_mem_boundary" in .cfg file.
> 
> Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx>

Reviewed-by: Kevin Tian <kevint.tian@xxxxxxxxx>

One comment though. could you be consistent to use RDM in the code? 
RMRR  is just an example of RDM...


> ---
>  docs/man/xl.cfg.pod.5          |  21 ++++
>  tools/libxc/xc_hvm_build_x86.c |   5 +-
>  tools/libxl/libxl.h            |   6 +
>  tools/libxl/libxl_create.c     |   6 +-
>  tools/libxl/libxl_dm.c         | 255
> +++++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_dom.c        |  11 +-
>  tools/libxl/libxl_internal.h   |  11 +-
>  tools/libxl/libxl_types.idl    |   8 ++
>  tools/libxl/xl_cmdimpl.c       |   3 +
>  9 files changed, 322 insertions(+), 4 deletions(-)
> 
> diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
> index 638b350..6fd2370 100644
> --- a/docs/man/xl.cfg.pod.5
> +++ b/docs/man/xl.cfg.pod.5
> @@ -767,6 +767,27 @@ to a given device, and "strict" is default here.
> 
>  Note this would override global B<rdm> option.
> 
> +=item B<rdm_mem_boundary=MBYTES>
> +
> +Number of megabytes to set a boundary for checking rdm conflict.
> +
> +When RDM conflicts with RAM, RDM probably scatter the whole RAM space.
> +Especially multiple RMRR entries would worsen this to lead a complicated
> +memory layout. So here we're trying to figure out a simple solution to
> +avoid breaking existing layout. So when a conflict occurs,
> +
> +    #1. Above a predefined boundary
> +        - move lowmem_end below reserved region to solve conflict;
> +
> +    #2. Below a predefined boundary
> +        - Check strict/relaxed policy.
> +        "strict" policy leads to fail libxl. Note when both policies
> +        are specified on a given region, 'strict' is always preferred.
> +        "relaxed" policy issue a warning message and also mask this entry 
> INVALID
> +        to indicate we shouldn't expose this entry to hvmloader.
> +
> +Here the default is 2G.
> +
>  =back
> 
>  =back
> diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c
> index 0e98c84..5142578 100644
> --- a/tools/libxc/xc_hvm_build_x86.c
> +++ b/tools/libxc/xc_hvm_build_x86.c
> @@ -21,6 +21,7 @@
>  #include <stdlib.h>
>  #include <unistd.h>
>  #include <zlib.h>
> +#include <assert.h>
> 
>  #include "xg_private.h"
>  #include "xc_private.h"
> @@ -270,7 +271,7 @@ static int setup_guest(xc_interface *xch,
> 
>      elf_parse_binary(&elf);
>      v_start = 0;
> -    v_end = args->mem_size;
> +    v_end = args->lowmem_end;
> 
>      if ( nr_pages > target_pages )
>          memflags |= XENMEMF_populate_on_demand;
> @@ -754,6 +755,8 @@ int xc_hvm_build_target_mem(xc_interface *xch,
>      args.mem_size = (uint64_t)memsize << 20;
>      args.mem_target = (uint64_t)target << 20;
>      args.image_file_name = image_name;
> +    if ( args.mmio_size == 0 )
> +        args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
> 
>      return xc_hvm_build(xch, domid, &args);
>  }
> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
> index 0a7913b..a6212fb 100644
> --- a/tools/libxl/libxl.h
> +++ b/tools/libxl/libxl.h
> @@ -858,6 +858,12 @@ const char *libxl_defbool_to_string(libxl_defbool b);
>  #define LIBXL_TIMER_MODE_DEFAULT -1
>  #define LIBXL_MEMKB_DEFAULT ~0ULL
> 
> +/*
> + * We'd like to set a memory boundary to determine if we need to check
> + * any overlap with reserved device memory.
> + */
> +#define LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT (2048 * 1024)
> +
>  #define LIBXL_MS_VM_GENID_LEN 16
>  typedef struct {
>      uint8_t bytes[LIBXL_MS_VM_GENID_LEN];
> diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
> index 6c8ec63..0438731 100644
> --- a/tools/libxl/libxl_create.c
> +++ b/tools/libxl/libxl_create.c
> @@ -109,6 +109,10 @@ void libxl__rdm_setdefault(libxl__gc *gc, 
> libxl_domain_build_info
> *b_info)
>  {
>      if (b_info->rdm.reserve == LIBXL_RDM_RESERVE_FLAG_INVALID)
>          b_info->rdm.reserve = LIBXL_RDM_RESERVE_FLAG_RELAXED;
> +
> +    if (b_info->rdm_mem_boundary_memkb == LIBXL_MEMKB_DEFAULT)
> +        b_info->rdm_mem_boundary_memkb =
> +                            LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT;
>  }
> 
>  int libxl__domain_build_info_setdefault(libxl__gc *gc,
> @@ -460,7 +464,7 @@ int libxl__domain_build(libxl__gc *gc,
> 
>      switch (info->type) {
>      case LIBXL_DOMAIN_TYPE_HVM:
> -        ret = libxl__build_hvm(gc, domid, info, state);
> +        ret = libxl__build_hvm(gc, domid, d_config, state);
>          if (ret)
>              goto out;
> 
> diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
> index 33f9ce6..d908350 100644
> --- a/tools/libxl/libxl_dm.c
> +++ b/tools/libxl/libxl_dm.c
> @@ -90,6 +90,261 @@ const char *libxl__domain_device_model(libxl__gc *gc,
>      return dm;
>  }
> 
> +static struct xen_reserved_device_memory
> +*xc_device_get_rdm(libxl__gc *gc,
> +                   uint32_t flag,
> +                   uint16_t seg,
> +                   uint8_t bus,
> +                   uint8_t devfn,
> +                   unsigned int *nr_entries)
> +{
> +    struct xen_reserved_device_memory *xrdm;
> +    int rc;
> +
> +    rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
> +                                       NULL, nr_entries);
> +    assert(rc <= 0);
> +    /* "0" means we have no any rdm entry. */
> +    if (!rc)
> +        goto out;
> +
> +    if (errno == ENOBUFS) {
> +        xrdm = malloc(*nr_entries * sizeof(xen_reserved_device_memory_t));
> +        if (!xrdm) {
> +            LOG(ERROR, "Could not allocate RDM buffer!\n");
> +            goto out;
> +        }
> +        rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
> +                                           xrdm, nr_entries);
> +        if (rc) {
> +            LOG(ERROR, "Could not get reserved device memory maps.\n");
> +            *nr_entries = 0;
> +            free(xrdm);
> +            xrdm = NULL;
> +        }
> +    } else
> +        LOG(ERROR, "Could not get reserved device memory maps.\n");
> +
> + out:
> +    return xrdm;
> +}
> +
> +/*
> + * Check whether there exists rdm hole in the specified memory range.
> + * Returns true if exists, else returns false.
> + */
> +static bool overlaps_rdm(uint64_t start, uint64_t memsize,
> +                         uint64_t rdm_start, uint64_t rdm_size)
> +{
> +    return (start + memsize > rdm_start) && (start < rdm_start + rdm_size);
> +}
> +
> +/*
> + * Check reported RDM regions and handle potential gfn conflicts according
> + * to user preferred policy.
> + *
> + * RMRR can reside in address space beyond 4G theoretically, but we never
> + * see this in real world. So in order to avoid breaking highmem layout
> + * we don't solve highmem conflict. Note this means highmem rmrr could still
> + * be supported if no conflict.
> + *
> + * But in the case of lowmem, RMRR probably scatter the whole RAM space.
> + * Especially multiple RMRR entries would worsen this to lead a complicated
> + * memory layout. And then its hard to extend hvm_info_table{} to work
> + * hvmloader out. So here we're trying to figure out a simple solution to
> + * avoid breaking existing layout. So when a conflict occurs,
> + *
> + * #1. Above a predefined boundary (default 2G)
> + * - Move lowmem_end below reserved region to solve conflict;
> + *
> + * #2. Below a predefined boundary (default 2G)
> + * - Check strict/relaxed policy.
> + * "strict" policy leads to fail libxl. Note when both policies
> + * are specified on a given region, 'strict' is always preferred.
> + * "relaxed" policy issue a warning message and also mask this entry
> + * INVALID to indicate we shouldn't expose this entry to hvmloader.
> + */
> +int libxl__domain_device_construct_rdm(libxl__gc *gc,
> +                                       libxl_domain_config *d_config,
> +                                       uint64_t rdm_mem_boundary,
> +                                       struct xc_hvm_build_args *args)
> +{
> +    int i, j, conflict;
> +    struct xen_reserved_device_memory *xrdm = NULL;
> +    uint32_t type = d_config->b_info.rdm.type;
> +    uint16_t seg;
> +    uint8_t bus, devfn;
> +    uint64_t rdm_start, rdm_size;
> +    uint64_t highmem_end = args->highmem_end ? args->highmem_end : 
> (1ull<<32);
> +
> +    /* Might not expose rdm. */
> +    if (type == LIBXL_RDM_RESERVE_TYPE_NONE && !d_config->num_pcidevs)
> +        return 0;
> +
> +    /* Query all RDM entries in this platform */
> +    if (type == LIBXL_RDM_RESERVE_TYPE_HOST) {
> +        unsigned int nr_entries;
> +
> +        /* Collect all rdm info if exist. */
> +        xrdm = xc_device_get_rdm(gc, PCI_DEV_RDM_ALL,
> +                                 0, 0, 0, &nr_entries);
> +        if (!nr_entries)
> +            return 0;
> +
> +        assert(xrdm);
> +
> +        d_config->num_rdms = nr_entries;
> +        d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
> +                                d_config->num_rdms * 
> sizeof(libxl_device_rdm));
> +
> +        for (i = 0; i < d_config->num_rdms; i++) {
> +            d_config->rdms[i].start =
> +                                (uint64_t)xrdm[i].start_pfn << XC_PAGE_SHIFT;
> +            d_config->rdms[i].size =
> +                                (uint64_t)xrdm[i].nr_pages << XC_PAGE_SHIFT;
> +            d_config->rdms[i].flag = d_config->b_info.rdm.reserve;
> +        }
> +
> +        free(xrdm);
> +    } else
> +        d_config->num_rdms = 0;
> +
> +    /* Query RDM entries per-device */
> +    for (i = 0; i < d_config->num_pcidevs; i++) {
> +        unsigned int nr_entries;
> +        bool new = true;
> +
> +        seg = d_config->pcidevs[i].domain;
> +        bus = d_config->pcidevs[i].bus;
> +        devfn = PCI_DEVFN(d_config->pcidevs[i].dev, 
> d_config->pcidevs[i].func);
> +        nr_entries = 0;
> +        xrdm = xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL,
> +                                 seg, bus, devfn, &nr_entries);
> +        /* No RDM to associated with this device. */
> +        if (!nr_entries)
> +            continue;
> +
> +        assert(xrdm);
> +
> +        /*
> +         * Need to check whether this entry is already saved in the array.
> +         * This could come from two cases:
> +         *
> +         *   - user may configure to get all RMRRs in this platform, which
> +         *   is already queried before this point
> +         *   - or two assigned devices may share one RMRR entry
> +         *
> +         * different policies may be configured on the same RMRR due to above
> +         * two cases. We choose a simple policy to always favor stricter 
> policy
> +         */
> +        for (j = 0; j < d_config->num_rdms; j++) {
> +            if (d_config->rdms[j].start ==
> +                                (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT)
> +             {
> +                if (d_config->rdms[j].flag != LIBXL_RDM_RESERVE_FLAG_STRICT)
> +                    d_config->rdms[j].flag = 
> d_config->pcidevs[i].rdm_reserve;
> +                new = false;
> +                break;
> +            }
> +        }
> +
> +        if (new) {
> +            d_config->num_rdms++;
> +            d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
> +                                d_config->num_rdms * 
> sizeof(libxl_device_rdm));
> +
> +            d_config->rdms[d_config->num_rdms - 1].start =
> +                                (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT;
> +            d_config->rdms[d_config->num_rdms - 1].size =
> +                                (uint64_t)xrdm[0].nr_pages << XC_PAGE_SHIFT;
> +            d_config->rdms[d_config->num_rdms - 1].flag =
> +                                d_config->pcidevs[i].rdm_reserve;
> +        }
> +        free(xrdm);
> +    }
> +
> +    /*
> +     * Next step is to check and avoid potential conflict between RDM entries
> +     * and guest RAM. To avoid intrusive impact to existing memory layout
> +     * {lowmem, mmio, highmem} which is passed around various function 
> blocks,
> +     * below conflicts are not handled which are rare and handling them would
> +     * lead to a more scattered layout:
> +     *  - RMRR in highmem area (>4G)
> +     *  - RMRR lower than a defined memory boundary (e.g. 2G)
> +     * Otherwise for conflicts between boundary and 4G, we'll simply move 
> lowmem
> +     * end below reserved region to solve conflict.
> +     *
> +     * If a conflict is detected on a given RMRR entry, an error will be
> +     * returned if 'strict' policy is specified. Instead, if 'relaxed' policy
> +     * specified, this conflict is treated just as a warning, but we mark 
> this
> +     * RMRR entry as INVALID to indicate that this entry shouldn't be exposed
> +     * to hvmloader.
> +     *
> +     * Firstly we should check the case of rdm < 4G because we may need to
> +     * expand highmem_end.
> +     */
> +    for (i = 0; i < d_config->num_rdms; i++) {
> +        rdm_start = d_config->rdms[i].start;
> +        rdm_size = d_config->rdms[i].size;
> +        conflict = overlaps_rdm(0, args->lowmem_end, rdm_start, rdm_size);
> +
> +        if (!conflict)
> +            continue;
> +
> +        /* Just check if RDM > our memory boundary. */
> +        if (rdm_start > rdm_mem_boundary) {
> +            /*
> +             * We will move downwards lowmem_end so we have to expand
> +             * highmem_end.
> +             */
> +            highmem_end += (args->lowmem_end - rdm_start);
> +            /* Now move downwards lowmem_end. */
> +            args->lowmem_end = rdm_start;
> +        }
> +    }
> +
> +    /* Sync highmem_end. */
> +    args->highmem_end = highmem_end;
> +
> +    /*
> +     * Finally we can take same policy to check lowmem(< 2G) and
> +     * highmem adjusted above.
> +     */
> +    for (i = 0; i < d_config->num_rdms; i++) {
> +        rdm_start = d_config->rdms[i].start;
> +        rdm_size = d_config->rdms[i].size;
> +        /* Does this entry conflict with lowmem? */
> +        conflict = overlaps_rdm(0, args->lowmem_end,
> +                                rdm_start, rdm_size);
> +        /* Does this entry conflict with highmem? */
> +        conflict |= overlaps_rdm((1ULL<<32),
> +                                 args->highmem_end - (1ULL<<32),
> +                                 rdm_start, rdm_size);
> +
> +        if (!conflict)
> +            continue;
> +
> +        if(d_config->rdms[i].flag == LIBXL_RDM_RESERVE_FLAG_STRICT) {
> +            LOG(ERROR, "RDM conflict at 0x%lx.\n", d_config->rdms[i].start);
> +            goto out;
> +        } else {
> +            LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n",
> +                      d_config->rdms[i].start);
> +
> +            /*
> +             * Then mask this INVALID to indicate we shouldn't expose this
> +             * to hvmloader.
> +             */
> +            d_config->rdms[i].flag = LIBXL_RDM_RESERVE_FLAG_INVALID;
> +        }
> +    }
> +
> +    return 0;
> +
> + out:
> +    return ERROR_FAIL;
> +}
> +
>  const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config)
>  {
>      const libxl_vnc_info *vnc = NULL;
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> index 867172a..1777b32 100644
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -914,13 +914,14 @@ out:
>  }
> 
>  int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
> -              libxl_domain_build_info *info,
> +              libxl_domain_config *d_config,
>                libxl__domain_build_state *state)
>  {
>      libxl_ctx *ctx = libxl__gc_owner(gc);
>      struct xc_hvm_build_args args = {};
>      int ret, rc = ERROR_FAIL;
>      uint64_t mmio_start, lowmem_end, highmem_end;
> +    libxl_domain_build_info *const info = &d_config->b_info;
> 
>      memset(&args, 0, sizeof(struct xc_hvm_build_args));
>      /* The params from the configuration file are in Mb, which are then
> @@ -958,6 +959,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
>      args.highmem_end = highmem_end;
>      args.mmio_start = mmio_start;
> 
> +    ret = libxl__domain_device_construct_rdm(gc, d_config,
> +                                             
> info->rdm_mem_boundary_memkb*1024,
> +                                             &args);
> +    if (ret) {
> +        LOG(ERROR, "checking reserved device memory failed");
> +        goto out;
> +    }
> +
>      if (info->num_vnuma_nodes != 0) {
>          int i;
> 
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index e9ac886..52f3831 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -1011,7 +1011,7 @@ _hidden int libxl__build_post(libxl__gc *gc, uint32_t 
> domid,
>  _hidden int libxl__build_pv(libxl__gc *gc, uint32_t domid,
>               libxl_domain_build_info *info, libxl__domain_build_state 
> *state);
>  _hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
> -              libxl_domain_build_info *info,
> +              libxl_domain_config *d_config,
>                libxl__domain_build_state *state);
> 
>  _hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
> @@ -1519,6 +1519,15 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
>          int nr_channels, libxl_device_channel *channels);
> 
>  /*
> + * This function will fix reserved device memory conflict
> + * according to user's configuration.
> + */
> +_hidden int libxl__domain_device_construct_rdm(libxl__gc *gc,
> +                                   libxl_domain_config *d_config,
> +                                   uint64_t rdm_mem_guard,
> +                                   struct xc_hvm_build_args *args);
> +
> +/*
>   * This function will cause the whole libxl process to hang
>   * if the device model does not respond.  It is deprecated.
>   *
> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index 4dfcaf7..b4282a0 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -395,6 +395,7 @@ libxl_domain_build_info = Struct("domain_build_info",[
>      ("target_memkb",    MemKB),
>      ("video_memkb",     MemKB),
>      ("shadow_memkb",    MemKB),
> +    ("rdm_mem_boundary_memkb",    MemKB),
>      ("rtc_timeoffset",  uint32),
>      ("exec_ssidref",    uint32),
>      ("exec_ssid_label", string),
> @@ -559,6 +560,12 @@ libxl_device_pci = Struct("device_pci", [
>      ("rdm_reserve",   libxl_rdm_reserve_flag),
>      ])
> 
> +libxl_device_rdm = Struct("device_rdm", [
> +    ("start", uint64),
> +    ("size", uint64),
> +    ("flag", libxl_rdm_reserve_flag),
> +    ])
> +
>  libxl_device_dtdev = Struct("device_dtdev", [
>      ("path", string),
>      ])
> @@ -589,6 +596,7 @@ libxl_domain_config = Struct("domain_config", [
>      ("disks", Array(libxl_device_disk, "num_disks")),
>      ("nics", Array(libxl_device_nic, "num_nics")),
>      ("pcidevs", Array(libxl_device_pci, "num_pcidevs")),
> +    ("rdms", Array(libxl_device_rdm, "num_rdms")),
>      ("dtdevs", Array(libxl_device_dtdev, "num_dtdevs")),
>      ("vfbs", Array(libxl_device_vfb, "num_vfbs")),
>      ("vkbs", Array(libxl_device_vkb, "num_vkbs")),
> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
> index 4364ba4..85d74fd 100644
> --- a/tools/libxl/xl_cmdimpl.c
> +++ b/tools/libxl/xl_cmdimpl.c
> @@ -1374,6 +1374,9 @@ static void parse_config_data(const char *config_source,
>      if (!xlu_cfg_get_long (config, "videoram", &l, 0))
>          b_info->video_memkb = l * 1024;
> 
> +    if (!xlu_cfg_get_long (config, "rdm_mem_boundary", &l, 0))
> +        b_info->rdm_mem_boundary_memkb = l * 1024;
> +
>      if (!xlu_cfg_get_long(config, "max_event_channels", &l, 0))
>          b_info->event_channels = l;
> 
> --
> 1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.