[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v6 05/10] libxl: vnuma topology configuration parser and doc



On Fri, Jul 18, 2014 at 6:53 AM, Wei Liu <wei.liu2@xxxxxxxxxx> wrote:
> On Fri, Jul 18, 2014 at 01:50:04AM -0400, Elena Ufimtseva wrote:
>> Parses vnuma topoplogy number of nodes and memory
>> ranges. If not defined, initializes vnuma with
>> only one node and default topology. This one node covers
>> all domain memory and all vcpus assigned to it.
>>
>> Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
>> ---
>>  docs/man/xl.cfg.pod.5       |   77 ++++++++
>>  tools/libxl/libxl_types.idl |    6 +-
>>  tools/libxl/libxl_vnuma.h   |    8 +
>>  tools/libxl/xl_cmdimpl.c    |  425 
>> +++++++++++++++++++++++++++++++++++++++++++
>>  4 files changed, 515 insertions(+), 1 deletion(-)
>>  create mode 100644 tools/libxl/libxl_vnuma.h
>>
>> diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
>> index ff9ea77..0c7fbf8 100644
>> --- a/docs/man/xl.cfg.pod.5
>> +++ b/docs/man/xl.cfg.pod.5
>> @@ -242,6 +242,83 @@ if the values of B<memory=> and B<maxmem=> differ.
>>  A "pre-ballooned" HVM guest needs a balloon driver, without a balloon driver
>>  it will crash.
>>
>> +=item B<vnuma_nodes=N>
>> +
>> +Number of vNUMA nodes the guest will be initialized with on boot.
>> +PV guest by default will have one vnuma node.
>> +
>
> In the future, all these config options will be used for HVM / PVH
> guests as well. But I'm fine with leaving it as is for the moment.

Sure!
>
> [...]
>>
>>  =head3 Event Actions
>> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
>> index de25f42..5876822 100644
>> --- a/tools/libxl/libxl_types.idl
>> +++ b/tools/libxl/libxl_types.idl
>> @@ -318,7 +318,11 @@ libxl_domain_build_info = Struct("domain_build_info",[
>>      ("disable_migrate", libxl_defbool),
>>      ("cpuid",           libxl_cpuid_policy_list),
>>      ("blkdev_start",    string),
>> -
>> +    ("vnuma_mem",     Array(uint64, "nr_nodes")),
>> +    ("vnuma_vcpumap",     Array(uint32, "nr_nodemap")),
>> +    ("vdistance",        Array(uint32, "nr_dist")),
>> +    ("vnuma_vnodemap",  Array(uint32, "nr_node_to_pnode")),
>
> The main problem here is that we need to name counter variables
> num_VARs. See idl.txt, idl.Array section.

Ok, I will rename these.

>
>> +    ("vnuma_autoplacement",  libxl_defbool),
>>      ("device_model_version", libxl_device_model_version),
>>      ("device_model_stubdomain", libxl_defbool),
>>      # if you set device_model you must set device_model_version too
>> diff --git a/tools/libxl/libxl_vnuma.h b/tools/libxl/libxl_vnuma.h
>> new file mode 100644
>> index 0000000..4ff4c57
>> --- /dev/null
>> +++ b/tools/libxl/libxl_vnuma.h
>> @@ -0,0 +1,8 @@
>> +#include "libxl_osdeps.h" /* must come before any other headers */
>> +
>> +#define VNUMA_NO_NODE ~((unsigned int)0)
>> +
>> +/* Max vNUMA node size from Linux. */
>
> Should be "Min" I guess.
>
>> +#define MIN_VNODE_SIZE  32U
>> +
>> +#define MAX_VNUMA_NODES (unsigned int)1 << 10
>
> Does this also come from Linux? Or is it just some arbitrary number?
> Worth a comment here.

Yes, this comes from max NODE_SHIFT from arch/x86/Kconfig. I will
comment on this in the code.

>
>> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
>> index 68df548..5d91c2c 100644
>> --- a/tools/libxl/xl_cmdimpl.c
>> +++ b/tools/libxl/xl_cmdimpl.c
>> @@ -40,6 +40,7 @@
>>  #include "libxl_json.h"
>>  #include "libxlutil.h"
>>  #include "xl.h"
>> +#include "libxl_vnuma.h"
>>
>>  /* For calls which return an errno on failure */
>>  #define CHK_ERRNOVAL( call ) ({                                         \
>> @@ -690,6 +691,423 @@ static void parse_top_level_sdl_options(XLU_Config 
>> *config,
>>      xlu_cfg_replace_string (config, "xauthority", &sdl->xauthority, 0);
>>  }
>>
>> +
>> +static unsigned int get_list_item_uint(XLU_ConfigList *list, unsigned int i)
>> +{
>> +    const char *buf;
>> +    char *ep;
>> +    unsigned long ul;
>> +    int rc = -EINVAL;
>> +
>> +    buf = xlu_cfg_get_listitem(list, i);
>> +    if (!buf)
>> +        return rc;
>> +    ul = strtoul(buf, &ep, 10);
>> +    if (ep == buf)
>> +        return rc;
>> +    if (ul >= UINT16_MAX)
>> +        return rc;
>> +    return (unsigned int)ul;
>> +}
>> +
>> +static void vdistance_set(unsigned int *vdistance,
>> +                                unsigned int nr_vnodes,
>> +                                unsigned int samenode,
>> +                                unsigned int othernode)
>> +{
>> +    unsigned int idx, slot;
>> +    for (idx = 0; idx < nr_vnodes; idx++)
>> +        for (slot = 0; slot < nr_vnodes; slot++)
>> +            *(vdistance + slot * nr_vnodes + idx) =
>> +                idx == slot ? samenode : othernode;
>> +}
>> +
>> +static void vcputovnode_default(unsigned int *cpu_to_node,
>> +                                unsigned int nr_vnodes,
>> +                                unsigned int max_vcpus)
>> +{
>> +    unsigned int cpu;
>> +    for (cpu = 0; cpu < max_vcpus; cpu++)
>> +        cpu_to_node[cpu] = cpu % nr_vnodes;
>> +}
>> +
>> +/* Split domain memory between vNUMA nodes equally. */
>> +static int split_vnumamem(libxl_domain_build_info *b_info)
>> +{
>> +    unsigned long long vnodemem = 0;
>> +    unsigned long n;
>> +    unsigned int i;
>> +
>> +    if (b_info->nr_nodes == 0)
>> +        return -1;
>> +
>> +    vnodemem = (b_info->max_memkb >> 10) / b_info->nr_nodes;
>> +    if (vnodemem < MIN_VNODE_SIZE)
>> +        return -1;
>> +    /* reminder in MBytes. */
>> +    n = (b_info->max_memkb >> 10) % b_info->nr_nodes;
>> +    /* get final sizes in MBytes. */
>> +    for (i = 0; i < (b_info->nr_nodes - 1); i++)
>> +        b_info->vnuma_mem[i] = vnodemem;
>> +    /* add the reminder to the last node. */
>> +    b_info->vnuma_mem[i] = vnodemem + n;
>> +    return 0;
>> +}
>> +
>> +static void vnuma_vnodemap_default(unsigned int *vnuma_vnodemap,
>> +                                   unsigned int nr_vnodes)
>> +{
>> +    unsigned int i;
>> +    for (i = 0; i < nr_vnodes; i++)
>> +        vnuma_vnodemap[i] = VNUMA_NO_NODE;
>> +}
>> +
>> +/*
>> + * init vNUMA to "zero config" with one node and all other
>> + * topology parameters set to default.
>> + */
>> +static int vnuma_zero_config(libxl_domain_build_info *b_info)
>> +{
>
> Haven't looked into details of this function, but I think this should be
> renamed to vnuma_default_config, from the reading of comment.
>
>> +    b_info->nr_nodes = 1;
>> +    /* all memory goes to this one vnode, as well as vcpus. */
>> +    if (!(b_info->vnuma_mem = (uint64_t *)calloc(b_info->nr_nodes,
>> +                                sizeof(*b_info->vnuma_mem))))
>> +        goto bad_vnumazerocfg;
>> +
>> +    if (!(b_info->vnuma_vcpumap = (unsigned int *)calloc(b_info->max_vcpus,
>> +                                sizeof(*b_info->vnuma_vcpumap))))
>> +        goto bad_vnumazerocfg;
>> +
>> +    if (!(b_info->vdistance = (unsigned int *)calloc(b_info->nr_nodes *
>> +                                b_info->nr_nodes, 
>> sizeof(*b_info->vdistance))))
>> +        goto bad_vnumazerocfg;
>> +
>> +    if (!(b_info->vnuma_vnodemap = (unsigned int *)calloc(b_info->nr_nodes,
>> +                                sizeof(*b_info->vnuma_vnodemap))))
>> +        goto bad_vnumazerocfg;
>> +
>> +    b_info->vnuma_mem[0] = b_info->max_memkb >> 10;
>> +
>> +    /* all vcpus assigned to this vnode. */
>> +    vcputovnode_default(b_info->vnuma_vcpumap, b_info->nr_nodes,
>> +                        b_info->max_vcpus);
>> +
>> +    /* default vdistance is 10. */
>> +    vdistance_set(b_info->vdistance, b_info->nr_nodes, 10, 10);
>> +
>> +    /* VNUMA_NO_NODE for vnode_to_pnode. */
>> +    vnuma_vnodemap_default(b_info->vnuma_vnodemap, b_info->nr_nodes);
>> +
>> +    /*
>> +     * will be placed to some physical nodes defined by automatic
>> +     * numa placement or VNUMA_NO_NODE will not request exact node.
>> +     */
>> +    libxl_defbool_set(&b_info->vnuma_autoplacement, true);
>> +    return 0;
>> +
>> + bad_vnumazerocfg:
>> +    return -1;
>> +}
>> +
>> +static void free_vnuma_info(libxl_domain_build_info *b_info)
>> +{
>> +    free(b_info->vnuma_mem);
>> +    free(b_info->vdistance);
>> +    free(b_info->vnuma_vcpumap);
>> +    free(b_info->vnuma_vnodemap);
>> +    b_info->nr_nodes = 0;
>> +}
>> +
>> +static int parse_vnuma_mem(XLU_Config *config,
>> +                            libxl_domain_build_info **b_info)
>> +{
>> +    libxl_domain_build_info *dst;
>> +    XLU_ConfigList *vnumamemcfg;
>> +    int nr_vnuma_regions, i;
>> +    unsigned long long vnuma_memparsed = 0;
>> +    unsigned long ul;
>> +    const char *buf;
>> +
>> +    dst = *b_info;
>> +    if (!xlu_cfg_get_list(config, "vnuma_mem",
>> +                          &vnumamemcfg, &nr_vnuma_regions, 0)) {
>> +
>> +        if (nr_vnuma_regions != dst->nr_nodes) {
>> +            fprintf(stderr, "Number of numa regions (vnumamem = %d) is \
>> +                    incorrect (should be %d).\n", nr_vnuma_regions,
>> +                    dst->nr_nodes);
>> +            goto bad_vnuma_mem;
>> +        }
>> +
>> +        dst->vnuma_mem = calloc(dst->nr_nodes,
>> +                                 sizeof(*dst->vnuma_mem));
>> +        if (dst->vnuma_mem == NULL) {
>> +            fprintf(stderr, "Unable to allocate memory for vnuma 
>> ranges.\n");
>> +            goto bad_vnuma_mem;
>> +        }
>> +
>> +        char *ep;
>> +        /*
>> +         * Will parse only nr_vnodes times, even if we have more/less 
>> regions.
>> +         * Take care of it later if less or discard if too many regions.
>> +         */
>> +        for (i = 0; i < dst->nr_nodes; i++) {
>> +            buf = xlu_cfg_get_listitem(vnumamemcfg, i);
>> +            if (!buf) {
>> +                fprintf(stderr,
>> +                        "xl: Unable to get element %d in vnuma memory 
>> list.\n", i);
>> +                if (vnuma_zero_config(dst))
>> +                    goto bad_vnuma_mem;
>> +
>> +            }
>
> I think we should fail here instead of creating "default" config. See
> the reasoning I made on hypervisor side.

Yes, I see your point. I replied there as well that we just can have
both to drop to default/zero node config.

>
>> +            ul = strtoul(buf, &ep, 10);
>> +            if (ep == buf) {
>> +                fprintf(stderr, "xl: Invalid argument parsing vnumamem: 
>> %s.\n", buf);
>> +                if (vnuma_zero_config(dst))
>> +                    goto bad_vnuma_mem;
>> +            }
>> +
>
> Ditto.
>
>> +            /* 32Mb is a min size for a node, taken from Linux */
>> +            if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) {
>> +                fprintf(stderr, "xl: vnuma memory %lu is not within %u - %u 
>> range.\n",
>> +                        ul, MIN_VNODE_SIZE, UINT32_MAX);
>> +                if (vnuma_zero_config(dst))
>> +                    goto bad_vnuma_mem;
>> +            }
>> +
>
> Ditto.
>
> Wei.



-- 
Elena

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.