[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v12 9/9] tools: CMDs and APIs for Platform QoS Monitoring



On Fri, 2014-07-04 at 16:34 +0800, Dongxiao Xu wrote:
> Introduced some new xl commands to handle the platform QoS monitoring
> related services.
> 
> The following two commands is to attach/detach the QoS monitoring
> service to/from a certain domain.

Is there a literal "service" here to attach to? Or are these operations
more like enable/disable?

> diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
> index b8b28c1..91e65c4 100644
> --- a/tools/libxc/Makefile
> +++ b/tools/libxc/Makefile
> @@ -35,6 +35,7 @@ CTRL_SRCS-y       += xc_kexec.c
>  CTRL_SRCS-y       += xtl_core.c
>  CTRL_SRCS-y       += xtl_logger_stdio.c
>  CTRL_SRCS-y       += xc_resource.c
> +CTRL_SRCS-y       += xc_pqos.c

CONFIG_X86?

>  CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c
>  CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c xc_linux_osdep.c
>  CTRL_SRCS-$(CONFIG_FreeBSD) += xc_freebsd.c xc_freebsd_osdep.c

> +int xc_pqos_monitor_get_domain_rmid(xc_interface *xch, uint32_t domid,
> +                                    uint32_t *rmid)
> +{
> +    int rc;
> +    DECLARE_DOMCTL;
> +
> +    domctl.cmd = XEN_DOMCTL_pqos_monitor_op;
> +    domctl.domain = (domid_t)domid;
> +    domctl.u.pqos_monitor_op.cmd = XEN_DOMCTL_PQOS_MONITOR_OP_QUERY_RMID;
> +
> +    rc = do_domctl(xch, &domctl);
> +
> +    *rmid = domctl.u.pqos_monitor_op.data;

Only if rc indicates success.

> +
> +    return rc;
> +}
> +
> +int xc_pqos_monitor_get_total_rmid(xc_interface *xch, uint32_t *total_rmid)
> +{
> +    static int val = 0;
> +    int rc;
> +    DECLARE_SYSCTL;
> +
> +    if ( val )
> +    {
> +        *total_rmid = val;

This can never change dynamically?

Likewise for several more similar things further down.

> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
> index 69ceac8..f510ade 100644
> --- a/tools/libxl/libxl.h
> +++ b/tools/libxl/libxl.h
> @@ -533,6 +533,13 @@ typedef uint8_t libxl_mac[6];
>  #define LIBXL_MAC_FMTLEN ((2*6)+5) /* 6 hex bytes plus 5 colons */
>  #define LIBXL_MAC_BYTES(mac) mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]
>  
> +/*
> + * LIBXL_HAVE_PQOS_MONITOR_CQM
> + *
> + * If this is defined, the cache QoS monitoring feature is suported.

"supported"

> +int libxl_pqos_monitor_get_l3_cache_size(libxl_ctx *ctx,
> +    uint32_t *l3_cache_size);
> +int libxl_pqos_monitor_get_cache_occupancy(libxl_ctx *ctx, uint32_t
> domid,
> +    uint32_t socketid, uint64_t *l3_cache_occupancy);

What are the units of the outputs of these functions?

Why is cache size 32 bits but occupancy is 64 ?

> diff --git a/tools/libxl/libxl_pqos.c b/tools/libxl/libxl_pqos.c
> new file mode 100644
> index 0000000..8c56e67
> --- /dev/null
> +++ b/tools/libxl/libxl_pqos.c
> @@ -0,0 +1,171 @@
> +/*
> + * Copyright (C) 2014      Intel Corporation
> + * Author Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as published
> + * by the Free Software Foundation; version 2.1 only. with the special
> + * exception on linking described in file LICENSE.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU Lesser General Public License for more details.
> + */
> +
> +#include "libxl_osdeps.h" /* must come before any other headers */
> +#include "libxl_internal.h"
> +
> +
> +#define IA32_QM_CTR_ERROR_MASK         (0x3ul << 62)
> +
> +static const char * const msg[] = {

Could this be local to the error printing function?

> +    [ENOSYS] = "unsupported operation",
> +    [ENODEV] = "Platform QoS Monitoring is not supported in this system",
> +    [EEXIST] = "Platform QoS Monitoring is already attached to this domain",
> +    [ENOENT] = "Platform QoS Monitoring is not attached to this domain",
> +    [EUSERS] = "there is no free RMID available",
> +    [ESRCH]  = "is this Domain ID valid?",
> +    [EFAULT] = "cannot find a valid CPU belonging to this socket",
> +};
> +
> +static void libxl_pqos_monitor_err_msg(libxl_ctx *ctx, int err)
> +{
> +    GC_INIT(ctx);
> +
> +    switch (err) {
> +    case EINVAL:
> +    case ENODEV:
> +    case EEXIST:
> +    case EUSERS:
> +    case ESRCH:
> +    case ENOENT:

This enumeration of the valid errnos is bound to get out of sync I
think.

You can compare err to ARRAY_SIZE(msg) and if it is within bounds check
it for NULL.

or  perhaps
case EINVAL: msg = "unsupported operation"; break;
etc

> +
> +int libxl_pqos_monitor_domain_attached(libxl_ctx *ctx, uint32_t domid)
> +{
> +    int rc;
> +    uint32_t rmid;
> +
> +    rc = xc_pqos_monitor_get_domain_rmid(ctx->xch, domid, &rmid);
> +    if (rc < 0)
> +        return 0;

don't you mean return rc here?

> +int libxl_pqos_monitor_get_cache_occupancy(libxl_ctx *ctx, uint32_t domid,
> +    uint32_t socketid, uint64_t *l3_cache_occupancy)
> +{
> +    GC_INIT(ctx);
> +
> +    unsigned int rmid;
> +    uint32_t upscaling_factor;
> +    uint64_t monitor_data;
> +    int cpu, rc;
> +    xc_pqos_monitor_type type;
> +
> +    rc = xc_pqos_monitor_get_domain_rmid(ctx->xch, domid, &rmid);
> +    if (rc < 0 || rmid == 0) {
> +        LOGE(ERROR, "fail to get the domain rmid, "
> +            "or domain is not attached with platform QoS monitoring 
> service");
> +        return ERROR_FAIL;

All of these error paths fail to GC_FREE. You should follow the
    if (error thing) {
        rc = ERROR_FAIL;
        goto out;
    }

    ...

    rc = 0
out:
    GC_FREE;
    return 0
idiom.

> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index 1018142..2e7668b 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -604,3 +604,7 @@ libxl_event = Struct("event",[
>                                   ])),
>             ("domain_create_console_available", None),
>             ]))])
> +
> +libxl_pqos_monitor_type = Enumeration("pqos_monitor_type", [
> +    (1, "CACHE_OCCUPANCY"),
> +    ])

This is used purely for the libxl_pqos_monitor_type_from_string helper
in xl and never in the libxl API, is that correct?

I'm not sure we want to expose this in the libxl api just for stat.
maybe just strcmp for now and if this list grows unwieldy we can
revisit.

> +    print_header = 1;
> +    for (domid = first_domain; domid < first_domain + nr_domains; domid++) {
> +        if (!libxl_pqos_monitor_domain_attached(ctx, domid)) {
> +            if (!ignore_domain_err)
> +                printf("No CQM info for Domain %d.\n", domid);

There might be an argument for printing a line anyway with "Unknown".

> +            continue;
> +        }
> +        if (print_header) {
> +            printf("Name                                        ID");

printf("%-40s %5s", "Name", "ID") would be easier to relate to the list
below.

> +            for (socketid = 0; socketid < nr_sockets; socketid++)
> +                printf("\tSocketID\tL3C_Usage");
> +            printf("\n");
> +            print_header = 0;
> +        }

So the output on a two socket system would be:
Name                                      ID    SocketID        L3CUsage        
SocketID        L3CUsage
Blahblah                                   1           0            123KB       
       1              456KB

?

The socketid column seems weird to me. Why not:

Name                                      ID        Socket 0    Socket 1
Blahblah                                   1           123KB       456KB


Or

Name                                      ID        Socket      L3C Usage
Blahblah                                   1             0      456KB
                                                         1      123KB

...

"xl list" takes a bunch of flags to include extra info. Maybe you want
to think about listing this stuff there instead?

> +        domain_name = libxl_domid_to_name(ctx, domid);
> +        printf("%-40s %5d", domain_name, domid);
> +        free(domain_name);
> +        for (socketid = 0; socketid < nr_sockets; socketid++) {
> +            rc = libxl_pqos_monitor_get_cache_occupancy(ctx, domid, socketid,
> +                                                        &l3_cache_occupancy);
> +            printf("%10u %13lu KB     ", socketid, l3_cache_occupancy/1024);
> +        }
> +        printf("\n");
> +    }
> +
> +    return 0;
> +}
> +
> +
> +int main_pqos_monitor_show(int argc, char **argv)
> +{
> +    int opt, ret = 0;
> +    uint32_t first_domain, nr_domains;
> +    libxl_pqos_monitor_type type;
> +
> +    SWITCH_FOREACH_OPT(opt, "", NULL, "pqos-monitor-show", 1) {
> +        /* No options */
> +    }
> +
> +    libxl_pqos_monitor_type_from_string(argv[optind], &type);
> +
> +    if (optind + 1 >= argc) {
> +        first_domain = 0;
> +        nr_domains = 1024;


When an explicit domain is not given please use libxl_list_domain() and
print the status for all of them instead of hardcoding the first 1024
domains.


> +    } else if (optind + 1 == argc - 1){
> +        first_domain = find_domain(argv[optind + 1]);
> +        nr_domains = 1;

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.