[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v11] tools, docs: add total/local memory bandwith monitoring



Add Memory Bandwidth Monitoring(MBM) for VMs. Two types of monitoring
are supported: total and local memory bandwidth monitoring. To use it,
CMT should be enabled in hypervisor.

Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
---
Changes in v11:
1. term change: *_MEM_BANDWIDTH => *_MEM_COUNT.
2. Add assert(nr <= ARRAY_SIZE(entries)).
Changes in v10:
1. Move refactoring code into standalone patch.
2. Create generic interface libxl_psr_cmt_get_sample for both
   cache_occupancy and memory bandwith.
Changes in v9:
1. Refactor code in xc_psr_cmt_get_data.
2. Move bandwidth calculation(sleep) from libxl to xl.
3. Broadcast feature with LIBXL_HAVE_PSR_MBM.
4. Check event mask with libxl_psr_cmt_type_supported.
5. Coding style/Document fix.
Changes in v6:
1. Remove DISABLE_IRQ flag as hypervisor disable IRQ for MSR_IA32_TSC
   implicitly.
Changes in v5:
1. Add MBM description in xen command line.
2. Use the tsc from hypervisor directly which is already ns.
3. Call resource_op with DISABLE_IRQ flag.
Changes in v4:
1. Get timestamp from hypervisor and use that for bandwidth calculation.
2. Minor document and coding style fix.
---
 docs/man/xl.pod.1                   | 11 ++++-
 docs/misc/xen-command-line.markdown |  3 ++
 tools/libxc/include/xenctrl.h       |  6 ++-
 tools/libxc/xc_msr_x86.h            |  1 +
 tools/libxc/xc_psr.c                | 47 ++++++++++++++++++-
 tools/libxl/libxl.h                 | 17 +++++++
 tools/libxl/libxl_psr.c             | 56 ++++++++++++++++++-----
 tools/libxl/libxl_types.idl         |  2 +
 tools/libxl/xl_cmdimpl.c            | 90 ++++++++++++++++++++++++++++++-------
 tools/libxl/xl_cmdtable.c           |  4 +-
 10 files changed, 207 insertions(+), 30 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index 6b89ba8..cd80ffc 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -1461,6 +1461,13 @@ is domain level. To monitor a specific domain, just 
attach the domain id with
 the monitoring service. When the domain doesn't need to be monitored any more,
 detach the domain id from the monitoring service.
 
+Intel Broadwell and later server platforms also offer total/local memory
+bandwidth monitoring. Xen supports per-domain monitoring for these two
+additional monitoring types. Both memory bandwidth monitoring and L3 cache
+occupancy monitoring share the same set of underlying monitoring service. Once
+a domain is attached to the monitoring service, monitoring data can be showed
+for any of these monitoring types.
+
 =over 4
 
 =item B<psr-cmt-attach> [I<domain-id>]
@@ -1475,7 +1482,9 @@ detach: Detach the platform shared resource monitoring 
service from a domain.
 
 Show monitoring data for a certain domain or all domains. Current supported
 monitor types are:
- - "cache-occupancy": showing the L3 cache occupancy.
+ - "cache-occupancy": showing the L3 cache occupancy(KB).
+ - "total-mem-bandwidth": showing the total memory bandwidth(KB/s).
+ - "local-mem-bandwidth": showing the local memory bandwidth(KB/s).
 
 =back
 
diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index bc316be..a09ec01 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1097,6 +1097,9 @@ The following resources are available:
   L3 cache occupancy.
   * `cmt` instructs Xen to enable/disable Cache Monitoring Technology.
   * `rmid_max` indicates the max value for rmid.
+* Memory Bandwidth Monitoring (Broadwell and later). Information regarding the
+  total/local memory bandwidth. Follow the same options with Cache Monitoring
+  Technology.
 
 ### reboot
 > `= t[riple] | k[bd] | a[cpi] | p[ci] | n[o] [, [w]arm | [c]old]`
diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 09d819f..df18292 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2688,6 +2688,8 @@ int xc_resource_op(xc_interface *xch, uint32_t nr_ops, 
xc_resource_op_t *ops);
 #if defined(__i386__) || defined(__x86_64__)
 enum xc_psr_cmt_type {
     XC_PSR_CMT_L3_OCCUPANCY,
+    XC_PSR_CMT_TOTAL_MEM_COUNT,
+    XC_PSR_CMT_LOCAL_MEM_COUNT,
 };
 typedef enum xc_psr_cmt_type xc_psr_cmt_type;
 int xc_psr_cmt_attach(xc_interface *xch, uint32_t domid);
@@ -2697,10 +2699,12 @@ int xc_psr_cmt_get_domain_rmid(xc_interface *xch, 
uint32_t domid,
 int xc_psr_cmt_get_total_rmid(xc_interface *xch, uint32_t *total_rmid);
 int xc_psr_cmt_get_l3_upscaling_factor(xc_interface *xch,
                                        uint32_t *upscaling_factor);
+int xc_psr_cmt_get_l3_event_mask(xc_interface *xch, uint32_t *event_mask);
 int xc_psr_cmt_get_l3_cache_size(xc_interface *xch, uint32_t cpu,
                                  uint32_t *l3_cache_size);
 int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, uint32_t cpu,
-                        uint32_t psr_cmt_type, uint64_t *monitor_data);
+                        uint32_t psr_cmt_type, uint64_t *monitor_data,
+                        uint64_t *tsc);
 int xc_psr_cmt_enabled(xc_interface *xch);
 #endif
 
diff --git a/tools/libxc/xc_msr_x86.h b/tools/libxc/xc_msr_x86.h
index 7c3e1a3..7f100e7 100644
--- a/tools/libxc/xc_msr_x86.h
+++ b/tools/libxc/xc_msr_x86.h
@@ -20,6 +20,7 @@
 #ifndef XC_MSR_X86_H
 #define XC_MSR_X86_H
 
+#define MSR_IA32_TSC            0x00000010
 #define MSR_IA32_CMT_EVTSEL     0x00000c8d
 #define MSR_IA32_CMT_CTR        0x00000c8e
 
diff --git a/tools/libxc/xc_psr.c b/tools/libxc/xc_psr.c
index 70d9067..e367a80 100644
--- a/tools/libxc/xc_psr.c
+++ b/tools/libxc/xc_psr.c
@@ -17,12 +17,15 @@
  * GNU Lesser General Public License for more details.
  */
 
+#include <assert.h>
 #include "xc_private.h"
 #include "xc_msr_x86.h"
 
 #define IA32_CMT_CTR_ERROR_MASK         (0x3ull << 62)
 
 #define EVTID_L3_OCCUPANCY             0x1
+#define EVTID_TOTAL_MEM_COUNT          0x2
+#define EVTID_LOCAL_MEM_COUNT          0x3
 
 int xc_psr_cmt_attach(xc_interface *xch, uint32_t domid)
 {
@@ -112,6 +115,23 @@ int xc_psr_cmt_get_l3_upscaling_factor(xc_interface *xch,
     return rc;
 }
 
+int xc_psr_cmt_get_l3_event_mask(xc_interface *xch, uint32_t *event_mask)
+{
+    int rc;
+    DECLARE_SYSCTL;
+
+    sysctl.cmd = XEN_SYSCTL_psr_cmt_op;
+    sysctl.u.psr_cmt_op.cmd =
+        XEN_SYSCTL_PSR_CMT_get_l3_event_mask;
+    sysctl.u.psr_cmt_op.flags = 0;
+
+    rc = xc_sysctl(xch, &sysctl);
+    if ( !rc )
+        *event_mask = sysctl.u.psr_cmt_op.u.data;
+
+    return rc;
+}
+
 int xc_psr_cmt_get_l3_cache_size(xc_interface *xch, uint32_t cpu,
                                  uint32_t *l3_cache_size)
 {
@@ -139,10 +159,12 @@ int xc_psr_cmt_get_l3_cache_size(xc_interface *xch, 
uint32_t cpu,
 }
 
 int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, uint32_t cpu,
-                        xc_psr_cmt_type type, uint64_t *monitor_data)
+                        xc_psr_cmt_type type, uint64_t *monitor_data,
+                        uint64_t *tsc)
 {
     xc_resource_op_t op;
-    xc_resource_entry_t entries[2];
+    xc_resource_entry_t entries[3];
+    xc_resource_entry_t *tsc_entry = NULL;
     uint32_t evtid, nr = 0;
     int rc;
 
@@ -151,6 +173,12 @@ int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, 
uint32_t cpu,
     case XC_PSR_CMT_L3_OCCUPANCY:
         evtid = EVTID_L3_OCCUPANCY;
         break;
+    case XC_PSR_CMT_TOTAL_MEM_COUNT:
+        evtid = EVTID_TOTAL_MEM_COUNT;
+        break;
+    case XC_PSR_CMT_LOCAL_MEM_COUNT:
+        evtid = EVTID_LOCAL_MEM_COUNT;
+        break;
     default:
         return -1;
     }
@@ -167,6 +195,18 @@ int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, 
uint32_t cpu,
     entries[nr].rsvd = 0;
     nr++;
 
+    if ( tsc != NULL )
+    {
+        tsc_entry = &entries[nr];
+        entries[nr].u.cmd = XEN_RESOURCE_OP_MSR_READ;
+        entries[nr].idx = MSR_IA32_TSC;
+        entries[nr].val = 0;
+        entries[nr].rsvd = 0;
+        nr++;
+    }
+
+    assert(nr <= ARRAY_SIZE(entries));
+
     op.cpu = cpu;
     op.nr_entries = nr;
     op.entries = entries;
@@ -180,6 +220,9 @@ int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, 
uint32_t cpu,
 
     *monitor_data = entries[1].val;
 
+    if ( tsc_entry != NULL )
+        *tsc = tsc_entry->val;
+
     return 0;
 }
 
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index f784df5..a48431c 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -712,6 +712,13 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, 
libxl_mac *src);
  * If this is defined, the Cache Monitoring Technology feature is supported.
  */
 #define LIBXL_HAVE_PSR_CMT 1
+
+/*
+ * LIBXL_HAVE_PSR_MBM
+ *
+ * If this is defined, the Memory Bandwidth Monitoring feature is supported.
+ */
+#define LIBXL_HAVE_PSR_MBM 1
 #endif
 
 typedef char **libxl_string_list;
@@ -1485,6 +1492,16 @@ int libxl_psr_cmt_get_cache_occupancy(libxl_ctx *ctx,
                                       uint32_t *l3_cache_occupancy);
 #endif
 
+#ifdef LIBXL_HAVE_PSR_MBM
+int libxl_psr_cmt_type_supported(libxl_ctx *ctx, libxl_psr_cmt_type type);
+int libxl_psr_cmt_get_sample(libxl_ctx *ctx,
+                             uint32_t domid,
+                             libxl_psr_cmt_type type,
+                             uint64_t scope,
+                             uint64_t *sample_r,
+                             uint64_t *tsc_r);
+#endif
+
 /* misc */
 
 /* Each of these sets or clears the flag according to whether the
diff --git a/tools/libxl/libxl_psr.c b/tools/libxl/libxl_psr.c
index ec3b6e9..3e1c792 100644
--- a/tools/libxl/libxl_psr.c
+++ b/tools/libxl/libxl_psr.c
@@ -161,18 +161,36 @@ out:
     return rc;
 }
 
-int libxl_psr_cmt_get_cache_occupancy(libxl_ctx *ctx,
-                                      uint32_t domid,
-                                      uint32_t socketid,
-                                      uint32_t *l3_cache_occupancy)
+int libxl_psr_cmt_type_supported(libxl_ctx *ctx, libxl_psr_cmt_type type)
 {
     GC_INIT(ctx);
+    uint32_t event_mask;
+    int rc;
 
+    rc = xc_psr_cmt_get_l3_event_mask(ctx->xch, &event_mask);
+    if (rc < 0) {
+        libxl__psr_cmt_log_err_msg(gc, errno);
+        rc = 0;
+    } else {
+        rc = event_mask & (1 << (type - 1));
+    }
+
+    GC_FREE;
+    return rc;
+}
+
+int libxl_psr_cmt_get_sample(libxl_ctx *ctx,
+                             uint32_t domid,
+                             libxl_psr_cmt_type type,
+                             uint64_t scope,
+                             uint64_t *sample_r,
+                             uint64_t *tsc_r)
+{
+    GC_INIT(ctx);
     unsigned int rmid;
     uint32_t upscaling_factor;
     uint64_t monitor_data;
     int cpu, rc;
-    xc_psr_cmt_type type;
 
     rc = xc_psr_cmt_get_domain_rmid(ctx->xch, domid, &rmid);
     if (rc < 0 || rmid == 0) {
@@ -182,15 +200,15 @@ int libxl_psr_cmt_get_cache_occupancy(libxl_ctx *ctx,
         goto out;
     }
 
-    cpu = libxl__pick_socket_cpu(gc, socketid);
+    cpu = libxl__pick_socket_cpu(gc, scope);
     if (cpu < 0) {
         LOGE(ERROR, "failed to get socket cpu");
         rc = ERROR_FAIL;
         goto out;
     }
 
-    type = XC_PSR_CMT_L3_OCCUPANCY;
-    rc = xc_psr_cmt_get_data(ctx->xch, rmid, cpu, type, &monitor_data);
+    rc = xc_psr_cmt_get_data(ctx->xch, rmid, cpu, type - 1,
+                             &monitor_data, tsc_r);
     if (rc < 0) {
         LOGE(ERROR, "failed to get monitoring data");
         rc = ERROR_FAIL;
@@ -204,13 +222,31 @@ int libxl_psr_cmt_get_cache_occupancy(libxl_ctx *ctx,
         goto out;
     }
 
-    *l3_cache_occupancy = upscaling_factor * monitor_data / 1024;
-    rc = 0;
+    *sample_r = monitor_data * upscaling_factor;
 out:
     GC_FREE;
     return rc;
 }
 
+int libxl_psr_cmt_get_cache_occupancy(libxl_ctx *ctx,
+                                      uint32_t domid,
+                                      uint32_t socketid,
+                                      uint32_t *l3_cache_occupancy)
+{
+    uint64_t data;
+    int rc;
+
+    rc = libxl_psr_cmt_get_sample(ctx, domid,
+                                  LIBXL_PSR_CMT_TYPE_CACHE_OCCUPANCY,
+                                  socketid, &data, NULL);
+    if (rc < 0)
+        goto out;
+
+    *l3_cache_occupancy = data / 1024;
+out:
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 02be466..47af340 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -696,4 +696,6 @@ libxl_event = Struct("event",[
 
 libxl_psr_cmt_type = Enumeration("psr_cmt_type", [
     (1, "CACHE_OCCUPANCY"),
+    (2, "TOTAL_MEM_COUNT"),
+    (3, "LOCAL_MEM_COUNT"),
     ])
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 846a4b2..c1de4de 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -7822,13 +7822,61 @@ out:
 }
 
 #ifdef LIBXL_HAVE_PSR_CMT
+
+#define MBM_SAMPLE_RETRY_MAX 4
+static int psr_cmt_get_mem_bandwidth(uint32_t domid,
+                                     libxl_psr_cmt_type type,
+                                     uint32_t socketid,
+                                     uint64_t *bandwidth_r)
+{
+    uint64_t sample1, sample2;
+    uint64_t tsc1, tsc2;
+    int retry_attempts = 0;
+    int rc;
+
+    while (1) {
+        rc = libxl_psr_cmt_get_sample(ctx, domid, type, socketid,
+                                      &sample1, &tsc1);
+        if (rc < 0)
+            return rc;
+
+        usleep(10000);
+
+        rc = libxl_psr_cmt_get_sample(ctx, domid, type, socketid,
+                                      &sample2, &tsc2);
+        if (rc < 0)
+            return rc;
+
+        if (tsc2 <= tsc1)
+            return -1;
+
+        /*
+         * Hardware guarantees at most 1 overflow can happen if the duration
+         * between two samples is less than 1 second. Note that tsc returned
+         * from hypervisor is already-scaled time(ns).
+         */
+        if (tsc2 - tsc1 < 1000000000 && sample2 >= sample1)
+            break;
+
+        if (retry_attempts < MBM_SAMPLE_RETRY_MAX) {
+            retry_attempts++;
+        } else {
+            fprintf(stderr, "event counter overflowed\n");
+            return -1;
+        }
+    }
+
+    *bandwidth_r = (sample2 - sample1) * 1000000000 / (tsc2 - tsc1) / 1024;
+    return 0;
+}
+
 static void psr_cmt_print_domain_info(libxl_dominfo *dominfo,
                                       libxl_psr_cmt_type type,
                                       uint32_t nr_sockets)
 {
     char *domain_name;
     uint32_t socketid;
-    uint32_t l3_cache_occupancy;
+    uint64_t monitor_data;
 
     if (!libxl_psr_cmt_domain_attached(ctx, dominfo->domid))
         return;
@@ -7840,11 +7888,15 @@ static void psr_cmt_print_domain_info(libxl_dominfo 
*dominfo,
     for (socketid = 0; socketid < nr_sockets; socketid++) {
         switch (type) {
         case LIBXL_PSR_CMT_TYPE_CACHE_OCCUPANCY:
-            if (!libxl_psr_cmt_get_cache_occupancy(ctx,
-                                                   dominfo->domid,
-                                                   socketid,
-                                                   &l3_cache_occupancy))
-                printf("%13u KB", l3_cache_occupancy);
+            if (!libxl_psr_cmt_get_sample(ctx, dominfo->domid, type, socketid,
+                                          &monitor_data, NULL))
+                printf("%13"PRIu64" KB", monitor_data / 1024);
+            break;
+        case LIBXL_PSR_CMT_TYPE_TOTAL_MEM_COUNT:
+        case LIBXL_PSR_CMT_TYPE_LOCAL_MEM_COUNT:
+            if (!psr_cmt_get_mem_bandwidth(dominfo->domid, type, socketid,
+                                           &monitor_data))
+                printf("%11"PRIu64" KB/s", monitor_data);
             break;
         default:
             return;
@@ -7866,6 +7918,12 @@ static int psr_cmt_show(libxl_psr_cmt_type type, 
uint32_t domid)
         return -1;
     }
 
+    if (!libxl_psr_cmt_type_supported(ctx, type)) {
+        fprintf(stderr, "Monitor type '%s' is not supported in the system\n",
+                libxl_psr_cmt_type_to_string(type));
+        return -1;
+    }
+
     libxl_physinfo_init(&info);
     rc = libxl_get_physinfo(ctx, &info);
     if (rc < 0) {
@@ -7970,7 +8028,16 @@ int main_psr_cmt_show(int argc, char **argv)
         /* No options */
     }
 
-    libxl_psr_cmt_type_from_string(argv[optind], &type);
+    if (!strcmp(argv[optind], "cache_occupancy"))
+        type = LIBXL_PSR_CMT_TYPE_CACHE_OCCUPANCY;
+    else if (!strcmp(argv[optind], "total_mem_bandwidth"))
+        type = LIBXL_PSR_CMT_TYPE_TOTAL_MEM_COUNT;
+    else if (!strcmp(argv[optind], "local_mem_bandwidth"))
+        type = LIBXL_PSR_CMT_TYPE_LOCAL_MEM_COUNT;
+    else {
+        help("psr-cmt-show");
+        return 2;
+    }
 
     if (optind + 1 >= argc)
         domid = INVALID_DOMID;
@@ -7981,14 +8048,7 @@ int main_psr_cmt_show(int argc, char **argv)
         return 2;
     }
 
-    switch (type) {
-    case LIBXL_PSR_CMT_TYPE_CACHE_OCCUPANCY:
-        ret = psr_cmt_show(type, domid);
-        break;
-    default:
-        help("psr-cmt-show");
-        return 2;
-    }
+    ret = psr_cmt_show(type, domid);
 
     return ret;
 }
diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
index 4b30d3d..22ab63b 100644
--- a/tools/libxl/xl_cmdtable.c
+++ b/tools/libxl/xl_cmdtable.c
@@ -538,7 +538,9 @@ struct cmd_spec cmd_table[] = {
       "Show Cache Monitoring Technology information",
       "<PSR-CMT-Type> <Domain>",
       "Available monitor types:\n"
-      "\"cache_occupancy\":         Show L3 cache occupancy\n",
+      "\"cache_occupancy\":         Show L3 cache occupancy(KB)\n"
+      "\"total_mem_bandwidth\":     Show total memory bandwidth(KB/s)\n"
+      "\"local_mem_bandwidth\":     Show local memory bandwidth(KB/s)\n",
     },
 #endif
 };
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.