[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Add migration_cost option to scheduler



The idea is borrowed from Linux kernel: if the vCPU is just scheduled out and put to run-queue, it's likely cache-hot on its current pCPU, and it may be scheduled in in a short period of time; however, if vCPU is migrated to another pCPU, it need to re-warm the cache - that's the meaning of migration cost.

The patch introduces an option migration_cost to depress too aggressive vCPU migration (actually we really see migration frequency is very high most of the time.), while in the meantime keeping load balance works in certain degree.

Linux kernel uses 0.5ms by default. Considering the cost may be higher (e.g. VMCS impact) than in native, migration_cost=1ms is chosen for our tests, which are performed on a 4x 6-core Dunnington platform. In 24-VM case, there is ~2% stable performance gain for enterprise workloads like SPECjbb and sysbench. If HVM is with stubdom, the gain is more: 4% for the same workloads.

The best value may vary on different platforms based on different cache hierarchy and with different workloads. Due to resource limit, we haven't test many combinations. And we plans to try more in future. Welcome to evaluate and give feedback on what's suitable / not suitable for you.

Signed-off-by: Xiaowei Yang <xiaowei.yang@xxxxxxxxx>


Thanks,
Xiaowei

Don't migrate cache-hot vCPU

diff -r 633debd7b831 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Tue Mar 03 03:17:57 2009 +0800
+++ b/tools/libxc/xc_misc.c     Tue Mar 03 03:17:59 2009 +0800
@@ -92,6 +92,21 @@ int xc_sched_id(int xc_handle,
         return ret;
 
     *sched_id = sysctl.u.sched_id.sched_id;
+
+    return 0;
+}
+
+int xc_migration_cost(int xc_handle,
+                      uint64_t cost)
+{
+    int ret;
+    DECLARE_SYSCTL;
+
+    sysctl.cmd = XEN_SYSCTL_migration_cost;
+    sysctl.u.migration_cost.cost = cost;
+
+    if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 )
+        return ret;
 
     return 0;
 }
diff -r 633debd7b831 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Tue Mar 03 03:17:57 2009 +0800
+++ b/tools/libxc/xenctrl.h     Tue Mar 03 03:17:59 2009 +0800
@@ -611,6 +611,9 @@ int xc_sched_id(int xc_handle,
 int xc_sched_id(int xc_handle,
                 int *sched_id);
 
+int xc_migration_cost(int xc_handle,
+                      uint64_t cost);
+
 typedef xen_sysctl_cpuinfo_t xc_cpuinfo_t;
 int xc_getcpuinfo(int xc_handle, int max_cpus,
                   xc_cpuinfo_t *info, int *nr_cpus); 
diff -r 633debd7b831 tools/xcutils/Makefile
--- a/tools/xcutils/Makefile    Tue Mar 03 03:17:57 2009 +0800
+++ b/tools/xcutils/Makefile    Tue Mar 03 03:17:59 2009 +0800
@@ -14,7 +14,7 @@ CFLAGS += -Werror
 CFLAGS += -Werror
 CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore)
 
-PROGRAMS = xc_restore xc_save readnotes lsevtchn
+PROGRAMS = xc_restore xc_save readnotes lsevtchn migration_cost
 
 LDLIBS   = $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenguest) $(LDFLAGS_libxenstore)
 
diff -r 633debd7b831 tools/xcutils/migration_cost.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xcutils/migration_cost.c    Tue Mar 03 03:17:59 2009 +0800
@@ -0,0 +1,25 @@
+#include <err.h>
+#include <stdlib.h>
+
+#include <xenctrl.h>
+
+int main(int argc, char **argv)
+{
+    int xc_fd, rc;
+    long long cost;
+
+    if (argc != 2)
+        errx(1, "usage: %s cost_in_ns", argv[0]);
+
+    cost = strtoll(argv[1], NULL, 0);
+
+    xc_fd = xc_interface_open();
+    if ( xc_fd < 0 )
+        errx(1, "failed to open control interface");
+
+    rc = xc_migration_cost(xc_fd, cost);
+
+    xc_interface_close(xc_fd);
+
+    return rc;
+}
diff -r 633debd7b831 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Tue Mar 03 03:17:57 2009 +0800
+++ b/xen/common/sched_credit.c Wed Mar 04 02:09:47 2009 +0800
@@ -123,7 +123,8 @@
     _MACRO(dom_init)                        \
     _MACRO(dom_destroy)                     \
     _MACRO(vcpu_init)                       \
-    _MACRO(vcpu_destroy)
+    _MACRO(vcpu_destroy)                    \
+    _MACRO(vcpu_hot)
 
 #ifndef NDEBUG
 #define CSCHED_STATS_EXPAND_CHECKS(_MACRO)  \
@@ -404,14 +405,29 @@ __csched_vcpu_check(struct vcpu *vc)
 #define CSCHED_VCPU_CHECK(_vc)
 #endif
 
+uint64_t migration_cost;
+
+static inline int
+__csched_vcpu_is_cache_hot(struct vcpu *v)
+{
+        int hot = (NOW() - v->runstate.state_entry_time) < migration_cost;
+
+        if (hot)
+            CSCHED_STAT_CRANK(vcpu_hot);
+
+       return hot;
+}
+
 static inline int
 __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
 {
     /*
-     * Don't pick up work that's in the peer's scheduling tail. Also only pick
-     * up work that's allowed to run on our CPU.
-     */
-    return !vc->is_running && cpu_isset(dest_cpu, vc->cpu_affinity);
+     * Don't pick up work that's in the peer's scheduling tail or hot on
+     * peer PCPU. Only pick up work that's allowed to run on our CPU.
+     */
+    return !vc->is_running &&
+           !__csched_vcpu_is_cache_hot(vc) &&
+           cpu_isset(dest_cpu, vc->cpu_affinity);
 }
 
 static int
@@ -1306,7 +1322,8 @@ csched_dump(void)
            "\tmsecs per tick     = %dms\n"
            "\tcredits per tick   = %d\n"
            "\tticks per tslice   = %d\n"
-           "\tticks per acct     = %d\n",
+           "\tticks per acct     = %d\n"
+           "\tmigration cost     = %"PRIu64"ns\n",
            csched_priv.ncpus,
            csched_priv.master,
            csched_priv.credit,
@@ -1317,7 +1334,8 @@ csched_dump(void)
            CSCHED_MSECS_PER_TICK,
            CSCHED_CREDITS_PER_TICK,
            CSCHED_TICKS_PER_TSLICE,
-           CSCHED_TICKS_PER_ACCT);
+           CSCHED_TICKS_PER_ACCT,
+           migration_cost);
 
     cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
     printk("idlers: %s\n", idlers_buf);
diff -r 633debd7b831 xen/common/sysctl.c
--- a/xen/common/sysctl.c       Tue Mar 03 03:17:57 2009 +0800
+++ b/xen/common/sysctl.c       Tue Mar 03 03:17:59 2009 +0800
@@ -88,6 +88,19 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
     }
     break;
 
+    case XEN_SYSCTL_migration_cost:
+    {
+        extern uint64_t migration_cost;
+        ret = xsm_migration_cost();
+        if ( ret )
+            break;
+
+        migration_cost = op->u.migration_cost.cost;
+
+        ret = 0;
+    }
+    break;
+
     case XEN_SYSCTL_getdomaininfolist:
     { 
         struct domain *d;
diff -r 633debd7b831 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Tue Mar 03 03:17:57 2009 +0800
+++ b/xen/include/public/sysctl.h       Tue Mar 03 03:17:59 2009 +0800
@@ -359,6 +359,15 @@ struct xen_sysctl_pm_op {
     };
 };
 
+/* Adjust vCPU migration_cost. */
+#define XEN_SYSCTL_migration_cost      13
+struct xen_sysctl_migration_cost {
+    /* IN variables. */
+    uint64_t cost;
+};
+typedef struct xen_sysctl_migration_cost xen_sysctl_migration_cost_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_migration_cost_t);
+
 struct xen_sysctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
@@ -375,6 +384,7 @@ struct xen_sysctl {
         struct xen_sysctl_get_pmstat        get_pmstat;
         struct xen_sysctl_cpu_hotplug       cpu_hotplug;
         struct xen_sysctl_pm_op             pm_op;
+        struct xen_sysctl_migration_cost    migration_cost;
         uint8_t                             pad[128];
     } u;
 };
diff -r 633debd7b831 xen/include/xsm/xsm.h
--- a/xen/include/xsm/xsm.h     Tue Mar 03 03:17:57 2009 +0800
+++ b/xen/include/xsm/xsm.h     Tue Mar 03 03:17:59 2009 +0800
@@ -68,6 +68,7 @@ struct xsm_operations {
     int (*tbufcontrol) (void);
     int (*readconsole) (uint32_t clear);
     int (*sched_id) (void);
+    int (*migration_cost) (void);
     int (*setdomainmaxmem) (struct domain *d);
     int (*setdomainhandle) (struct domain *d);
     int (*setdebugging) (struct domain *d);
@@ -247,6 +248,11 @@ static inline int xsm_sched_id (void)
     return xsm_call(sched_id());
 }
 
+static inline int xsm_migration_cost (void)
+{
+    return xsm_call(migration_cost());
+}
+
 static inline int xsm_setdomainmaxmem (struct domain *d)
 {
     return xsm_call(setdomainmaxmem(d));
diff -r 633debd7b831 xen/xsm/dummy.c
--- a/xen/xsm/dummy.c   Tue Mar 03 03:17:57 2009 +0800
+++ b/xen/xsm/dummy.c   Tue Mar 03 03:17:59 2009 +0800
@@ -100,6 +100,11 @@ static int dummy_readconsole (uint32_t c
 }
 
 static int dummy_sched_id (void)
+{
+    return 0;
+}
+
+static int dummy_migration_cost (void)
 {
     return 0;
 }
@@ -486,6 +491,7 @@ void xsm_fixup_ops (struct xsm_operation
     set_to_dummy_if_null(ops, tbufcontrol);
     set_to_dummy_if_null(ops, readconsole);
     set_to_dummy_if_null(ops, sched_id);
+    set_to_dummy_if_null(ops, migration_cost);
     set_to_dummy_if_null(ops, setdomainmaxmem);
     set_to_dummy_if_null(ops, setdomainhandle);
     set_to_dummy_if_null(ops, setdebugging);
diff -r 633debd7b831 xen/xsm/flask/hooks.c
--- a/xen/xsm/flask/hooks.c     Tue Mar 03 03:17:57 2009 +0800
+++ b/xen/xsm/flask/hooks.c     Tue Mar 03 03:17:59 2009 +0800
@@ -597,6 +597,11 @@ static int flask_readconsole(uint32_t cl
 }
 
 static int flask_sched_id(void)
+{
+    return domain_has_xen(current->domain, XEN__SCHEDULER);
+}
+
+static int flask_migration_cost(void)
 {
     return domain_has_xen(current->domain, XEN__SCHEDULER);
 }
@@ -1235,6 +1240,7 @@ static struct xsm_operations flask_ops =
     .tbufcontrol = flask_tbufcontrol,
     .readconsole = flask_readconsole,
     .sched_id = flask_sched_id,
+    .migration_cost = flask_migration_cost,
     .setdomainmaxmem = flask_setdomainmaxmem,
     .setdomainhandle = flask_setdomainhandle,
     .setdebugging = flask_setdebugging,
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.