diff -r 0526644ad2a6 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Thu Oct 27 16:07:18 2011 +0100
+++ b/xen/common/sched_credit.c	Thu Nov 03 03:28:24 2011 +0000
@@ -109,6 +109,11 @@ boolean_param("sched_credit_default_yiel
 boolean_param("sched_credit_default_yield", sched_credit_default_yield);
 static int __read_mostly sched_credit_tslice_ms = CSCHED_DEFAULT_TSLICE_MS;
 integer_param("sched_credit_tslice_ms", sched_credit_tslice_ms);
+
+/*
+ * Scheduler generic parameters
+ */
+extern int sched_ratelimit_us;
 
 /*
  * Physical CPU
@@ -1297,9 +1302,14 @@ csched_schedule(
     struct csched_private *prv = CSCHED_PRIV(ops);
     struct csched_vcpu *snext;
     struct task_slice ret;
+    s_time_t runtime, tslice;
 
     CSCHED_STAT_CRANK(schedule);
     CSCHED_VCPU_CHECK(current);
+
+    runtime = now - current->runstate.state_entry_time;
+    if ( runtime < 0 ) /* Does this ever happen? */
+        runtime = 0;
 
     if ( !is_idle_vcpu(scurr->vcpu) )
     {
@@ -1314,14 +1324,46 @@ csched_schedule(
     }
 
     /*
-     * Select next runnable local VCPU (ie top of local runq)
+     * Choices, choices:
+     * - If we have a tasklet, we need to run the idle vcpu no matter what.
+     * - If sched rate limiting is in effect, and the current vcpu has
+     *   run for less than that amount of time, continue the current one,
+     *   but with a shorter timeslice.
+     * - Otherwise, chose the one with the highest priority (which may
+     *   be the one currently running)
+     * - If the currently running one is TS_OVER, see if there
+     *   is a higher priority one waiting on the runqueue of another
+     *   cpu and steal it.
+     *
+     * Current invariant is that we always put the currently running vcpu
+     * on the runqueue, because we always take him off again below.
      */
+
     if ( vcpu_runnable(current) )
         __runq_insert(cpu, scurr);
     else
         BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
 
-    snext = __runq_elem(runq->next);
+    /* If we have schedule rate limiting enabled, check to see
+     * how long we've run for. */
+    if ( sched_ratelimit_us
+         && vcpu_runnable(current)
+         && !is_idle_vcpu(current)
+         && runtime < MICROSECS(sched_ratelimit_us) )
+    {
+        snext = scurr;
+        /* FIXME: Use prv->tslice_ms if we're also the head of hte queue */
+        tslice = MICROSECS(sched_ratelimit_us);
+    }
+    else
+    {
+        /*
+         * Select next runnable local VCPU (ie top of local runq)
+         */
+        snext = __runq_elem(runq->next);
+        tslice = MILLISECS(prv->tslice_ms));
+    }
+
     ret.migrated = 0;
 
     /* Tasklet work (which runs in idle VCPU context) overrides all else. */
@@ -1371,7 +1413,7 @@ csched_schedule(
      * Return task to run next...
      */
     ret.time = (is_idle_vcpu(snext->vcpu) ?
-                -1 : MILLISECS(prv->tslice_ms));
+                -1 : tslice;
     ret.task = snext->vcpu;
 
     CSCHED_VCPU_CHECK(ret.task);
diff -r 0526644ad2a6 xen/common/schedule.c
--- a/xen/common/schedule.c	Thu Oct 27 16:07:18 2011 +0100
+++ b/xen/common/schedule.c	Thu Nov 03 03:28:24 2011 +0000
@@ -46,6 +46,9 @@ string_param("sched", opt_sched);
  */
 bool_t sched_smt_power_savings = 0;
 boolean_param("sched_smt_power_savings", sched_smt_power_savings);
+/* Default scheduling rate limit: 1ms */
+int sched_ratelimit_us = 1000;
+integer_param("sched_ratelimit_us", sched_ratelimit_us);
 
 /* Various timer handlers. */
 static void s_timer_fn(void *unused);