[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 6/7] xen: credit1: treat pCPUs more evenly during balancing.

To: xen-devel@xxxxxxxxxxxxxxxxxxxx
From: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
Date: Thu, 06 Apr 2017 10:16:54 +0200
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>, Anshul Makkar <anshul.makkar@xxxxxxxxxx>
Delivery-date: Thu, 06 Apr 2017 08:16:58 +0000
List-id: Xen developer discussion <xen-devel.lists.xen.org>

Right now, we use cpumask_first() for going through
the bus pCPUs in csched_load_balance(). This means
not all pCPUs have equal chances of seeing their
pending work stolen. It also means there is more
runqueue lock pressure on lower ID pCPUs.

To avoid all this, let's record and remember, for
each NUMA node, from what pCPU we have stolen for
last, and start from that the following time.

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
---
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Cc: Anshul Makkar <anshul.makkar@xxxxxxxxxx>
---
 xen/common/sched_credit.c |   37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 09e3192..c37a65c 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -229,6 +229,7 @@ struct csched_private {
     uint32_t credit;
     int credit_balance;
     uint32_t runq_sort;
+    uint32_t *balance_bias;
     unsigned ratelimit_us;
     /* Period of master and tick in milliseconds */
     unsigned tslice_ms, tick_period_us, ticks_per_tslice;
@@ -547,6 +548,7 @@ csched_deinit_pdata(const struct scheduler *ops, void 
*pcpu, int cpu)
 {
     struct csched_private *prv = CSCHED_PRIV(ops);
     struct csched_pcpu *spc = pcpu;
+    unsigned int node = cpu_to_node(cpu);
     unsigned long flags;
 
     /*
@@ -570,6 +572,12 @@ csched_deinit_pdata(const struct scheduler *ops, void 
*pcpu, int cpu)
         prv->master = cpumask_first(prv->cpus);
         migrate_timer(&prv->master_ticker, prv->master);
     }
+    if ( prv->balance_bias[node] == cpu )
+    {
+        cpumask_and(cpumask_scratch, prv->cpus, &node_to_cpumask(node));
+        if ( !cpumask_empty(cpumask_scratch) )
+            prv->balance_bias[node] =  cpumask_first(cpumask_scratch);
+    }
     kill_timer(&spc->ticker);
     if ( prv->ncpus == 0 )
         kill_timer(&prv->master_ticker);
@@ -609,6 +617,10 @@ init_pdata(struct csched_private *prv, struct csched_pcpu 
*spc, int cpu)
                   NOW() + MILLISECS(prv->tslice_ms));
     }
 
+    cpumask_and(cpumask_scratch, prv->cpus, 
&node_to_cpumask(cpu_to_node(cpu)));
+    if ( cpumask_weight(cpumask_scratch) == 1 )
+        prv->balance_bias[cpu_to_node(cpu)] = cpu;
+
     init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
     set_timer(&spc->ticker, NOW() + MICROSECS(prv->tick_period_us) );
 
@@ -1710,7 +1722,7 @@ csched_load_balance(struct csched_private *prv, int cpu,
     struct csched_vcpu *speer;
     cpumask_t workers;
     cpumask_t *online;
-    int peer_cpu, peer_node, bstep;
+    int peer_cpu, first_cpu, peer_node, bstep;
     int node = cpu_to_node(cpu);
 
     BUG_ON( cpu != snext->vcpu->processor );
@@ -1754,9 +1766,10 @@ csched_load_balance(struct csched_private *prv, int cpu,
             cpumask_and(&workers, &workers, &node_to_cpumask(peer_node));
             __cpumask_clear_cpu(cpu, &workers);
 
-            peer_cpu = cpumask_first(&workers);
-            if ( peer_cpu >= nr_cpu_ids )
+            first_cpu = cpumask_cycle(prv->balance_bias[peer_node], &workers);
+            if ( first_cpu >= nr_cpu_ids )
                 goto next_node;
+            peer_cpu = first_cpu;
             do
             {
                 spinlock_t *lock;
@@ -1820,13 +1833,19 @@ csched_load_balance(struct csched_private *prv, int cpu,
                 if ( speer != NULL )
                 {
                     *stolen = 1;
+                    /*
+                     * Next time we'll look for work to steal on this node, we
+                     * will start from the next pCPU, with respect to this one,
+                     * so we don't risk stealing always from the same ones.
+                     */
+                    prv->balance_bias[peer_node] = peer_cpu;
                     return speer;
                 }
 
  next_cpu:
                 peer_cpu = cpumask_cycle(peer_cpu, &workers);
 
-            } while( peer_cpu != cpumask_first(&workers) );
+            } while( peer_cpu != first_cpu );
 
  next_node:
             peer_node = cycle_node(peer_node, node_online_map);
@@ -2178,10 +2197,19 @@ csched_init(struct scheduler *ops)
     prv = xzalloc(struct csched_private);
     if ( prv == NULL )
         return -ENOMEM;
+
+    prv->balance_bias = xzalloc_array(uint32_t, MAX_NUMNODES);
+    if ( prv->balance_bias == NULL )
+    {
+        xfree(prv);
+        return -ENOMEM;
+    }
+
     if ( !zalloc_cpumask_var(&prv->cpus) ||
          !zalloc_cpumask_var(&prv->idlers) )
     {
         free_cpumask_var(prv->cpus);
+        xfree(prv->balance_bias);
         xfree(prv);
         return -ENOMEM;
     }
@@ -2227,6 +2255,7 @@ csched_deinit(struct scheduler *ops)
         ops->sched_data = NULL;
         free_cpumask_var(prv->cpus);
         free_cpumask_var(prv->idlers);
+        xfree(prv->balance_bias);
         xfree(prv);
     }
 }


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

Follow-Ups:
- Re: [Xen-devel] [PATCH v2 6/7] xen: credit1: treat pCPUs more evenly during balancing.
  - From: George Dunlap

References:
- [Xen-devel] [PATCH v2 0/7] xen: sched: improve scalability of Credit1, and optimize a bit both Credit1 and Credit2
  - From: Dario Faggioli

Prev by Date: [Xen-devel] [PATCH v2 5/7] xen: credit1: increase efficiency and scalability of load balancing.
Next by Date: [Xen-devel] [PATCH v2 7/7] xen: credit2: avoid cpumask_any() in pick_cpu().
Previous by thread: Re: [Xen-devel] [PATCH v2 5/7] xen: credit1: increase efficiency and scalability of load balancing.
Next by thread: Re: [Xen-devel] [PATCH v2 6/7] xen: credit1: treat pCPUs more evenly during balancing.
Index(es):
- Date
- Thread

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.