[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2] xen: merge temporary vcpu pinning scenarios



On 23.07.19 14:26, Andrew Cooper wrote:
On 23/07/2019 10:20, Juergen Gross wrote:
Today there are three scenarios which are pinning vcpus temporarily to
a single physical cpu:

- NMI/MCE injection into PV domains
- wait_event() handling
- vcpu_pin_override() handling

Each of those cases are handled independently today using their own
temporary cpumask to save the old affinity settings.

The three cases can be combined as the two latter cases will only pin
a vcpu to the physical cpu it is already running on, while
vcpu_pin_override() is allowed to fail.

So merge the three temporary pinning scenarios by only using one
cpumask and a per-vcpu bitmask for specifying which of the three
scenarios is currently active (they are allowed to nest).

Note that we don't need to call domain_update_node_affinity() as we
are only pinning for a brief period of time.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
  xen/arch/x86/pv/traps.c | 20 +-------------------
  xen/arch/x86/traps.c    |  8 ++------
  xen/common/domain.c     |  4 +---
  xen/common/schedule.c   | 35 +++++++++++++++++++++++------------
  xen/common/wait.c       | 26 ++++++++------------------
  xen/include/xen/sched.h |  8 +++++---
  6 files changed, 40 insertions(+), 61 deletions(-)

diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c
index 1740784ff2..37dac300ba 100644
--- a/xen/arch/x86/pv/traps.c
+++ b/xen/arch/x86/pv/traps.c
@@ -151,25 +151,7 @@ static void nmi_mce_softirq(void)
BUG_ON(st->vcpu == NULL); - /*
-     * Set the tmp value unconditionally, so that the check in the iret
-     * hypercall works.
-     */
-    cpumask_copy(st->vcpu->cpu_hard_affinity_tmp,
-                 st->vcpu->cpu_hard_affinity);
-
-    if ( (cpu != st->processor) ||
-         (st->processor != st->vcpu->processor) )
-    {
-
-        /*
-         * We are on a different physical cpu.  Make sure to wakeup the vcpu on
-         * the specified processor.
-         */
-        vcpu_set_hard_affinity(st->vcpu, cpumask_of(st->processor));
-
-        /* Affinity is restored in the iret hypercall. */
-    }
+    vcpu_set_tmp_affinity(st->vcpu, st->processor, VCPU_AFFINITY_NMI);

Please can we keep the comment explaining where the affinity is
restored, which is a disguised explanation of why it is PV-only.

Okay.


diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 89bc259ae4..d4de74f9c8 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1106,47 +1106,58 @@ void watchdog_domain_destroy(struct domain *d)
          kill_timer(&d->watchdog_timer[i]);
  }
-int vcpu_pin_override(struct vcpu *v, int cpu)
+int vcpu_set_tmp_affinity(struct vcpu *v, int cpu, uint8_t reason)
  {
      spinlock_t *lock;
      int ret = -EINVAL;
+    bool migrate;
lock = vcpu_schedule_lock_irq(v); if ( cpu < 0 )
      {
-        if ( v->affinity_broken )
+        if ( v->affinity_broken & reason )
          {
-            sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
-            v->affinity_broken = 0;
              ret = 0;
+            v->affinity_broken &= ~reason;
          }
+        if ( !ret && !v->affinity_broken )
+            sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
      }
      else if ( cpu < nr_cpu_ids )
      {
-        if ( v->affinity_broken )
+        if ( (v->affinity_broken & reason) ||
+             (v->affinity_broken && v->processor != cpu) )
              ret = -EBUSY;
          else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) )
          {
-            cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
-            v->affinity_broken = 1;
-            sched_set_affinity(v, cpumask_of(cpu), NULL);
+            if ( !v->affinity_broken )
+            {
+                cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
+                sched_set_affinity(v, cpumask_of(cpu), NULL);
+            }
+            v->affinity_broken |= reason;
              ret = 0;
          }
      }
- if ( ret == 0 )
+    migrate = !ret && !cpumask_test_cpu(v->processor, v->cpu_hard_affinity);
+    if ( migrate )
          vcpu_migrate_start(v);
vcpu_schedule_unlock_irq(lock, v); - domain_update_node_affinity(v->domain);
-
-    vcpu_migrate_finish(v);
+    if ( migrate )
+        vcpu_migrate_finish(v);
return ret;
  }
+int vcpu_pin_override(struct vcpu *v, int cpu)

There are exactly two callers of vcpu_pin_override().  I'd take the
opportunity to make vcpu_set_tmp_affinity() the single API call for
adjusting affinity.

Fine with me.


+{
+    return vcpu_set_tmp_affinity(v, cpu, VCPU_AFFINITY_OVERRIDE);
+}
+
  typedef long ret_t;
#endif /* !COMPAT */
diff --git a/xen/common/wait.c b/xen/common/wait.c
index 4f830a14e8..9f9ad033b3 100644
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -182,30 +178,24 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
  static void __finish_wait(struct waitqueue_vcpu *wqv)
  {
      wqv->esp = NULL;
-    (void)vcpu_set_hard_affinity(current, &wqv->saved_affinity);
+    vcpu_set_tmp_affinity(current, -1, VCPU_AFFINITY_WAIT);
  }
void check_wakeup_from_wait(void)
  {
-    struct waitqueue_vcpu *wqv = current->waitqueue_vcpu;
+    struct vcpu *curr = current;
+    struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
ASSERT(list_empty(&wqv->list)); if ( likely(wqv->esp == NULL) )
          return;
- /* Check if we woke up on the wrong CPU. */
-    if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) )
+    /* Check if we are still pinned. */
+    if ( unlikely(!(curr->affinity_broken & VCPU_AFFINITY_WAIT)) )
      {
-        /* Re-set VCPU affinity and re-enter the scheduler. */
-        struct vcpu *curr = current;
-        cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
-        if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
-        {
-            gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
-            domain_crash(current->domain);
-        }
-        wait(); /* takes us back into the scheduler */
+        gdprintk(XENLOG_ERR, "vcpu affinity lost\n");
+        domain_crash(current->domain);

curr

Okay.


      }
/*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index b40c8fd138..721c429454 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -200,7 +200,10 @@ struct vcpu
      /* VCPU is paused following shutdown request (d->is_shutting_down)? */
      bool             paused_for_shutdown;
      /* VCPU need affinity restored */
-    bool             affinity_broken;
+    uint8_t          affinity_broken;
+#define VCPU_AFFINITY_OVERRIDE    0x01
+#define VCPU_AFFINITY_NMI         0x02

VCPU_AFFINITY_NMI_MCE ?  It is used for more than just NMIs.

Okay.

BTW: The MCE case is never triggered (there is no caller of
pv_raise_interrupt() with TRAP_machine_check). Is this code left in
place on purpose or could it be removed?


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.