[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 4/4] VMX: fixup PI descritpor when cpu is offline



When cpu is offline, we need to move all the vcpus in its blocking
list to another online cpu, this patch handles it. And we need to
carefully handle the situation that calling to vmx_vcpu_block() and
cpu offline occur concurrently.

Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
 xen/arch/x86/hvm/vmx/vmcs.c        |  1 +
 xen/arch/x86/hvm/vmx/vmx.c         | 90 +++++++++++++++++++++++++++++++++++++-
 xen/common/schedule.c              |  8 +---
 xen/include/asm-x86/hvm/hvm.h      |  4 +-
 xen/include/asm-x86/hvm/vmx/vmcs.h |  2 +-
 xen/include/asm-x86/hvm/vmx/vmx.h  |  1 +
 6 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 8284281..aa25788 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -590,6 +590,7 @@ void vmx_cpu_dead(unsigned int cpu)
     vmx_free_vmcs(per_cpu(vmxon_region, cpu));
     per_cpu(vmxon_region, cpu) = 0;
     nvmx_cpu_dead(cpu);
+    vmx_pi_desc_fixup(cpu);
 }
 
 int vmx_cpu_up(void)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 662b38d..b56082e 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -87,6 +87,7 @@ static int vmx_vmfunc_intercept(struct cpu_user_regs *regs);
 struct vmx_pi_blocking_vcpu {
     struct list_head     list;
     spinlock_t           lock;
+    bool_t               down;
 };
 
 /*
@@ -102,9 +103,10 @@ void vmx_pi_per_cpu_init(unsigned int cpu)
 {
     INIT_LIST_HEAD(&per_cpu(vmx_pi_blocking, cpu).list);
     spin_lock_init(&per_cpu(vmx_pi_blocking, cpu).lock);
+    per_cpu(vmx_pi_blocking, cpu).down = 0;
 }
 
-static void vmx_vcpu_block(struct vcpu *v)
+static bool_t vmx_vcpu_block(struct vcpu *v)
 {
     unsigned long flags;
     unsigned int dest;
@@ -122,10 +124,25 @@ static void vmx_vcpu_block(struct vcpu *v)
          * new vCPU to the list.
          */
         spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_hotplug_lock, flags);
-        return;
+        return 1;
     }
 
     spin_lock(pi_blocking_list_lock);
+    if ( unlikely(per_cpu(vmx_pi_blocking, v->processor).down) )
+    {
+        /*
+         * We being here means that the v->processor is going away, and all
+         * the vcpus on its blocking list were removed from it. Hence we
+         * cannot add new vcpu to it. Besides that, we return -1 to
+         * prevent the vcpu from being blocked. This is needed because
+         * if the vCPU is continue to block and here we don't put it
+         * in a per-cpu blocking list, it might not be woken up by the
+         * notification event.
+         */
+        spin_unlock(pi_blocking_list_lock);
+        spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_hotplug_lock, flags);
+        return 0;
+    }
     old_lock = cmpxchg(&v->arch.hvm_vmx.pi_blocking.lock, NULL,
                        pi_blocking_list_lock);
 
@@ -161,6 +178,8 @@ static void vmx_vcpu_block(struct vcpu *v)
                  x2apic_enabled ? dest : MASK_INSR(dest, PI_xAPIC_NDST_MASK));
 
     write_atomic(&pi_desc->nv, pi_wakeup_vector);
+
+    return 1;
 }
 
 static void vmx_pi_switch_from(struct vcpu *v)
@@ -253,6 +272,73 @@ static void vmx_pi_blocking_cleanup(struct vcpu *v)
     spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_hotplug_lock, flags);
 }
 
+void vmx_pi_desc_fixup(int cpu)
+{
+    unsigned int new_cpu, dest;
+    unsigned long flags;
+    struct arch_vmx_struct *vmx, *tmp;
+    spinlock_t *new_lock, *old_lock = &per_cpu(vmx_pi_blocking, cpu).lock;
+    struct list_head *blocked_vcpus = &per_cpu(vmx_pi_blocking, cpu).list;
+
+    if ( !iommu_intpost )
+        return;
+
+    spin_lock_irqsave(old_lock, flags);
+    per_cpu(vmx_pi_blocking, cpu).down = 1;
+
+    list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
+    {
+        /*
+         * We need to find an online cpu as the NDST of the PI descriptor, it
+         * doesn't matter whether it is within the cpupool of the domain or
+         * not. As long as it is online, the vCPU will be woken up once the
+         * notification event arrives.
+         */
+        new_cpu = cpu;
+restart:
+        while ( 1 )
+        {
+            new_cpu = (new_cpu + 1) % nr_cpu_ids;
+            if ( cpu_online(new_cpu) )
+                break;
+        }
+        new_lock = &per_cpu(vmx_pi_blocking, cpu).lock;
+
+        spin_lock(new_lock);
+        /*
+         * After acquiring the blocking list lock for the new cpu, we need
+         * to check whether new_cpu is still online.
+         *
+         * If '.down' is true, it mean 'new_cpu' is also going to be offline,
+         * so just go back to find another one, otherwise, there are two
+         * possibilities:
+         *   case 1 - 'new_cpu' is online.
+         *   case 2 - 'new_cpu' is about to be offline, but doesn't get to
+         *            the point where '.down' is set.
+         * In either case above, we can just set 'new_cpu' to 'NDST' field.
+         * For case 2 the 'NDST' field will be set to another online cpu when
+         * we get to this function for 'new_cpu' some time later.
+         */
+        if ( per_cpu(vmx_pi_blocking, cpu).down )
+        {
+            spin_unlock(new_lock);
+            goto restart;
+        }
+
+        ASSERT(vmx->pi_blocking.lock == old_lock);
+
+        dest = cpu_physical_id(new_cpu);
+        write_atomic(&vmx->pi_desc.ndst,
+                     x2apic_enabled ? dest : MASK_INSR(dest, 
PI_xAPIC_NDST_MASK));
+
+        list_move(&vmx->pi_blocking.list,
+                  &per_cpu(vmx_pi_blocking, new_cpu).list);
+        vmx->pi_blocking.lock = new_lock;
+        spin_unlock(new_lock);
+    }
+    spin_unlock_irqrestore(old_lock, flags);
+}
+
 /* This function is called when pcidevs_lock is held */
 void vmx_pi_hooks_assign(struct domain *d)
 {
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 5546999..b60491d 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -833,10 +833,8 @@ void vcpu_block(void)
 
     set_bit(_VPF_blocked, &v->pause_flags);
 
-    arch_vcpu_block(v);
-
     /* Check for events /after/ blocking: avoids wakeup waiting race. */
-    if ( local_events_need_delivery() )
+    if ( arch_vcpu_block(v) || local_events_need_delivery() )
     {
         clear_bit(_VPF_blocked, &v->pause_flags);
     }
@@ -872,8 +870,6 @@ static long do_poll(struct sched_poll *sched_poll)
     v->poll_evtchn = -1;
     set_bit(v->vcpu_id, d->poll_mask);
 
-    arch_vcpu_block(v);
-
 #ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
     /* Check for events /after/ setting flags: avoids wakeup waiting race. */
     smp_mb();
@@ -891,7 +887,7 @@ static long do_poll(struct sched_poll *sched_poll)
 #endif
 
     rc = 0;
-    if ( local_events_need_delivery() )
+    if ( arch_vcpu_block(v) || local_events_need_delivery() )
         goto out;
 
     for ( i = 0; i < sched_poll->nr_ports; i++ )
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 7b7ff3f..725dce7 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -608,11 +608,13 @@ unsigned long hvm_cr4_guest_reserved_bits(const struct 
vcpu *v, bool_t restore);
  * not been defined yet.
  */
 #define arch_vcpu_block(v) ({                                   \
+    bool_t rc = 0;                                              \
     struct vcpu *v_ = (v);                                      \
     struct domain *d_ = v_->domain;                             \
     if ( has_hvm_container_domain(d_) &&                        \
          d_->arch.hvm_domain.vmx.vcpu_block )                   \
-        d_->arch.hvm_domain.vmx.vcpu_block(v_);                 \
+        rc = d_->arch.hvm_domain.vmx.vcpu_block(v_);            \
+    rc;                                                         \
 })
 
 #endif /* __ASM_X86_HVM_HVM_H__ */
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 3834f49..e1834f7 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -132,7 +132,7 @@ struct vmx_domain {
      * has a physical device assigned to it, so we set and clear the callbacks
      * as appropriate when device assignment changes.
      */
-    void (*vcpu_block) (struct vcpu *);
+    bool_t (*vcpu_block) (struct vcpu *);
     void (*pi_switch_from) (struct vcpu *v);
     void (*pi_switch_to) (struct vcpu *v);
     void (*pi_do_resume) (struct vcpu *v);
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index a85d488..b6ba123 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -565,6 +565,7 @@ void free_p2m_hap_data(struct p2m_domain *p2m);
 void p2m_init_hap_data(struct p2m_domain *p2m);
 
 void vmx_pi_per_cpu_init(unsigned int cpu);
+void vmx_pi_desc_fixup(int cpu);
 
 void vmx_pi_hooks_assign(struct domain *d);
 void vmx_pi_hooks_deassign(struct domain *d);
-- 
2.1.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.