[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH][3rd try] don't schedule unplugged vcpus



* Ryan Harper <ryanh@xxxxxxxxxx> [2005-06-08 16:29]:
> * Ryan Harper <ryanh@xxxxxxxxxx> [2005-06-06 16:04]:
> > This patch extends the CONFIG_HOTPLUG_CPU behavior down into the
> > hypervisor.  Currently when a CPU in Linux is moved offline,
> > 
> > echo 0 > /sys/devices/system/cpu/cpuX/online
> > 
> > the offline cpu yields its slice back to the hypervisor.  This patch
> > adds two SCHEDOPS (vcpu_down/vcpu_up) which set/clear a new VCPU flag,
> > VCPU_down.  The domain_runnable() check now looks at this flag and
> > subsequently the vcpu is not scheduled when VCPU_down is set.
> > 
> > The patch was built and tested against 20050606 nightly snapshot.
> > Testing requires DOMU with CONFIG_SMP and CONFIG_HOTPLUG_CPU.  Please
> > apply.
> 
> I've added in changes to DOM0_GETDOMINFO and DOM0_GETVCPUCONTEXT
> hypercalls.  dominfo now creates a vcpu_online_map bitmap which marks
> whether vcpus are up or down.  I didn't want to clobber either the
> total number of vcpus allocated to a domain (n_vcpu) nor the vcpu_to_cpu
> mapping since both are still valid whether the vcpu is being scheduled
> or not.

Down VCPUS are accounted for in the vcpu_to_cpu map instead of a
separate online map.  

> 
> I modified vcpucontext to give the context for the first vcpu not down.
> If the requested vcpu is down, it will return the context of the next
> vcpu that is up, or -ESRCH if no vcpu past the requested vcpu is valid.

This has been kept.

> I modified xm list -v to display an ONLINE column which indicates
> the online status of each vcpu in a domain.

I've removed the ONLINE column, using -1 in the CPU column to indicate
offline VCPUS.

Built and tested on 20050614 nightly unstable snapshot.

Please apply.

--
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@xxxxxxxxxx


diffstat output:
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c       |    7 +
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c       |    4 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h |   31 +++++++
 tools/python/xen/xm/main.py                                  |    7 +
 xen/common/dom0_ops.c                                        |   19 +++-
 xen/common/schedule.c                                        |   48 +++++++++++
 xen/include/public/xen.h                                     |    3 
 xen/include/xen/sched.h                                      |    5 -
 8 files changed, 117 insertions(+), 7 deletions(-)

Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
---
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c 
vcpu_down/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c    2005-06-12 
22:13:37.000000000 -0500
+++ vcpu_down/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c    
2005-06-13 10:41:56.000000000 -0500
@@ -154,8 +154,13 @@
                                cpu_clear(cpu, cpu_idle_map);
                        rmb();
 
-                       if (cpu_is_offline(cpu))
+                       if (cpu_is_offline(cpu)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
+            /* Tell hypervisor not to schedule dead vcpus */
+            HYPERVISOR_vcpu_down(cpu);
+#endif
                                play_dead();
+         }
 
                        irq_stat[cpu].idle_timestamp = jiffies;
                        xen_idle();
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c 
vcpu_down/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c    2005-06-12 
22:13:44.000000000 -0500
+++ vcpu_down/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c    
2005-06-13 10:41:56.000000000 -0500
@@ -1380,6 +1380,10 @@
        }
 
 #ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_XEN
+   /* Tell hypervisor to bring vcpu up */
+   HYPERVISOR_vcpu_up(cpu);
+#endif
        /* Already up, and in cpu_quiescent now? */
        if (cpu_isset(cpu, smp_commenced_mask)) {
                cpu_enable(cpu);
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 
vcpu_down/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h      
2005-06-12 22:13:45.000000000 -0500
+++ vcpu_down/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h      
2005-06-13 10:41:56.000000000 -0500
@@ -517,4 +517,35 @@
     return ret;
 }
 
+static inline int
+HYPERVISOR_vcpu_down(
+    int vcpu)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
+        : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_up(
+    int vcpu)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
+        : "memory" );
+
+    return ret;
+}
 #endif /* __HYPERCALL_H__ */
diff -urN b/tools/python/xen/xm/main.py vcpu_down/tools/python/xen/xm/main.py
--- b/tools/python/xen/xm/main.py       2005-06-12 22:13:41.000000000 -0500
+++ vcpu_down/tools/python/xen/xm/main.py       2005-06-13 14:46:16.788968002 
-0500
@@ -406,7 +406,7 @@
         print 'Name              Id  VCPU  CPU  CPUMAP'
         for dom in doms:
             info = server.xend_domain(dom)
-            vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', 
'?').replace('-','')
+            vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', 
'-1').replace('-1','#')
             cpumap = sxp.child_value(info, 'cpumap', [])
             mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1
             count = 0
@@ -415,7 +415,10 @@
                 d['name']   = sxp.child_value(info, 'name', '??')
                 d['dom']    = int(sxp.child_value(info, 'id', '-1'))
                 d['vcpu']   = int(count)
-                d['cpu']    = int(cpu)
+                if cpu == "#":
+                    d['cpu']    = int("-1")
+                else:
+                    d['cpu']    = int(cpu)
                 d['cpumap'] = int(cpumap[count])&mask
                 count = count + 1
                 print ("%(name)-16s %(dom)3d  %(vcpu)4d  %(cpu)3d  
0x%(cpumap)x" % d)
diff -urN b/xen/common/dom0_ops.c vcpu_down/xen/common/dom0_ops.c
--- b/xen/common/dom0_ops.c     2005-06-12 22:13:43.000000000 -0500
+++ vcpu_down/xen/common/dom0_ops.c     2005-06-13 11:09:35.000000000 -0500
@@ -334,9 +334,14 @@
          * - domain is marked as paused or blocked only if all its vcpus 
          *   are paused or blocked 
          * - domain is marked as running if any of its vcpus is running
+         * - only map vcpus that aren't down.  Note, at some point we may
+         *   wish to demux the -1 value to indicate down vs. not-ever-booted
+         *   
          */
         for_each_vcpu ( d, v ) {
-            op->u.getdomaininfo.vcpu_to_cpu[v->vcpu_id] = v->processor;
+            /* only map vcpus that are up */
+            if ( !(test_bit(_VCPUF_down, &v->vcpu_flags)) )
+                op->u.getdomaininfo.vcpu_to_cpu[v->vcpu_id] = v->processor;
             op->u.getdomaininfo.cpumap[v->vcpu_id]      = v->cpumap;
             if ( !(v->vcpu_flags & VCPUF_ctrl_pause) )
                 flags &= ~DOMFLAGS_PAUSED;
@@ -373,7 +378,8 @@
     { 
         struct vcpu_guest_context *c;
         struct domain             *d;
-        struct vcpu               *v;
+        struct vcpu               *v=NULL;
+        int i;
 
         d = find_domain_by_id(op->u.getvcpucontext.domain);
         if ( d == NULL )
@@ -388,8 +394,15 @@
             put_domain(d);
             break;
         }
+
+        /* find first valid vcpu starting from request. */
+        for ( i=op->u.getvcpucontext.vcpu; i<MAX_VIRT_CPUS; i++ )
+        {
+            v = d->vcpu[i];
+            if ( v != NULL && !(test_bit(_VCPUF_down, &v->vcpu_flags)) )
+                break;
+        }
         
-        v = d->vcpu[op->u.getvcpucontext.vcpu];
         if ( v == NULL )
         {
             ret = -ESRCH;
diff -urN b/xen/common/schedule.c vcpu_down/xen/common/schedule.c
--- b/xen/common/schedule.c     2005-06-12 22:13:45.000000000 -0500
+++ vcpu_down/xen/common/schedule.c     2005-06-13 10:41:57.000000000 -0500
@@ -261,6 +261,44 @@
     return 0;
 }
 
+/* Mark target vcpu as non-runnable so it is not scheduled */
+static long do_vcpu_down(int vcpu)
+{
+    struct vcpu *target;
+    
+    if (vcpu > MAX_VIRT_CPUS)
+        return -EINVAL;
+
+    target = current->domain->vcpu[vcpu];
+    /* DEBUG
+     * printk("DOM%d VCPU%d going down\n",
+     *     target->domain->domain_id, target->vcpu_id);
+     */
+    set_bit(_VCPUF_down, &target->vcpu_flags);
+
+    return 0;
+}
+
+/* Mark target vcpu as runnable and wake it */
+static long do_vcpu_up(int vcpu)
+{
+    struct vcpu *target;
+   
+    if (vcpu > MAX_VIRT_CPUS)
+        return -EINVAL;
+
+    target = current->domain->vcpu[vcpu];
+    /* DEBUG
+     * printk("DOM%d VCPU%d coming up\n", 
+     *     target->domain->domain_id, target->vcpu_id);
+     */
+    clear_bit(_VCPUF_down, &target->vcpu_flags);
+    /* wake vcpu */
+    domain_wake(target);
+
+    return 0;
+}
+
 /*
  * Demultiplex scheduler-related hypercalls.
  */
@@ -290,6 +328,16 @@
         domain_shutdown((u8)(op >> SCHEDOP_reasonshift));
         break;
     }
+    case SCHEDOP_vcpu_down:
+    {
+        ret = do_vcpu_down((int)(op >> SCHEDOP_vcpushift));
+        break;
+    }
+    case SCHEDOP_vcpu_up:
+    {
+        ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift));
+        break;
+    }
 
     default:
         ret = -ENOSYS;
diff -urN b/xen/include/public/xen.h vcpu_down/xen/include/public/xen.h
--- b/xen/include/public/xen.h  2005-06-12 22:13:44.000000000 -0500
+++ vcpu_down/xen/include/public/xen.h  2005-06-13 10:41:57.000000000 -0500
@@ -200,8 +200,11 @@
 #define SCHEDOP_yield           0   /* Give up the CPU voluntarily.       */
 #define SCHEDOP_block           1   /* Block until an event is received.  */
 #define SCHEDOP_shutdown        2   /* Stop executing this domain.        */
+#define SCHEDOP_vcpu_down       3   /* make target VCPU not-runnable.     */
+#define SCHEDOP_vcpu_up         4   /* make target VCPU runnable.         */
 #define SCHEDOP_cmdmask       255   /* 8-bit command. */
 #define SCHEDOP_reasonshift     8   /* 8-bit reason code. (SCHEDOP_shutdown) */
+#define SCHEDOP_vcpushift       8   /* 8-bit VCPU target. (SCHEDOP_up|down) */
 
 /*
  * Reason codes for SCHEDOP_shutdown. These may be interpreted by control 
diff -urN b/xen/include/xen/sched.h vcpu_down/xen/include/xen/sched.h
--- b/xen/include/xen/sched.h   2005-06-12 22:13:38.000000000 -0500
+++ vcpu_down/xen/include/xen/sched.h   2005-06-13 10:41:57.000000000 -0500
@@ -346,6 +346,9 @@
  /* Initialization completed. */
 #define _VCPUF_initialised     8
 #define VCPUF_initialised      (1UL<<_VCPUF_initialised)
+ /* VCPU is not-runnable */
+#define _VCPUF_down            9
+#define VCPUF_down             (1UL<<_VCPUF_down)
 
 /*
  * Per-domain flags (domain_flags).
@@ -375,7 +378,7 @@
 static inline int domain_runnable(struct vcpu *v)
 {
     return ( (atomic_read(&v->pausecnt) == 0) &&
-             !(v->vcpu_flags & (VCPUF_blocked|VCPUF_ctrl_pause)) &&
+             !(v->vcpu_flags & (VCPUF_blocked|VCPUF_ctrl_pause|VCPUF_down)) &&
              !(v->domain->domain_flags & (DOMF_shutdown|DOMF_shuttingdown)) );
 }
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.