[Xen-changelog] Actually make suspending SMP domUs work: the pre

# HG changeset patch
# User sos22@xxxxxxxxxxxxxxxxxxxx
# Node ID 7f9b024a509e1a3838fa226cf18d512fc161d370
# Parent  6e6cedc1763db80ac68fefbe6062594416c75aa1
Actually make suspending SMP domUs work: the previous commit didn't
bring the other vcpus up correctly.

Signed-off-by: Steven Smith, sos22@xxxxxxxxx

diff -r 6e6cedc1763d -r 7f9b024a509e 
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Wed Aug 17 
14:37:22 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Thu Aug 18 
15:27:55 2005
@@ -115,20 +115,12 @@
 /* We don't actually take CPU down, just spin without interrupts. */
 static inline void play_dead(void)
 {
-       /* Ack it */
-       __get_cpu_var(cpu_state) = CPU_DEAD;
-
-       /* We shouldn't have to disable interrupts while dead, but
-        * some interrupts just don't seem to go away, and this makes
-        * it "work" for testing purposes. */
        /* Death loop */
        while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
                HYPERVISOR_yield();
 
-       local_irq_disable();
        __flush_tlb_all();
        cpu_set(smp_processor_id(), cpu_online_map);
-       local_irq_enable();
 }
 #else
 static inline void play_dead(void)
@@ -156,12 +148,23 @@
                        rmb();
 
                        if (cpu_is_offline(cpu)) {
+                               printk("<0>Cpu %d going offline.\n",
+                                      cpu);
+                               local_irq_disable();
+                               /* Ack it.  From this point on until
+                                  we get woken up, we're not allowed
+                                  to take any locks.  In particular,
+                                  don't printk. */
+                               __get_cpu_var(cpu_state) = CPU_DEAD;
 #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
                                /* Tell hypervisor to take vcpu down. */
                                HYPERVISOR_vcpu_down(cpu);
 #endif
                                play_dead();
-         }
+                               local_irq_enable();
+                               printk("<0>Cpu %d back online.\n",
+                                      cpu);
+                       }
 
                        __get_cpu_var(irq_stat).idle_timestamp = jiffies;
                        xen_idle();
diff -r 6e6cedc1763d -r 7f9b024a509e 
linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c   Wed Aug 17 14:37:22 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c   Thu Aug 18 15:27:55 2005
@@ -129,9 +129,12 @@
 
 DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
 
+unsigned uber_debug;
+
 static inline void __send_IPI_one(unsigned int cpu, int vector)
 {
        unsigned int evtchn;
+       int r;
 
        evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
        // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, 
vector, evtchn);
@@ -142,7 +145,11 @@
                       synch_test_bit(evtchn, &s->evtchn_mask[0]))
                        ;
 #endif
-               notify_via_evtchn(evtchn);
+               if (uber_debug)
+                       printk("<0>Notifying on evtchn %d.\n", evtchn);
+               if ((r = notify_via_evtchn(evtchn)) != 0)
+                       printk("<0>Hypervisor stopped us sending an IPI: %d.\n",
+                              r);
        } else
                printk("send_IPI to unbound port %d/%d",
                       cpu, vector);
@@ -161,6 +168,8 @@
                        if (cpu == smp_processor_id())
                                continue;
                        if (cpu_isset(cpu, cpu_online_map)) {
+                               if (uber_debug)
+                                       printk("<0>Sending ipi to %d.\n", cpu);
                                __send_IPI_one(cpu, vector);
                        }
                }
diff -r 6e6cedc1763d -r 7f9b024a509e 
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Wed Aug 17 
14:37:22 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Thu Aug 18 
15:27:55 2005
@@ -1616,3 +1616,19 @@
        smp_intr_init();
        local_setup_timer_irq();
 }
+
+DECLARE_PER_CPU(int, timer_irq);
+
+void _restore_vcpu(void)
+{
+       int cpu = smp_processor_id();
+       /* We are the first thing the vcpu runs when it comes back,
+          and we are supposed to restore the IPIs and timer
+          interrupts etc.  When we return, the vcpu's idle loop will
+          start up again. */
+       printk("<0>_restore_vcpu %d.\n", cpu);
+       _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
+       _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
+       _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
+       _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) 
);
+}
diff -r 6e6cedc1763d -r 7f9b024a509e 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Wed Aug 17 14:37:22 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Thu Aug 18 15:27:55 2005
@@ -745,7 +745,7 @@
 #endif
 
 /* Dynamically-mapped IRQ. */
-static DEFINE_PER_CPU(int, timer_irq);
+DEFINE_PER_CPU(int, timer_irq);
 
 static struct irqaction irq_timer = {
        timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
diff -r 6e6cedc1763d -r 7f9b024a509e 
linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c     Wed Aug 17 14:37:22 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c     Thu Aug 18 15:27:55 2005
@@ -512,6 +512,7 @@
        printk("%08lx\n", regs->eip);
        dump_fault_path(address);
        die("Oops", regs, error_code);
+       while(1);
        bust_spinlocks(0);
        do_exit(SIGKILL);
 
diff -r 6e6cedc1763d -r 7f9b024a509e 
linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Wed Aug 17 14:37:22 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Thu Aug 18 15:27:55 2005
@@ -134,6 +134,8 @@
     (void)HYPERVISOR_xen_version(0);
 }
 
+extern unsigned uber_debug;
+
 /* NB. Interrupts are disabled on entry. */
 asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
 {
@@ -145,6 +147,8 @@
 
     vcpu_info->evtchn_upcall_pending = 0;
     
+    if (uber_debug && cpu != 0)
+       printk("<0>evtchn_do_upcall on %d.\n", cpu);
     /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
     l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
     while ( l1 != 0 )
@@ -158,9 +162,13 @@
             l2 &= ~(1 << l2i);
             
             port = (l1i << 5) + l2i;
-            if ( (irq = evtchn_to_irq[port]) != -1 )
+           if (uber_debug && cpu != 0)
+               printk("<0>Port %d.\n", port);
+            if ( (irq = evtchn_to_irq[port]) != -1 ) {
+               if (uber_debug && cpu != 0)
+                   printk("<0>irq %d.\n", irq);
                 do_IRQ(irq, regs);
-            else
+           } else
                 evtchn_device_upcall(port);
         }
     }
@@ -243,6 +251,71 @@
     }
 
     spin_unlock(&irq_mapping_update_lock);
+}
+
+/* This is only used when a vcpu from an xm save.  The ipi is expected
+   to have been bound before we suspended, and so all of the xenolinux
+   state is set up; we only need to restore the Xen side of things.
+   The irq number has to be the same, but the evtchn number can
+   change. */
+void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    spin_lock(&irq_mapping_update_lock);
+
+    op.cmd = EVTCHNOP_bind_ipi;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+       panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
+    evtchn = op.u.bind_ipi.port;
+
+    printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
+          ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
+          evtchn);
+
+    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+    irq_to_evtchn[irq] = -1;
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
+
+    bind_evtchn_to_cpu(evtchn, vcpu);
+
+    spin_unlock(&irq_mapping_update_lock);
+
+    clear_bit(evtchn, HYPERVISOR_shared_info->evtchn_mask);
+}
+
+void _bind_virq_to_irq(int virq, int cpu, int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    spin_lock(&irq_mapping_update_lock);
+
+    op.cmd              = EVTCHNOP_bind_virq;
+    op.u.bind_virq.virq = virq;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("Failed to bind virtual IRQ %d\n", virq);
+    evtchn = op.u.bind_virq.port;
+
+
+    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+    irq_to_evtchn[irq] = -1;
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+    bind_evtchn_to_cpu(evtchn, cpu);
+
+    spin_unlock(&irq_mapping_update_lock);
+
+    return irq;
 }
 
 int bind_ipi_to_irq(int ipi)
diff -r 6e6cedc1763d -r 7f9b024a509e 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Wed Aug 17 14:37:22 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Thu Aug 18 15:27:55 2005
@@ -65,10 +65,56 @@
 #define cpu_up(x) (-EOPNOTSUPP)
 #endif
 
+static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int r;
+    int gdt_pages;
+    r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
+    BUG_ON(r != 0);
+    gdt_pages = (ctxt->gdt_ents + 511) / 512;
+    ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
+    for (r = 0; r < gdt_pages; r++) {
+       ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
+    }
+}
+
+void _restore_vcpu(int cpu);
+
+static void restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int r;
+    int gdt_pages = (ctxt->gdt_ents + 511) / 512;
+
+    /* This is kind of a hack, and implicitly relies on the fact that
+       the vcpu stops in a place where all of the call clobbered
+       registers are already dead. */
+    printk("<0>regs.esp %x.\n", ctxt->user_regs.esp);
+    ctxt->user_regs.esp -= 4;
+    ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
+    ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
+
+    ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
+    for (r = 0; r < gdt_pages; r++) {
+       ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
+    }
+    r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
+    if (r != 0) {
+       printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
+       return;
+    }
+}
+
+/* Whoever decided that printk should call into the scheduler needs to
+   be taken out and shot */
+#define msg(x) HYPERVISOR_console_io(CONSOLEIO_write, sizeof(x), x)
+
+extern unsigned uber_debug;
+
 static int __do_suspend(void *ignore)
 {
     int i, j;
     suspend_record_t *suspend_record;
+    static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
 
     /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
        /* XXX SMH: yes it would :-( */ 
@@ -158,6 +204,10 @@
 
     __cli();
 
+    for (i = 0; i < NR_CPUS; i++)
+       if (cpu_isset(i, feasible_cpus))
+           save_vcpu_context(i, &suspended_cpu_records[i]);
+
 #ifdef __i386__
     mm_pin_all();
     kmem_cache_shrink(pgd_cache);
@@ -173,7 +223,9 @@
     smp_suspend();
 #endif
 
+    msg("xenbus going down.\n");
     xenbus_suspend();
+    msg("xenbus gone down.\n");
 
     ctrl_if_suspend();
 
@@ -187,10 +239,11 @@
     memcpy(&suspend_record->resume_info, &xen_start_info,
            sizeof(xen_start_info));
 
+    msg("Suspending...\n");
     /* We'll stop somewhere inside this hypercall.  When it returns,
        we'll start resuming after the restore. */
     HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
-
+    msg("Back from suspension\n");
 
     shutting_down = SHUTDOWN_INVALID; 
 
@@ -217,7 +270,9 @@
 
     ctrl_if_resume();
 
+    msg("Here comes the xenbus...\n");
     xenbus_resume();
+    msg("xenbus resumed.\n");
 
 #ifdef CONFIG_SMP
     smp_resume();
@@ -231,21 +286,32 @@
 
     usbif_resume();
 
-    preempt_enable();
+    msg("Restoring cpu contexts...\n");
+    for (i = 0; i < NR_CPUS; i++)
+       if (cpu_isset(i, feasible_cpus))
+           restore_vcpu_context(i, &suspended_cpu_records[i]);
+    msg("All vcpus rebooted.\n");
 
     __sti();
 
  out_reenable_cpus:
+    msg("Reenabling cpus.\n");
     while (!cpus_empty(feasible_cpus)) {
        i = first_cpu(feasible_cpus);
+       printk("<0>Bring up %d/%d.\n", i, num_online_cpus());
+       printk("<0>17 preempt_count %x.\n", preempt_count());
        j = cpu_up(i);
+       printk("<0>18 preempt_count %x.\n", preempt_count());
        if (j != 0) {
            printk(KERN_CRIT "Failed to bring cpu %d back up (%d).\n",
                   i, j);
            err = j;
        }
+       printk("<0>%d up.\n", i);
        cpu_clear(i, feasible_cpus);
     }
+    msg("Reenabled cpus.\n");
+    uber_debug = 0;
 
  out:
     if ( suspend_record != NULL )
diff -r 6e6cedc1763d -r 7f9b024a509e 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Wed Aug 17 
14:37:22 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Aug 18 
15:27:55 2005
@@ -163,7 +163,7 @@
         TRAP_INSTR
         : "=a" (ret), "=b" (ign)
        : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
-       : "memory" );
+       : "memory", "ecx" );
 
     return ret;
 }
@@ -178,7 +178,7 @@
         TRAP_INSTR
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
-       : "memory" );
+       : "memory", "ecx" );
 
     return ret;
 }
@@ -194,7 +194,7 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -210,7 +210,7 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -228,7 +228,7 @@
         : "=a" (ret), "=b" (ign1), "=S" (ign2)
        : "0" (__HYPERVISOR_sched_op),
         "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
-        "S" (srec) : "memory");
+        "S" (srec) : "memory", "ecx");
 
     return ret;
 }
@@ -244,7 +244,7 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -534,7 +534,7 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
-        : "memory" );
+        : "memory", "ecx", "edx" );
 
     return ret;
 }
@@ -550,8 +550,26 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
+        : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+    int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int ret;
+    unsigned long ign1, ign2;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
+         "2" (ctxt)
         : "memory" );
 
     return ret;
 }
+
 #endif /* __HYPERCALL_H__ */
diff -r 6e6cedc1763d -r 7f9b024a509e xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Aug 17 14:37:22 2005
+++ b/xen/arch/x86/domain.c     Thu Aug 18 15:27:55 2005
@@ -217,8 +217,16 @@
     return xmalloc(struct vcpu);
 }
 
+/* We assume that vcpu 0 is always the last one to be freed in a
+   domain i.e. if v->vcpu_id == 0, the domain should be
+   single-processor. */
 void arch_free_vcpu_struct(struct vcpu *v)
 {
+    struct vcpu *p;
+    for_each_vcpu(v->domain, p) {
+        if (p->next_in_list == v)
+            p->next_in_list = v->next_in_list;
+    }
     xfree(v);
 }
 
@@ -402,8 +410,10 @@
     if ( !(c->flags & VGCF_VMX_GUEST) )
     {
         if ( ((c->user_regs.cs & 3) == 0) ||
-             ((c->user_regs.ss & 3) == 0) )
-                return -EINVAL;
+             ((c->user_regs.ss & 3) == 0) ) {
+            printf("User regs.cs %x, ss %x.\n", c->user_regs.cs, 
c->user_regs.ss);
+            return -EINVAL;
+        }
     }
 
     clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
@@ -448,8 +458,10 @@
 
     if ( shadow_mode_refcounts(d) )
     {
-        if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
+        if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) ) {
+            printf("Bad phys_basetab %lx.\n", phys_basetab);
             return -EINVAL;
+        }
     }
     else
     {
@@ -457,13 +469,16 @@
         if ( !(c->flags & VGCF_VMX_GUEST) )
 #endif
             if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
-                  PGT_base_page_table) )
+                                    PGT_base_page_table) ) {
+                printf("Bad phys_basetab2 %lx.\n", phys_basetab);
                 return -EINVAL;
+            }
     }
 
     if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
     {
         put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
+        printf("Failed to set gdt, %d.\n", rc);
         return rc;
     }
 
@@ -484,6 +499,8 @@
 
     /* Don't redo final setup */
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
+
+    printf("Arch set_info_guest succeeded.\n");
 
     return 0;
 }
diff -r 6e6cedc1763d -r 7f9b024a509e xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Aug 17 14:37:22 2005
+++ b/xen/arch/x86/mm.c Thu Aug 18 15:27:55 2005
@@ -2631,16 +2631,25 @@
     int i, nr_pages = (entries + 511) / 512;
     unsigned long pfn;
 
-    if ( entries > FIRST_RESERVED_GDT_ENTRY )
+    if ( entries > FIRST_RESERVED_GDT_ENTRY ) {
+        printf("Too many entries in gdt (%d).\n", entries);
         return -EINVAL;
+    }
     
     shadow_sync_all(d);
 
     /* Check the pages in the new GDT. */
-    for ( i = 0; i < nr_pages; i++ )
-        if ( ((pfn = frames[i]) >= max_page) ||
-             !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
+    for ( i = 0; i < nr_pages; i++ ) {
+        pfn = frames[i];
+        if (pfn >= max_page) {
+            printf("GDT bad as %ld >= %ld.\n", pfn, max_page);
             goto fail;
+        }
+        if (!get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) ) {
+            printf("Frame %ld looks bad.\n", pfn);
+            goto fail;
+        }
+    }
 
     /* Tear down the old GDT. */
     destroy_gdt(v);
diff -r 6e6cedc1763d -r 7f9b024a509e xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Wed Aug 17 14:37:22 2005
+++ b/xen/arch/x86/time.c       Thu Aug 18 15:27:55 2005
@@ -831,7 +831,7 @@
     tsc_elapsed32 = (u32)tsc_elapsed64;
 
     /* tsc_elapsed > stime_elapsed */
-    ASSERT(tsc_elapsed32 != 0);
+    //    ASSERT(tsc_elapsed32 != 0);
     while ( tsc_elapsed32 <= stime_elapsed32 )
     {
         tsc_elapsed32 <<= 1;
diff -r 6e6cedc1763d -r 7f9b024a509e xen/common/domain.c
--- a/xen/common/domain.c       Wed Aug 17 14:37:22 2005
+++ b/xen/common/domain.c       Thu Aug 18 15:27:55 2005
@@ -178,6 +178,7 @@
 {
     struct domain *d = current->domain;
     struct vcpu *v;
+    int cntr;
 
     if ( d->domain_id == 0 )
     {
@@ -208,8 +209,17 @@
     }
 
     /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
-    for_each_vcpu ( d, v )
+    cntr = 0;
+    printf("Putting %d to sleep.\n", d->domain_id);
+    for_each_vcpu ( d, v ) {
+        if (test_bit(_VCPUF_down, &v->vcpu_flags)) {
+            printf("vcpu %d is down.\n", v->vcpu_id);
+        } else {
+            printf("vcpu %d is up.\n", v->vcpu_id);
+        }
         domain_sleep_nosync(v);
+    }
+    printf("Put %d vcpus to sleep for domain shutdown.\n", cntr);
 }
 
 
diff -r 6e6cedc1763d -r 7f9b024a509e xen/common/event_channel.c
--- a/xen/common/event_channel.c        Wed Aug 17 14:37:22 2005
+++ b/xen/common/event_channel.c        Thu Aug 18 15:27:55 2005
@@ -292,6 +292,8 @@
         chn = evtchn_from_port(d, port);
         chn->state          = ECS_IPI;
         chn->notify_vcpu_id = current->vcpu_id;
+        printf("Bound ipi on vcpu %d to port %d.\n", current->vcpu_id,
+               port);
     }
 
     spin_unlock(&d->evtchn_lock);
@@ -497,9 +499,24 @@
         evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
         break;
     case ECS_IPI:
+        if (current->domain->domain_id != 0) {
+            printf("Set %d pending on %d.\n", lport,
+                   lchn->notify_vcpu_id);
+            if (lport == 7) {
+                struct vcpu *v = ld->vcpu[lchn->notify_vcpu_id];
+                struct domain *d = v->domain;
+                shared_info_t *s = d->shared_info;
+                printf("pending %x, mask %x, pending_sel %x, upcall_pending 
%x.\n",
+                       s->evtchn_pending[0],
+                       s->evtchn_mask[0],
+                       v->vcpu_info->evtchn_pending_sel,
+                       v->vcpu_info->evtchn_upcall_pending);
+            }
+        }
         evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport);
         break;
     default:
+        printf("Failed to set %d pending: state %d.\n", lport, lchn->state);
         ret = -EINVAL;
     }
 
diff -r 6e6cedc1763d -r 7f9b024a509e xen/common/schedule.c
--- a/xen/common/schedule.c     Wed Aug 17 14:37:22 2005
+++ b/xen/common/schedule.c     Thu Aug 18 15:27:55 2005
@@ -38,6 +38,8 @@
 #include <xen/mm.h>
 #include <public/sched_ctl.h>
 
+extern void arch_getdomaininfo_ctxt(struct vcpu *,
+                                    struct vcpu_guest_context *);
 /* opt_sched: scheduler - default to SEDF */
 static char opt_sched[10] = "sedf";
 string_param("sched", opt_sched);
@@ -82,7 +84,8 @@
     int i;
 
     SCHED_OP(free_task, d);
-    for (i = 0; i < MAX_VIRT_CPUS; i++)
+    /* vcpu 0 has to be the last one destructed. */
+    for (i = MAX_VIRT_CPUS-1; i >= 0; i--)
         if ( d->vcpu[i] )
             arch_free_vcpu_struct(d->vcpu[i]);
 
@@ -295,10 +298,37 @@
     return 0;
 }
 
+static long do_vcpu_pickle(int vcpu, unsigned long arg)
+{
+    struct vcpu *v;
+    vcpu_guest_context_t *c;
+    int ret = 0;
+
+    if (vcpu >= MAX_VIRT_CPUS)
+        return -EINVAL;
+    v = current->domain->vcpu[vcpu];
+    if (!v)
+        return -ESRCH;
+    /* Don't pickle vcpus which are currently running */
+    if (!test_bit(_VCPUF_down, &v->vcpu_flags)) {
+        printf("Pickling a live vcpu?\n");
+        return -EBUSY;
+    }
+    c = xmalloc(vcpu_guest_context_t);
+    if (!c)
+        return -ENOMEM;
+    arch_getdomaininfo_ctxt(v, c);
+    if (copy_to_user((vcpu_guest_context_t *)arg,
+                     (const vcpu_guest_context_t *)c, sizeof(*c)))
+        ret = -EFAULT;
+    xfree(c);
+    return ret;
+}
+
 /*
  * Demultiplex scheduler-related hypercalls.
  */
-long do_sched_op(unsigned long op)
+long do_sched_op(unsigned long op, unsigned long arg)
 {
     long ret = 0;
 
@@ -332,6 +362,12 @@
     case SCHEDOP_vcpu_up:
     {
         ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift));
+        break;
+    }
+    case SCHEDOP_vcpu_pickle:
+    {
+        ret = do_vcpu_pickle((int)(op >> SCHEDOP_vcpushift), arg);
+        printf("Pickle result %ld.\n", ret);
         break;
     }
 
diff -r 6e6cedc1763d -r 7f9b024a509e xen/include/public/xen.h
--- a/xen/include/public/xen.h  Wed Aug 17 14:37:22 2005
+++ b/xen/include/public/xen.h  Thu Aug 18 15:27:55 2005
@@ -203,6 +203,7 @@
 #define SCHEDOP_shutdown        2   /* Stop executing this domain.        */
 #define SCHEDOP_vcpu_down       3   /* make target VCPU not-runnable.     */
 #define SCHEDOP_vcpu_up         4   /* make target VCPU runnable.         */
+#define SCHEDOP_vcpu_pickle     5   /* save a vcpu's context to memory.   */
 #define SCHEDOP_cmdmask       255   /* 8-bit command. */
 #define SCHEDOP_reasonshift     8   /* 8-bit reason code. (SCHEDOP_shutdown) */
 #define SCHEDOP_vcpushift       8   /* 8-bit VCPU target. (SCHEDOP_up|down) */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
WARNING - OLD ARCHIVES

xen-changelog

[Xen-changelog] Actually make suspending SMP domUs work: the previous co