WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] AP boot support

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] AP boot support
From: Kip Macy <kmacy@xxxxxxxxxx>
Date: Sat, 7 May 2005 21:36:21 -0700 (PDT)
Delivery-date: Sun, 08 May 2005 04:36:06 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/05/07 21:32:49-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx 
#   get AP booting working 
#   currently crashing in init_secondary - will fix after adding SMP debug 
support
#   Signed-off-by: Kip Macy <kmacy@xxxxxxxxxxx>
# 
# freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +4 -0
#   add declaration for per-cpu clock init
# 
# freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +1 -0
#   make pmap_lazyfix_action global
# 
# freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +6 -1
#   add IPI fields
# 
# freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +16 -0
#   add boot_vcpu call
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +0 -2
#   make PANIC_IF declaration global
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +0 -1
#   make pmap_lazyfix_action global
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +229 -55
#   add support for booting APs
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +78 -46
#   do per-cpu GDT initialization up-front
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +15 -8
#   special case AST IPI
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
#   2005/05/07 21:32:46-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +60 -14
#   add per-cpu clock support
# 
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c  2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c  2005-05-06 21:37:31 
-07:00
@@ -87,6 +87,12 @@
 
 /* XEN specific defines */
 #include <machine/xen_intr.h>
+#include <vm/vm.h>   /* needed by machine/pmap.h */
+#include <vm/pmap.h> /* needed by machine/pmap.h */
+#include <machine/pmap.h> /* needed by xen-os.h */
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h> /* needed by xenfunc.h */
+#include <machine/xenfunc.h>
 
 /*
  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -129,7 +135,15 @@
 static uint32_t shadow_time_version;
 static struct timeval shadow_tv;
 
+#define DEFINE_PER_CPU(type, name) \
+    __typeof__(type) per_cpu__##name
+
+#define per_cpu(var, cpu)           (*((void)cpu, &per_cpu__##var))
+
+
 static uint64_t processed_system_time;/* System time (ns) at last processing. 
*/
+static DEFINE_PER_CPU(uint64_t, processed_system_time);
+
 
 #define NS_PER_TICK (1000000000ULL/hz)
 
@@ -202,18 +216,19 @@
 static void 
 clkintr(struct clockframe *frame)
 {
-    int64_t delta;
+    int64_t cpu_delta, delta;
+    int cpu = smp_processor_id();
     long ticks = 0;
 
-
     do {
        __get_time_values_from_xen();
-       delta = (int64_t)(shadow_system_time + 
-                         xen_get_offset() * 1000 - 
-                         processed_system_time);
+       delta = cpu_delta = (int64_t)shadow_system_time + 
+               (int64_t)xen_get_offset() * 1000;
+       delta -= processed_system_time;
+       cpu_delta -= per_cpu(processed_system_time, cpu);
     } while (!TIME_VALUES_UP_TO_DATE);
 
-    if (unlikely(delta < 0)) {
+    if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) {
         printk("Timer ISR: Time went backwards: %lld\n", delta);
         return;
     }
@@ -225,15 +240,28 @@
         delta -= NS_PER_TICK;
         processed_system_time += NS_PER_TICK;
     }
-
-    if (ticks > 0) {
-       if (frame)
-               timer_func(frame);
-#ifdef SMP
-       if (timer_func == hardclock && frame)
-               forward_hardclock();
+    /* Local CPU jiffy work. */
+    while (cpu_delta >= NS_PER_TICK) {
+           cpu_delta -= NS_PER_TICK;
+           per_cpu(processed_system_time, cpu) += NS_PER_TICK;
+#if 0
+           update_process_times(user_mode(regs));
+           profile_tick(CPU_PROFILING, regs);
 #endif
     }
+    if (ticks > 0) {
+       if (frame) timer_func(frame);
+    }
+    
+    if (cpu != 0)
+           return;
+    /*
+     * Take synchronised time from Xen once a minute if we're not
+     * synchronised ourselves, and we haven't chosen to keep an independent
+     * time base.
+     */
+    
+    /* XXX TODO */
 }
 
 #include "opt_ddb.h"
@@ -429,7 +457,7 @@
  * Start clocks running.
  */
 void
-cpu_initclocks()
+cpu_initclocks(void)
 {
        int diag;
        int time_irq = bind_virq_to_irq(VIRQ_TIMER);
@@ -445,7 +473,25 @@
        /* initialize xen values */
        __get_time_values_from_xen();
        processed_system_time = shadow_system_time;
+       per_cpu(processed_system_time, 0) = processed_system_time;
+
+}
+
+#ifdef SMP 
+void
+ap_cpu_initclocks(void)
+{
+       int irq;
+       int cpu = smp_processor_id();
+
+       per_cpu(processed_system_time, cpu) = shadow_system_time;
+       
+       irq = bind_virq_to_irq(VIRQ_TIMER);
+       PCPU_SET(time_irq, irq);
+       PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, 
+                                 NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
 }
+#endif
 
 void
 cpu_startprofclock(void)
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c 2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c 2005-05-06 21:37:31 
-07:00
@@ -79,9 +79,14 @@
                 l2 &= ~(1 << l2i);
             
                 port = (l1i << 5) + l2i;
+               irq = evtchn_to_irq[port];
+#ifdef SMP             
+               if (irq == PCPU_GET(cpuast)) 
+                       continue;
+#endif
                 if ( (owned = mtx_owned(&sched_lock)) != 0 )
                     mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
-                if ( (irq = evtchn_to_irq[port]) != -1 ) {
+                if ( irq != -1 ) {
                    struct intsrc *isrc = intr_lookup_source(irq);
                    intr_execute_handlers(isrc, frame);
                } else {
@@ -584,6 +589,7 @@
         PCPU_GET(virq_to_irq)[i] = -1;
 }
 
+
 static void 
 evtchn_init(void *dummy __unused)
 {
@@ -591,13 +597,6 @@
     struct xenpic *xp;
     struct xenpic_intsrc *pin;
 
-    /*
-     * xenpic_lock: in order to allow an interrupt to occur in a critical
-     *                 section, to set pcpu->ipending (etc...) properly, we
-     *         must be able to get the icu lock, so it can't be
-     *         under witness.
-     */
-    mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF);
 
     /* XXX -- expedience hack */
     PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]);
@@ -657,3 +656,11 @@
 }
 
 SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
+    /*
+     * xenpic_lock: in order to allow an interrupt to occur in a critical
+     *                 section, to set pcpu->ipending (etc...) properly, we
+     *         must be able to get the icu lock, so it can't be
+     *         under witness.
+     */
+
+MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", 
MTX_DEF|MTX_NOWITNESS);
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c        2005-05-06 
21:37:31 -07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c        2005-05-06 
21:37:31 -07:00
@@ -78,6 +78,7 @@
 #include <sys/sched.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
+#include <sys/smp.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
@@ -883,14 +884,6 @@
 static void
 cpu_idle_default(void)
 {
-#if 0
-       /*
-        * we must absolutely guarentee that hlt is the
-        * absolute next instruction after sti or we
-        * introduce a timing window.
-        */
-       __asm __volatile("sti; hlt");
-#endif
        idle_block();
        enable_intr();
 }
@@ -1376,6 +1369,7 @@
 unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
 int preemptable;
 int gdt_set;
+static int ncpus;
 
 /* Linux infection */
 #define PAGE_OFFSET  KERNBASE
@@ -1387,6 +1381,10 @@
     int i;
     vm_paddr_t pdir_shadow_ma, KPTphys;
     vm_offset_t *pdir_shadow;
+#ifdef SMP
+    int j;
+#endif
+
 #ifdef WRITABLE_PAGETABLES
     printk("using writable pagetables\n");
     HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
@@ -1447,18 +1445,19 @@
 
 
 #ifdef SMP
+#if 0
     /* allocate cpu0 private page */
     cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT));
     tmpindex++; 
-
+#endif
     /* allocate SMP page table */
     SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT));
-
+#if 0
     /* Map the private page into the SMP page table */
     SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;
-
+#endif
     /* map SMP page table RO */
-    PT_SET_MA(SMPpt, vtomach(SMPpt) & ~PG_RW);
+    PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW);
 
     /* put the page table into the page directory */
     xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), 
@@ -1496,44 +1495,61 @@
     tmpindex++;
 
     HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned 
long)xen_phys_machine;
+    ncpus = HYPERVISOR_shared_info->n_vcpu; 
+#ifdef SMP
+    for (i = 0; i < ncpus; i++) {
+           int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE;
+           for (j = 0; j < npages; j++) {
+                   vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
+                   tmpindex++;
+                   PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW 
| PG_M, FALSE);
+           }
+    }
+    xen_flush_queue();
+#endif
     
     init_first = tmpindex;
     
 }
 
+
+trap_info_t trap_table[] = {
+       { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
+       { 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
+       { 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
+       { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
+       /* This is UPL on Linux and KPL on BSD */
+       { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
+       { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
+       { 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
+       /*
+        * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
+        *   no handler for double fault
+        */
+       { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
+       {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
+       {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
+       {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
+       {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
+       {14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
+       {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
+       {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
+       {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
+       {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
+       {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
+       {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(int0x80_syscall)},
+       {  0, 0,           0, 0 }
+};
+
 void
 init386(void)
 {
        int gsel_tss, metadata_missing, off, x, error;
        struct pcpu *pc;
        unsigned long gdtmachpfn;
-       trap_info_t trap_table[] = {
-           { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
-           { 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
-           { 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
-           { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
-           /* This is UPL on Linux and KPL on BSD */
-           { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
-           { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
-           { 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
-           /*
-            * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(XXX)},
-            *   no handler for double fault
-            */
-           { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(fpusegm)},
-           {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
-           {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(missing)},
-           {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
-           {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
-           {14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
-           {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
-           {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
-           {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
-           {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
-           {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
-           {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(int0x80_syscall)},
-           {  0, 0,           0, 0 }
-        };
+#ifdef SMP
+       int i;
+#endif
        proc0.p_uarea = proc0uarea;
        thread0.td_kstack = proc0kstack;
        thread0.td_pcb = (struct pcb *)
@@ -1583,26 +1599,42 @@
        gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 
16))); 
 #endif
 #ifdef SMP
-       /* this correspond to the cpu private page as mapped into the SMP page 
-        * table in initvalues
+       /* XXX this will blow up if there are more than 512/NGDT vcpus - will 
never 
+        * be an issue in the real world but should add an assert on general 
principles
+        * we'll likely blow up when we hit LAST_RESERVED_GDT_ENTRY, at which 
point we
+        * would need to start allocating more pages for the GDT
         */
        pc = &SMP_prvspace[0].pcpu;
-       gdt_segs[GPRIV_SEL].ssd_limit =
-               atop(sizeof(struct privatespace) - 1);
+       for (i = 0; i < ncpus; i++) {
+               cpu_add(i, (i == 0));
+
+               gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i];
+               gdt_segs[GPRIV_SEL].ssd_limit =
+                       atop(sizeof(struct privatespace) - 1);
+               gdt_segs[GPROC0_SEL].ssd_base =
+                       (int) &SMP_prvspace[i].pcpu.pc_common_tss;
+               SMP_prvspace[i].pcpu.pc_prvspace =
+                       &SMP_prvspace[i].pcpu;
+               
+               for (x = 0; x < NGDT; x++) {
+                       ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd);
+               }
+       }
 #else
        pc = &__pcpu;
        gdt_segs[GPRIV_SEL].ssd_limit =
                atop(sizeof(struct pcpu) - 1);
-#endif
        gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
        gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
        for (x = 0; x < NGDT; x++)
            ssdtosd(&gdt_segs[x], &gdt[x].sd);
+#endif
+
 
        PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW);
        gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
-       if ((error = HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 
1))) 
-           panic("set_gdt failed");
+       PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) 
!= 0);
+
        
        lgdt_finish();
        gdt_set = 1;
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c     2005-05-06 
21:37:31 -07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c     2005-05-06 
21:37:31 -07:00
@@ -83,7 +83,16 @@
 #include <machine/specialreg.h>
 #include <machine/privatespace.h>
 
+
+/* XEN includes */
 #include <machine/xenfunc.h>
+#include <machine/xen_intr.h>
+
+void Xhypervisor_callback(void);
+void failsafe_callback(void);
+
+/***************/
+
 
 #define WARMBOOT_TARGET                0
 #define WARMBOOT_OFF           (KERNBASE + 0x0467)
@@ -94,6 +103,10 @@
 #define BIOS_RESET             (0x0f)
 #define BIOS_WARM              (0x0a)
 
+
+#undef POSTCODE
+#define POSTCODE(x)
+
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
@@ -175,6 +188,8 @@
 /* SMP page table page */
 extern pt_entry_t *SMPpt;
 
+extern trap_info_t trap_table[];
+
 struct pcb stoppcbs[MAXCPU];
 
 /* Variables needed for SMP tlb shootdown. */
@@ -208,7 +223,9 @@
 
 static void    set_logical_apic_ids(void);
 static int     start_all_aps(void);
+#if 0
 static void    install_ap_tramp(void);
+#endif
 static int     start_ap(int apic_id);
 static void    release_aps(void *dummy);
 
@@ -314,6 +331,7 @@
 cpu_mp_probe(void)
 {
 
+       mp_ncpus = HYPERVISOR_shared_info->n_vcpu;
        /*
         * Always record BSP in CPU map so that the mbuf init code works
         * correctly.
@@ -342,20 +360,24 @@
        return (1);
 }
 
-/*
- * Initialize the IPI handlers and start up the AP's.
- */
-void
-cpu_mp_start(void)
+static void
+cpu_mp_ipi_init(void)
 {
-       int i;
-
-       POSTCODE(MP_START_POST);
-
-       /* Initialize the logical ID to APIC ID table. */
-       for (i = 0; i < MAXCPU; i++)
-               cpu_apic_ids[i] = -1;
-
+       int irq;
+       int cpu = smp_processor_id();
+       /* 
+        * these are not needed by XenFreeBSD - from Keir:
+        * For TLB-flush related IPIs, Xen has hypercalls 
+        * you should use instead. You can pass a pointer 
+        * to a vcpu bitmap to update_va_mapping(), and to
+        * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi. 
+        * Xen will then make sure that those vcpus get 
+        * flushed appropriately before returning to the
+        * caller.
+        * There is also no indication that we need to forward
+        * clock interrupts.
+        */
+#if 0 
        /* Install an inter-CPU IPI for TLB invalidation */
        setidt(IPI_INVLTLB, IDTVEC(invltlb),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
@@ -371,22 +393,69 @@
        /* Install an inter-CPU IPI for forwarding statclock() */
        setidt(IPI_STATCLOCK, IDTVEC(statclock),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-       
+#endif
+
+       /* 
+        * These can all be consolidated. For now leaving 
+        * as individual IPIs.
+        *
+        */
+#if 0
        /* Install an inter-CPU IPI for lazy pmap release */
        setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+       irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP);
+       PCPU_SET(lazypmap, irq);
+       PANIC_IF(intr_add_handler("pmap_lazyfix", irq, 
+                                 (driver_intr_t *)pmap_lazyfix_action, 
+                                 NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
+#endif
 
+#if 0
        /* Install an inter-CPU IPI for all-CPU rendezvous */
        setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else 
+       irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS);
+       PCPU_SET(rendezvous, irq);
+       PANIC_IF(intr_add_handler("smp_rendezvous", irq, 
+                                 (driver_intr_t *)smp_rendezvous_action, 
+                                 NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
+#endif
 
+#if 0
        /* Install an inter-CPU IPI for forcing an additional software trap */
        setidt(IPI_AST, IDTVEC(cpuast),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-
+#else
+       irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST);
+       PCPU_SET(cpuast, irq);
+#endif
+       /* XXX ignore for now */
+#if 0 
        /* Install an inter-CPU IPI for CPU stop/restart */
        setidt(IPI_STOP, IDTVEC(cpustop),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#endif
+
+}
+
+SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL);
+
+/*
+ * Initialize the IPI handlers and start up the AP's.
+ */
+void
+cpu_mp_start(void) /* --- Start here --- */
+{
+       int i;
+
+       POSTCODE(MP_START_POST);
+
+       /* Initialize the logical ID to APIC ID table. */
+       for (i = 0; i < MAXCPU; i++)
+               cpu_apic_ids[i] = -1;
 
 
        /* Set boot_cpu_id if needed. */
@@ -437,35 +506,44 @@
 void
 init_secondary(void)
 {
-       int     gsel_tss;
-       int     x, myid;
+       int     myid;
+       unsigned long gdtmachpfn;
+       printk("MADE IT!!");
+
 #if 0
        u_int   cr0;
 #endif
+       /* Steps to booting SMP on xen as gleaned from XenLinux:
+        * - cpu_init() - processor specific initialization
+        * - smp_callin() 
+        *    - wait 2s for BP to finish its startup sequence
+        *    - map_cpu_to_logical_apicid()
+        *    - save cpuid info
+        *    - set bit in callin map to let master (BP?) continue
+        * - local setup timer() - per cpu timer initialization
+        * - ldebug_setup() - bind debug IRQ to local CPU.
+        * - smp_intr_init() - IPI setup that we do in cpu_mp_start
+        * - local_irq_enable() - enable interrupts locally
+        * - cpu_set(id, map) - announce that we're up
+        * - cpu_idle() - make us schedulable
+        */
+
+
        /* bootAP is set in start_ap() to our ID. */
        myid = bootAP;
-       gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
-       gdt_segs[GPROC0_SEL].ssd_base =
-               (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
-       SMP_prvspace[myid].pcpu.pc_prvspace =
-               &SMP_prvspace[myid].pcpu;
 
-       for (x = 0; x < NGDT; x++) {
-               ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
-       }
+       gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
+       PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) 
!= 0); 
 
-#if 0
-       r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
-       r_gdt.rd_base = (int) &gdt[myid * NGDT];
-       lgdt(&r_gdt);                   /* does magic intra-segment return */
+       
+       lgdt_finish();
 
-       lidt(&r_idt);
-       lldt(_default_ldt);
-#endif
+       PCPU_SET(cpuid, myid);
+
+
+       set_user_ldt((struct mdproc *)_default_ldt);
        PCPU_SET(currentldt, _default_ldt);
 
-       gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-       gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
        PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
        PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
        PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
@@ -557,6 +635,13 @@
        while (smp_started == 0)
                ia32_pause();
 
+       /* need to wait until now to setup the IPIs as SI_SUB_CPU is
+        * much earlier than SI_SUB_INTR
+        */  
+       ap_evtchn_init(myid);
+       ap_cpu_initclocks();
+       cpu_mp_ipi_init();
+
        /* ok, now grab sched_lock and enter the scheduler */
        mtx_lock_spin(&sched_lock);
 
@@ -610,28 +695,35 @@
 static int
 start_all_aps(void)
 {
-#ifndef PC98
-       u_char mpbiosreason;
-#endif
-       u_long mpbioswarmvec;
        struct pcpu *pc;
        char *stack;
-       uintptr_t kptbase;
-       int i, pg, apic_id, cpu;
+       int i, apic_id, cpu;
+
+       /* 
+        * This function corresponds most closely to 
+        * smp_boot_cpus in XenLinux - the sequence there 
+        * is:
+        * - check if SMP config is found - if not:
+        *     - clear the I/O APIC IRQs
+        *     - map cpu to logical apicid
+        *     - exit
+        * - smp_intr_init - IPI initialization
+        * - map cpu to logical apicid
+        * - boot each of the vcpus
+        * - clear and then construct the cpu sibling [logical CPUs] map.
+        *
+        */
 
        POSTCODE(START_ALL_APS_POST);
 
        mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
-
+#if 0
        /* install the AP 1st level boot code */
        install_ap_tramp();
 
        /* save the current value of the warm-start vector */
        mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
-#ifndef PC98
-       outb(CMOS_REG, BIOS_RESET);
-       mpbiosreason = inb(CMOS_DATA);
-#endif
+
 
        /* set up temporary P==V mapping for AP boot */
        /* XXX this is a hack, we should boot the AP on its own stack/PTD */
@@ -640,7 +732,7 @@
                PTD[i] = (pd_entry_t)(PG_V | PG_RW |
                    ((kptbase + i * PAGE_SIZE) & PG_FRAME));
        invltlb();
-
+#endif
        /* start each AP */
        for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
                if (!cpu_info[apic_id].cpu_present ||
@@ -650,7 +742,7 @@
 
                /* save APIC ID for this logical ID */
                cpu_apic_ids[cpu] = apic_id;
-
+#if 0
                /* first page of AP's private space */
                pg = cpu * i386_btop(sizeof(struct privatespace));
 
@@ -665,11 +757,14 @@
                for (i = 0; i < KSTACK_PAGES; i++)
                        SMPpt[pg + 1 + i] = (pt_entry_t)
                            (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
+#endif
+               pc = &SMP_prvspace[cpu].pcpu;
 
                /* prime data page for it to use */
                pcpu_init(pc, cpu, sizeof(struct pcpu));
                pc->pc_apic_id = apic_id;
 
+#if 0
                /* setup a vector to our boot code */
                *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
                *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
@@ -677,7 +772,7 @@
                outb(CMOS_REG, BIOS_RESET);
                outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
 #endif
-
+#endif
                bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
                    PAGE_SIZE];
                bootAP = cpu;
@@ -700,13 +795,10 @@
        /* build our map of 'other' CPUs */
        PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
 
+#if 0
        /* restore the warmstart vector */
        *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
-#ifndef PC98
-       outb(CMOS_REG, BIOS_RESET);
-       outb(CMOS_DATA, mpbiosreason);
 #endif
-
        /*
         * Set up the idle context for the BSP.  Similar to above except
         * that some was done by locore, some by pmap.c and some is implicit
@@ -739,7 +831,7 @@
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
-
+#if 0
 static void
 install_ap_tramp(void)
 {
@@ -791,6 +883,21 @@
        *dst16 = (u_int) boot_address & 0xffff;
        *dst8 = ((u_int) boot_address >> 16) & 0xff;
 }
+#endif
+
+static int 
+cpu_mp_trap_init(trap_info_t *trap_ctxt)
+{
+
+        trap_info_t *t = trap_table;
+
+        for (t = trap_table; t->address; t++) {
+                trap_ctxt[t->vector].flags = t->flags;
+                trap_ctxt[t->vector].cs = t->cs;
+                trap_ctxt[t->vector].address = t->address;
+        }
+        return 0x80 /*SYSCALL_VECTOR*/;
+}
 
 /*
  * This function starts the AP (application processor) identified
@@ -802,8 +909,25 @@
 static int
 start_ap(int apic_id)
 {
-       int vector, ms;
-       int cpus;
+       int vector, ms, i;
+       int cpus, boot_error;
+       vcpu_guest_context_t ctxt;
+
+       /* 
+        * This is the FreeBSD equivalent to do_boot_cpu(apicid) in
+        * smpboot.c. 
+        * its initialization sequence consists of:
+        * - fork_idle(cpu) to create separate idle context
+        * - initialization of idle's context to start_secondary
+        * - initialization of cpu ctxt to start in startup_32_smp
+        * - then we call HYPERVISOR_boot_vcpu with the cpu index and
+        *   a pointer to the context.
+        * - on boot success we:
+        *   - set ourselves in the callout_map
+        *   - wait up to 5 seconds for us to be set in the callin map
+        * - set x86_cpu_to_apicid[cpu] = apicid;
+        *
+        */
 
        POSTCODE(START_AP_POST);
 
@@ -813,6 +937,55 @@
        /* used as a watchpoint to signal AP startup */
        cpus = mp_naps;
 
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
+       ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
+       ctxt.user_regs.fs = 0;
+       ctxt.user_regs.gs = 0;
+       ctxt.user_regs.ss = __KERNEL_DS;
+       ctxt.user_regs.cs = __KERNEL_CS;
+       ctxt.user_regs.eip = (unsigned long)init_secondary;
+       ctxt.user_regs.esp = (unsigned long)bootSTK;
+#ifdef notyet
+       ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+#else
+       ctxt.user_regs.eflags = (1<<9) | (1<<2);
+#endif
+       /* FPU is set up to default initial state. */
+       memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+       }
+       ctxt.fast_trap_idx = cpu_mp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.kernel_ss = __KERNEL_DS;
+       ctxt.kernel_sp = (unsigned long)bootSTK;
+
+       /* Callback handlers. */
+       ctxt.event_callback_cs     = __KERNEL_CS;
+       ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
+       ctxt.failsafe_callback_cs  = __KERNEL_CS;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+       ctxt.pt_base = (vm_paddr_t)IdlePTD;
+
+       boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt);
+
+       
+       if (boot_error) 
+               printk("Houston we have a problem\n");
+       else
+               printk("boot_vcpu succeeded\n");
+#if 0
        /*
         * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
         * and running the target CPU. OR this INIT IPI might be latched (P5
@@ -862,6 +1035,7 @@
            APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
            vector, apic_id);
        lapic_ipi_wait(-1);
+#endif
        DELAY(200);             /* wait ~200uS */
 
        /* Wait up to 5 seconds for it to start. */
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c   2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c   2005-05-06 21:37:31 
-07:00
@@ -1374,7 +1374,6 @@
 static u_int lazyptd;
 static volatile u_int lazywait;
 
-void pmap_lazyfix_action(void);
 
 void
 pmap_lazyfix_action(void)
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c    2005-05-06 
21:37:31 -07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c    2005-05-06 
21:37:31 -07:00
@@ -380,8 +380,6 @@
         (void)HYPERVISOR_console_write(buf, ret);
 }
 
-#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); 
panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
-
 
 #define XPQUEUE_SIZE 128
 #ifdef SMP
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h 
b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h      2005-05-06 
21:37:31 -07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h      2005-05-06 
21:37:31 -07:00
@@ -441,4 +441,20 @@
     return ret;
 }
 
+static inline int
+HYPERVISOR_boot_vcpu(
+    unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
+       : "memory");
+
+    return ret;
+}
+
 #endif /* __HYPERVISOR_H__ */
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h 
b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h    2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h    2005-05-06 21:37:31 
-07:00
@@ -53,7 +53,12 @@
         int     *pc_ipi_to_evtchn;                                      \
         int     *pc_virq_to_irq;                                        \
         u_int   pc_cr2;                                                 \
-        u_int   pc_pdir                                        
+        u_int   pc_pdir;                                                \
+        u_int   pc_lazypmap;                                            \
+        u_int   pc_rendezvous;                                          \
+        u_int   pc_cpuast;                                              \
+        u_int   pc_time_irq;                                              \
+        uint64_t pc_processed_system_time;  
 
 #if defined(lint)
  
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h 
b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h    2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h    2005-05-06 21:37:31 
-07:00
@@ -343,6 +343,7 @@
 void   pmap_invalidate_page(pmap_t, vm_offset_t);
 void   pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
 void   pmap_invalidate_all(pmap_t);
+void    pmap_lazyfix_action(void);
 
 void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
 void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h 
b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h 2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h 2005-05-06 21:37:31 
-07:00
@@ -61,6 +61,9 @@
 void xen_machphys_update(unsigned long, unsigned long);
 void xen_update_descriptor(union descriptor *, union descriptor *);
 void lldt(u_short sel);
+void ap_cpu_initclocks(void);
+
+
 /*
  * Invalidate a patricular VA on all cpus
  *
@@ -79,5 +82,6 @@
        
 }
 
+#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); 
panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
 
 #endif /* _XEN_XENFUNC_H_ */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] AP boot support, Kip Macy <=