[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] apci issue



> On Thu, Apr 21, 2005 at 03:26:53PM +0100, Keir Fraser wrote:
> > 
> > On 21 Apr 2005, at 15:03, Gerd Knorr wrote:
> > 
> > >On Thu, Apr 21, 2005 at 02:15:06PM +0100, Ian Pratt wrote:
> > >>
> > >>>Updated to current bk today, and my machine stopped booting ...
> > >>>I've tracked it down to the apic changes from end of last week.
> > >>>cset 1.1307 boots fine, 1.1308 doesn't.
> > 
> > Try changing the following line in xen/arch/x86/apic.c (in 
> > setup_local_APIC):
> >   if (!smp_processor_id()  && (pic_mode || !value)) {
> > To:
> >   if (!smp_processor_id()) {
> 
> Yep, that fixes it.
> 
> > My guess is that your IRQs are routed through the legacy PIC
> 
> It's indeed a old machine with a classic PIC, no IO-APIC yet.

Excellent. I think there may be a better patch though. Please can you
try replacing arch/x86/apic.c and arch/x86/setup.c with the versions
attached to this email?

With luck this will both fix the problem and bring us a little closer
to the Linux codebase (probably not a bad thing with all this fragile
bootstrap code).

 Cheers,
 Keir

/*
 *      based on linux-2.6.10/arch/i386/kernel/apic.c
 *
 *  Local APIC handling, local APIC timers
 *
 *  (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
 *
 *  Fixes
 *  Maciej W. Rozycki   :   Bits for genuine 82489DX APICs;
 *                  thanks to Eric Gilmore
 *                  and Rolf G. Tews
 *                  for testing these extensively.
 *      Maciej W. Rozycki       :       Various updates and fixes.
 *      Mikael Pettersson       :       Power Management for UP-APIC.
 *    Pavel Machek and
 *    Mikael Pettersson    :    PM converted to driver model.
 */

#include <xen/config.h>
#include <xen/perfc.h>
#include <xen/errno.h>
#include <xen/init.h>
#include <xen/mm.h>
#include <xen/sched.h>
#include <xen/irq.h>
#include <xen/delay.h>
#include <xen/smp.h>
#include <xen/softirq.h>
#include <asm/mc146818rtc.h>
#include <asm/msr.h>
#include <asm/atomic.h>
#include <asm/mpspec.h>
#include <asm/flushtlb.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/mach_apic.h>
#include <asm/io_ports.h>

/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;

static int enabled_via_apicbase;

int get_maxlvt(void)
{
    unsigned int v, ver, maxlvt;

    v = apic_read(APIC_LVR);
    ver = GET_APIC_VERSION(v);
    /* 82489DXs do not report # of LVT entries. */
    maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
    return maxlvt;
}

void clear_local_APIC(void)
{
    int maxlvt;
    unsigned long v;

    maxlvt = get_maxlvt();

    /*
     * Masking an LVT entry on a P6 can trigger a local APIC error
     * if the vector is zero. Mask LVTERR first to prevent this.
     */
    if (maxlvt >= 3) {
        v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
        apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
    }
    /*
     * Careful: we have to set masks only first to deassert
     * any level-triggered sources.
     */
    v = apic_read(APIC_LVTT);
    apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
    v = apic_read(APIC_LVT0);
    apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
    v = apic_read(APIC_LVT1);
    apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
    if (maxlvt >= 4) {
        v = apic_read(APIC_LVTPC);
        apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
    }

    /*
     * Clean APIC state for other OSs:
     */
    apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
    apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
    apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
    if (maxlvt >= 3)
        apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
    if (maxlvt >= 4)
        apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);

    v = GET_APIC_VERSION(apic_read(APIC_LVR));
    if (APIC_INTEGRATED(v)) {   /* !82489DX */
        if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
            apic_write(APIC_ESR, 0);
        apic_read(APIC_ESR);
    }
}

void __init connect_bsp_APIC(void)
{
    if (pic_mode) {
        /*
         * Do not trust the local APIC being empty at bootup.
         */
        clear_local_APIC();
        /*
         * PIC mode, enable APIC mode in the IMCR, i.e.
         * connect BSP's local APIC to INT and NMI lines.
         */
        printk("leaving PIC mode, enabling APIC mode.\n");
        outb(0x70, 0x22);
        outb(0x01, 0x23);
    }
}

void disconnect_bsp_APIC(void)
{
    if (pic_mode) {
        /*
         * Put the board back into PIC mode (has an effect
         * only on certain older boards).  Note that APIC
         * interrupts, including IPIs, won't work beyond
         * this point!  The only exception are INIT IPIs.
         */
        printk("disabling APIC mode, entering PIC mode.\n");
        outb(0x70, 0x22);
        outb(0x00, 0x23);
    }
}

void disable_local_APIC(void)
{
    unsigned long value;

    clear_local_APIC();

    /*
     * Disable APIC (implies clearing of registers
     * for 82489DX!).
     */
    value = apic_read(APIC_SPIV);
    value &= ~APIC_SPIV_APIC_ENABLED;
    apic_write_around(APIC_SPIV, value);

    if (enabled_via_apicbase) {
        unsigned int l, h;
        rdmsr(MSR_IA32_APICBASE, l, h);
        l &= ~MSR_IA32_APICBASE_ENABLE;
        wrmsr(MSR_IA32_APICBASE, l, h);
    }
}

/*
 * This is to verify that we're looking at a real local APIC.
 * Check these against your board if the CPUs aren't getting
 * started for no apparent reason.
 */
int __init verify_local_APIC(void)
{
    unsigned int reg0, reg1;

    /*
     * The version register is read-only in a real APIC.
     */
    reg0 = apic_read(APIC_LVR);
    Dprintk("Getting VERSION: %x\n", reg0);
    apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
    reg1 = apic_read(APIC_LVR);
    Dprintk("Getting VERSION: %x\n", reg1);

    /*
     * The two version reads above should print the same
     * numbers.  If the second one is different, then we
     * poke at a non-APIC.
     */
    if (reg1 != reg0)
        return 0;

    /*
     * Check if the version looks reasonably.
     */
    reg1 = GET_APIC_VERSION(reg0);
    if (reg1 == 0x00 || reg1 == 0xff)
        return 0;
    reg1 = get_maxlvt();
    if (reg1 < 0x02 || reg1 == 0xff)
        return 0;

    /*
     * The ID register is read/write in a real APIC.
     */
    reg0 = apic_read(APIC_ID);
    Dprintk("Getting ID: %x\n", reg0);

    /*
     * The next two are just to see if we have sane values.
     * They're only really relevant if we're in Virtual Wire
     * compatibility mode, but most boxes are anymore.
     */
    reg0 = apic_read(APIC_LVT0);
    Dprintk("Getting LVT0: %x\n", reg0);
    reg1 = apic_read(APIC_LVT1);
    Dprintk("Getting LVT1: %x\n", reg1);

    return 1;
}

void __init sync_Arb_IDs(void)
{
    /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
    unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
    if (ver >= 0x14)    /* P4 or higher */
        return;
    /*
     * Wait for idle.
     */
    apic_wait_icr_idle();

    Dprintk("Synchronizing Arb IDs.\n");
    apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
                      | APIC_DM_INIT);
}

extern void __error_in_apic_c (void);

void __init init_bsp_APIC(void)
{
    unsigned long value, ver;

    /*
     * Don't do the setup now if we have a SMP BIOS as the through-I/O-APIC 
     * virtual wire mode might be active.
     */
    if (smp_found_config || !cpu_has_apic)
        return;

    value = apic_read(APIC_LVR);
    ver = GET_APIC_VERSION(value);
    
    /*
     * Do not trust the local APIC being empty at bootup.
     */
    clear_local_APIC();
    
    /*
     * Enable APIC.
     */
    value = apic_read(APIC_SPIV);
    value &= ~APIC_VECTOR_MASK;
    value |= APIC_SPIV_APIC_ENABLED;
    
    /* This bit is reserved on P4/Xeon and should be cleared */
    if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 
15))
        value &= ~APIC_SPIV_FOCUS_DISABLED;
    else
        value |= APIC_SPIV_FOCUS_DISABLED;
    value |= SPURIOUS_APIC_VECTOR;
    apic_write_around(APIC_SPIV, value);

    /*
     * Set up the virtual wire mode.
     */
    apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
    value = APIC_DM_NMI;
    if (!APIC_INTEGRATED(ver))              /* 82489DX */
        value |= APIC_LVT_LEVEL_TRIGGER;
    apic_write_around(APIC_LVT1, value);
}

void __init setup_local_APIC (void)
{
    unsigned long oldvalue, value, ver, maxlvt;

    /* Pound the ESR really hard over the head with a big hammer - mbligh */
    if (esr_disable) {
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
    }

    value = apic_read(APIC_LVR);
    ver = GET_APIC_VERSION(value);

    if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
        __error_in_apic_c();

    /*
     * Double-check whether this APIC is really registered.
     */
    if (!apic_id_registered())
        BUG();

    /*
     * Intel recommends to set DFR, LDR and TPR before enabling
     * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
     * document number 292116).  So here it goes...
     */
    init_apic_ldr();

    /*
     * Set Task Priority to 'accept all'. We never change this
     * later on.
     */
    value = apic_read(APIC_TASKPRI);
    value &= ~APIC_TPRI_MASK;
    apic_write_around(APIC_TASKPRI, value);

    /*
     * Now that we are all set up, enable the APIC
     */
    value = apic_read(APIC_SPIV);
    value &= ~APIC_VECTOR_MASK;
    /*
     * Enable APIC
     */
    value |= APIC_SPIV_APIC_ENABLED;

    /*
     * Some unknown Intel IO/APIC (or APIC) errata is biting us with
     * certain networking cards. If high frequency interrupts are
     * happening on a particular IOAPIC pin, plus the IOAPIC routing
     * entry is masked/unmasked at a high rate as well then sooner or
     * later IOAPIC line gets 'stuck', no more interrupts are received
     * from the device. If focus CPU is disabled then the hang goes
     * away, oh well :-(
     *
     * [ This bug can be reproduced easily with a level-triggered
     *   PCI Ne2000 networking cards and PII/PIII processors, dual
     *   BX chipset. ]
     */
    /*
     * Actually disabling the focus CPU check just makes the hang less
     * frequent as it makes the interrupt distributon model be more
     * like LRU than MRU (the short-term load is more even across CPUs).
     * See also the comment in end_level_ioapic_irq().  --macro
     */
#if 1
    /* Enable focus processor (bit==0) */
    value &= ~APIC_SPIV_FOCUS_DISABLED;
#else
    /* Disable focus processor (bit==1) */
    value |= APIC_SPIV_FOCUS_DISABLED;
#endif
    /*
     * Set spurious IRQ vector
     */
    value |= SPURIOUS_APIC_VECTOR;
    apic_write_around(APIC_SPIV, value);

    /*
     * Set up LVT0, LVT1:
     *
     * set up through-local-APIC on the BP's LINT0. This is not
     * strictly necessery in pure symmetric-IO mode, but sometimes
     * we delegate interrupts to the 8259A.
     */
    /*
     * TODO: set up through-local-APIC from through-I/O-APIC? --macro
     */
    value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
    if (!smp_processor_id() && (pic_mode || !value)) {
        value = APIC_DM_EXTINT;
        printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
    } else {
        value = APIC_DM_EXTINT | APIC_LVT_MASKED;
        printk("masked ExtINT on CPU#%d\n", smp_processor_id());
    }
    apic_write_around(APIC_LVT0, value);

    /*
     * only the BP should see the LINT1 NMI signal, obviously.
     */
    if (!smp_processor_id())
        value = APIC_DM_NMI;
    else
        value = APIC_DM_NMI | APIC_LVT_MASKED;
    if (!APIC_INTEGRATED(ver))      /* 82489DX */
        value |= APIC_LVT_LEVEL_TRIGGER;
    apic_write_around(APIC_LVT1, value);

    if (APIC_INTEGRATED(ver) && !esr_disable) {        /* !82489DX */
        maxlvt = get_maxlvt();
        if (maxlvt > 3)     /* Due to the Pentium erratum 3AP. */
            apic_write(APIC_ESR, 0);
        oldvalue = apic_read(APIC_ESR);

        value = ERROR_APIC_VECTOR;      // enables sending errors
        apic_write_around(APIC_LVTERR, value);
        /*
         * spec says clear errors after enabling vector.
         */
        if (maxlvt > 3)
            apic_write(APIC_ESR, 0);
        value = apic_read(APIC_ESR);
        if (value != oldvalue)
            printk("ESR value before enabling vector: 0x%08lx "
                "after: 0x%08lx\n", oldvalue, value);
    } else {
        if (esr_disable)    
            /* 
             * Something untraceble is creating bad interrupts on 
             * secondary quads ... for the moment, just leave the
             * ESR disabled - we can't do anything useful with the
             * errors anyway - mbligh
             */
            printk("Leaving ESR disabled.\n");
        else 
        printk("No ESR for 82489DX.\n");
    }

    if (nmi_watchdog == NMI_LOCAL_APIC)
        setup_apic_nmi_watchdog();
}

/*
 * Detect and enable local APICs on non-SMP boards.
 * Original code written by Keir Fraser.
 */

static int __init detect_init_APIC (void)
{
    u32 h, l, features;
    extern void get_cpu_vendor(struct cpuinfo_x86*);

    /* Workaround for us being called before identify_cpu(). */
    get_cpu_vendor(&boot_cpu_data);

    switch (boot_cpu_data.x86_vendor) {
    case X86_VENDOR_AMD:
        if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
            (boot_cpu_data.x86 == 15))        
            break;
        goto no_apic;
    case X86_VENDOR_INTEL:
        if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
            (boot_cpu_data.x86 == 5 && cpu_has_apic))
            break;
        goto no_apic;
    default:
        goto no_apic;
    }

    if (!cpu_has_apic) {
        /*
         * Some BIOSes disable the local APIC in the
         * APIC_BASE MSR. This can only be done in
         * software for Intel P6 or later and AMD K7
         * (Model > 1) or later.
         */
        rdmsr(MSR_IA32_APICBASE, l, h);
        if (!(l & MSR_IA32_APICBASE_ENABLE)) {
            printk("Local APIC disabled by BIOS -- reenabling.\n");
            l &= ~MSR_IA32_APICBASE_BASE;
            l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
            wrmsr(MSR_IA32_APICBASE, l, h);
            enabled_via_apicbase = 1;
        }
    }

    /* The APIC feature bit should now be enabled in `cpuid' */
    features = cpuid_edx(1);
    if (!(features & (1 << X86_FEATURE_APIC))) {
        printk("Could not enable APIC!\n");
        return -1;
    }

    set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
    mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;

    /* The BIOS may have set up the APIC at some other address */
    rdmsr(MSR_IA32_APICBASE, l, h);
    if (l & MSR_IA32_APICBASE_ENABLE)
        mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;

    if (nmi_watchdog != NMI_NONE)
        nmi_watchdog = NMI_LOCAL_APIC;

    printk("Found and enabled local APIC!\n");

    return 0;

no_apic:
    printk("No local APIC present or hardware disabled\n");
    return -1;
}

void __init init_apic_mappings(void)
{
    unsigned long apic_phys;

    /*
     * If no local APIC can be found then set up a fake all
     * zeroes page to simulate the local APIC and another
     * one for the IO-APIC.
     */
    if (!smp_found_config && detect_init_APIC()) {
        apic_phys = alloc_xenheap_page();
        apic_phys = __pa(apic_phys);
    } else
        apic_phys = mp_lapic_addr;

    set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
    Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);

    /*
     * Fetch the APIC ID of the BSP in case we have a
     * default configuration (or the MP table is broken).
     */
    if (boot_cpu_physical_apicid == -1U)
        boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));

#ifdef CONFIG_X86_IO_APIC
    {
        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
        int i;

        for (i = 0; i < nr_ioapics; i++) {
            if (smp_found_config) {
                ioapic_phys = mp_ioapics[i].mpc_apicaddr;
                if (!ioapic_phys) {
                    printk(KERN_ERR
                           "WARNING: bogus zero IO-APIC "
                           "address found in MPTABLE, "
                           "disabling IO/APIC support!\n");
                    smp_found_config = 0;
                    skip_ioapic_setup = 1;
                    goto fake_ioapic_page;
                }
            } else {
fake_ioapic_page:
                ioapic_phys = alloc_xenheap_page();
                ioapic_phys = __pa(ioapic_phys);
            }
            set_fixmap_nocache(idx, ioapic_phys);
            Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
                    fix_to_virt(idx), ioapic_phys);
            idx++;
        }
    }
#endif
}

/*****************************************************************************
 * APIC calibration
 * 
 * The APIC is programmed in bus cycles.
 * Timeout values should specified in real time units.
 * The "cheapest" time source is the cyclecounter.
 * 
 * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
 * 
 * The calibration is currently a bit shoddy since it requires the external
 * timer chip to generate periodic timer interupts. 
 *****************************************************************************/

/* used for system time scaling */
static unsigned long bus_freq;    /* KAF: pointer-size avoids compile warns. */
static u32           bus_cycle;   /* length of one bus cycle in pico-seconds */
static u32           bus_scale;   /* scaling factor convert ns to bus cycles */

/*
 * The timer chip is already set up at HZ interrupts per second here,
 * but we do not accept timer interrupts yet. We only allow the BP
 * to calibrate.
 */
static unsigned int __init get_8254_timer_count(void)
{
    /*extern spinlock_t i8253_lock;*/
    /*unsigned long flags;*/

    unsigned int count;

    /*spin_lock_irqsave(&i8253_lock, flags);*/

    outb_p(0x00, PIT_MODE);
    count = inb_p(PIT_CH0);
    count |= inb_p(PIT_CH0) << 8;

    /*spin_unlock_irqrestore(&i8253_lock, flags);*/

    return count;
}

/* next tick in 8254 can be caught by catching timer wraparound */
static void __init wait_8254_wraparound(void)
{
    unsigned int curr_count, prev_count=~0;
    int delta;

    curr_count = get_8254_timer_count();

    do {
        prev_count = curr_count;
        curr_count = get_8254_timer_count();
        delta = curr_count-prev_count;

        /*
         * This limit for delta seems arbitrary, but it isn't, it's slightly
         * above the level of error a buggy Mercury/Neptune chipset timer can
         * cause.
         */
    } while (delta < 300);
}

/*
 * Default initialization for 8254 timers. If we use other timers like HPET,
 * we override this later
 */
void (*wait_timer_tick)(void) = wait_8254_wraparound;

/*
 * This function sets up the local APIC timer, with a timeout of
 * 'clocks' APIC bus clock. During calibration we actually call
 * this function with a very large value and read the current time after
 * a well defined period of time as expired.
 *
 * Calibration is only performed once, for CPU0!
 *
 * We do reads before writes even if unnecessary, to get around the
 * P5 APIC double write bug.
 */

#define APIC_DIVISOR 1

static void __setup_APIC_LVTT(unsigned int clocks)
{
    unsigned int lvtt_value, tmp_value, ver;

    ver = GET_APIC_VERSION(apic_read(APIC_LVR));
    /* NB. Xen uses local APIC timer in one-shot mode. */
    lvtt_value = /*APIC_LVT_TIMER_PERIODIC |*/ LOCAL_TIMER_VECTOR;
    if (!APIC_INTEGRATED(ver))
        lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
    apic_write_around(APIC_LVTT, lvtt_value);

    tmp_value = apic_read(APIC_TDCR);
    apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));

    apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}

/*
 * this is done for every CPU from setup_APIC_clocks() below.
 * We setup each local APIC with a zero timeout value for now.
 * Unlike Linux, we don't have to wait for slices etc.
 */
void setup_APIC_timer(void * data)
{
    unsigned long flags;
    __save_flags(flags);
    __sti();
    __setup_APIC_LVTT(0);
    __restore_flags(flags);
}

/*
 * In this function we calibrate APIC bus clocks to the external timer.
 *
 * As a result we have the Bus Speed and CPU speed in Hz.
 * 
 * We want to do the calibration only once (for CPU0).  CPUs connected by the
 * same APIC bus have the very same bus frequency.
 *
 * This bit is a bit shoddy since we use the very same periodic timer interrupt
 * we try to eliminate to calibrate the APIC. 
 */

int __init calibrate_APIC_clock(void)
{
    unsigned long long t1 = 0, t2 = 0;
    long tt1, tt2;
    long result;
    int i;
    const int LOOPS = HZ/10;

    printk("Calibrating APIC timer for CPU%d...\n",  smp_processor_id());

    /*
     * Put whatever arbitrary (but long enough) timeout
     * value into the APIC clock, we just want to get the
     * counter running for calibration.
     */
    __setup_APIC_LVTT(1000000000);

    /*
     * The timer chip counts down to zero. Let's wait
     * for a wraparound to start exact measurement:
     * (the current tick might have been already half done)
     */
    wait_timer_tick();

    /*
     * We wrapped around just now. Let's start:
     */
    if (cpu_has_tsc)
        rdtscll(t1);
    tt1 = apic_read(APIC_TMCCT);

    /*
     * Let's wait LOOPS wraprounds:
     */
    for (i = 0; i < LOOPS; i++)
        wait_timer_tick();

    tt2 = apic_read(APIC_TMCCT);
    if (cpu_has_tsc)
        rdtscll(t2);

    /*
     * The APIC bus clock counter is 32 bits only, it
     * might have overflown, but note that we use signed
     * longs, thus no extra care needed.
     * [underflown to be exact, as the timer counts down ;)]
     */

    result = (tt1-tt2)*APIC_DIVISOR/LOOPS;

    if (cpu_has_tsc)
        printk("..... CPU clock speed is %ld.%04ld MHz.\n",
            ((long)(t2-t1)/LOOPS)/(1000000/HZ),
            ((long)(t2-t1)/LOOPS)%(1000000/HZ));

    printk("..... host bus clock speed is %ld.%04ld MHz.\n",
        result/(1000000/HZ),
        result%(1000000/HZ));

    /* set up multipliers for accurate timer code */
    bus_freq   = result*HZ;
    bus_cycle  = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
    bus_scale  = (1000*262144)/bus_cycle;

    printk("..... bus_scale = 0x%08X\n", bus_scale);
    /* reset APIC to zero timeout value */
    __setup_APIC_LVTT(0);

    return result;
}

/*
 * initialise the APIC timers for all CPUs
 * we start with the first and find out processor frequency and bus speed
 */
void __init setup_APIC_clocks (void)
{
    printk("Using local APIC timer interrupts.\n");
    using_apic_timer = 1;
    __cli();
    /* calibrate CPU0 for CPU speed and BUS speed */
    bus_freq = calibrate_APIC_clock();
    /* Now set up the timer for real. */
    setup_APIC_timer((void *)bus_freq);
    __sti();
    /* and update all other cpus */
    smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
}

#undef APIC_DIVISOR

/*
 * reprogram the APIC timer. Timeoutvalue is in ns from start of boot
 * returns 1 on success
 * returns 0 if the timeout value is too small or in the past.
 */
int reprogram_ac_timer(s_time_t timeout)
{
    s_time_t    now;
    s_time_t    expire;
    u64         apic_tmict;

    /*
     * We use this value because we don't trust zero (we think it may just
     * cause an immediate interrupt). At least this is guaranteed to hold it
     * off for ages (esp. since the clock ticks on bus clock, not cpu clock!).
     */
    if ( timeout == 0 )
    {
        apic_tmict = 0xffffffff;
        goto reprogram;
    }

    now = NOW();
    expire = timeout - now; /* value from now */

    if ( expire <= 0 )
    {
        Dprintk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n", 
                smp_processor_id(), (u32)(now>>32), 
                (u32)now, (u32)(timeout>>32),(u32)timeout);
        return 0;
    }

    /*
     * If we don't have local APIC then we just poll the timer list off the
     * PIT interrupt. Cheesy but good enough to work on eg. VMware :-)
     */
    if ( !cpu_has_apic )
        return 1;

    /* conversion to bus units */
    apic_tmict = (((u64)bus_scale) * expire)>>18;

    if ( apic_tmict >= 0xffffffff )
    {
        Dprintk("APICT[%02d] Timeout value too large\n", smp_processor_id());
        apic_tmict = 0xffffffff;
    }

    if ( apic_tmict == 0 )
    {
        Dprintk("APICT[%02d] timeout value too small\n", smp_processor_id());
        return 0;
    }

 reprogram:
    /* Program the timer. */
    apic_write(APIC_TMICT, (unsigned long)apic_tmict);

    return 1;
}

void smp_apic_timer_interrupt(struct xen_regs * regs)
{
    ack_APIC_irq();
    perfc_incrc(apic_timer);
    raise_softirq(AC_TIMER_SOFTIRQ);
}

/*
 * This interrupt should _never_ happen with our APIC/SMP architecture
 */
asmlinkage void smp_spurious_interrupt(struct xen_regs *regs)
{
    unsigned long v;

    /*
     * Check if this really is a spurious interrupt and ACK it
     * if it is a vectored one.  Just in case...
     * Spurious interrupts should not be ACKed.
     */
    v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
    if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
        ack_APIC_irq();

    /* see sw-dev-man vol 3, chapter 7.4.13.5 */
    printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
           smp_processor_id());
}

/*
 * This interrupt should never happen with our APIC/SMP architecture
 */

asmlinkage void smp_error_interrupt(struct xen_regs *regs)
{
    unsigned long v, v1;

    /* First tickle the hardware, only then report what went on. -- REW */
    v = apic_read(APIC_ESR);
    apic_write(APIC_ESR, 0);
    v1 = apic_read(APIC_ESR);
    ack_APIC_irq();
    atomic_inc(&irq_err_count);

    /* Here is what the APIC error bits mean:
       0: Send CS error
       1: Receive CS error
       2: Send accept error
       3: Receive accept error
       4: Reserved
       5: Send illegal vector
       6: Received illegal vector
       7: Illegal register address
    */
    printk("APIC error on CPU%d: %02lx(%02lx)\n",
            smp_processor_id(), v, v1);
}

/*
 * This initializes the IO-APIC and APIC hardware if this is
 * a UP kernel.
 */
int __init APIC_init_uniprocessor (void)
{
    if (!smp_found_config && !cpu_has_apic)
        return -1;

    /*
     * Complain if the BIOS pretends there is one.
     */
    if (!cpu_has_apic && 
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
        printk("BIOS bug, local APIC #%d not detected!...\n",
               boot_cpu_physical_apicid);
        return -1;
    }

    verify_local_APIC();

    connect_bsp_APIC();

#ifdef CONFIG_SMP
    cpu_online_map = 1;
#endif
    phys_cpu_present_map = 1;
    apic_write_around(APIC_ID, boot_cpu_physical_apicid);

    setup_local_APIC();

#ifdef CONFIG_X86_IO_APIC
    if (smp_found_config)
        if (!skip_ioapic_setup && nr_ioapics)
            setup_IO_APIC();
#endif
    setup_APIC_clocks();

    return 0;
}
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/sched.h>
#include <xen/pci.h>
#include <xen/serial.h>
#include <xen/softirq.h>
#include <xen/acpi.h>
#include <xen/console.h>
#include <xen/serial.h>
#include <xen/trace.h>
#include <xen/multiboot.h>
#include <asm/bitops.h>
#include <asm/smp.h>
#include <asm/processor.h>
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/domain_page.h>
#include <asm/shadow.h>
#include <asm/e820.h>

/*
 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
 * pfn_info table and allocation bitmap.
 */
static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
#if defined(__x86_64__)
integer_param("xenheap_megabytes", opt_xenheap_megabytes);
#endif

/* opt_noht: If true, Hyperthreading is ignored. */
int opt_noht = 0;
boolean_param("noht", opt_noht);

/* opt_noacpi: If true, ACPI tables are not parsed. */
static int opt_noacpi = 0;
boolean_param("noacpi", opt_noacpi);

/* opt_nosmp: If true, secondary processors are ignored. */
static int opt_nosmp = 0;
boolean_param("nosmp", opt_nosmp);

/* opt_ignorebiostables: If true, ACPI and MP tables are ignored. */
/* NB. This flag implies 'nosmp' and 'noacpi'. */
static int opt_ignorebiostables = 0;
boolean_param("ignorebiostables", opt_ignorebiostables);

/* opt_watchdog: If true, run a watchdog NMI on each processor. */
static int opt_watchdog = 0;
boolean_param("watchdog", opt_watchdog);

int early_boot = 1;

unsigned long xenheap_phys_end;

extern void arch_init_memory(void);
extern void init_IRQ(void);
extern void trap_init(void);
extern void time_init(void);
extern void ac_timer_init(void);
extern void initialize_keytable();
extern int do_timer_lists_from_pit;

char ignore_irq13; /* set if exception 16 works */
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };

#if defined(__x86_64__)
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
#else
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
#endif
EXPORT_SYMBOL(mmu_cr4_features);

unsigned long wait_init_idle;

struct exec_domain *idle_task[NR_CPUS] = { &idle0_exec_domain };

#ifdef CONFIG_ACPI_INTERPRETER
int acpi_disabled = 0;
#else
int acpi_disabled = 1;
#endif
EXPORT_SYMBOL(acpi_disabled);

int phys_proc_id[NR_CPUS];
int logical_proc_id[NR_CPUS];

/* Standard macro to see if a specific flag is changeable. */
static inline int flag_is_changeable_p(unsigned long flag)
{
    unsigned long f1, f2;

    asm("pushf\n\t"
        "pushf\n\t"
        "pop %0\n\t"
        "mov %0,%1\n\t"
        "xor %2,%0\n\t"
        "push %0\n\t"
        "popf\n\t"
        "pushf\n\t"
        "pop %0\n\t"
        "popf\n\t"
        : "=&r" (f1), "=&r" (f2)
        : "ir" (flag));

    return ((f1^f2) & flag) != 0;
}

/* Probe for the CPUID instruction */
static int __init have_cpuid_p(void)
{
    return flag_is_changeable_p(X86_EFLAGS_ID);
}

void __init get_cpu_vendor(struct cpuinfo_x86 *c)
{
    char *v = c->x86_vendor_id;

    if (!strcmp(v, "GenuineIntel"))
        c->x86_vendor = X86_VENDOR_INTEL;
    else if (!strcmp(v, "AuthenticAMD"))
        c->x86_vendor = X86_VENDOR_AMD;
    else if (!strcmp(v, "CyrixInstead"))
        c->x86_vendor = X86_VENDOR_CYRIX;
    else if (!strcmp(v, "UMC UMC UMC "))
        c->x86_vendor = X86_VENDOR_UMC;
    else if (!strcmp(v, "CentaurHauls"))
        c->x86_vendor = X86_VENDOR_CENTAUR;
    else if (!strcmp(v, "NexGenDriven"))
        c->x86_vendor = X86_VENDOR_NEXGEN;
    else if (!strcmp(v, "RiseRiseRise"))
        c->x86_vendor = X86_VENDOR_RISE;
    else if (!strcmp(v, "GenuineTMx86") ||
             !strcmp(v, "TransmetaCPU"))
        c->x86_vendor = X86_VENDOR_TRANSMETA;
    else
        c->x86_vendor = X86_VENDOR_UNKNOWN;
}

static void __init init_intel(struct cpuinfo_x86 *c)
{
    /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
    if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
        clear_bit(X86_FEATURE_SEP, &c->x86_capability);

#ifdef CONFIG_SMP
    if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
    {
        u32     eax, ebx, ecx, edx;
        int     initial_apic_id, siblings, cpu = smp_processor_id();
        
        cpuid(1, &eax, &ebx, &ecx, &edx);
        ht_per_core = siblings = (ebx & 0xff0000) >> 16;

        if ( opt_noht )
            clear_bit(X86_FEATURE_HT, &c->x86_capability[0]);

        if ( siblings <= 1 )
        {
            printk(KERN_INFO  "CPU#%d: Hyper-Threading is disabled\n", cpu);
        } 
        else if ( siblings > 2 )
        {
            panic("We don't support more than two logical CPUs per package!");
        }
        else
        {
            initial_apic_id = ebx >> 24 & 0xff;
            phys_proc_id[cpu]    = initial_apic_id >> 1;
            logical_proc_id[cpu] = initial_apic_id & 1;
            printk(KERN_INFO  "CPU#%d: Physical ID: %d, Logical ID: %d\n",
                   cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
        }
    }
#endif

#ifdef CONFIG_VMX
    start_vmx();
#endif

}

static void __init init_amd(struct cpuinfo_x86 *c)
{
    /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
       3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
    clear_bit(0*32+31, &c->x86_capability);
        
    switch(c->x86)
    {
    case 5:
        panic("AMD K6 is not supported.\n");
    case 6:     /* An Athlon/Duron. We can trust the BIOS probably */
        break;          
    }
}

/*
 * This does the hard work of actually picking apart the CPU stuff...
 */
void __init identify_cpu(struct cpuinfo_x86 *c)
{
    int i, cpu = smp_processor_id();
    u32 xlvl, tfms, junk;

    phys_proc_id[cpu]    = cpu;
    logical_proc_id[cpu] = 0;

    c->x86_vendor = X86_VENDOR_UNKNOWN;
    c->cpuid_level = -1;        /* CPUID not detected */
    c->x86_model = c->x86_mask = 0;     /* So far unknown... */
    c->x86_vendor_id[0] = '\0'; /* Unset */
    memset(&c->x86_capability, 0, sizeof c->x86_capability);

    if ( !have_cpuid_p() )
        panic("Ancient processors not supported\n");

    /* Get vendor name */
    cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
          (unsigned int *)&c->x86_vendor_id[0],
          (unsigned int *)&c->x86_vendor_id[8],
          (unsigned int *)&c->x86_vendor_id[4]);

    get_cpu_vendor(c);
                
    if ( c->cpuid_level == 0 )
        panic("Decrepit CPUID not supported\n");

    cpuid(0x00000001, &tfms, &junk, &junk,
          &c->x86_capability[0]);
    c->x86 = (tfms >> 8) & 15;
    c->x86_model = (tfms >> 4) & 15;
    c->x86_mask = tfms & 15;

    /* AMD-defined flags: level 0x80000001 */
    xlvl = cpuid_eax(0x80000000);
    if ( (xlvl & 0xffff0000) == 0x80000000 ) {
        if ( xlvl >= 0x80000001 )
            c->x86_capability[1] = cpuid_edx(0x80000001);
    }

    /* Transmeta-defined flags: level 0x80860001 */
    xlvl = cpuid_eax(0x80860000);
    if ( (xlvl & 0xffff0000) == 0x80860000 ) {
        if (  xlvl >= 0x80860001 )
            c->x86_capability[2] = cpuid_edx(0x80860001);
    }

    printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
           smp_processor_id(),
           c->x86_capability[0],
           c->x86_capability[1],
           c->x86_capability[2],
           c->x86_vendor);

    switch ( c->x86_vendor ) {
    case X86_VENDOR_INTEL:
        init_intel(c);
        break;
    case X86_VENDOR_AMD:
        init_amd(c);
        break;
    case X86_VENDOR_UNKNOWN:  /* Connectix Virtual PC reports this */
        break;
    case X86_VENDOR_CENTAUR:
        break;
    default:
        printk("Unknown CPU identifier (%d): continuing anyway, "
               "but might fail.\n", c->x86_vendor);
    }
        
    printk("CPU caps: %08x %08x %08x %08x\n",
           c->x86_capability[0],
           c->x86_capability[1],
           c->x86_capability[2],
           c->x86_capability[3]);

    /*
     * On SMP, boot_cpu_data holds the common feature set between
     * all CPUs; so make sure that we indicate which features are
     * common between the CPUs.  The first time this routine gets
     * executed, c == &boot_cpu_data.
     */
    if ( c != &boot_cpu_data ) {
        /* AND the already accumulated flags with these */
        for ( i = 0 ; i < NCAPINTS ; i++ )
            boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
    }
}


unsigned long cpu_initialized;
void __init cpu_init(void)
{
    int nr = smp_processor_id();
    struct tss_struct *t = &init_tss[nr];

    if ( test_and_set_bit(nr, &cpu_initialized) )
        panic("CPU#%d already initialized!!!\n", nr);
    printk("Initializing CPU#%d\n", nr);

    SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
    SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
    __asm__ __volatile__ ( "lgdt %0" : "=m" (*current->arch.gdt) );

    /* No nested task. */
    __asm__ __volatile__ ( "pushf ; andw $0xbfff,(%"__OP"sp) ; popf" );

    /* Ensure FPU gets initialised for each domain. */
    stts();

    /* Set up and load the per-CPU TSS and LDT. */
    t->bitmap = IOBMP_INVALID_OFFSET;
#if defined(__i386__)
    t->ss0  = __HYPERVISOR_DS;
    t->esp0 = get_stack_bottom();
#elif defined(__x86_64__)
    t->rsp0 = get_stack_bottom();
#endif
    set_tss_desc(nr,t);
    load_TR(nr);
    __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );

    /* Clear all 6 debug registers. */
#define CD(register) __asm__ ( "mov %0,%%db" #register : : "r" (0UL) );
    CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
#undef CD

    /* Install correct page table. */
    write_ptbase(current);

    init_idle_task();
}

static void __init do_initcalls(void)
{
    initcall_t *call;
    for ( call = &__initcall_start; call < &__initcall_end; call++ )
        (*call)();
}

unsigned long pci_mem_start = 0x10000000;

static void __init start_of_day(void)
{
    unsigned long low_mem_size;
    
#ifdef MEMORY_GUARD
    /* Unmap the first page of CPU0's stack. */
    extern unsigned long cpu0_stack[];
    memguard_guard_stack(cpu0_stack);
#endif

    open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);

    if ( opt_watchdog ) 
        nmi_watchdog = NMI_LOCAL_APIC;

    sort_exception_tables();

    arch_do_createdomain(current);

    /* Tell the PCI layer not to allocate too close to the RAM area.. */
    low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
    if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
    
    identify_cpu(&boot_cpu_data); /* get CPU type info */
    if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
    if ( cpu_has_xmm )  set_in_cr4(X86_CR4_OSXMMEXCPT);
#ifdef CONFIG_SMP
    if ( opt_ignorebiostables )
    {
        opt_nosmp  = 1;           /* No SMP without configuration          */
        opt_noacpi = 1;           /* ACPI will just confuse matters also   */
    }
    else
    {
        find_smp_config();
        smp_alloc_memory();       /* trampoline which other CPUs jump at   */
    }
#endif
    paging_init();                /* not much here now, but sets up fixmap */
    if ( !opt_noacpi )
        acpi_boot_init();
#ifdef CONFIG_SMP
    if ( smp_found_config ) 
        get_smp_config();
#endif
    init_apic_mappings(); /* make APICs addressable in our pagetables. */
    scheduler_init();   
    init_IRQ();  /* installs simple interrupt wrappers. Starts HZ clock. */
    trap_init();
    time_init(); /* installs software handler for HZ clock. */

    arch_init_memory();

#ifndef CONFIG_SMP    
    APIC_init_uniprocessor();
#else
    if ( opt_nosmp )
        APIC_init_uniprocessor();
    else
        smp_boot_cpus(); 
    /*
     * Does loads of stuff, including kicking the local
     * APIC, and the IO APIC after other CPUs are booted.
     * Each IRQ is preferably handled by IO-APIC, but
     * fall thru to 8259A if we have to (but slower).
     */
#endif

    __sti();

    initialize_keytable(); /* call back handling for key codes */

    serial_init_stage2();

    if ( !cpu_has_apic )
    {
        do_timer_lists_from_pit = 1;
        if ( smp_num_cpus != 1 )
            panic("We need local APICs on SMP machines!");
    }

    ac_timer_init();    /* init accurate timers */
    init_xen_time();    /* initialise the time */
    schedulers_start(); /* start scheduler for each CPU */

    check_nmi_watchdog();

#ifdef CONFIG_PCI
    pci_init();
#endif
    do_initcalls();

#ifdef CONFIG_SMP
    wait_init_idle = cpu_online_map;
    clear_bit(smp_processor_id(), &wait_init_idle);
    smp_threads_ready = 1;
    smp_commence(); /* Tell other CPUs that state of the world is stable. */
    while ( wait_init_idle != 0 )
        cpu_relax();
#endif

    watchdog_on = 1;
#ifdef __x86_64__ /* x86_32 uses low mappings when building DOM0. */
    zap_low_mappings();
#endif
}

void __init __start_xen(multiboot_info_t *mbi)
{
    char *cmdline;
    module_t *mod = (module_t *)__va(mbi->mods_addr);
    void *heap_start;
    unsigned long firsthole_start, nr_pages;
    unsigned long initial_images_start, initial_images_end;
    struct e820entry e820_raw[E820MAX];
    int i, e820_raw_nr = 0, bytes = 0;

    /* Parse the command-line options. */
    if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
        cmdline_parse(__va(mbi->cmdline));

    /* Must do this early -- e.g., spinlocks rely on get_current(). */
    set_current(&idle0_exec_domain);

    /* We initialise the serial devices very early so we can get debugging. */
    serial_init_stage1();

    init_console();

    /* Check that we have at least one Multiboot module. */
    if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
    {
        printk("FATAL ERROR: Require at least one Multiboot module.\n");
        for ( ; ; ) ;
    }

    xenheap_phys_end = opt_xenheap_megabytes << 20;

    if ( mbi->flags & MBI_MEMMAP )
    {
        while ( bytes < mbi->mmap_length )
        {
            memory_map_t *map = __va(mbi->mmap_addr + bytes);
            e820_raw[e820_raw_nr].addr = 
                ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
            e820_raw[e820_raw_nr].size = 
                ((u64)map->length_high << 32) | (u64)map->length_low;
            e820_raw[e820_raw_nr].type = 
                (map->type > E820_SHARED_PAGE) ? E820_RESERVED : map->type;
            e820_raw_nr++;
            bytes += map->size + 4;
        }
    }
    else if ( mbi->flags & MBI_MEMLIMITS )
    {
        e820_raw[0].addr = 0;
        e820_raw[0].size = mbi->mem_lower << 10;
        e820_raw[0].type = E820_RAM;
        e820_raw[1].addr = 0x100000;
        e820_raw[1].size = mbi->mem_upper << 10;
        e820_raw[1].type = E820_RAM;
        e820_raw_nr = 2;
    }
    else
    {
        printk("FATAL ERROR: Bootloader provided no memory information.\n");
        for ( ; ; ) ;
    }

    max_page = init_e820(e820_raw, e820_raw_nr);

    /* Find the first high-memory RAM hole. */
    for ( i = 0; i < e820.nr_map; i++ )
        if ( (e820.map[i].type == E820_RAM) &&
             (e820.map[i].addr >= 0x100000) )
            break;
    firsthole_start = e820.map[i].addr + e820.map[i].size;

    /* Relocate the Multiboot modules. */
    initial_images_start = xenheap_phys_end;
    initial_images_end   = initial_images_start + 
        (mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
    if ( initial_images_end > firsthole_start )
    {
        printk("Not enough memory to stash the DOM0 kernel image.\n");
        for ( ; ; ) ;
    }
#if defined(__i386__)
    memmove((void *)initial_images_start,  /* use low mapping */
            (void *)mod[0].mod_start,      /* use low mapping */
            mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
#elif defined(__x86_64__)
    memmove(__va(initial_images_start),
            __va(mod[0].mod_start),
            mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
#endif

    /* Initialise boot-time allocator with all RAM situated after modules. */
    heap_start = memguard_init(&_end);
    heap_start = __va(init_boot_allocator(__pa(heap_start)));
    nr_pages   = 0;
    for ( i = 0; i < e820.nr_map; i++ )
    {
        if ( e820.map[i].type != E820_RAM )
            continue;
        nr_pages += e820.map[i].size >> PAGE_SHIFT;
        if ( (e820.map[i].addr + e820.map[i].size) >= initial_images_end )
            init_boot_pages((e820.map[i].addr < initial_images_end) ?
                            initial_images_end : e820.map[i].addr,
                            e820.map[i].addr + e820.map[i].size);
    }

    printk("System RAM: %luMB (%lukB)\n", 
           nr_pages >> (20 - PAGE_SHIFT),
           nr_pages << (PAGE_SHIFT - 10));

    init_frametable();

    end_boot_allocator();

    init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
    printk("Xen heap: %luMB (%lukB)\n",
           (xenheap_phys_end-__pa(heap_start)) >> 20,
           (xenheap_phys_end-__pa(heap_start)) >> 10);

    early_boot = 0;

    start_of_day();

    grant_table_init();

    shadow_mode_init();

    /* Create initial domain 0. */
    dom0 = do_createdomain(0, 0);
    if ( dom0 == NULL )
        panic("Error creating domain 0\n");

    set_bit(DF_PRIVILEGED, &dom0->d_flags);

    /* Grab the DOM0 command line. Skip past the image name. */
    cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
    if ( cmdline != NULL )
    {
        while ( *cmdline == ' ' ) cmdline++;
        if ( (cmdline = strchr(cmdline, ' ')) != NULL )
            while ( *cmdline == ' ' ) cmdline++;
    }

    /*
     * We're going to setup domain0 using the module(s) that we stashed safely
     * above our heap. The second module, if present, is an initrd ramdisk.
     */
    if ( construct_dom0(dom0,
                        initial_images_start, 
                        mod[0].mod_end-mod[0].mod_start,
                        (mbi->mods_count == 1) ? 0 :
                        initial_images_start + 
                        (mod[1].mod_start-mod[0].mod_start),
                        (mbi->mods_count == 1) ? 0 :
                        mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
                        cmdline) != 0)
        panic("Could not set up DOM0 guest OS\n");

    /* Scrub RAM that is still free and so may go to an unprivileged domain. */
    scrub_heap_pages();

    init_trace_bufs();

    /* Give up the VGA console if DOM0 is configured to grab it. */
    console_endboot(cmdline && strstr(cmdline, "tty0"));

    /* Hide UART from DOM0 if we're using it */
    serial_endboot();

    domain_unpause_by_systemcontroller(current->domain);
    domain_unpause_by_systemcontroller(dom0);
    startup_cpu_idle_loop();
}

/*
 * Local variables:
 * mode: C
 * c-set-style: "BSD"
 * c-basic-offset: 4
 * tab-width: 4
 * indent-tabs-mode: nil
 * End:
 */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.