WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] apci issue

To: Gerd Knorr <kraxel@xxxxxxxxxxx>
Subject: Re: [Xen-devel] apci issue
From: Keir Fraser <Keir.Fraser@xxxxxxxxxxxx>
Date: Thu, 21 Apr 2005 15:53:41 +0100
Cc: natasha@xxxxxxxxxx, Ian Pratt <m+Ian.Pratt@xxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Thu, 21 Apr 2005 14:53:35 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: Your message of "Thu, 21 Apr 2005 16:46:50 +0200." <20050421144649.GD10671@bytesex>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
> On Thu, Apr 21, 2005 at 03:26:53PM +0100, Keir Fraser wrote:
> > 
> > On 21 Apr 2005, at 15:03, Gerd Knorr wrote:
> > 
> > >On Thu, Apr 21, 2005 at 02:15:06PM +0100, Ian Pratt wrote:
> > >>
> > >>>Updated to current bk today, and my machine stopped booting ...
> > >>>I've tracked it down to the apic changes from end of last week.
> > >>>cset 1.1307 boots fine, 1.1308 doesn't.
> > 
> > Try changing the following line in xen/arch/x86/apic.c (in 
> > setup_local_APIC):
> >   if (!smp_processor_id()  && (pic_mode || !value)) {
> > To:
> >   if (!smp_processor_id()) {
> 
> Yep, that fixes it.
> 
> > My guess is that your IRQs are routed through the legacy PIC
> 
> It's indeed a old machine with a classic PIC, no IO-APIC yet.

Excellent. I think there may be a better patch though. Please can you
try replacing arch/x86/apic.c and arch/x86/setup.c with the versions
attached to this email?

With luck this will both fix the problem and bring us a little closer
to the Linux codebase (probably not a bad thing with all this fragile
bootstrap code).

 Cheers,
 Keir

/*
 *      based on linux-2.6.10/arch/i386/kernel/apic.c
 *
 *  Local APIC handling, local APIC timers
 *
 *  (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
 *
 *  Fixes
 *  Maciej W. Rozycki   :   Bits for genuine 82489DX APICs;
 *                  thanks to Eric Gilmore
 *                  and Rolf G. Tews
 *                  for testing these extensively.
 *      Maciej W. Rozycki       :       Various updates and fixes.
 *      Mikael Pettersson       :       Power Management for UP-APIC.
 *    Pavel Machek and
 *    Mikael Pettersson    :    PM converted to driver model.
 */

#include <xen/config.h>
#include <xen/perfc.h>
#include <xen/errno.h>
#include <xen/init.h>
#include <xen/mm.h>
#include <xen/sched.h>
#include <xen/irq.h>
#include <xen/delay.h>
#include <xen/smp.h>
#include <xen/softirq.h>
#include <asm/mc146818rtc.h>
#include <asm/msr.h>
#include <asm/atomic.h>
#include <asm/mpspec.h>
#include <asm/flushtlb.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/mach_apic.h>
#include <asm/io_ports.h>

/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;

static int enabled_via_apicbase;

int get_maxlvt(void)
{
    unsigned int v, ver, maxlvt;

    v = apic_read(APIC_LVR);
    ver = GET_APIC_VERSION(v);
    /* 82489DXs do not report # of LVT entries. */
    maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
    return maxlvt;
}

void clear_local_APIC(void)
{
    int maxlvt;
    unsigned long v;

    maxlvt = get_maxlvt();

    /*
     * Masking an LVT entry on a P6 can trigger a local APIC error
     * if the vector is zero. Mask LVTERR first to prevent this.
     */
    if (maxlvt >= 3) {
        v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
        apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
    }
    /*
     * Careful: we have to set masks only first to deassert
     * any level-triggered sources.
     */
    v = apic_read(APIC_LVTT);
    apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
    v = apic_read(APIC_LVT0);
    apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
    v = apic_read(APIC_LVT1);
    apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
    if (maxlvt >= 4) {
        v = apic_read(APIC_LVTPC);
        apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
    }

    /*
     * Clean APIC state for other OSs:
     */
    apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
    apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
    apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
    if (maxlvt >= 3)
        apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
    if (maxlvt >= 4)
        apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);

    v = GET_APIC_VERSION(apic_read(APIC_LVR));
    if (APIC_INTEGRATED(v)) {   /* !82489DX */
        if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
            apic_write(APIC_ESR, 0);
        apic_read(APIC_ESR);
    }
}

void __init connect_bsp_APIC(void)
{
    if (pic_mode) {
        /*
         * Do not trust the local APIC being empty at bootup.
         */
        clear_local_APIC();
        /*
         * PIC mode, enable APIC mode in the IMCR, i.e.
         * connect BSP's local APIC to INT and NMI lines.
         */
        printk("leaving PIC mode, enabling APIC mode.\n");
        outb(0x70, 0x22);
        outb(0x01, 0x23);
    }
}

void disconnect_bsp_APIC(void)
{
    if (pic_mode) {
        /*
         * Put the board back into PIC mode (has an effect
         * only on certain older boards).  Note that APIC
         * interrupts, including IPIs, won't work beyond
         * this point!  The only exception are INIT IPIs.
         */
        printk("disabling APIC mode, entering PIC mode.\n");
        outb(0x70, 0x22);
        outb(0x00, 0x23);
    }
}

void disable_local_APIC(void)
{
    unsigned long value;

    clear_local_APIC();

    /*
     * Disable APIC (implies clearing of registers
     * for 82489DX!).
     */
    value = apic_read(APIC_SPIV);
    value &= ~APIC_SPIV_APIC_ENABLED;
    apic_write_around(APIC_SPIV, value);

    if (enabled_via_apicbase) {
        unsigned int l, h;
        rdmsr(MSR_IA32_APICBASE, l, h);
        l &= ~MSR_IA32_APICBASE_ENABLE;
        wrmsr(MSR_IA32_APICBASE, l, h);
    }
}

/*
 * This is to verify that we're looking at a real local APIC.
 * Check these against your board if the CPUs aren't getting
 * started for no apparent reason.
 */
int __init verify_local_APIC(void)
{
    unsigned int reg0, reg1;

    /*
     * The version register is read-only in a real APIC.
     */
    reg0 = apic_read(APIC_LVR);
    Dprintk("Getting VERSION: %x\n", reg0);
    apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
    reg1 = apic_read(APIC_LVR);
    Dprintk("Getting VERSION: %x\n", reg1);

    /*
     * The two version reads above should print the same
     * numbers.  If the second one is different, then we
     * poke at a non-APIC.
     */
    if (reg1 != reg0)
        return 0;

    /*
     * Check if the version looks reasonably.
     */
    reg1 = GET_APIC_VERSION(reg0);
    if (reg1 == 0x00 || reg1 == 0xff)
        return 0;
    reg1 = get_maxlvt();
    if (reg1 < 0x02 || reg1 == 0xff)
        return 0;

    /*
     * The ID register is read/write in a real APIC.
     */
    reg0 = apic_read(APIC_ID);
    Dprintk("Getting ID: %x\n", reg0);

    /*
     * The next two are just to see if we have sane values.
     * They're only really relevant if we're in Virtual Wire
     * compatibility mode, but most boxes are anymore.
     */
    reg0 = apic_read(APIC_LVT0);
    Dprintk("Getting LVT0: %x\n", reg0);
    reg1 = apic_read(APIC_LVT1);
    Dprintk("Getting LVT1: %x\n", reg1);

    return 1;
}

void __init sync_Arb_IDs(void)
{
    /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
    unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
    if (ver >= 0x14)    /* P4 or higher */
        return;
    /*
     * Wait for idle.
     */
    apic_wait_icr_idle();

    Dprintk("Synchronizing Arb IDs.\n");
    apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
                      | APIC_DM_INIT);
}

extern void __error_in_apic_c (void);

void __init init_bsp_APIC(void)
{
    unsigned long value, ver;

    /*
     * Don't do the setup now if we have a SMP BIOS as the through-I/O-APIC 
     * virtual wire mode might be active.
     */
    if (smp_found_config || !cpu_has_apic)
        return;

    value = apic_read(APIC_LVR);
    ver = GET_APIC_VERSION(value);
    
    /*
     * Do not trust the local APIC being empty at bootup.
     */
    clear_local_APIC();
    
    /*
     * Enable APIC.
     */
    value = apic_read(APIC_SPIV);
    value &= ~APIC_VECTOR_MASK;
    value |= APIC_SPIV_APIC_ENABLED;
    
    /* This bit is reserved on P4/Xeon and should be cleared */
    if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 
15))
        value &= ~APIC_SPIV_FOCUS_DISABLED;
    else
        value |= APIC_SPIV_FOCUS_DISABLED;
    value |= SPURIOUS_APIC_VECTOR;
    apic_write_around(APIC_SPIV, value);

    /*
     * Set up the virtual wire mode.
     */
    apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
    value = APIC_DM_NMI;
    if (!APIC_INTEGRATED(ver))              /* 82489DX */
        value |= APIC_LVT_LEVEL_TRIGGER;
    apic_write_around(APIC_LVT1, value);
}

void __init setup_local_APIC (void)
{
    unsigned long oldvalue, value, ver, maxlvt;

    /* Pound the ESR really hard over the head with a big hammer - mbligh */
    if (esr_disable) {
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
    }

    value = apic_read(APIC_LVR);
    ver = GET_APIC_VERSION(value);

    if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
        __error_in_apic_c();

    /*
     * Double-check whether this APIC is really registered.
     */
    if (!apic_id_registered())
        BUG();

    /*
     * Intel recommends to set DFR, LDR and TPR before enabling
     * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
     * document number 292116).  So here it goes...
     */
    init_apic_ldr();

    /*
     * Set Task Priority to 'accept all'. We never change this
     * later on.
     */
    value = apic_read(APIC_TASKPRI);
    value &= ~APIC_TPRI_MASK;
    apic_write_around(APIC_TASKPRI, value);

    /*
     * Now that we are all set up, enable the APIC
     */
    value = apic_read(APIC_SPIV);
    value &= ~APIC_VECTOR_MASK;
    /*
     * Enable APIC
     */
    value |= APIC_SPIV_APIC_ENABLED;

    /*
     * Some unknown Intel IO/APIC (or APIC) errata is biting us with
     * certain networking cards. If high frequency interrupts are
     * happening on a particular IOAPIC pin, plus the IOAPIC routing
     * entry is masked/unmasked at a high rate as well then sooner or
     * later IOAPIC line gets 'stuck', no more interrupts are received
     * from the device. If focus CPU is disabled then the hang goes
     * away, oh well :-(
     *
     * [ This bug can be reproduced easily with a level-triggered
     *   PCI Ne2000 networking cards and PII/PIII processors, dual
     *   BX chipset. ]
     */
    /*
     * Actually disabling the focus CPU check just makes the hang less
     * frequent as it makes the interrupt distributon model be more
     * like LRU than MRU (the short-term load is more even across CPUs).
     * See also the comment in end_level_ioapic_irq().  --macro
     */
#if 1
    /* Enable focus processor (bit==0) */
    value &= ~APIC_SPIV_FOCUS_DISABLED;
#else
    /* Disable focus processor (bit==1) */
    value |= APIC_SPIV_FOCUS_DISABLED;
#endif
    /*
     * Set spurious IRQ vector
     */
    value |= SPURIOUS_APIC_VECTOR;
    apic_write_around(APIC_SPIV, value);

    /*
     * Set up LVT0, LVT1:
     *
     * set up through-local-APIC on the BP's LINT0. This is not
     * strictly necessery in pure symmetric-IO mode, but sometimes
     * we delegate interrupts to the 8259A.
     */
    /*
     * TODO: set up through-local-APIC from through-I/O-APIC? --macro
     */
    value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
    if (!smp_processor_id() && (pic_mode || !value)) {
        value = APIC_DM_EXTINT;
        printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
    } else {
        value = APIC_DM_EXTINT | APIC_LVT_MASKED;
        printk("masked ExtINT on CPU#%d\n", smp_processor_id());
    }
    apic_write_around(APIC_LVT0, value);

    /*
     * only the BP should see the LINT1 NMI signal, obviously.
     */
    if (!smp_processor_id())
        value = APIC_DM_NMI;
    else
        value = APIC_DM_NMI | APIC_LVT_MASKED;
    if (!APIC_INTEGRATED(ver))      /* 82489DX */
        value |= APIC_LVT_LEVEL_TRIGGER;
    apic_write_around(APIC_LVT1, value);

    if (APIC_INTEGRATED(ver) && !esr_disable) {        /* !82489DX */
        maxlvt = get_maxlvt();
        if (maxlvt > 3)     /* Due to the Pentium erratum 3AP. */
            apic_write(APIC_ESR, 0);
        oldvalue = apic_read(APIC_ESR);

        value = ERROR_APIC_VECTOR;      // enables sending errors
        apic_write_around(APIC_LVTERR, value);
        /*
         * spec says clear errors after enabling vector.
         */
        if (maxlvt > 3)
            apic_write(APIC_ESR, 0);
        value = apic_read(APIC_ESR);
        if (value != oldvalue)
            printk("ESR value before enabling vector: 0x%08lx "
                "after: 0x%08lx\n", oldvalue, value);
    } else {
        if (esr_disable)    
            /* 
             * Something untraceble is creating bad interrupts on 
             * secondary quads ... for the moment, just leave the
             * ESR disabled - we can't do anything useful with the
             * errors anyway - mbligh
             */
            printk("Leaving ESR disabled.\n");
        else 
        printk("No ESR for 82489DX.\n");
    }

    if (nmi_watchdog == NMI_LOCAL_APIC)
        setup_apic_nmi_watchdog();
}

/*
 * Detect and enable local APICs on non-SMP boards.
 * Original code written by Keir Fraser.
 */

static int __init detect_init_APIC (void)
{
    u32 h, l, features;
    extern void get_cpu_vendor(struct cpuinfo_x86*);

    /* Workaround for us being called before identify_cpu(). */
    get_cpu_vendor(&boot_cpu_data);

    switch (boot_cpu_data.x86_vendor) {
    case X86_VENDOR_AMD:
        if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
            (boot_cpu_data.x86 == 15))        
            break;
        goto no_apic;
    case X86_VENDOR_INTEL:
        if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
            (boot_cpu_data.x86 == 5 && cpu_has_apic))
            break;
        goto no_apic;
    default:
        goto no_apic;
    }

    if (!cpu_has_apic) {
        /*
         * Some BIOSes disable the local APIC in the
         * APIC_BASE MSR. This can only be done in
         * software for Intel P6 or later and AMD K7
         * (Model > 1) or later.
         */
        rdmsr(MSR_IA32_APICBASE, l, h);
        if (!(l & MSR_IA32_APICBASE_ENABLE)) {
            printk("Local APIC disabled by BIOS -- reenabling.\n");
            l &= ~MSR_IA32_APICBASE_BASE;
            l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
            wrmsr(MSR_IA32_APICBASE, l, h);
            enabled_via_apicbase = 1;
        }
    }

    /* The APIC feature bit should now be enabled in `cpuid' */
    features = cpuid_edx(1);
    if (!(features & (1 << X86_FEATURE_APIC))) {
        printk("Could not enable APIC!\n");
        return -1;
    }

    set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
    mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;

    /* The BIOS may have set up the APIC at some other address */
    rdmsr(MSR_IA32_APICBASE, l, h);
    if (l & MSR_IA32_APICBASE_ENABLE)
        mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;

    if (nmi_watchdog != NMI_NONE)
        nmi_watchdog = NMI_LOCAL_APIC;

    printk("Found and enabled local APIC!\n");

    return 0;

no_apic:
    printk("No local APIC present or hardware disabled\n");
    return -1;
}

void __init init_apic_mappings(void)
{
    unsigned long apic_phys;

    /*
     * If no local APIC can be found then set up a fake all
     * zeroes page to simulate the local APIC and another
     * one for the IO-APIC.
     */
    if (!smp_found_config && detect_init_APIC()) {
        apic_phys = alloc_xenheap_page();
        apic_phys = __pa(apic_phys);
    } else
        apic_phys = mp_lapic_addr;

    set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
    Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);

    /*
     * Fetch the APIC ID of the BSP in case we have a
     * default configuration (or the MP table is broken).
     */
    if (boot_cpu_physical_apicid == -1U)
        boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));

#ifdef CONFIG_X86_IO_APIC
    {
        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
        int i;

        for (i = 0; i < nr_ioapics; i++) {
            if (smp_found_config) {
                ioapic_phys = mp_ioapics[i].mpc_apicaddr;
                if (!ioapic_phys) {
                    printk(KERN_ERR
                           "WARNING: bogus zero IO-APIC "
                           "address found in MPTABLE, "
                           "disabling IO/APIC support!\n");
                    smp_found_config = 0;
                    skip_ioapic_setup = 1;
                    goto fake_ioapic_page;
                }
            } else {
fake_ioapic_page:
                ioapic_phys = alloc_xenheap_page();
                ioapic_phys = __pa(ioapic_phys);
            }
            set_fixmap_nocache(idx, ioapic_phys);
            Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
                    fix_to_virt(idx), ioapic_phys);
            idx++;
        }
    }
#endif
}

/*****************************************************************************
 * APIC calibration
 * 
 * The APIC is programmed in bus cycles.
 * Timeout values should specified in real time units.
 * The "cheapest" time source is the cyclecounter.
 * 
 * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
 * 
 * The calibration is currently a bit shoddy since it requires the external
 * timer chip to generate periodic timer interupts. 
 *****************************************************************************/

/* used for system time scaling */
static unsigned long bus_freq;    /* KAF: pointer-size avoids compile warns. */
static u32           bus_cycle;   /* length of one bus cycle in pico-seconds */
static u32           bus_scale;   /* scaling factor convert ns to bus cycles */

/*
 * The timer chip is already set up at HZ interrupts per second here,
 * but we do not accept timer interrupts yet. We only allow the BP
 * to calibrate.
 */
static unsigned int __init get_8254_timer_count(void)
{
    /*extern spinlock_t i8253_lock;*/
    /*unsigned long flags;*/

    unsigned int count;

    /*spin_lock_irqsave(&i8253_lock, flags);*/

    outb_p(0x00, PIT_MODE);
    count = inb_p(PIT_CH0);
    count |= inb_p(PIT_CH0) << 8;

    /*spin_unlock_irqrestore(&i8253_lock, flags);*/

    return count;
}

/* next tick in 8254 can be caught by catching timer wraparound */
static void __init wait_8254_wraparound(void)
{
    unsigned int curr_count, prev_count=~0;
    int delta;

    curr_count = get_8254_timer_count();

    do {
        prev_count = curr_count;
        curr_count = get_8254_timer_count();
        delta = curr_count-prev_count;

        /*
         * This limit for delta seems arbitrary, but it isn't, it's slightly
         * above the level of error a buggy Mercury/Neptune chipset timer can
         * cause.
         */
    } while (delta < 300);
}

/*
 * Default initialization for 8254 timers. If we use other timers like HPET,
 * we override this later
 */
void (*wait_timer_tick)(void) = wait_8254_wraparound;

/*
 * This function sets up the local APIC timer, with a timeout of
 * 'clocks' APIC bus clock. During calibration we actually call
 * this function with a very large value and read the current time after
 * a well defined period of time as expired.
 *
 * Calibration is only performed once, for CPU0!
 *
 * We do reads before writes even if unnecessary, to get around the
 * P5 APIC double write bug.
 */

#define APIC_DIVISOR 1

static void __setup_APIC_LVTT(unsigned int clocks)
{
    unsigned int lvtt_value, tmp_value, ver;

    ver = GET_APIC_VERSION(apic_read(APIC_LVR));
    /* NB. Xen uses local APIC timer in one-shot mode. */
    lvtt_value = /*APIC_LVT_TIMER_PERIODIC |*/ LOCAL_TIMER_VECTOR;
    if (!APIC_INTEGRATED(ver))
        lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
    apic_write_around(APIC_LVTT, lvtt_value);

    tmp_value = apic_read(APIC_TDCR);
    apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));

    apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}

/*
 * this is done for every CPU from setup_APIC_clocks() below.
 * We setup each local APIC with a zero timeout value for now.
 * Unlike Linux, we don't have to wait for slices etc.
 */
void setup_APIC_timer(void * data)
{
    unsigned long flags;
    __save_flags(flags);
    __sti();
    __setup_APIC_LVTT(0);
    __restore_flags(flags);
}

/*
 * In this function we calibrate APIC bus clocks to the external timer.
 *
 * As a result we have the Bus Speed and CPU speed in Hz.
 * 
 * We want to do the calibration only once (for CPU0).  CPUs connected by the
 * same APIC bus have the very same bus frequency.
 *
 * This bit is a bit shoddy since we use the very same periodic timer interrupt
 * we try to eliminate to calibrate the APIC. 
 */

int __init calibrate_APIC_clock(void)
{
    unsigned long long t1 = 0, t2 = 0;
    long tt1, tt2;
    long result;
    int i;
    const int LOOPS = HZ/10;

    printk("Calibrating APIC timer for CPU%d...\n",  smp_processor_id());

    /*
     * Put whatever arbitrary (but long enough) timeout
     * value into the APIC clock, we just want to get the
     * counter running for calibration.
     */
    __setup_APIC_LVTT(1000000000);

    /*
     * The timer chip counts down to zero. Let's wait
     * for a wraparound to start exact measurement:
     * (the current tick might have been already half done)
     */
    wait_timer_tick();

    /*
     * We wrapped around just now. Let's start:
     */
    if (cpu_has_tsc)
        rdtscll(t1);
    tt1 = apic_read(APIC_TMCCT);

    /*
     * Let's wait LOOPS wraprounds:
     */
    for (i = 0; i < LOOPS; i++)
        wait_timer_tick();

    tt2 = apic_read(APIC_TMCCT);
    if (cpu_has_tsc)
        rdtscll(t2);

    /*
     * The APIC bus clock counter is 32 bits only, it
     * might have overflown, but note that we use signed
     * longs, thus no extra care needed.
     * [underflown to be exact, as the timer counts down ;)]
     */

    result = (tt1-tt2)*APIC_DIVISOR/LOOPS;

    if (cpu_has_tsc)
        printk("..... CPU clock speed is %ld.%04ld MHz.\n",
            ((long)(t2-t1)/LOOPS)/(1000000/HZ),
            ((long)(t2-t1)/LOOPS)%(1000000/HZ));

    printk("..... host bus clock speed is %ld.%04ld MHz.\n",
        result/(1000000/HZ),
        result%(1000000/HZ));

    /* set up multipliers for accurate timer code */
    bus_freq   = result*HZ;
    bus_cycle  = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
    bus_scale  = (1000*262144)/bus_cycle;

    printk("..... bus_scale = 0x%08X\n", bus_scale);
    /* reset APIC to zero timeout value */
    __setup_APIC_LVTT(0);

    return result;
}

/*
 * initialise the APIC timers for all CPUs
 * we start with the first and find out processor frequency and bus speed
 */
void __init setup_APIC_clocks (void)
{
    printk("Using local APIC timer interrupts.\n");
    using_apic_timer = 1;
    __cli();
    /* calibrate CPU0 for CPU speed and BUS speed */
    bus_freq = calibrate_APIC_clock();
    /* Now set up the timer for real. */
    setup_APIC_timer((void *)bus_freq);
    __sti();
    /* and update all other cpus */
    smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
}

#undef APIC_DIVISOR

/*
 * reprogram the APIC timer. Timeoutvalue is in ns from start of boot
 * returns 1 on success
 * returns 0 if the timeout value is too small or in the past.
 */
int reprogram_ac_timer(s_time_t timeout)
{
    s_time_t    now;
    s_time_t    expire;
    u64         apic_tmict;

    /*
     * We use this value because we don't trust zero (we think it may just
     * cause an immediate interrupt). At least this is guaranteed to hold it
     * off for ages (esp. since the clock ticks on bus clock, not cpu clock!).
     */
    if ( timeout == 0 )
    {
        apic_tmict = 0xffffffff;
        goto reprogram;
    }

    now = NOW();
    expire = timeout - now; /* value from now */

    if ( expire <= 0 )
    {
        Dprintk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n", 
                smp_processor_id(), (u32)(now>>32), 
                (u32)now, (u32)(timeout>>32),(u32)timeout);
        return 0;
    }

    /*
     * If we don't have local APIC then we just poll the timer list off the
     * PIT interrupt. Cheesy but good enough to work on eg. VMware :-)
     */
    if ( !cpu_has_apic )
        return 1;

    /* conversion to bus units */
    apic_tmict = (((u64)bus_scale) * expire)>>18;

    if ( apic_tmict >= 0xffffffff )
    {
        Dprintk("APICT[%02d] Timeout value too large\n", smp_processor_id());
        apic_tmict = 0xffffffff;
    }

    if ( apic_tmict == 0 )
    {
        Dprintk("APICT[%02d] timeout value too small\n", smp_processor_id());
        return 0;
    }

 reprogram:
    /* Program the timer. */
    apic_write(APIC_TMICT, (unsigned long)apic_tmict);

    return 1;
}

void smp_apic_timer_interrupt(struct xen_regs * regs)
{
    ack_APIC_irq();
    perfc_incrc(apic_timer);
    raise_softirq(AC_TIMER_SOFTIRQ);
}

/*
 * This interrupt should _never_ happen with our APIC/SMP architecture
 */
asmlinkage void smp_spurious_interrupt(struct xen_regs *regs)
{
    unsigned long v;

    /*
     * Check if this really is a spurious interrupt and ACK it
     * if it is a vectored one.  Just in case...
     * Spurious interrupts should not be ACKed.
     */
    v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
    if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
        ack_APIC_irq();

    /* see sw-dev-man vol 3, chapter 7.4.13.5 */
    printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
           smp_processor_id());
}

/*
 * This interrupt should never happen with our APIC/SMP architecture
 */

asmlinkage void smp_error_interrupt(struct xen_regs *regs)
{
    unsigned long v, v1;

    /* First tickle the hardware, only then report what went on. -- REW */
    v = apic_read(APIC_ESR);
    apic_write(APIC_ESR, 0);
    v1 = apic_read(APIC_ESR);
    ack_APIC_irq();
    atomic_inc(&irq_err_count);

    /* Here is what the APIC error bits mean:
       0: Send CS error
       1: Receive CS error
       2: Send accept error
       3: Receive accept error
       4: Reserved
       5: Send illegal vector
       6: Received illegal vector
       7: Illegal register address
    */
    printk("APIC error on CPU%d: %02lx(%02lx)\n",
            smp_processor_id(), v, v1);
}

/*
 * This initializes the IO-APIC and APIC hardware if this is
 * a UP kernel.
 */
int __init APIC_init_uniprocessor (void)
{
    if (!smp_found_config && !cpu_has_apic)
        return -1;

    /*
     * Complain if the BIOS pretends there is one.
     */
    if (!cpu_has_apic && 
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
        printk("BIOS bug, local APIC #%d not detected!...\n",
               boot_cpu_physical_apicid);
        return -1;
    }

    verify_local_APIC();

    connect_bsp_APIC();

#ifdef CONFIG_SMP
    cpu_online_map = 1;
#endif
    phys_cpu_present_map = 1;
    apic_write_around(APIC_ID, boot_cpu_physical_apicid);

    setup_local_APIC();

#ifdef CONFIG_X86_IO_APIC
    if (smp_found_config)
        if (!skip_ioapic_setup && nr_ioapics)
            setup_IO_APIC();
#endif
    setup_APIC_clocks();

    return 0;
}
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/sched.h>
#include <xen/pci.h>
#include <xen/serial.h>
#include <xen/softirq.h>
#include <xen/acpi.h>
#include <xen/console.h>
#include <xen/serial.h>
#include <xen/trace.h>
#include <xen/multiboot.h>
#include <asm/bitops.h>
#include <asm/smp.h>
#include <asm/processor.h>
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/domain_page.h>
#include <asm/shadow.h>
#include <asm/e820.h>

/*
 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
 * pfn_info table and allocation bitmap.
 */
static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
#if defined(__x86_64__)
integer_param("xenheap_megabytes", opt_xenheap_megabytes);
#endif

/* opt_noht: If true, Hyperthreading is ignored. */
int opt_noht = 0;
boolean_param("noht", opt_noht);

/* opt_noacpi: If true, ACPI tables are not parsed. */
static int opt_noacpi = 0;
boolean_param("noacpi", opt_noacpi);

/* opt_nosmp: If true, secondary processors are ignored. */
static int opt_nosmp = 0;
boolean_param("nosmp", opt_nosmp);

/* opt_ignorebiostables: If true, ACPI and MP tables are ignored. */
/* NB. This flag implies 'nosmp' and 'noacpi'. */
static int opt_ignorebiostables = 0;
boolean_param("ignorebiostables", opt_ignorebiostables);

/* opt_watchdog: If true, run a watchdog NMI on each processor. */
static int opt_watchdog = 0;
boolean_param("watchdog", opt_watchdog);

int early_boot = 1;

unsigned long xenheap_phys_end;

extern void arch_init_memory(void);
extern void init_IRQ(void);
extern void trap_init(void);
extern void time_init(void);
extern void ac_timer_init(void);
extern void initialize_keytable();
extern int do_timer_lists_from_pit;

char ignore_irq13; /* set if exception 16 works */
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };

#if defined(__x86_64__)
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
#else
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
#endif
EXPORT_SYMBOL(mmu_cr4_features);

unsigned long wait_init_idle;

struct exec_domain *idle_task[NR_CPUS] = { &idle0_exec_domain };

#ifdef CONFIG_ACPI_INTERPRETER
int acpi_disabled = 0;
#else
int acpi_disabled = 1;
#endif
EXPORT_SYMBOL(acpi_disabled);

int phys_proc_id[NR_CPUS];
int logical_proc_id[NR_CPUS];

/* Standard macro to see if a specific flag is changeable. */
static inline int flag_is_changeable_p(unsigned long flag)
{
    unsigned long f1, f2;

    asm("pushf\n\t"
        "pushf\n\t"
        "pop %0\n\t"
        "mov %0,%1\n\t"
        "xor %2,%0\n\t"
        "push %0\n\t"
        "popf\n\t"
        "pushf\n\t"
        "pop %0\n\t"
        "popf\n\t"
        : "=&r" (f1), "=&r" (f2)
        : "ir" (flag));

    return ((f1^f2) & flag) != 0;
}

/* Probe for the CPUID instruction */
static int __init have_cpuid_p(void)
{
    return flag_is_changeable_p(X86_EFLAGS_ID);
}

void __init get_cpu_vendor(struct cpuinfo_x86 *c)
{
    char *v = c->x86_vendor_id;

    if (!strcmp(v, "GenuineIntel"))
        c->x86_vendor = X86_VENDOR_INTEL;
    else if (!strcmp(v, "AuthenticAMD"))
        c->x86_vendor = X86_VENDOR_AMD;
    else if (!strcmp(v, "CyrixInstead"))
        c->x86_vendor = X86_VENDOR_CYRIX;
    else if (!strcmp(v, "UMC UMC UMC "))
        c->x86_vendor = X86_VENDOR_UMC;
    else if (!strcmp(v, "CentaurHauls"))
        c->x86_vendor = X86_VENDOR_CENTAUR;
    else if (!strcmp(v, "NexGenDriven"))
        c->x86_vendor = X86_VENDOR_NEXGEN;
    else if (!strcmp(v, "RiseRiseRise"))
        c->x86_vendor = X86_VENDOR_RISE;
    else if (!strcmp(v, "GenuineTMx86") ||
             !strcmp(v, "TransmetaCPU"))
        c->x86_vendor = X86_VENDOR_TRANSMETA;
    else
        c->x86_vendor = X86_VENDOR_UNKNOWN;
}

static void __init init_intel(struct cpuinfo_x86 *c)
{
    /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
    if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
        clear_bit(X86_FEATURE_SEP, &c->x86_capability);

#ifdef CONFIG_SMP
    if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
    {
        u32     eax, ebx, ecx, edx;
        int     initial_apic_id, siblings, cpu = smp_processor_id();
        
        cpuid(1, &eax, &ebx, &ecx, &edx);
        ht_per_core = siblings = (ebx & 0xff0000) >> 16;

        if ( opt_noht )
            clear_bit(X86_FEATURE_HT, &c->x86_capability[0]);

        if ( siblings <= 1 )
        {
            printk(KERN_INFO  "CPU#%d: Hyper-Threading is disabled\n", cpu);
        } 
        else if ( siblings > 2 )
        {
            panic("We don't support more than two logical CPUs per package!");
        }
        else
        {
            initial_apic_id = ebx >> 24 & 0xff;
            phys_proc_id[cpu]    = initial_apic_id >> 1;
            logical_proc_id[cpu] = initial_apic_id & 1;
            printk(KERN_INFO  "CPU#%d: Physical ID: %d, Logical ID: %d\n",
                   cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
        }
    }
#endif

#ifdef CONFIG_VMX
    start_vmx();
#endif

}

static void __init init_amd(struct cpuinfo_x86 *c)
{
    /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
       3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
    clear_bit(0*32+31, &c->x86_capability);
        
    switch(c->x86)
    {
    case 5:
        panic("AMD K6 is not supported.\n");
    case 6:     /* An Athlon/Duron. We can trust the BIOS probably */
        break;          
    }
}

/*
 * This does the hard work of actually picking apart the CPU stuff...
 */
void __init identify_cpu(struct cpuinfo_x86 *c)
{
    int i, cpu = smp_processor_id();
    u32 xlvl, tfms, junk;

    phys_proc_id[cpu]    = cpu;
    logical_proc_id[cpu] = 0;

    c->x86_vendor = X86_VENDOR_UNKNOWN;
    c->cpuid_level = -1;        /* CPUID not detected */
    c->x86_model = c->x86_mask = 0;     /* So far unknown... */
    c->x86_vendor_id[0] = '\0'; /* Unset */
    memset(&c->x86_capability, 0, sizeof c->x86_capability);

    if ( !have_cpuid_p() )
        panic("Ancient processors not supported\n");

    /* Get vendor name */
    cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
          (unsigned int *)&c->x86_vendor_id[0],
          (unsigned int *)&c->x86_vendor_id[8],
          (unsigned int *)&c->x86_vendor_id[4]);

    get_cpu_vendor(c);
                
    if ( c->cpuid_level == 0 )
        panic("Decrepit CPUID not supported\n");

    cpuid(0x00000001, &tfms, &junk, &junk,
          &c->x86_capability[0]);
    c->x86 = (tfms >> 8) & 15;
    c->x86_model = (tfms >> 4) & 15;
    c->x86_mask = tfms & 15;

    /* AMD-defined flags: level 0x80000001 */
    xlvl = cpuid_eax(0x80000000);
    if ( (xlvl & 0xffff0000) == 0x80000000 ) {
        if ( xlvl >= 0x80000001 )
            c->x86_capability[1] = cpuid_edx(0x80000001);
    }

    /* Transmeta-defined flags: level 0x80860001 */
    xlvl = cpuid_eax(0x80860000);
    if ( (xlvl & 0xffff0000) == 0x80860000 ) {
        if (  xlvl >= 0x80860001 )
            c->x86_capability[2] = cpuid_edx(0x80860001);
    }

    printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
           smp_processor_id(),
           c->x86_capability[0],
           c->x86_capability[1],
           c->x86_capability[2],
           c->x86_vendor);

    switch ( c->x86_vendor ) {
    case X86_VENDOR_INTEL:
        init_intel(c);
        break;
    case X86_VENDOR_AMD:
        init_amd(c);
        break;
    case X86_VENDOR_UNKNOWN:  /* Connectix Virtual PC reports this */
        break;
    case X86_VENDOR_CENTAUR:
        break;
    default:
        printk("Unknown CPU identifier (%d): continuing anyway, "
               "but might fail.\n", c->x86_vendor);
    }
        
    printk("CPU caps: %08x %08x %08x %08x\n",
           c->x86_capability[0],
           c->x86_capability[1],
           c->x86_capability[2],
           c->x86_capability[3]);

    /*
     * On SMP, boot_cpu_data holds the common feature set between
     * all CPUs; so make sure that we indicate which features are
     * common between the CPUs.  The first time this routine gets
     * executed, c == &boot_cpu_data.
     */
    if ( c != &boot_cpu_data ) {
        /* AND the already accumulated flags with these */
        for ( i = 0 ; i < NCAPINTS ; i++ )
            boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
    }
}


unsigned long cpu_initialized;
void __init cpu_init(void)
{
    int nr = smp_processor_id();
    struct tss_struct *t = &init_tss[nr];

    if ( test_and_set_bit(nr, &cpu_initialized) )
        panic("CPU#%d already initialized!!!\n", nr);
    printk("Initializing CPU#%d\n", nr);

    SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
    SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
    __asm__ __volatile__ ( "lgdt %0" : "=m" (*current->arch.gdt) );

    /* No nested task. */
    __asm__ __volatile__ ( "pushf ; andw $0xbfff,(%"__OP"sp) ; popf" );

    /* Ensure FPU gets initialised for each domain. */
    stts();

    /* Set up and load the per-CPU TSS and LDT. */
    t->bitmap = IOBMP_INVALID_OFFSET;
#if defined(__i386__)
    t->ss0  = __HYPERVISOR_DS;
    t->esp0 = get_stack_bottom();
#elif defined(__x86_64__)
    t->rsp0 = get_stack_bottom();
#endif
    set_tss_desc(nr,t);
    load_TR(nr);
    __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );

    /* Clear all 6 debug registers. */
#define CD(register) __asm__ ( "mov %0,%%db" #register : : "r" (0UL) );
    CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
#undef CD

    /* Install correct page table. */
    write_ptbase(current);

    init_idle_task();
}

static void __init do_initcalls(void)
{
    initcall_t *call;
    for ( call = &__initcall_start; call < &__initcall_end; call++ )
        (*call)();
}

unsigned long pci_mem_start = 0x10000000;

static void __init start_of_day(void)
{
    unsigned long low_mem_size;
    
#ifdef MEMORY_GUARD
    /* Unmap the first page of CPU0's stack. */
    extern unsigned long cpu0_stack[];
    memguard_guard_stack(cpu0_stack);
#endif

    open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);

    if ( opt_watchdog ) 
        nmi_watchdog = NMI_LOCAL_APIC;

    sort_exception_tables();

    arch_do_createdomain(current);

    /* Tell the PCI layer not to allocate too close to the RAM area.. */
    low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
    if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
    
    identify_cpu(&boot_cpu_data); /* get CPU type info */
    if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
    if ( cpu_has_xmm )  set_in_cr4(X86_CR4_OSXMMEXCPT);
#ifdef CONFIG_SMP
    if ( opt_ignorebiostables )
    {
        opt_nosmp  = 1;           /* No SMP without configuration          */
        opt_noacpi = 1;           /* ACPI will just confuse matters also   */
    }
    else
    {
        find_smp_config();
        smp_alloc_memory();       /* trampoline which other CPUs jump at   */
    }
#endif
    paging_init();                /* not much here now, but sets up fixmap */
    if ( !opt_noacpi )
        acpi_boot_init();
#ifdef CONFIG_SMP
    if ( smp_found_config ) 
        get_smp_config();
#endif
    init_apic_mappings(); /* make APICs addressable in our pagetables. */
    scheduler_init();   
    init_IRQ();  /* installs simple interrupt wrappers. Starts HZ clock. */
    trap_init();
    time_init(); /* installs software handler for HZ clock. */

    arch_init_memory();

#ifndef CONFIG_SMP    
    APIC_init_uniprocessor();
#else
    if ( opt_nosmp )
        APIC_init_uniprocessor();
    else
        smp_boot_cpus(); 
    /*
     * Does loads of stuff, including kicking the local
     * APIC, and the IO APIC after other CPUs are booted.
     * Each IRQ is preferably handled by IO-APIC, but
     * fall thru to 8259A if we have to (but slower).
     */
#endif

    __sti();

    initialize_keytable(); /* call back handling for key codes */

    serial_init_stage2();

    if ( !cpu_has_apic )
    {
        do_timer_lists_from_pit = 1;
        if ( smp_num_cpus != 1 )
            panic("We need local APICs on SMP machines!");
    }

    ac_timer_init();    /* init accurate timers */
    init_xen_time();    /* initialise the time */
    schedulers_start(); /* start scheduler for each CPU */

    check_nmi_watchdog();

#ifdef CONFIG_PCI
    pci_init();
#endif
    do_initcalls();

#ifdef CONFIG_SMP
    wait_init_idle = cpu_online_map;
    clear_bit(smp_processor_id(), &wait_init_idle);
    smp_threads_ready = 1;
    smp_commence(); /* Tell other CPUs that state of the world is stable. */
    while ( wait_init_idle != 0 )
        cpu_relax();
#endif

    watchdog_on = 1;
#ifdef __x86_64__ /* x86_32 uses low mappings when building DOM0. */
    zap_low_mappings();
#endif
}

void __init __start_xen(multiboot_info_t *mbi)
{
    char *cmdline;
    module_t *mod = (module_t *)__va(mbi->mods_addr);
    void *heap_start;
    unsigned long firsthole_start, nr_pages;
    unsigned long initial_images_start, initial_images_end;
    struct e820entry e820_raw[E820MAX];
    int i, e820_raw_nr = 0, bytes = 0;

    /* Parse the command-line options. */
    if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
        cmdline_parse(__va(mbi->cmdline));

    /* Must do this early -- e.g., spinlocks rely on get_current(). */
    set_current(&idle0_exec_domain);

    /* We initialise the serial devices very early so we can get debugging. */
    serial_init_stage1();

    init_console();

    /* Check that we have at least one Multiboot module. */
    if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
    {
        printk("FATAL ERROR: Require at least one Multiboot module.\n");
        for ( ; ; ) ;
    }

    xenheap_phys_end = opt_xenheap_megabytes << 20;

    if ( mbi->flags & MBI_MEMMAP )
    {
        while ( bytes < mbi->mmap_length )
        {
            memory_map_t *map = __va(mbi->mmap_addr + bytes);
            e820_raw[e820_raw_nr].addr = 
                ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
            e820_raw[e820_raw_nr].size = 
                ((u64)map->length_high << 32) | (u64)map->length_low;
            e820_raw[e820_raw_nr].type = 
                (map->type > E820_SHARED_PAGE) ? E820_RESERVED : map->type;
            e820_raw_nr++;
            bytes += map->size + 4;
        }
    }
    else if ( mbi->flags & MBI_MEMLIMITS )
    {
        e820_raw[0].addr = 0;
        e820_raw[0].size = mbi->mem_lower << 10;
        e820_raw[0].type = E820_RAM;
        e820_raw[1].addr = 0x100000;
        e820_raw[1].size = mbi->mem_upper << 10;
        e820_raw[1].type = E820_RAM;
        e820_raw_nr = 2;
    }
    else
    {
        printk("FATAL ERROR: Bootloader provided no memory information.\n");
        for ( ; ; ) ;
    }

    max_page = init_e820(e820_raw, e820_raw_nr);

    /* Find the first high-memory RAM hole. */
    for ( i = 0; i < e820.nr_map; i++ )
        if ( (e820.map[i].type == E820_RAM) &&
             (e820.map[i].addr >= 0x100000) )
            break;
    firsthole_start = e820.map[i].addr + e820.map[i].size;

    /* Relocate the Multiboot modules. */
    initial_images_start = xenheap_phys_end;
    initial_images_end   = initial_images_start + 
        (mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
    if ( initial_images_end > firsthole_start )
    {
        printk("Not enough memory to stash the DOM0 kernel image.\n");
        for ( ; ; ) ;
    }
#if defined(__i386__)
    memmove((void *)initial_images_start,  /* use low mapping */
            (void *)mod[0].mod_start,      /* use low mapping */
            mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
#elif defined(__x86_64__)
    memmove(__va(initial_images_start),
            __va(mod[0].mod_start),
            mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
#endif

    /* Initialise boot-time allocator with all RAM situated after modules. */
    heap_start = memguard_init(&_end);
    heap_start = __va(init_boot_allocator(__pa(heap_start)));
    nr_pages   = 0;
    for ( i = 0; i < e820.nr_map; i++ )
    {
        if ( e820.map[i].type != E820_RAM )
            continue;
        nr_pages += e820.map[i].size >> PAGE_SHIFT;
        if ( (e820.map[i].addr + e820.map[i].size) >= initial_images_end )
            init_boot_pages((e820.map[i].addr < initial_images_end) ?
                            initial_images_end : e820.map[i].addr,
                            e820.map[i].addr + e820.map[i].size);
    }

    printk("System RAM: %luMB (%lukB)\n", 
           nr_pages >> (20 - PAGE_SHIFT),
           nr_pages << (PAGE_SHIFT - 10));

    init_frametable();

    end_boot_allocator();

    init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
    printk("Xen heap: %luMB (%lukB)\n",
           (xenheap_phys_end-__pa(heap_start)) >> 20,
           (xenheap_phys_end-__pa(heap_start)) >> 10);

    early_boot = 0;

    start_of_day();

    grant_table_init();

    shadow_mode_init();

    /* Create initial domain 0. */
    dom0 = do_createdomain(0, 0);
    if ( dom0 == NULL )
        panic("Error creating domain 0\n");

    set_bit(DF_PRIVILEGED, &dom0->d_flags);

    /* Grab the DOM0 command line. Skip past the image name. */
    cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
    if ( cmdline != NULL )
    {
        while ( *cmdline == ' ' ) cmdline++;
        if ( (cmdline = strchr(cmdline, ' ')) != NULL )
            while ( *cmdline == ' ' ) cmdline++;
    }

    /*
     * We're going to setup domain0 using the module(s) that we stashed safely
     * above our heap. The second module, if present, is an initrd ramdisk.
     */
    if ( construct_dom0(dom0,
                        initial_images_start, 
                        mod[0].mod_end-mod[0].mod_start,
                        (mbi->mods_count == 1) ? 0 :
                        initial_images_start + 
                        (mod[1].mod_start-mod[0].mod_start),
                        (mbi->mods_count == 1) ? 0 :
                        mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
                        cmdline) != 0)
        panic("Could not set up DOM0 guest OS\n");

    /* Scrub RAM that is still free and so may go to an unprivileged domain. */
    scrub_heap_pages();

    init_trace_bufs();

    /* Give up the VGA console if DOM0 is configured to grab it. */
    console_endboot(cmdline && strstr(cmdline, "tty0"));

    /* Hide UART from DOM0 if we're using it */
    serial_endboot();

    domain_unpause_by_systemcontroller(current->domain);
    domain_unpause_by_systemcontroller(dom0);
    startup_cpu_idle_loop();
}

/*
 * Local variables:
 * mode: C
 * c-set-style: "BSD"
 * c-basic-offset: 4
 * tab-width: 4
 * indent-tabs-mode: nil
 * End:
 */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>