WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merge with ia64-unstable.hg

# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID a83ac0806d6b900eafac6e6942352f88fe89d449
# Parent  35b74976f5986786e00cc51f8c2b7cf2c5d0f768

# Parent  8328317e60ab3655d919e94a8a2ffce82edec73f
Merge with ia64-unstable.hg

diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vcpu.c
--- a/xen/arch/ia64/vcpu.c      Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vcpu.c      Fri Jul 15 13:39:50 2005
@@ -53,8 +53,15 @@
 #define        PRIVOP_COUNT_ADDR(x,y) do {} while (0)
 #endif
 
+unsigned long dtlb_translate_count = 0;
+unsigned long tr_translate_count = 0;
+unsigned long phys_translate_count = 0;
+
 unsigned long vcpu_verbose = 0;
 #define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0)
+
+extern TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa);
+extern TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa);
 
 /**************************************************************************
  VCPU general register access routines
@@ -224,6 +231,9 @@
 //else printf("but nothing pending\n");
        }
 #endif
+       if (enabling_interrupts &&
+               vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+                       PSCB(vcpu,pending_interruption) = 1;
        return IA64_NO_FAULT;
 }
 
@@ -267,6 +277,9 @@
                        return IA64_EXTINT_VECTOR;
        }
 #endif
+       if (enabling_interrupts &&
+               vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+                       PSCB(vcpu,pending_interruption) = 1;
        return IA64_NO_FAULT;
 }
 
@@ -531,6 +544,11 @@
 /**************************************************************************
  VCPU interrupt control register access routines
 **************************************************************************/
+
+void vcpu_pend_unspecified_interrupt(VCPU *vcpu)
+{
+       PSCB(vcpu,pending_interruption) = 1;
+}
 
 void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
 {
@@ -1241,28 +1259,117 @@
        return (IA64_ILLOP_FAULT);
 }
 
+#define itir_ps(itir)  ((itir >> 2) & 0x3f)
+#define itir_mask(itir) (~((1UL << itir_ps(itir)) - 1))
+
+unsigned long vhpt_translate_count = 0;
+int in_vcpu_tpa = 0;
+
+IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 
*pteval, UINT64 *itir)
+{
+       unsigned long pta, pta_mask, iha, pte, ps;
+       TR_ENTRY *trp;
+       ia64_rr rr;
+
+       if (!(address >> 61)) {
+               if (!PSCB(vcpu,metaphysical_mode)) {
+                       REGS *regs = vcpu_regs(vcpu);
+                       unsigned long viip = PSCB(vcpu,iip);
+                       unsigned long vipsr = PSCB(vcpu,ipsr);
+                       unsigned long iip = regs->cr_iip;
+                       unsigned long ipsr = regs->cr_ipsr;
+#if 0
+                       printk("vcpu_translate: bad address %p, viip=%p, 
vipsr=%p, iip=%p, ipsr=%p\n", address, viip, vipsr, iip, ipsr);
+                       if (in_vcpu_tpa) printk("vcpu_translate called from 
vcpu_tpa\n");
+                       while(1);
+                       panic_domain(0,"vcpu_translate: bad address %p\n", 
address);
+#endif
+                       printk("vcpu_translate: bad address %p, viip=%p, 
vipsr=%p, iip=%p, ipsr=%p continuing\n", address, viip, vipsr, iip, ipsr);
+               }
+
+               *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS | 
_PAGE_PL_2 | _PAGE_AR_RWX;
+               *itir = PAGE_SHIFT << 2;
+               phys_translate_count++;
+               return IA64_NO_FAULT;
+       }
+
+       /* check translation registers */
+       if ((trp = match_tr(vcpu,address))) {
+                       tr_translate_count++;
+               *pteval = trp->page_flags;
+               *itir = trp->itir;
+               return IA64_NO_FAULT;
+       }
+
+       /* check 1-entry TLB */
+       if ((trp = match_dtlb(vcpu,address))) {
+               dtlb_translate_count++;
+if (!in_vcpu_tpa) printf("vcpu_translate: found in vdtlb\n");
+               *pteval = trp->page_flags;
+               *itir = trp->itir;
+               return IA64_NO_FAULT;
+       }
+
+       /* check guest VHPT */
+       pta = PSCB(vcpu,pta);
+       rr.rrval = PSCB(vcpu,rrs)[address>>61];
+       if (rr.ve && (pta & IA64_PTA_VE))
+       {
+               if (pta & IA64_PTA_VF)
+               {
+                       /* long format VHPT - not implemented */
+                       return (is_data ? IA64_DATA_TLB_VECTOR : 
IA64_INST_TLB_VECTOR);
+               }
+               else
+               {
+                       /* short format VHPT */
+
+                       /* avoid recursively walking VHPT */
+                       pta_mask = (itir_mask(pta) << 3) >> 3;
+                       if (((address ^ pta) & pta_mask) == 0)
+                               return (is_data ? IA64_DATA_TLB_VECTOR : 
IA64_INST_TLB_VECTOR);
+
+                       vcpu_thash(vcpu, address, &iha);
+                       if (__copy_from_user(&pte, (void *)iha, sizeof(pte)) != 
0)
+                               return IA64_VHPT_TRANS_VECTOR;
+
+                       /* 
+                        * Optimisation: this VHPT walker aborts on not-present 
pages
+                        * instead of inserting a not-present translation, this 
allows
+                        * vectoring directly to the miss handler.
+       \                */
+                       if (pte & _PAGE_P)
+                       {
+                               *pteval = pte;
+                               *itir = vcpu_get_itir_on_fault(vcpu,address);
+                               vhpt_translate_count++;
+                               return IA64_NO_FAULT;
+                       }
+                       return (is_data ? IA64_DATA_TLB_VECTOR : 
IA64_INST_TLB_VECTOR);
+               }
+       }
+       return (is_data ? IA64_ALT_DATA_TLB_VECTOR : IA64_ALT_INST_TLB_VECTOR);
+}
+
 IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
 {
-       extern TR_ENTRY *match_tr(VCPU *,UINT64);
-       unsigned long match_dtlb(VCPU *, unsigned long, unsigned long *, 
unsigned long *);
-       TR_ENTRY *trp;
-       UINT64 mask, pteval, mp_pte, ps;
-
-extern unsigned long privop_trace;
-       if (pteval = match_dtlb(vcpu, vadr, &ps, &mp_pte) && (mp_pte != -1UL)) {
-               mask = (1L << ps) - 1;
-               *padr = ((mp_pte & _PAGE_PPN_MASK) & ~mask) | (vadr & mask);
-               verbose("vcpu_tpa: addr=%p @%p, successful, 
padr=%p\n",vadr,PSCB(vcpu,iip),*padr);
+       UINT64 pteval, itir, mask;
+       IA64FAULT fault;
+
+in_vcpu_tpa=1;
+       fault = vcpu_translate(vcpu, vadr, 1, &pteval, &itir);
+in_vcpu_tpa=0;
+       if (fault == IA64_NO_FAULT)
+       {
+               mask = itir_mask(itir);
+               *padr = (pteval & _PAGE_PPN_MASK & mask) | (vadr & ~mask);
                return (IA64_NO_FAULT);
        }
-       if (trp=match_tr(current,vadr)) {
-               mask = (1L << trp->ps) - 1;
-               *padr = ((trp->ppn << 12) & ~mask) | (vadr & mask);
-               verbose("vcpu_tpa: addr=%p @%p, successful, 
padr=%p\n",vadr,PSCB(vcpu,iip),*padr);
-               return (IA64_NO_FAULT);
-       }
-       verbose("vcpu_tpa addr=%p, @%p, forcing data 
miss\n",vadr,PSCB(vcpu,iip));
-       return vcpu_force_data_miss(vcpu, vadr);
+       else
+       {
+               PSCB(vcpu,tmp[0]) = vadr;       // save ifa in vcpu structure, 
then specify IA64_FORCED_IFA
+               return (fault | IA64_FORCED_IFA);
+       }
 }
 
 IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
@@ -1392,20 +1499,24 @@
        // if reg > 3
        switch(reg) {
            case 0:
+               memcpy(pval,"Xen/ia64",8);
+               break;
            case 1:
-               memcpy(pval,"Xen/ia64",8);
+               *pval = 0;
                break;
            case 2:
                *pval = 0;
                break;
            case 3:
-               *pval = 0;  //FIXME: See vol1, 3.1.11
+               *pval = ia64_get_cpuid(3);
                break;
            case 4:
-               *pval = 1;  //FIXME: See vol1, 3.1.11
+               *pval = ia64_get_cpuid(4);
                break;
            default:
-               *pval = 0;  //FIXME: See vol1, 3.1.11
+               if (reg > (ia64_get_cpuid(3) & 0xff))
+                       return IA64_RSVDREG_FAULT;
+               *pval = ia64_get_cpuid(reg);
                break;
        }
        return (IA64_NO_FAULT);
@@ -1614,15 +1725,12 @@
 
 // NOTE: returns a physical pte, NOT a "metaphysical" pte, so do not check
 // the physical address contained for correctness
-unsigned long match_dtlb(VCPU *vcpu, unsigned long ifa, unsigned long *ps, 
unsigned long *mp_pte)
+TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa)
 {
        TR_ENTRY *trp;
 
-       if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1)) {
-               if (ps) *ps = trp->ps;
-               if (mp_pte) *mp_pte = vcpu->arch.dtlb_pte;
-               return (trp->page_flags);
-       }
+       if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1))
+               return (&vcpu->arch.dtlb);
        return 0UL;
 }
 
@@ -1679,11 +1787,12 @@
        // TODO: Only allowed for current vcpu
        UINT64 mpaddr, ps;
        IA64FAULT fault;
-       unsigned long match_dtlb(VCPU *, unsigned long, unsigned long *, 
unsigned long *);
+       TR_ENTRY *trp;
        unsigned long lookup_domain_mpa(struct domain *,unsigned long);
        unsigned long pteval, dom_imva;
 
-       if (pteval = match_dtlb(vcpu, vadr, NULL, NULL)) {
+       if ((trp = match_dtlb(vcpu,vadr))) {
+               pteval = trp->page_flags;
                dom_imva = __va(pteval & _PFN_MASK);
                ia64_fc(dom_imva);
                return IA64_NO_FAULT;
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/hypercall.c
--- a/xen/arch/ia64/hypercall.c Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/hypercall.c Fri Jul 15 13:39:50 2005
@@ -16,21 +16,26 @@
 #include <asm/dom_fw.h>
 
 extern unsigned long translate_domain_mpaddr(unsigned long);
-extern struct ia64_sal_retval pal_emulator_static(UINT64);
+extern struct ia64_pal_retval xen_pal_emulator(UINT64,UINT64,UINT64,UINT64);
 extern struct ia64_sal_retval 
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+
+unsigned long idle_when_pending = 0;
+unsigned long pal_halt_light_count = 0;
 
 int
 ia64_hypercall (struct pt_regs *regs)
 {
        struct vcpu *v = (struct domain *) current;
        struct ia64_sal_retval x;
+       struct ia64_pal_retval y;
        unsigned long *tv, *tc;
+       int pi;
 
        switch (regs->r2) {
            case FW_HYPERCALL_PAL_CALL:
                //printf("*** PAL hypercall: index=%d\n",regs->r28);
                //FIXME: This should call a C routine
-#if 1
+#if 0
                // This is very conservative, but avoids a possible
                // (and deadly) freeze in paravirtualized domains due
                // to a yet-to-be-found bug where pending_interruption
@@ -38,25 +43,34 @@
                // in the idle loop, this should resolve it
                v->vcpu_info->arch.pending_interruption = 1;
 #endif
-               x = pal_emulator_static(regs->r28);
                if (regs->r28 == PAL_HALT_LIGHT) {
-#if 1
 #define SPURIOUS_VECTOR 15
-                       if (vcpu_check_pending_interrupts(v)!=SPURIOUS_VECTOR) {
-//                             int pi = vcpu_check_pending_interrupts(v);
+                       pi = vcpu_check_pending_interrupts(v);
+                       if (pi != SPURIOUS_VECTOR) {
+                               if (!v->vcpu_info->arch.pending_interruption)
+                                       idle_when_pending++;
+                               vcpu_pend_unspecified_interrupt(v);
 //printf("idle w/int#%d pending!\n",pi);
 //this shouldn't happen, but it apparently does quite a bit!  so don't
 //allow it to happen... i.e. if a domain has an interrupt pending and
 //it tries to halt itself because it thinks it is idle, just return here
 //as deliver_pending_interrupt is called on the way out and will deliver it
                        }
-                       else
-#endif
-                       do_sched_op(SCHEDOP_yield);
+                       else {
+                               pal_halt_light_count++;
+                               do_sched_op(SCHEDOP_yield);
+                       }
                        //break;
                }
-               regs->r8 = x.status; regs->r9 = x.v0;
-               regs->r10 = x.v1; regs->r11 = x.v2;
+               else if (regs->r28 >= PAL_COPY_PAL) {   /* FIXME */
+                       printf("stacked PAL hypercalls not supported\n");
+                       regs->r8 = -1;
+                       break;
+               }
+               else y = xen_pal_emulator(regs->r28,regs->r29,
+                                               regs->r30,regs->r31);
+               regs->r8 = y.status; regs->r9 = y.v0;
+               regs->r10 = y.v1; regs->r11 = y.v2;
                break;
            case FW_HYPERCALL_SAL_CALL:
                x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33),
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/hyperprivop.S
--- a/xen/arch/ia64/hyperprivop.S       Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/hyperprivop.S       Fri Jul 15 13:39:50 2005
@@ -14,8 +14,15 @@
 #include <asm/system.h>
 #include <public/arch-ia64.h>
 
+#if 1   // change to 0 to turn off all fast paths
+#define FAST_HYPERPRIVOPS
 #define FAST_HYPERPRIVOP_CNT
 #define FAST_REFLECT_CNT
+#define FAST_TICK
+#define FAST_BREAK
+#define FAST_ACCESS_REFLECT
+#undef RFI_TO_INTERRUPT // not working yet
+#endif
 
 // FIXME: This is defined in include/asm-ia64/hw_irq.h but this
 // doesn't appear to be include'able from assembly?
@@ -45,6 +52,9 @@
 //     r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
 //     r31 == pr
 GLOBAL_ENTRY(fast_hyperprivop)
+#ifndef FAST_HYPERPRIVOPS // see beginning of file
+       br.sptk.many dispatch_break_fault ;;
+#endif
        // HYPERPRIVOP_SSM_I?
        // assumes domain interrupts pending, so just do it
        cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
@@ -124,6 +134,10 @@
        // HYPERPRIVOP_ITC_I?
        cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
 (p7)   br.sptk.many hyper_itc_i;;
+
+       // HYPERPRIVOP_THASH?
+       cmp.eq p7,p6=XEN_HYPER_THASH,r17
+(p7)   br.sptk.many hyper_thash;;
 
        // if not one of the above, give up for now and do it the slow way
        br.sptk.many dispatch_break_fault ;;
@@ -276,17 +290,17 @@
 //     r30 == cr.ivr
 //     r29 == rp
 GLOBAL_ENTRY(fast_tick_reflect)
-#define FAST_TICK
-#ifndef FAST_TICK
+#ifndef FAST_TICK // see beginning of file
        br.cond.sptk.many rp;;
 #endif
        mov r28=IA64_TIMER_VECTOR;;
        cmp.ne p6,p0=r28,r30
 (p6)   br.cond.spnt.few rp;;
        movl r20=(PERCPU_ADDR)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
-       ld8 r21=[r20];;
+       ld8 r26=[r20];;
        mov r27=ar.itc;;
-       cmp.ltu p6,p0=r21,r27
+       adds r27=200,r27;;      // safety margin
+       cmp.ltu p6,p0=r26,r27
 (p6)   br.cond.spnt.few rp;;
        mov r17=cr.ipsr;;
        // slow path if: ipsr.be==1, ipsr.pp==1
@@ -296,14 +310,16 @@
        extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p6,p0=r21,r0
 (p6)   br.cond.spnt.few rp;;
+       // definitely have a domain tick
+       mov cr.eoi=r0;;
+       mov rp=r29;;
+       mov cr.itm=r26;;        // ensure next tick
 #ifdef FAST_REFLECT_CNT
        movl r20=fast_reflect_count+((0x3000>>8)*8);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
 #endif
-       mov cr.eoi=r0;;
-       mov rp=r29;;
        // vcpu_pend_timer(current)
        movl r18=XSI_PSR_IC;;
        adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
@@ -440,15 +456,13 @@
 END(fast_tick_reflect)
 
 // reflect domain breaks directly to domain
-// FIXME: DOES NOT WORK YET
 //     r16 == cr.isr
 //     r17 == cr.iim
 //     r18 == XSI_PSR_IC
 //     r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
 //     r31 == pr
 GLOBAL_ENTRY(fast_break_reflect)
-#define FAST_BREAK
-#ifndef FAST_BREAK
+#ifndef FAST_BREAK // see beginning of file
        br.sptk.many dispatch_break_fault ;;
 #endif
        mov r30=cr.ipsr;;
@@ -471,15 +485,30 @@
        cmp.eq p7,p0=r22,r17;
 (p7)   br.spnt.few dispatch_break_fault ;;
 #endif
-#ifdef FAST_REFLECT_CNT
-       movl r20=fast_reflect_count+((0x2c00>>8)*8);;
-       ld8 r21=[r20];;
-       adds r21=1,r21;;
-       st8 [r20]=r21;;
-#endif
+       movl r20=0x2c00;
        // save iim in shared_info
        adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r17;;
+       // fall through
+
+
+// reflect to domain ivt+r20
+// sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
+//     r16 == cr.isr
+//     r18 == XSI_PSR_IC
+//     r20 == offset into ivt
+//     r29 == iip
+//     r30 == ipsr
+//     r31 == pr
+ENTRY(fast_reflect)
+#ifdef FAST_REFLECT_CNT
+       movl r22=fast_reflect_count;
+       shr r23=r20,5;;
+       add r22=r22,r23;;
+       ld8 r21=[r22];;
+       adds r21=1,r21;;
+       st8 [r22]=r21;;
+#endif
        // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
        adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r29;;
@@ -504,9 +533,9 @@
        or r30=r30,r28;;
        and r30=r30,r27;;
        // also set shared_mem ipsr.i and ipsr.ic appropriately
-       ld8 r20=[r18];;
-       extr.u r22=r20,32,32
-       cmp4.eq p6,p7=r20,r0;;
+       ld8 r24=[r18];;
+       extr.u r22=r24,32,32
+       cmp4.eq p6,p7=r24,r0;;
 (p6)   dep r30=0,r30,IA64_PSR_IC_BIT,1
 (p7)   dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
        cmp4.eq p6,p7=r22,r0;;
@@ -520,13 +549,13 @@
        // cover and set shared_mem precover_ifs to cr.ifs
        // set shared_mem ifs and incomplete_regframe to 0
        cover ;;
-       mov r20=cr.ifs;;
+       mov r24=cr.ifs;;
        adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
        st4 [r21]=r0 ;;
        adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r0 ;;
        adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
-       st8 [r21]=r20 ;;
+       st8 [r21]=r24 ;;
        // vpsr.i = vpsr.ic = 0 on delivery of interruption
        st8 [r18]=r0;;
        // FIXME: need to save iipa and isr to be arch-compliant
@@ -534,22 +563,30 @@
        mov r22=IA64_KR(CURRENT);;
        adds r22=IA64_VCPU_IVA_OFFSET,r22;;
        ld8 r23=[r22];;
-       movl r24=0x2c00;;
-       add r24=r24,r23;;
-       mov cr.iip=r24;;
+       add r20=r20,r23;;
+       mov cr.iip=r20;;
        // OK, now all set to go except for switch to virtual bank0
        mov r30=r2; mov r29=r3;;
        adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
        adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
        bsw.1;;
-       st8 [r2]=r16,16; st8 [r3]=r17,16 ;;
-       st8 [r2]=r18,16; st8 [r3]=r19,16 ;;
-       st8 [r2]=r20,16; st8 [r3]=r21,16 ;;
-       st8 [r2]=r22,16; st8 [r3]=r23,16 ;;
-       st8 [r2]=r24,16; st8 [r3]=r25,16 ;;
-       st8 [r2]=r26,16; st8 [r3]=r27,16 ;;
-       st8 [r2]=r28,16; st8 [r3]=r29,16 ;;
-       st8 [r2]=r30,16; st8 [r3]=r31,16 ;;
+       // FIXME: need to handle ar.unat!
+       .mem.offset 0,0; st8.spill [r2]=r16,16;
+       .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r18,16;
+       .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r20,16;
+       .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r22,16;
+       .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r24,16;
+       .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r26,16;
+       .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r28,16;
+       .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r30,16;
+       .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
        movl r31=XSI_IPSR;;
        bsw.0 ;;
        mov r2=r30; mov r3=r29;;
@@ -558,6 +595,44 @@
        mov pr=r31,-1 ;;
        rfi
        ;;
+
+// reflect access faults (0x2400,0x2800,0x5300) directly to domain
+//     r16 == isr
+//     r17 == ifa
+//     r19 == reflect number (only pass-thru to dispatch_reflection)
+//     r20 == offset into ivt
+//     r31 == pr
+GLOBAL_ENTRY(fast_access_reflect)
+#ifndef FAST_ACCESS_REFLECT // see beginning of file
+       br.spnt.few dispatch_reflection ;;
+#endif
+       mov r30=cr.ipsr;;
+       mov r29=cr.iip;;
+       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
+       cmp.eq p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       movl r18=XSI_PSR_IC;;
+       ld8 r21=[r18];;
+       cmp.eq p7,p0=r0,r21
+(p7)   br.spnt.few dispatch_reflection ;;
+       // set shared_mem ifa, FIXME: should we validate it?
+       mov r17=cr.ifa;;
+       adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; 
+       st8 [r21]=r17 ;;
+       // get rr[ifa] and save to itir in shared memory (extra bits ignored)
+       shr.u r22=r17,61
+       adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 
+       adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+       shladd r22=r22,3,r21;;
+       ld8 r22=[r22];;
+       st8 [r23]=r22;;
+       br.cond.sptk.many fast_reflect;;
 
 
 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
@@ -606,7 +681,7 @@
        cmp.ltu p0,p7=r22,r23 ;;        // if !(iip<low) &&
 (p7)   cmp.geu p0,p7=r22,r24 ;;        //    !(iip>=high)
 (p7)   br.spnt.few dispatch_break_fault ;;
-#ifndef RFI_TO_INTERRUPT
+#ifndef RFI_TO_INTERRUPT // see beginning of file
        cmp.ne p6,p0=r30,r0
 (p6)   br.cond.spnt.few dispatch_break_fault ;;
 #endif
@@ -1312,8 +1387,146 @@
        ;;
 END(hyper_set_rr)
 
+// this routine was derived from optimized assembly output from
+// vcpu_thash so it is dense and difficult to read but it works
+// On entry:
+//     r18 == XSI_PSR_IC
+//     r31 == pr
+GLOBAL_ENTRY(hyper_thash)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       shr.u r20 = r8, 61
+       addl r25 = 1, r0
+       movl r17 = 0xe000000000000000
+       ;;
+       and r21 = r17, r8               // VHPT_Addr1
+       ;;
+       shladd r28 = r20, 3, r18
+       adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
+       ;;
+       adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
+       addl r28 = 32767, r0
+       ld8 r24 = [r19]                 // pta
+       ;;
+       ld8 r23 = [r27]                 // rrs[vadr>>61]
+       extr.u r26 = r24, 2, 6
+       ;;
+       extr.u r22 = r23, 2, 6
+       shl r30 = r25, r26
+       ;;
+       shr.u r19 = r8, r22
+       shr.u r29 = r24, 15
+       ;;
+       adds r17 = -1, r30
+       ;;
+       shladd r27 = r19, 3, r0
+       extr.u r26 = r17, 15, 46
+       ;;
+       andcm r24 = r29, r26
+       and r19 = r28, r27
+       shr.u r25 = r27, 15
+       ;;
+       and r23 = r26, r25
+       ;;
+       or r22 = r24, r23
+       ;;
+       dep.z r20 = r22, 15, 46
+       ;;
+       or r16 = r20, r21
+       ;;
+       or r8 = r19, r16
+       // done, update iip/ipsr to next instruction
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_thash)
+
 ENTRY(hyper_ptc_ga)
-       br.spnt.many dispatch_break_fault ;;
+#ifdef CONFIG_SMP
+FIXME: ptc.ga instruction requires spinlock for SMP
+#endif
+       // FIXME: validate not flushing Xen addresses
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r28=r8
+       extr.u r19=r9,2,6               // addr_range=1<<((r9&0xfc)>>2)
+       mov r20=1
+       shr.u r24=r8,61
+       addl r27=56,r0                  // PAGE_SHIFT<<2 (for ptc.ga)
+       movl r26=0x8000000000000000     // INVALID_TI_TAG
+       mov r30=ar.lc
+       ;;
+       shl r19=r20,r19
+       cmp.eq p7,p0=7,r24
+(p7)   br.spnt.many dispatch_break_fault ;;    // slow way for rr7
+       ;;
+       cmp.le p7,p0=r19,r0             // skip flush if size<=0
+(p7)   br.cond.dpnt 2f ;;
+       extr.u r24=r19,0,PAGE_SHIFT
+       shr.u r23=r19,PAGE_SHIFT ;;     // repeat loop for n pages
+       cmp.ne p7,p0=r24,r0 ;;
+(p7)   adds r23=1,r23 ;;               // n_pages<size<n_pages+1? extra iter
+       mov ar.lc=r23
+       movl r29=PAGE_SIZE;;
+1:
+       thash r25=r28 ;;
+       adds r25=16,r25 ;;
+       ld8 r24=[r25] ;;
+       // FIXME: should check if tag matches, not just blow it away
+       or r24=r26,r24 ;;               // vhpt_entry->ti_tag = 1
+       st8 [r25]=r24
+       ptc.ga r28,r27 ;;
+       srlz.i ;;
+       add r28=r29,r28
+       br.cloop.sptk.few 1b
+       ;;
+2:
+       mov ar.lc=r30 ;;
+       mov r29=cr.ipsr
+       mov r30=cr.iip;;
+       mov r27=IA64_KR(CURRENT);;
+       adds r25=IA64_VCPU_DTLB_OFFSET,r27
+       adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
+       ld8 r24=[r25]
+       ld8 r27=[r26] ;;
+       and r24=-2,r24
+       and r27=-2,r27 ;;
+       st8 [r25]=r24                   // set 1-entry i/dtlb as not present
+       st8 [r26]=r27 ;;
+       // increment to point to next instruction
+       extr.u r26=r29,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r30=16,r30
+(p7)   adds r26=1,r26
+       ;;
+       dep r29=r26,r29,41,2
+       ;;
+       mov cr.ipsr=r29
+       mov cr.iip=r30
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
 END(hyper_ptc_ga)
 
 ENTRY(hyper_itc_d)
@@ -1323,4 +1536,3 @@
 ENTRY(hyper_itc_i)
        br.spnt.many dispatch_break_fault ;;
 END(hyper_itc_i)
-
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_entry.S
--- a/xen/arch/ia64/vmx_entry.S Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_entry.S Fri Jul 15 13:39:50 2005
@@ -217,7 +217,7 @@
     alloc loc0=ar.pfs,0,1,1,0
     adds out0=16,r12
     ;;
-    br.call.sptk.many b0=vmx_deliver_pending_interrupt
+    br.call.sptk.many b0=leave_hypervisor_tail
     mov ar.pfs=loc0
     adds r8=IA64_VPD_BASE_OFFSET,r13
     ;;
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_init.c
--- a/xen/arch/ia64/vmx_init.c  Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_init.c  Fri Jul 15 13:39:50 2005
@@ -40,6 +40,7 @@
 #include <asm/vmmu.h>
 #include <public/arch-ia64.h>
 #include <asm/vmx_phy_mode.h>
+#include <asm/processor.h>
 #include <asm/vmx.h>
 #include <xen/mm.h>
 
@@ -225,6 +226,17 @@
         vmx_purge_double_mapping(dom_rr7, KERNEL_START,
                                 (u64)v->arch.vtlb->ts->vhpt->hash);
 
+       /* Need to save KR when domain switch, though HV itself doesn;t
+        * use them.
+        */
+       v->arch.arch_vmx.vkr[0] = ia64_get_kr(0);
+       v->arch.arch_vmx.vkr[1] = ia64_get_kr(1);
+       v->arch.arch_vmx.vkr[2] = ia64_get_kr(2);
+       v->arch.arch_vmx.vkr[3] = ia64_get_kr(3);
+       v->arch.arch_vmx.vkr[4] = ia64_get_kr(4);
+       v->arch.arch_vmx.vkr[5] = ia64_get_kr(5);
+       v->arch.arch_vmx.vkr[6] = ia64_get_kr(6);
+       v->arch.arch_vmx.vkr[7] = ia64_get_kr(7);
 }
 
 /* Even guest is in physical mode, we still need such double mapping */
@@ -234,6 +246,7 @@
        u64 status, psr;
        u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
        u64 pte_xen, pte_vhpt;
+       int i;
 
        status = ia64_pal_vp_restore(v->arch.arch_vmx.vpd, 0);
        if (status != PAL_STATUS_SUCCESS)
@@ -246,6 +259,14 @@
                                  (u64)v->arch.vtlb->ts->vhpt->hash,
                                  pte_xen, pte_vhpt);
 
+       ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
+       ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
+       ia64_set_kr(2, v->arch.arch_vmx.vkr[2]);
+       ia64_set_kr(3, v->arch.arch_vmx.vkr[3]);
+       ia64_set_kr(4, v->arch.arch_vmx.vkr[4]);
+       ia64_set_kr(5, v->arch.arch_vmx.vkr[5]);
+       ia64_set_kr(6, v->arch.arch_vmx.vkr[6]);
+       ia64_set_kr(7, v->arch.arch_vmx.vkr[7]);
        /* Guest vTLB is not required to be switched explicitly, since
         * anchored in vcpu */
 }
@@ -290,7 +311,7 @@
        vmx_create_vp(v);
 
        /* Set this ed to be vmx */
-       v->arch.arch_vmx.flags = 1;
+       set_bit(ARCH_VMX_VMCS_LOADED, &v->arch.arch_vmx.flags);
 
        /* Other vmx specific initialization work */
 }
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/dom_fw.c
--- a/xen/arch/ia64/dom_fw.c    Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/dom_fw.c    Fri Jul 15 13:39:50 2005
@@ -154,7 +154,7 @@
        return 1;
 }
 
-extern void pal_emulator_static (void);
+extern struct ia64_pal_retval pal_emulator_static (unsigned long);
 
 /* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
 
@@ -220,17 +220,14 @@
         */
        status = 0;
        if (index == SAL_FREQ_BASE) {
-               switch (in1) {
+               if (!running_on_sim)
+                       status = ia64_sal_freq_base(in1,&r9,&r10);
+               else switch (in1) {
                      case SAL_FREQ_BASE_PLATFORM:
                        r9 = 200000000;
                        break;
 
                      case SAL_FREQ_BASE_INTERVAL_TIMER:
-                       /*
-                        * Is this supposed to be the cr.itc frequency
-                        * or something platform specific?  The SAL
-                        * doc ain't exactly clear on this...
-                        */
                        r9 = 700000000;
                        break;
 
@@ -285,6 +282,116 @@
        return ((struct sal_ret_values) {status, r9, r10, r11});
 }
 
+struct ia64_pal_retval
+xen_pal_emulator(unsigned long index, unsigned long in1,
+       unsigned long in2, unsigned long in3)
+{
+       long r9  = 0;
+       long r10 = 0;
+       long r11 = 0;
+       long status = -1;
+
+       if (running_on_sim) return pal_emulator_static(index);
+       if (index >= PAL_COPY_PAL) {
+               printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n",
+                               index);
+       }
+       else switch (index) {
+           case PAL_MEM_ATTRIB:
+               status = ia64_pal_mem_attrib(&r9);
+               break;
+           case PAL_FREQ_BASE:
+               status = ia64_pal_freq_base(&r9);
+               break;
+           case PAL_PROC_GET_FEATURES:
+               status = ia64_pal_proc_get_features(&r9,&r10,&r11);
+               break;
+           case PAL_BUS_GET_FEATURES:
+               status = ia64_pal_bus_get_features(&r9,&r10,&r11);
+               break;
+           case PAL_FREQ_RATIOS:
+               status = ia64_pal_freq_ratios(&r9,&r10,&r11);
+               break;
+           case PAL_PTCE_INFO:
+               {
+                       ia64_ptce_info_t ptce;
+                       status = ia64_get_ptce(&ptce);
+                       if (status != 0) break;
+                       r9 = ptce.base;
+                       r10 = (ptce.count[0]<<32)|(ptce.count[1]&0xffffffffL);
+                       r11 = (ptce.stride[0]<<32)|(ptce.stride[1]&0xffffffffL);
+               }
+               break;
+           case PAL_VERSION:
+               status = ia64_pal_version(&r9,&r10);
+               break;
+           case PAL_VM_PAGE_SIZE:
+               status = ia64_pal_vm_page_size(&r9,&r10);
+               break;
+           case PAL_DEBUG_INFO:
+               status = ia64_pal_debug_info(&r9,&r10);
+               break;
+           case PAL_CACHE_SUMMARY:
+               status = ia64_pal_cache_summary(&r9,&r10);
+               break;
+           case PAL_VM_SUMMARY:
+               status = ia64_pal_vm_summary(&r9,&r10);
+               break;
+           case PAL_RSE_INFO:
+               status = ia64_pal_rse_info(&r9,&r10);
+               break;
+           case PAL_VM_INFO:
+               status = ia64_pal_vm_info(in1,in2,&r9,&r10);
+               break;
+           case PAL_REGISTER_INFO:
+               status = ia64_pal_register_info(in1,&r9,&r10);
+               break;
+           case PAL_CACHE_FLUSH:
+               /* FIXME */
+               printk("PAL_CACHE_FLUSH NOT IMPLEMENTED!\n");
+               BUG();
+               break;
+           case PAL_PERF_MON_INFO:
+               {
+                       unsigned long pm_buffer[16];
+                       int i;
+                       status = ia64_pal_perf_mon_info(pm_buffer,&r9);
+                       if (status != 0) {
+                               while(1)
+                               printk("PAL_PERF_MON_INFO fails 
ret=%d\n",status);
+                               break;
+                       }
+                       if (copy_to_user((void __user *)in1,pm_buffer,128)) {
+                               while(1)
+                               printk("xen_pal_emulator: PAL_PERF_MON_INFO "
+                                       "can't copy to user!!!!\n");
+                               status = -1;
+                               break;
+                       }
+               }
+               break;
+           case PAL_CACHE_INFO:
+               {
+                       pal_cache_config_info_t ci;
+                       status = ia64_pal_cache_config_info(in1,in2,&ci);
+                       if (status != 0) break;
+                       r9 = ci.pcci_info_1.pcci1_data;
+                       r10 = ci.pcci_info_2.pcci2_data;
+               }
+               break;
+           case PAL_VM_TR_READ:        /* FIXME: vcpu_get_tr?? */
+               printk("PAL_VM_TR_READ NOT IMPLEMENTED, IGNORED!\n");
+               break;
+           case PAL_HALT_INFO:         /* inappropriate info for guest? */
+               printk("PAL_HALT_INFO NOT IMPLEMENTED, IGNORED!\n");
+               break;
+           default:
+               printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n",
+                               index);
+               break;
+       }
+       return ((struct ia64_pal_retval) {status, r9, r10, r11});
+}
 
 #define NFUNCPTRS 20
 
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/vmx_vpd.h
--- a/xen/include/asm-ia64/vmx_vpd.h    Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/vmx_vpd.h    Fri Jul 15 13:39:50 2005
@@ -19,8 +19,8 @@
  *      Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx)
  */
 
-#ifndef _VPD_H_
-#define _VPD_H_
+#ifndef _ASM_IA64_VMX_VPD_H_
+#define _ASM_IA64_VMX_VPD_H_
 
 #ifndef __ASSEMBLY__
 
@@ -123,6 +123,7 @@
        vpd_t       *vpd;
        vtime_t     vtm;
     unsigned long   vrr[8];
+    unsigned long   vkr[8];
     unsigned long   mrr5;
     unsigned long   mrr6;
     unsigned long   mrr7;
@@ -145,6 +146,7 @@
 #define ARCH_VMX_VMCS_LAUNCH    1       /* Needs VMCS launch */
 #define ARCH_VMX_VMCS_RESUME    2       /* Needs VMCS resume */
 #define ARCH_VMX_IO_WAIT        3       /* Waiting for I/O completion */
+#define ARCH_VMX_INTR_ASSIST   4       /* Need DM's assist to issue intr */
 
 
 #define VMX_DEBUG 1
@@ -191,4 +193,4 @@
 #define VPD_VMM_VAIL_START_OFFSET      31744
 
 
-#endif /* _VPD_H_ */
+#endif /* _ASM_IA64_VMX_VPD_H_ */
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/xenprocessor.h
--- a/xen/include/asm-ia64/xenprocessor.h       Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/xenprocessor.h       Fri Jul 15 13:39:50 2005
@@ -166,6 +166,16 @@
     };
 } ipi_d_t;
 
+typedef union {
+    __u64 val;
+    struct {
+        __u64 ig0 : 4;
+        __u64 mic : 4;
+        __u64 rsv : 8;
+        __u64 mmi : 1;
+        __u64 ig1 : 47;
+    };
+} tpr_t;
 
 #define IA64_ISR_CODE_MASK0     0xf
 #define IA64_UNIMPL_DADDR_FAULT     0x30
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_hypercall.c
--- a/xen/arch/ia64/vmx_hypercall.c     Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_hypercall.c     Fri Jul 15 13:39:50 2005
@@ -141,21 +141,27 @@
 static int do_set_shared_page(VCPU *vcpu, u64 gpa)
 {
     u64 shared_info, o_info;
+    struct domain *d = vcpu->domain;
+    struct vcpu *v;
     if(vcpu->domain!=dom0)
         return -EPERM;
     shared_info = __gpa_to_mpa(vcpu->domain, gpa);
     o_info = (u64)vcpu->domain->shared_info;
-    vcpu->domain->shared_info= (shared_info_t *)__va(shared_info);
+    d->shared_info= (shared_info_t *)__va(shared_info);
 
     /* Copy existing shared info into new page */
-    if (!o_info) {
-       memcpy((void*)vcpu->domain->shared_info, (void*)o_info, PAGE_SIZE);
-       /* If original page belongs to xen heap, then relinguish back
-        * to xen heap. Or else, leave to domain itself to decide.
-        */
-       if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info))))
-               free_xenheap_page(o_info);
-    }
+    if (o_info) {
+       memcpy((void*)d->shared_info, (void*)o_info, PAGE_SIZE);
+       for_each_vcpu(d, v) {
+               v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
+       }
+       /* If original page belongs to xen heap, then relinguish back
+        * to xen heap. Or else, leave to domain itself to decide.
+        */
+       if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info))))
+               free_xenheap_page(o_info);
+    } else
+        memset(d->shared_info, 0, PAGE_SIZE);
     return 0;
 }
 
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile    Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/Makefile    Fri Jul 15 13:39:50 2005
@@ -15,7 +15,7 @@
 ifeq ($(CONFIG_VTI),y)
 OBJS += vmx_init.o vmx_virt.o vmx_vcpu.o vmx_process.o vmx_vsa.o vmx_ivt.o \
        vmx_phy_mode.o vmx_utility.o vmx_interrupt.o vmx_entry.o vmmu.o \
-       vtlb.o mmio.o vlsapic.o vmx_hypercall.o mm.o
+       vtlb.o mmio.o vlsapic.o vmx_hypercall.o mm.o vmx_support.o pal_emul.o
 endif
 # perfmon.o
 # unwind.o needed for kernel unwinding (rare)
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/privop.h
--- a/xen/include/asm-ia64/privop.h     Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/privop.h     Fri Jul 15 13:39:50 2005
@@ -138,6 +138,19 @@
     IA64_INST inst;
     struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
 } INST64_M47;
+typedef union U_INST64_M1{
+    IA64_INST inst;
+    struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2, x6:6, m:1, 
major:4; };
+} INST64_M1;
+typedef union U_INST64_M4 {
+    IA64_INST inst;
+    struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2, x6:6, m:1, 
major:4; };
+} INST64_M4;
+typedef union U_INST64_M6 {
+    IA64_INST inst;
+    struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, x6:6, m:1, 
major:4; };
+} INST64_M6;
+
 #endif // CONFIG_VTI
 
 typedef union U_INST64 {
@@ -151,6 +164,11 @@
     INST64_I26 I26;    // mov register to ar (I unit)
     INST64_I27 I27;    // mov immediate to ar (I unit)
     INST64_I28 I28;    // mov from ar (I unit)
+#ifdef CONFIG_VTI
+    INST64_M1  M1;  // ld integer
+    INST64_M4  M4;  // st integer
+    INST64_M6  M6;  // ldfd floating pointer
+#endif // CONFIG_VTI
     INST64_M28 M28;    // purge translation cache entry
     INST64_M29 M29;    // mov register to ar (M unit)
     INST64_M30 M30;    // mov immediate to ar (M unit)
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/domain.h     Fri Jul 15 13:39:50 2005
@@ -42,8 +42,6 @@
      * max_pages in domain struct, which indicates maximum memory size
      */
     unsigned long max_pfn;
-    unsigned int section_nr;
-    mm_section_t *sections;    /* Describe memory hole except for Dom0 */
 #endif  //CONFIG_VTI
     u64 xen_vastart;
     u64 xen_vaend;
@@ -88,6 +86,8 @@
     void (*schedule_tail) (struct vcpu *);
     struct trap_bounce trap_bounce;
     thash_cb_t *vtlb;
+    char irq_new_pending;
+    char irq_new_condition;    // vpsr.i/vtpr change, check for pending VHPI
     //for phycial  emulation
     unsigned long old_rsc;
     int mode_flags;
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/mm.c
--- a/xen/arch/ia64/mm.c        Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/mm.c        Fri Jul 15 13:39:50 2005
@@ -95,7 +95,7 @@
 #include <asm/vmx_vcpu.h>
 #include <asm/vmmu.h>
 #include <asm/regionreg.h>
-
+#include <asm/vmx_mm_def.h>
 /*
         uregs->ptr is virtual address
         uregs->val is pte value
@@ -109,8 +109,9 @@
     mmu_update_t req;
     ia64_rr rr;
     thash_cb_t *hcb;
-    thash_data_t entry={0};
+    thash_data_t entry={0},*ovl;
     vcpu = current;
+    search_section_t sections;
     hcb = vmx_vcpu_get_vtlb(vcpu);
     for ( i = 0; i < count; i++ )
     {
@@ -124,8 +125,18 @@
             entry.cl = DSIDE_TLB;
             rr = vmx_vcpu_rr(vcpu, req.ptr);
             entry.ps = rr.ps;
+            entry.key = redistribute_rid(rr.rid);
             entry.rid = rr.rid;
-            vtlb_insert(hcb, &entry, req.ptr);
+            entry.vadr = PAGEALIGN(req.ptr,entry.ps);
+            sections.tr = 1;
+            sections.tc = 0;
+            ovl = thash_find_overlap(hcb, &entry, sections);
+            if (ovl) {
+                  // generate MCA.
+                panic("Tlb conflict!!");
+                return;
+            }
+            thash_purge_and_insert(hcb, &entry);
         }else if(cmd == MMU_MACHPHYS_UPDATE){
             mfn = req.ptr >>PAGE_SHIFT;
             gpfn = req.val;
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_process.c
--- a/xen/arch/ia64/vmx_process.c       Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_process.c       Fri Jul 15 13:39:50 2005
@@ -45,7 +45,9 @@
 #include <asm/dom_fw.h>
 #include <asm/vmx_vcpu.h>
 #include <asm/kregs.h>
+#include <asm/vmx.h>
 #include <asm/vmx_mm_def.h>
+#include <xen/mm.h>
 /* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */
 #define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
 
@@ -53,7 +55,7 @@
 extern struct ia64_sal_retval pal_emulator_static(UINT64);
 extern struct ia64_sal_retval 
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
 extern void rnat_consumption (VCPU *vcpu);
-
+#define DOMN_PAL_REQUEST    0x110000
 IA64FAULT
 vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long 
isr, unsigned long iim)
 {
@@ -148,7 +150,10 @@
                                regs->r2);
 #endif
                vmx_vcpu_increment_iip(current);
-       } else
+       }else if(iim == DOMN_PAL_REQUEST){
+        pal_emul(current);
+               vmx_vcpu_increment_iip(current);
+    }  else
                vmx_reflect_interruption(ifa,isr,iim,11);
 }
 
@@ -187,26 +192,43 @@
 // ONLY gets called from ia64_leave_kernel
 // ONLY call with interrupts disabled?? (else might miss one?)
 // NEVER successful if already reflecting a trap/fault because psr.i==0
-void vmx_deliver_pending_interrupt(struct pt_regs *regs)
+void leave_hypervisor_tail(struct pt_regs *regs)
 {
        struct domain *d = current->domain;
        struct vcpu *v = current;
        // FIXME: Will this work properly if doing an RFI???
        if (!is_idle_task(d) ) {        // always comes from guest
-               //vcpu_poke_timer(v);
-               //if (vcpu_deliverable_interrupts(v)) {
-               //      unsigned long isr = regs->cr_ipsr & IA64_PSR_RI;
-               //      foodpi();
-               //      reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR);
-               //}
                extern void vmx_dorfirfi(void);
                struct pt_regs *user_regs = vcpu_regs(current);
 
+               if (local_softirq_pending())
+                       do_softirq();
+               local_irq_disable();
+ 
                if (user_regs != regs)
                        printk("WARNING: checking pending interrupt in nested 
interrupt!!!\n");
-               if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi)
-                       return;
-               vmx_check_pending_irq(v);
+
+               /* VMX Domain N has other interrupt source, saying DM  */
+                if (test_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags))
+                      vmx_intr_assist(v);
+
+               /* FIXME: Check event pending indicator, and set
+                * pending bit if necessary to inject back to guest.
+                * Should be careful about window between this check
+                * and above assist, since IOPACKET_PORT shouldn't be
+                * injected into vmx domain.
+                *
+                * Now hardcode the vector as 0x10 temporarily
+                */
+               if 
(event_pending(v)&&(!((v->arch.arch_vmx.in_service[0])&(1UL<<0x10)))) {
+                       VPD_CR(v, irr[0]) |= 1UL << 0x10;
+                       v->arch.irq_new_pending = 1;
+               }
+ 
+               if ( v->arch.irq_new_pending ) {
+                       v->arch.irq_new_pending = 0;
+                       vmx_check_pending_irq(v);
+               }
        }
 }
 
@@ -244,7 +266,11 @@
         return;
     }
     if((vec==2)&&(!vpsr.dt)){
-        physical_dtlb_miss(vcpu, vadr);
+        
if(vcpu->domain!=dom0&&__gpfn_is_io(vcpu->domain,(vadr<<1)>>(PAGE_SHIFT+1))){
+            emulate_io_inst(vcpu,((vadr<<1)>>1),4);   //  UC
+        }else{
+            physical_dtlb_miss(vcpu, vadr);
+        }
         return;
     }
     vrr = vmx_vcpu_rr(vcpu,vadr);
@@ -255,6 +281,11 @@
 //    prepare_if_physical_mode(vcpu);
 
     if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){
+        if(vcpu->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(vcpu->domain, 
data->ppn>>(PAGE_SHIFT-12))){
+            
vadr=(vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps);
+            emulate_io_inst(vcpu, vadr, data->ma);
+            return IA64_FAULT;
+        }
        if ( data->ps != vrr.ps ) {
                machine_tlb_insert(vcpu, data);
        }
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_virt.c
--- a/xen/arch/ia64/vmx_virt.c  Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_virt.c  Fri Jul 15 13:39:50 2005
@@ -1276,7 +1276,13 @@
 }
 
 
-
+static void post_emulation_action(VCPU *vcpu)
+{
+    if ( vcpu->arch.irq_new_condition ) {
+        vcpu->arch.irq_new_condition = 0;
+        vhpi_detection(vcpu);
+    }
+}
 
 //#define  BYPASS_VMAL_OPCODE
 extern IA64_SLOT_TYPE  slot_types[0x20][3];
@@ -1336,7 +1342,7 @@
     slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
     if (!slot) inst.inst = bundle.slot0;
     else if (slot == 1)
-        inst.inst = bundle.slot1a + (bundle.slot1b<<23);
+        inst.inst = bundle.slot1a + (bundle.slot1b<<18);
     else if (slot == 2) inst.inst = bundle.slot2;
     else printf("priv_handle_op: illegal slot: %d\n", slot);
     slot_type = slot_types[bundle.template][slot];
@@ -1478,9 +1484,11 @@
        status=IA64_FAULT;
         break;
     default:
-        printf("unknown cause %d:\n", cause);
+        printf("unknown cause %d, iip: %lx, ipsr: %lx\n", 
cause,regs->cr_iip,regs->cr_ipsr);
+        while(1);
        /* For unknown cause, let hardware to re-execute */
        status=IA64_RETRY;
+        break;
 //        panic("unknown cause in virtualization intercept");
     };
 
@@ -1494,6 +1502,7 @@
     }
 
     recover_if_physical_mode(vcpu);
+    post_emulation_action (vcpu);
 //TODO    set_irq_check(v);
     return;
 
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_vcpu.c
--- a/xen/arch/ia64/vmx_vcpu.c  Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_vcpu.c  Fri Jul 15 13:39:50 2005
@@ -22,8 +22,6 @@
  *  Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
  *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
  */
-
-
 
 #include <linux/sched.h>
 #include <public/arch-ia64.h>
@@ -71,8 +69,8 @@
 
 //unsigned long last_guest_rsm = 0x0;
 struct guest_psr_bundle{
-       unsigned long ip;
-       unsigned long psr;
+    unsigned long ip;
+    unsigned long psr;
 };
 
 struct guest_psr_bundle guest_psr_buf[100];
@@ -107,20 +105,24 @@
                 IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
             ));
 
+    if ( !old_psr.i && (value & IA64_PSR_I) ) {
+        // vpsr.i 0->1
+        vcpu->arch.irq_new_condition = 1;
+    }
     new_psr.val=vmx_vcpu_get_psr(vcpu);
     {
-       struct xen_regs *regs = vcpu_regs(vcpu);
-       guest_psr_buf[guest_psr_index].ip = regs->cr_iip;
-       guest_psr_buf[guest_psr_index].psr = new_psr.val;
-       if (++guest_psr_index >= 100)
-           guest_psr_index = 0;
+    struct xen_regs *regs = vcpu_regs(vcpu);
+    guest_psr_buf[guest_psr_index].ip = regs->cr_iip;
+    guest_psr_buf[guest_psr_index].psr = new_psr.val;
+    if (++guest_psr_index >= 100)
+        guest_psr_index = 0;
     }
 #if 0
     if (old_psr.i != new_psr.i) {
-       if (old_psr.i)
-               last_guest_rsm = vcpu_regs(vcpu)->cr_iip;
-       else
-               last_guest_rsm = 0;
+    if (old_psr.i)
+        last_guest_rsm = vcpu_regs(vcpu)->cr_iip;
+    else
+        last_guest_rsm = 0;
     }
 #endif
 
@@ -270,8 +272,8 @@
 {
      va &= ~ (PSIZE(ps)-1);
      if ( va == 0x2000000002908000UL ||
-         va == 0x600000000000C000UL ) {
-       stop();
+      va == 0x600000000000C000UL ) {
+    stop();
      }
      if (tlb_debug) printf("%s at %lx %lx\n", str, va, 1UL<<ps);
 }
@@ -433,4 +435,11 @@
     return IA64_NO_FAULT;
 }
 
-
+IA64FAULT
+vmx_vcpu_set_tpr(VCPU *vcpu, u64 val)
+{
+    VPD_CR(vcpu,tpr)=val;
+    vcpu->arch.irq_new_condition = 1;
+    return IA64_NO_FAULT;
+}
+
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/privop.c
--- a/xen/arch/ia64/privop.c    Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/privop.c    Fri Jul 15 13:39:50 2005
@@ -1033,6 +1033,41 @@
 }
 #endif
 
+extern unsigned long dtlb_translate_count;
+extern unsigned long tr_translate_count;
+extern unsigned long phys_translate_count;
+extern unsigned long vhpt_translate_count;
+extern unsigned long lazy_cover_count;
+extern unsigned long idle_when_pending;
+extern unsigned long pal_halt_light_count;
+extern unsigned long context_switch_count;
+
+int dump_misc_stats(char *buf)
+{
+       char *s = buf;
+       s += sprintf(s,"Virtual TR translations: %d\n",tr_translate_count);
+       s += sprintf(s,"Virtual VHPT translations: %d\n",vhpt_translate_count);
+       s += sprintf(s,"Virtual DTLB translations: %d\n",dtlb_translate_count);
+       s += sprintf(s,"Physical translations: %d\n",phys_translate_count);
+       s += sprintf(s,"Idle when pending: %d\n",idle_when_pending);
+       s += sprintf(s,"PAL_HALT_LIGHT (no pending): 
%d\n",pal_halt_light_count);
+       s += sprintf(s,"context switches: %d\n",context_switch_count);
+       s += sprintf(s,"Lazy covers: %d\n",lazy_cover_count);
+       return s - buf;
+}
+
+void zero_misc_stats(void)
+{
+       dtlb_translate_count = 0;
+       tr_translate_count = 0;
+       phys_translate_count = 0;
+       vhpt_translate_count = 0;
+       lazy_cover_count = 0;
+       pal_halt_light_count = 0;
+       idle_when_pending = 0;
+       context_switch_count = 0;
+}
+
 int dump_hyperprivop_counts(char *buf)
 {
        int i;
@@ -1072,6 +1107,7 @@
 #ifdef PRIVOP_ADDR_COUNT
        n += dump_privop_addrs(buf + n);
 #endif
+       n += dump_misc_stats(buf + n);
        if (len < TMPBUFLEN) return -1;
        if (__copy_to_user(ubuf,buf,n)) return -1;
        return n;
@@ -1086,6 +1122,7 @@
 #ifdef PRIVOP_ADDR_COUNT
        zero_privop_addrs();
 #endif
+       zero_misc_stats();
        zero_reflect_counts();
        if (len < TMPBUFLEN) return -1;
        if (__copy_to_user(ubuf,buf,n)) return -1;
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_phy_mode.c
--- a/xen/arch/ia64/vmx_phy_mode.c      Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_phy_mode.c      Fri Jul 15 13:39:50 2005
@@ -119,30 +119,67 @@
     vcpu->arch.old_rsc = 0;
     vcpu->arch.mode_flags = GUEST_IN_PHY;
 
-    psr = ia64_clear_ic();
-
-    ia64_set_rr((VRN0<<VRN_SHIFT), vcpu->domain->arch.emul_phy_rr0.rrval);
-    ia64_srlz_d();
-    ia64_set_rr((VRN4<<VRN_SHIFT), vcpu->domain->arch.emul_phy_rr4.rrval);
-    ia64_srlz_d();
+    return;
+}
+
+extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages);
 #if 0
-    /* FIXME: temp workaround to support guest physical mode */
-ia64_itr(0x1, IA64_TEMP_PHYSICAL, dom0_start,
-        pte_val(pfn_pte((dom0_start >> PAGE_SHIFT), PAGE_KERNEL)),
-        28);
-ia64_itr(0x2, IA64_TEMP_PHYSICAL, dom0_start,
-        pte_val(pfn_pte((dom0_start >> PAGE_SHIFT), PAGE_KERNEL)),
-        28);
-ia64_srlz_i();
+void
+physical_itlb_miss_domn(VCPU *vcpu, u64 vadr)
+{
+    u64 psr;
+    IA64_PSR vpsr;
+    u64 mppn,gppn,mpp1,gpp1;
+    struct domain *d;
+    static u64 test=0;
+    d=vcpu->domain;
+    if(test)
+        panic("domn physical itlb miss happen\n");
+    else
+        test=1;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    gppn=(vadr<<1)>>13;
+    mppn = get_mfn(DOMID_SELF,gppn,1);
+    mppn=(mppn<<12)|(vpsr.cpl<<7);
+    gpp1=0;
+    mpp1 = get_mfn(DOMID_SELF,gpp1,1);
+    mpp1=(mpp1<<12)|(vpsr.cpl<<7);
+//    if(vadr>>63)
+//        mppn |= PHY_PAGE_UC;
+//    else
+//        mppn |= PHY_PAGE_WB;
+    mpp1 |= PHY_PAGE_WB;
+    psr=ia64_clear_ic();
+    ia64_itr(0x1, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24);
+    ia64_srlz_i();
+    ia64_itr(0x2, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24);
+    ia64_stop();
+    ia64_srlz_i();
+    ia64_itr(0x1, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL), 
(mppn|PHY_PAGE_WB), 24);
+    ia64_srlz_i();
+    ia64_itr(0x2, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL), 
(mppn|PHY_PAGE_WB), 24);
+    ia64_stop();
+    ia64_srlz_i();
+    ia64_itr(0x1, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28);
+    ia64_srlz_i();
+    ia64_itr(0x2, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28);
+    ia64_stop();
+    ia64_srlz_i();
+    ia64_set_psr(psr);
+    ia64_srlz_i();
+    return;
+}
 #endif
-    ia64_set_psr(psr);
-    ia64_srlz_i();
-    return;
-}
-
-extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages);
+
 void
 physical_itlb_miss(VCPU *vcpu, u64 vadr)
+{
+        physical_itlb_miss_dom0(vcpu, vadr);
+}
+
+
+void
+physical_itlb_miss_dom0(VCPU *vcpu, u64 vadr)
 {
     u64 psr;
     IA64_PSR vpsr;
@@ -150,7 +187,11 @@
     vpsr.val=vmx_vcpu_get_psr(vcpu);
     gppn=(vadr<<1)>>13;
     mppn = get_mfn(DOMID_SELF,gppn,1);
-    mppn=(mppn<<12)|(vpsr.cpl<<7)|PHY_PAGE_WB;
+    mppn=(mppn<<12)|(vpsr.cpl<<7); 
+//    if(vadr>>63)
+//       mppn |= PHY_PAGE_UC;
+//    else
+    mppn |= PHY_PAGE_WB;
 
     psr=ia64_clear_ic();
     ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
@@ -159,12 +200,15 @@
     return;
 }
 
+
 void
 physical_dtlb_miss(VCPU *vcpu, u64 vadr)
 {
     u64 psr;
     IA64_PSR vpsr;
     u64 mppn,gppn;
+//    if(vcpu->domain!=dom0)
+//        panic("dom n physical dtlb miss happen\n");
     vpsr.val=vmx_vcpu_get_psr(vcpu);
     gppn=(vadr<<1)>>13;
     mppn = get_mfn(DOMID_SELF,gppn,1);
@@ -209,6 +253,8 @@
         * mode in same region
         */
        if (is_physical_mode(vcpu)) {
+               if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
+                       panic("Unexpected domain switch in phy emul\n");
                ia64_set_rr((VRN0 << VRN_SHIFT),
                             vcpu->domain->arch.emul_phy_rr0.rrval);
                ia64_set_rr((VRN4 << VRN_SHIFT),
@@ -262,15 +308,10 @@
     psr=ia64_clear_ic();
 
     mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT);
-    mrr.rid = VRID_2_MRID(vcpu,mrr.rid);
-//VRID_2_MRID(vcpu,mrr.rid);
-    mrr.ve = 1;
-    ia64_set_rr(VRN0<<VRN_SHIFT, mrr.rrval );
+    ia64_set_rr(VRN0<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval));
     ia64_srlz_d();
     mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT);
-    mrr.rid = VRID_2_MRID(vcpu,mrr.rid);
-    mrr.ve = 1;
-    ia64_set_rr(VRN4<<VRN_SHIFT, mrr.rrval );
+    ia64_set_rr(VRN4<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval));
     ia64_srlz_d();
     ia64_set_psr(psr);
     ia64_srlz_i();
@@ -377,8 +418,10 @@
 void
 prepare_if_physical_mode(VCPU *vcpu)
 {
-    if (is_physical_mode(vcpu))
+    if (is_physical_mode(vcpu)) {
+       vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
         switch_to_virtual_rid(vcpu);
+    }
     return;
 }
 
@@ -386,8 +429,10 @@
 void
 recover_if_physical_mode(VCPU *vcpu)
 {
-    if (is_physical_mode(vcpu))
+    if (is_physical_mode(vcpu)) {
+       vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
         switch_to_physical_rid(vcpu);
-    return;
-}
-
+    }
+    return;
+}
+
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/domain.c
--- a/xen/arch/ia64/domain.c    Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/domain.c    Fri Jul 15 13:39:50 2005
@@ -37,10 +37,13 @@
 #include <asm/asm-offsets.h>  /* for IA64_THREAD_INFO_SIZE */
 
 #include <asm/vcpu.h>   /* for function declarations */
+#include <public/arch-ia64.h>
 #ifdef CONFIG_VTI
 #include <asm/vmx.h>
 #include <asm/vmx_vcpu.h>
+#include <asm/vmx_vpd.h>
 #include <asm/pal.h>
+#include <public/io/ioreq.h>
 #endif // CONFIG_VTI
 
 #define CONFIG_DOMAIN0_CONTIGUOUS
@@ -203,18 +206,20 @@
         * after up.
         */
        d->shared_info = (void *)alloc_xenheap_page();
-
-       /* FIXME: Because full virtual cpu info is placed in this area,
-        * it's unlikely to put it into one shareinfo page. Later
-        * need split vcpu context from vcpu_info and conforms to
-        * normal xen convention.
+       /* Now assume all vcpu info and event indicators can be
+        * held in one shared page. Definitely later we need to
+        * consider more about it
         */
-       v->vcpu_info = (void *)alloc_xenheap_page();
-       if (!v->vcpu_info) {
-               printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
-               while (1);
-       }
-       memset(v->vcpu_info, 0, PAGE_SIZE);
+
+       memset(d->shared_info, 0, PAGE_SIZE);
+       v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
+       /* Mask all events, and specific port will be unmasked
+        * when customer subscribes to it.
+        */
+       if(v == d->vcpu[0]) {
+           memset(&d->shared_info->evtchn_mask[0], 0xff,
+               sizeof(d->shared_info->evtchn_mask));
+       }
 
        /* Allocate per-domain vTLB and vhpt */
        v->arch.vtlb = init_domain_tlb(v);
@@ -291,6 +296,7 @@
        c->shared = v->domain->shared_info->arch;
 }
 
+#ifndef CONFIG_VTI
 int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
 {
        struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + 
IA64_STK_OFFSET) - 1;
@@ -312,6 +318,79 @@
        v->domain->shared_info->arch = c->shared;
        return 0;
 }
+#else // CONFIG_VTI
+int arch_set_info_guest(
+    struct vcpu *v, struct vcpu_guest_context *c)
+{
+    struct domain *d = v->domain;
+    int i, rc, ret;
+    unsigned long progress = 0;
+
+    if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+        return 0;
+
+    /* Lazy FP not implemented yet */
+    clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
+    if ( c->flags & VGCF_FPU_VALID )
+        set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
+
+    /* Sync d/i cache conservatively, after domain N is loaded */
+    ret = ia64_pal_cache_flush(3, 0, &progress, NULL);
+    if (ret != PAL_STATUS_SUCCESS)
+            panic("PAL CACHE FLUSH failed for dom[%d].\n",
+               v->domain->domain_id);
+    DPRINTK("Sync i/d cache for dom%d image SUCC\n",
+               v->domain->domain_id);
+
+    /* Physical mode emulation initialization, including
+     * emulation ID allcation and related memory request
+     */
+    physical_mode_init(v);
+
+    /* FIXME: only support PMT table continuously by far */
+    d->arch.pmt = __va(c->pt_base);
+    d->arch.max_pfn = c->pt_max_pfn;
+    v->arch.arch_vmx.vmx_platform.shared_page_va = __va(c->share_io_pg);
+    memset((char *)__va(c->share_io_pg),0,PAGE_SIZE);
+
+    if (c->flags & VGCF_VMX_GUEST) {
+       if (!vmx_enabled)
+           panic("No VMX hardware feature for vmx domain.\n");
+
+       vmx_final_setup_domain(d);
+
+       /* One more step to enable interrupt assist */
+       set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags);
+    }
+
+    vlsapic_reset(v);
+    vtm_init(v);
+
+    /* Only open one port for I/O and interrupt emulation */
+    if (v == d->vcpu[0]) {
+       memset(&d->shared_info->evtchn_mask[0], 0xff,
+               sizeof(d->shared_info->evtchn_mask));
+       clear_bit(IOPACKET_PORT, &d->shared_info->evtchn_mask[0]);
+    }
+    /* Setup domain context. Actually IA-64 is a bit different with
+     * x86, with almost all system resources better managed by HV
+     * directly. CP only needs to provide start IP of guest, which
+     * ideally is the load address of guest Firmware.
+     */
+    new_thread(v, c->guest_iip, 0, 0);
+
+
+    d->xen_vastart = 0xf000000000000000;
+    d->xen_vaend = 0xf300000000000000;
+    d->arch.breakimm = 0x1000 + d->domain_id;
+    v->arch._thread.on_ustack = 0;
+
+    /* Don't redo final setup */
+    set_bit(_VCPUF_initialised, &v->vcpu_flags);
+
+    return 0;
+}
+#endif // CONFIG_VTI
 
 void arch_do_boot_vcpu(struct vcpu *v)
 {
@@ -361,7 +440,10 @@
                init_all_rr(v);
 
        if (VMX_DOMAIN(v)) {
-               VMX_VPD(v,vgr[12]) = dom_fw_setup(d,saved_command_line,256L);
+               if (d == dom0) {
+                   VMX_VPD(v,vgr[12]) = 
dom_fw_setup(d,saved_command_line,256L);
+                   printk("new_thread, done with dom_fw_setup\n");
+               }
                /* Virtual processor context setup */
                VMX_VPD(v, vpsr) = IA64_PSR_BN;
                VPD_CR(v, dcr) = 0;
@@ -556,6 +638,7 @@
 }
 
 // FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
+#ifndef CONFIG_VTI
 unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
 {
        unsigned long pte = lookup_domain_mpa(d,mpaddr);
@@ -566,6 +649,14 @@
        imva |= mpaddr & ~PAGE_MASK;
        return(imva);
 }
+#else // CONFIG_VTI
+unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
+{
+    unsigned long imva = __gpa_to_mpa(d, mpaddr);
+
+    return __va(imva);
+}
+#endif // CONFIG_VTI
 
 // remove following line if not privifying in memory
 //#define HAVE_PRIVIFY_MEMORY
@@ -812,6 +903,17 @@
     /* ... */
 }
 
+/*
+ * Domain 0 has direct access to all devices absolutely. However
+ * the major point of this stub here, is to allow alloc_dom_mem
+ * handled with order > 0 request. Dom0 requires that bit set to
+ * allocate memory for other domains.
+ */
+void physdev_init_dom0(struct domain *d)
+{
+       set_bit(_DOMF_physdev_access, &d->domain_flags);
+}
+
 extern unsigned long running_on_sim;
 unsigned int vmx_dom0 = 0;
 int construct_dom0(struct domain *d, 
@@ -963,6 +1065,7 @@
     set_bit(_DOMF_constructed, &d->domain_flags);
     new_thread(v, pkern_entry, 0, 0);
 
+    physdev_init_dom0(d);
     // FIXME: Hack for keyboard input
 #ifdef CLONE_DOMAIN0
 if (d == dom0)
@@ -978,6 +1081,8 @@
 
     return 0;
 }
+
+
 #else //CONFIG_VTI
 
 int construct_dom0(struct domain *d, 
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/ia64_int.h
--- a/xen/include/asm-ia64/ia64_int.h   Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/ia64_int.h   Fri Jul 15 13:39:50 2005
@@ -3,11 +3,11 @@
 
 //#include "ia64.h"
 
-#define        IA64_VHPT_TRANS_VECTOR                  0x0000  /* UNUSED */
+#define        IA64_VHPT_TRANS_VECTOR                  0x0000
 #define IA64_INST_TLB_VECTOR                   0x0400
 #define IA64_DATA_TLB_VECTOR                   0x0800
-#define IA64_ALT_INST_TLB_VECTOR               0x0c00  /* UNUSED */
-#define IA64_ALT_DATA_TLB_VECTOR               0x1000  /* UNUSED */
+#define IA64_ALT_INST_TLB_VECTOR               0x0c00
+#define IA64_ALT_DATA_TLB_VECTOR               0x1000
 #define IA64_DATA_NESTED_TLB_VECTOR            0x1400
 #define IA64_INST_KEY_MISS_VECTOR              0x1800
 #define IA64_DATA_KEY_MISS_VECTOR              0x1c00
@@ -33,12 +33,11 @@
 #define IA64_TAKEN_BRANCH_TRAP_VECTOR          0x5f00
 #define IA64_SINGLE_STEP_TRAP_VECTOR           0x6000
 
-#define        IA64_NO_FAULT           0x0000
-#define        IA64_RFI_IN_PROGRESS    0x0001
-#define IA64_RETRY              0x0002
+#define        IA64_NO_FAULT           0x0001
+#define        IA64_RFI_IN_PROGRESS    0x0002
+#define IA64_RETRY              0x0003
 #ifdef  CONFIG_VTI
-#define IA64_FAULT             0x0001
-#define IA64_INJ_FAULT         0x0005
+#define IA64_FAULT             0x0002
 #endif      //CONFIG_VTI
 #define IA64_FORCED_IFA         0x0004
 #define        IA64_ILLOP_FAULT        (IA64_GENEX_VECTOR | 0x00)
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/process.c
--- a/xen/arch/ia64/process.c   Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/process.c   Fri Jul 15 13:39:50 2005
@@ -75,8 +75,6 @@
        }
 #endif // CONFIG_VTI
 }
-
-extern TR_ENTRY *match_tr(struct vcpu *v, unsigned long ifa);
 
 void tdpfoo(void) { }
 
@@ -260,140 +258,29 @@
                        ++pending_false_positive;
        }
 }
+unsigned long lazy_cover_count = 0;
 
 int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs)
 {
        if (!PSCB(v,interrupt_collection_enabled)) {
-               if (isr & IA64_ISR_IR) {
-//                     printf("Handling lazy cover\n");
-                       PSCB(v,ifs) = regs->cr_ifs;
-                       PSCB(v,incomplete_regframe) = 1;
-                       regs->cr_ifs = 0;
-                       return(1); // retry same instruction with cr.ifs off
-               }
+               PSCB(v,ifs) = regs->cr_ifs;
+               PSCB(v,incomplete_regframe) = 1;
+               regs->cr_ifs = 0;
+               lazy_cover_count++;
+               return(1); // retry same instruction with cr.ifs off
        }
        return(0);
 }
 
-#define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
-
-void xen_handle_domain_access(unsigned long address, unsigned long isr, struct 
pt_regs *regs, unsigned long itir)
-{
-       struct domain *d = (struct domain *) current->domain;
-       struct domain *ed = (struct vcpu *) current;
-       TR_ENTRY *trp;
-       unsigned long psr = regs->cr_ipsr, mask, flags;
+void ia64_do_page_fault (unsigned long address, unsigned long isr, struct 
pt_regs *regs, unsigned long itir)
+{
        unsigned long iip = regs->cr_iip;
        // FIXME should validate address here
-       unsigned long pteval, mpaddr, ps;
-       unsigned long lookup_domain_mpa(struct domain *,unsigned long);
-       unsigned long match_dtlb(struct vcpu *,unsigned long, unsigned long *, 
unsigned long *);
+       unsigned long pteval;
+       unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
        IA64FAULT fault;
 
-// NEED TO HANDLE THREE CASES:
-// 1) domain is in metaphysical mode
-// 2) domain address is in TR
-// 3) domain address is not in TR (reflect data miss)
-
-               // got here trying to read a privop bundle
-               //if (d->metaphysical_mode) {
-       if (PSCB(current,metaphysical_mode) && !(address>>61)) {  //FIXME
-               if (d == dom0) {
-                       if (address < dom0_start || address >= dom0_start + 
dom0_size) {
-                               printk("xen_handle_domain_access: out-of-bounds"
-                                  "dom0 mpaddr %p! continuing...\n",mpaddr);
-                               tdpfoo();
-                       }
-               }
-               pteval = lookup_domain_mpa(d,address);
-               //FIXME: check return value?
-               // would be nice to have a counter here
-               vcpu_itc_no_srlz(ed,2,address,pteval,-1UL,PAGE_SHIFT);
-               return;
-       }
-if (address < 0x4000) printf("WARNING: page_fault @%p, iip=%p\n",address,iip);
-               
-       if (trp = match_tr(current,address)) {
-               // FIXME address had better be pre-validated on insert
-               pteval = 
translate_domain_pte(trp->page_flags,address,trp->itir);
-               
vcpu_itc_no_srlz(current,6,address,pteval,-1UL,(trp->itir>>2)&0x3f);
-               return;
-       }
-       // if we are fortunate enough to have it in the 1-entry TLB...
-       if (pteval = match_dtlb(ed,address,&ps,NULL)) {
-               vcpu_itc_no_srlz(ed,6,address,pteval,-1UL,ps);
-               return;
-       }
-       if (ia64_done_with_exception(regs)) {
-//if (!(uacnt++ & 0x3ff)) printk("*** xen_handle_domain_access: successfully 
handled cnt=%d iip=%p, addr=%p...\n",uacnt,iip,address);
-                       return;
-       }
-       else {
-               // should never happen.  If it does, region 0 addr may
-               // indicate a bad xen pointer
-               printk("*** xen_handle_domain_access: exception table"
-                       " lookup failed, iip=%p, addr=%p, spinning...\n",
-                       iip,address);
-               panic_domain(regs,"*** xen_handle_domain_access: exception 
table"
-                       " lookup failed, iip=%p, addr=%p, spinning...\n",
-                       iip,address);
-       }
-}
-
-void ia64_do_page_fault (unsigned long address, unsigned long isr, struct 
pt_regs *regs, unsigned long itir)
-{
-       struct domain *d = (struct domain *) current->domain;
-       TR_ENTRY *trp;
-       unsigned long psr = regs->cr_ipsr, mask, flags;
-       unsigned long iip = regs->cr_iip;
-       // FIXME should validate address here
-       unsigned long iha, pteval, mpaddr;
-       unsigned long lookup_domain_mpa(struct domain *,unsigned long);
-       unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
-       unsigned long vector;
-       IA64FAULT fault;
-
-
-       //The right way is put in VHPT and take another miss!
-
-       // weak attempt to avoid doing both I/D tlb insert to avoid
-       // problems for privop bundle fetch, doesn't work, deal with later
-       if (IS_XEN_ADDRESS(d,iip) && !IS_XEN_ADDRESS(d,address)) {
-               xen_handle_domain_access(address, isr, regs, itir);
-
-               return;
-       }
-
-       // FIXME: no need to pass itir in to this routine as we need to
-       // compute the virtual itir anyway (based on domain's RR.ps)
-       // AND ACTUALLY reflect_interruption doesn't use it anyway!
-       itir = vcpu_get_itir_on_fault(current,address);
-
-       if (PSCB(current,metaphysical_mode) && (is_data || !(address>>61))) {  
//FIXME
-               // FIXME should validate mpaddr here
-               if (d == dom0) {
-                       if (address < dom0_start || address >= dom0_start + 
dom0_size) {
-                               printk("ia64_do_page_fault: out-of-bounds dom0 
mpaddr %p, iip=%p! continuing...\n",address,iip);
-                               printk("ia64_do_page_fault: out-of-bounds dom0 
mpaddr %p, old iip=%p!\n",address,current->vcpu_info->arch.iip);
-                               tdpfoo();
-                       }
-               }
-               pteval = lookup_domain_mpa(d,address);
-               // FIXME, must be inlined or potential for nested fault here!
-               
vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,PAGE_SHIFT);
-               return;
-       }
-       if (trp = match_tr(current,address)) {
-               // FIXME address had better be pre-validated on insert
-               pteval = 
translate_domain_pte(trp->page_flags,address,trp->itir);
-               
vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(trp->itir>>2)&0x3f);
-               return;
-       }
-
-       if (handle_lazy_cover(current, isr, regs)) return;
-if (!(address>>61)) {
-panic_domain(0,"ia64_do_page_fault: @%p???, iip=%p, b0=%p, itc=%p 
(spinning...)\n",address,iip,regs->b0,ia64_get_itc());
-}
+       if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs)) 
return;
        if ((isr & IA64_ISR_SP)
            || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == 
IA64_ISR_CODE_LFETCH))
        {
@@ -406,37 +293,29 @@
                return;
        }
 
-       if (vcpu_get_rr_ve(current, address) && (PSCB(current,pta) & 
IA64_PTA_VE))
+       fault = vcpu_translate(current,address,is_data,&pteval,&itir);
+       if (fault == IA64_NO_FAULT)
        {
-               if (PSCB(current,pta) & IA64_PTA_VF)
-               {
-                       /* long format VHPT - not implemented */
-                       vector = is_data ? IA64_DATA_TLB_VECTOR : 
IA64_INST_TLB_VECTOR;
-               }
-               else
-               {
-                       /* short format VHPT */
-                       vcpu_thash(current, address, &iha);
-                       if (__copy_from_user(&pteval, iha, sizeof(pteval)) == 0)
-                       {
-                               /* 
-                                * Optimisation: this VHPT walker aborts on 
not-present pages
-                                * instead of inserting a not-present 
translation, this allows
-                                * vectoring directly to the miss handler.
-       \                        */
-                               if (pteval & _PAGE_P)
-                               {
-                                       pteval = 
translate_domain_pte(pteval,address,itir);
-                                       
vcpu_itc_no_srlz(current,is_data?6:1,address,pteval,-1UL,(itir>>2)&0x3f);
-                                       return;
-                               }
-                               else vector = is_data ? IA64_DATA_TLB_VECTOR : 
IA64_INST_TLB_VECTOR;
-                       }
-                       else vector = IA64_VHPT_TRANS_VECTOR;
-               }
-       }
-       else vector = is_data ? IA64_ALT_DATA_TLB_VECTOR : 
IA64_ALT_INST_TLB_VECTOR;
-       reflect_interruption(address, isr, itir, regs, vector);
+               pteval = translate_domain_pte(pteval,address,itir);
+               
vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3f);
+               return;
+       }
+       else if (IS_VMM_ADDRESS(iip))
+       {
+               if (!ia64_done_with_exception(regs)) {
+                       // should never happen.  If it does, region 0 addr may
+                       // indicate a bad xen pointer
+                       printk("*** xen_handle_domain_access: exception table"
+                              " lookup failed, iip=%p, addr=%p, spinning...\n",
+                               iip,address);
+                       panic_domain(regs,"*** xen_handle_domain_access: 
exception table"
+                              " lookup failed, iip=%p, addr=%p, spinning...\n",
+                               iip,address);
+               }
+               return;
+       }
+
+       reflect_interruption(address, isr, 0, regs, fault);
 }
 
 void
@@ -865,6 +744,6 @@
                while(vector);
                return;
        }
-       if (check_lazy_cover && handle_lazy_cover(v, isr, regs)) return;
+       if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, 
isr, regs)) return;
        reflect_interruption(ifa,isr,itir,regs,vector);
 }
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/xensetup.c
--- a/xen/arch/ia64/xensetup.c  Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/xensetup.c  Fri Jul 15 13:39:50 2005
@@ -14,7 +14,7 @@
 //#include <xen/delay.h>
 #include <xen/compile.h>
 //#include <xen/console.h>
-//#include <xen/serial.h>
+#include <xen/serial.h>
 #include <xen/trace.h>
 #include <asm/meminit.h>
 #include <asm/page.h>
@@ -30,7 +30,6 @@
 #ifdef CLONE_DOMAIN0
 struct domain *clones[CLONE_DOMAIN0];
 #endif
-extern struct domain *dom0;
 extern unsigned long domain0_ready;
 
 int find_max_pfn (unsigned long, unsigned long, void *);
diff -r 35b74976f598 -r a83ac0806d6b xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Fri Jul 15 12:52:02 2005
+++ b/xen/include/public/arch-ia64.h    Fri Jul 15 13:39:50 2005
@@ -181,6 +181,16 @@
 } arch_shared_info_t;          // DON'T PACK 
 
 typedef struct vcpu_guest_context {
+#define VGCF_FPU_VALID (1<<0)
+#define VGCF_VMX_GUEST (1<<1)
+#define VGCF_IN_KERNEL (1<<2)
+       unsigned long flags;       /* VGCF_* flags */
+       unsigned long pt_base;     /* PMT table base */
+       unsigned long pt_max_pfn;  /* Max pfn including holes */
+       unsigned long share_io_pg; /* Shared page for I/O emulation */
+       unsigned long vm_assist;   /* VMASST_TYPE_* bitmap, now none on IPF */
+       unsigned long guest_iip;   /* Guest entry point */
+
        struct pt_regs regs;
        arch_vcpu_info_t vcpu;
        arch_shared_info_t shared;
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/xenmisc.c
--- a/xen/arch/ia64/xenmisc.c   Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/xenmisc.c   Fri Jul 15 13:39:50 2005
@@ -257,6 +257,8 @@
 void cs10foo(void) {}
 void cs01foo(void) {}
 
+unsigned long context_switch_count = 0;
+
 // context_switch
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
@@ -276,6 +278,7 @@
        /* Housekeeping for prev domain */
 #endif // CONFIG_VTI
 
+       context_switch_count++;
        switch_to(prev,next,prev);
 #ifdef CONFIG_VTI
        /* Post-setup for new domain */
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h     Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/config.h     Fri Jul 15 13:39:50 2005
@@ -49,6 +49,7 @@
 extern unsigned long xenheap_phys_end;
 extern unsigned long xen_pstart;
 extern unsigned long xenheap_size;
+extern struct domain *dom0;
 extern unsigned long dom0_start;
 extern unsigned long dom0_size;
 
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/vcpu.h
--- a/xen/include/asm-ia64/vcpu.h       Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/vcpu.h       Fri Jul 15 13:39:50 2005
@@ -135,6 +135,7 @@
 extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu, UINT64 vadr, UINT64 addr_range);
 extern IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
 extern IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
+extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, 
UINT64 *pteval, UINT64 *itir);
 extern IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr);
 /* misc */
 extern IA64FAULT vcpu_rfi(VCPU *vcpu);
@@ -150,5 +151,4 @@
 extern UINT64 vcpu_get_tmp(VCPU *, UINT64);
 extern void vcpu_set_tmp(VCPU *, UINT64, UINT64);
 
-
 #endif
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/ivt.S
--- a/xen/arch/ia64/ivt.S       Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/ivt.S       Fri Jul 15 13:39:50 2005
@@ -666,7 +666,12 @@
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(iaccess_bit)
 #ifdef XEN
-       REFLECT(9)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=9
+       movl r20=0x2400
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(9)
        // Like Entry 8, except for instruction access
@@ -734,7 +739,12 @@
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(daccess_bit)
 #ifdef XEN
-       REFLECT(10)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=10
+       movl r20=0x2800
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(10)
        // Like Entry 8, except for data access
@@ -921,10 +931,10 @@
        ;;
 #ifdef XEN
        mov r30=cr.ivr          // pass cr.ivr as first arg
-       // FIXME: this is a hack... use cpuinfo.pgd_quick because its
+       // FIXME: this is a hack... use cpuinfo.ksoftirqd because its
        // not used anywhere else and we need a place to stash ivr and
        // there's no registers available unused by SAVE_MIN/REST
-       movl r29=(PERCPU_ADDR)+IA64_CPUINFO_PGD_QUICK_OFFSET;;
+       movl r29=(PERCPU_ADDR)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
        st8 [r29]=r30;;
        movl r28=slow_interrupt;;
        mov r29=rp;;
@@ -944,7 +954,7 @@
        ;;
        alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
 #ifdef XEN
-       movl out0=(PERCPU_ADDR)+IA64_CPUINFO_PGD_QUICK_OFFSET;;
+       movl out0=(PERCPU_ADDR)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
        ld8 out0=[out0];;
 #else
        mov out0=cr.ivr         // pass cr.ivr as first arg
@@ -1395,7 +1405,12 @@
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(daccess_rights)
 #ifdef XEN
-       REFLECT(23)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=23
+       movl r20=0x5300
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(23)
        mov r16=cr.ifa
@@ -1821,7 +1836,7 @@
 
 #ifdef XEN
        .org ia64_ivt+0x8000
-ENTRY(dispatch_reflection)
+GLOBAL_ENTRY(dispatch_reflection)
        /*
         * Input:
         *      psr.ic: off
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/vmx_phy_mode.h
--- a/xen/include/asm-ia64/vmx_phy_mode.h       Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/vmx_phy_mode.h       Fri Jul 15 13:39:50 2005
@@ -83,6 +83,7 @@
 #define IA64_RSC_MODE       0x0000000000000003
 #define XEN_RR7_RID    (0xf00010)
 #define GUEST_IN_PHY    0x1
+#define GUEST_PHY_EMUL 0x2
 extern int valid_mm_mode[];
 extern int mm_switch_table[][8];
 extern void physical_mode_init(VCPU *);
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/asm-offsets.c       Fri Jul 15 13:39:50 2005
@@ -46,6 +46,8 @@
        DEFINE(XSI_PSR_IC, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, 
arch.interrupt_collection_enabled)));
        DEFINE(XSI_PSR_I_OFS, offsetof(vcpu_info_t, 
arch.interrupt_delivery_enabled));
        DEFINE(XSI_IIP_OFS, offsetof(vcpu_info_t, arch.iip));
+       DEFINE(XSI_IFA_OFS, offsetof(vcpu_info_t, arch.ifa));
+       DEFINE(XSI_ITIR_OFS, offsetof(vcpu_info_t, arch.itir));
        DEFINE(XSI_IPSR, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.ipsr)));
        DEFINE(XSI_IPSR_OFS, offsetof(vcpu_info_t, arch.ipsr));
        DEFINE(XSI_IFS_OFS, offsetof(vcpu_info_t, arch.ifs));
@@ -61,6 +63,7 @@
        DEFINE(XSI_PEND_OFS, offsetof(vcpu_info_t, arch.pending_interruption));
        DEFINE(XSI_RR0_OFS, offsetof(vcpu_info_t, arch.rrs[0]));
        DEFINE(XSI_TPR_OFS, offsetof(vcpu_info_t, arch.tpr));
+       DEFINE(XSI_PTA_OFS, offsetof (vcpu_info_t, arch.pta));
        DEFINE(XSI_ITV_OFS, offsetof(vcpu_info_t, arch.itv));
        //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, 
blocked));
        //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, 
clear_child_tid));
@@ -85,10 +88,12 @@
        DEFINE(IA64_VCPU_ENDING_RID_OFFSET, offsetof (struct vcpu, 
arch.ending_rid));
        DEFINE(IA64_VCPU_DOMAIN_ITM_OFFSET, offsetof (struct vcpu, 
arch.domain_itm));
        DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, 
arch.domain_itm_last));
+       DEFINE(IA64_VCPU_ITLB_OFFSET, offsetof (struct vcpu, arch.itlb));
+       DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb));
 
        BLANK();
        DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, 
itm_next));
-       DEFINE(IA64_CPUINFO_PGD_QUICK_OFFSET, offsetof (struct cpuinfo_ia64, 
pgd_quick));
+       DEFINE(IA64_CPUINFO_KSOFTIRQD_OFFSET, offsetof (struct cpuinfo_ia64, 
ksoftirqd));
 
        //DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, 
siglock));
 
@@ -219,6 +224,7 @@
 
 #ifdef  CONFIG_VTI
        DEFINE(IA64_VPD_BASE_OFFSET, offsetof (struct vcpu, arch.arch_vmx.vpd));
+       DEFINE(IA64_VLSAPIC_INSVC_BASE_OFFSET, offsetof (struct vcpu, 
arch.arch_vmx.in_service[0]));
        DEFINE(IA64_VPD_CR_VPTA_OFFSET, offsetof (cr_t, pta));
        DEFINE(XXX_THASH_SIZE, sizeof (thash_data_t));
 
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vtlb.c
--- a/xen/arch/ia64/vtlb.c      Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vtlb.c      Fri Jul 15 13:39:50 2005
@@ -26,7 +26,7 @@
 #include <asm/vmx_mm_def.h>
 #include <asm/gcc_intrin.h>
 #include <xen/interrupt.h>
-#include <asm/vcpu.h>
+#include <asm/vmx_vcpu.h>
 #define  MAX_CCH_LENGTH     40
 
 
@@ -401,6 +401,8 @@
             panic("Can't convert to machine VHPT entry\n");
         }
         hash_table->next = cch;
+        if(hash_table->tag==hash_table->next->tag)
+            while(1);
     }
     return /*hash_table*/;
 }
@@ -466,10 +468,11 @@
 static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch)
 {
     thash_data_t *next;
-    
+
     if ( ++cch_depth > MAX_CCH_LENGTH ) {
         printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n");
-    }
+        while(1);
+   }
     if ( cch -> next ) {
         next = thash_rem_cch(hcb, cch->next);
     }
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h        Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/vmx.h        Fri Jul 15 13:39:50 2005
@@ -35,4 +35,6 @@
 extern void vmx_purge_double_mapping(u64, u64, u64);
 extern void vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7);
 
+extern void vmx_wait_io(void);
+extern void vmx_io_assist(struct vcpu *v);
 #endif /* _ASM_IA64_VT_H */
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vlsapic.c
--- a/xen/arch/ia64/vlsapic.c   Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vlsapic.c   Fri Jul 15 13:39:50 2005
@@ -133,7 +133,7 @@
     // FIXME: should use local_irq_disable & local_irq_enable ??
     local_irq_save(spsr);
     guest_itc = now_itc(vtm);
-    update_last_itc(vtm, guest_itc);
+//    update_last_itc(vtm, guest_itc);
 
     local_irq_restore(spsr);
     return guest_itc;
@@ -174,12 +174,12 @@
 /* Interrupt must be disabled at this point */
 
 extern u64 tick_to_ns(u64 tick);
-#define TIMER_SLOP (50*1000) /* ns */  /* copy from ac_timer.c */
+#define TIMER_SLOP (50*1000) /* ns */  /* copy from ac_timer.c */
 void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
 {
     uint64_t    cur_itc,vitm,vitv;
     uint64_t    expires;
-    long       diff_now, diff_last;
+    long        diff_now, diff_last;
     uint64_t    spsr;
     
     vitv = VPD_CR(vcpu, itv);
@@ -237,21 +237,30 @@
 
 #define  NMI_VECTOR         2
 #define  ExtINT_VECTOR      0
-
+#define  NULL_VECTOR        -1
 #define  VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.arch_vmx.in_service[i])
-/*
- * LID-CR64: Keep in vpd.
- * IVR-CR65: (RO) see guest_read_ivr().
- * TPR-CR66: Keep in vpd, acceleration enabled.
- * EOI-CR67: see guest_write_eoi().
- * IRR0-3 - CR68-71: (RO) Keep in vpd irq_pending[]
- *          can move to vpd for optimization.
- * ITV: in time virtualization.
- * PMV: Keep in vpd initialized as 0x10000.
- * CMCV: Keep in vpd initialized as 0x10000.
- * LRR0-1: Keep in vpd, initialized as 0x10000.
- *
- */
+static void update_vhpi(VCPU *vcpu, int vec)
+{
+    u64     vhpi;
+    if ( vec == NULL_VECTOR ) {
+        vhpi = 0;
+    }
+    else if ( vec == NMI_VECTOR ) { // NMI
+        vhpi = 32;
+    } else if (vec == ExtINT_VECTOR) { //ExtINT
+        vhpi = 16;
+    }
+    else {
+        vhpi = vec / 16;
+    }
+
+    VMX_VPD(vcpu,vhpi) = vhpi;
+    // TODO: Add support for XENO
+    if ( VMX_VPD(vcpu,vac).a_int ) {
+        ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT, 
+                (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0);
+    }
+}
 
 void vlsapic_reset(VCPU *vcpu)
 {
@@ -268,9 +277,11 @@
     VPD_CR(vcpu, cmcv) = 0x10000;
     VPD_CR(vcpu, lrr0) = 0x10000;   // default reset value?
     VPD_CR(vcpu, lrr1) = 0x10000;   // default reset value?
+    update_vhpi(vcpu, NULL_VECTOR);
     for ( i=0; i<4; i++) {
         VLSAPIC_INSVC(vcpu,i) = 0;
     }
+    DPRINTK("VLSAPIC inservice base=%lp\n", &VLSAPIC_INSVC(vcpu,0) );
 }
 
 /*
@@ -281,7 +292,7 @@
  */
 static __inline__ int highest_bits(uint64_t *dat)
 {
-    uint64_t  bits, bitnum=-1;
+    uint64_t  bits, bitnum;
     int i;
     
     /* loop for all 256 bits */
@@ -292,12 +303,12 @@
             return i*64+bitnum;
         }
     }
-   return -1;
+   return NULL_VECTOR;
 }
 
 /*
  * Return 0-255 for pending irq.
- *        -1 when no pending.
+ *        NULL_VECTOR: when no pending.
  */
 static int highest_pending_irq(VCPU *vcpu)
 {
@@ -320,7 +331,7 @@
 static int is_higher_irq(int pending, int inservice)
 {
     return ( (pending >> 4) > (inservice>>4) || 
-                ((pending != -1) && (inservice == -1)) );
+                ((pending != NULL_VECTOR) && (inservice == NULL_VECTOR)) );
 }
 
 static int is_higher_class(int pending, int mic)
@@ -332,41 +343,97 @@
 {
     return (vec == 1 || ((vec <= 14 && vec >= 3)));
 }
+
+#define   IRQ_NO_MASKED         0
+#define   IRQ_MASKED_BY_VTPR    1
+#define   IRQ_MASKED_BY_INSVC   2   // masked by inservice IRQ
 
 /* See Table 5-8 in SDM vol2 for the definition */
 static int
-irq_masked(VCPU *vcpu, int h_pending, int h_inservice)
-{
-    uint64_t    vtpr;
-    
-    vtpr = VPD_CR(vcpu, tpr);
-
-    if ( h_pending == NMI_VECTOR && h_inservice != NMI_VECTOR )
+_xirq_masked(VCPU *vcpu, int h_pending, int h_inservice)
+{
+    tpr_t    vtpr;
+    uint64_t    mmi;
+    
+    vtpr.val = VPD_CR(vcpu, tpr);
+
+    if ( h_inservice == NMI_VECTOR ) {
+        return IRQ_MASKED_BY_INSVC;
+    }
+    if ( h_pending == NMI_VECTOR ) {
         // Non Maskable Interrupt
-        return 0;
-
-    if ( h_pending == ExtINT_VECTOR && h_inservice >= 16)
-        return (vtpr>>16)&1;    // vtpr.mmi
-
-    if ( !(vtpr&(1UL<<16)) &&
-          is_higher_irq(h_pending, h_inservice) &&
-          is_higher_class(h_pending, (vtpr>>4)&0xf) )
-        return 0;
-
-    return 1;
-}
-
+        return IRQ_NO_MASKED;
+    }
+    if ( h_inservice == ExtINT_VECTOR ) {
+        return IRQ_MASKED_BY_INSVC;
+    }
+    mmi = vtpr.mmi;
+    if ( h_pending == ExtINT_VECTOR ) {
+        if ( mmi ) {
+            // mask all external IRQ
+            return IRQ_MASKED_BY_VTPR;
+        }
+        else {
+            return IRQ_NO_MASKED;
+        }
+    }
+
+    if ( is_higher_irq(h_pending, h_inservice) ) {
+        if ( !mmi && is_higher_class(h_pending, vtpr.mic) ) {
+            return IRQ_NO_MASKED;
+        }
+        else {
+            return IRQ_MASKED_BY_VTPR;
+        }
+    }
+    else {
+        return IRQ_MASKED_BY_INSVC;
+    }
+}
+
+static int irq_masked(VCPU *vcpu, int h_pending, int h_inservice)
+{
+    int mask;
+    
+    mask = _xirq_masked(vcpu, h_pending, h_inservice);
+    return mask;
+}
+
+
+/*
+ * May come from virtualization fault or
+ * nested host interrupt.
+ */
 void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
 {
     uint64_t    spsr;
 
     if (vector & ~0xff) {
-        printf("vmx_vcpu_pend_interrupt: bad vector\n");
+        DPRINTK("vmx_vcpu_pend_interrupt: bad vector\n");
         return;
     }
     local_irq_save(spsr);
     VPD_CR(vcpu,irr[vector>>6]) |= 1UL<<(vector&63);
     local_irq_restore(spsr);
+    vcpu->arch.irq_new_pending = 1;
+}
+
+/*
+ * Add batch of pending interrupt.
+ * The interrupt source is contained in pend_irr[0-3] with
+ * each bits stand for one interrupt.
+ */
+void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, UINT64 *pend_irr)
+{
+    uint64_t    spsr;
+    int     i;
+
+    local_irq_save(spsr);
+    for (i=0 ; i<4; i++ ) {
+        VPD_CR(vcpu,irr[i]) |= pend_irr[i];
+    }
+    local_irq_restore(spsr);
+    vcpu->arch.irq_new_pending = 1;
 }
 
 /*
@@ -383,7 +450,7 @@
  */
 int vmx_check_pending_irq(VCPU *vcpu)
 {
-    uint64_t  spsr;
+    uint64_t  spsr, mask;
     int     h_pending, h_inservice;
     int injected=0;
     uint64_t    isr;
@@ -391,22 +458,25 @@
 
     local_irq_save(spsr);
     h_pending = highest_pending_irq(vcpu);
-    if ( h_pending == -1 ) goto chk_irq_exit;
+    if ( h_pending == NULL_VECTOR ) goto chk_irq_exit;
     h_inservice = highest_inservice_irq(vcpu);
 
     vpsr.val = vmx_vcpu_get_psr(vcpu);
-    if (  vpsr.i &&
-        !irq_masked(vcpu, h_pending, h_inservice) ) {
-        //inject_guest_irq(v);
+    mask = irq_masked(vcpu, h_pending, h_inservice);
+    if (  vpsr.i && IRQ_NO_MASKED == mask ) {
         isr = vpsr.val & IA64_PSR_RI;
         if ( !vpsr.ic )
             panic("Interrupt when IC=0\n");
         vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
         injected = 1;
     }
-    else if ( VMX_VPD(vcpu,vac).a_int && 
-            is_higher_irq(h_pending,h_inservice) ) {
-        vmx_inject_vhpi(vcpu,h_pending);
+    else if ( mask == IRQ_MASKED_BY_INSVC ) {
+        // cann't inject VHPI
+//        DPRINTK("IRQ masked by higher inservice\n");
+    }
+    else {
+        // masked by vpsr.i or vtpr.
+        update_vhpi(vcpu,h_pending);
     }
 
 chk_irq_exit:
@@ -414,17 +484,21 @@
     return injected;
 }
 
+/*
+ * Only coming from virtualization fault.
+ */
 void guest_write_eoi(VCPU *vcpu)
 {
     int vec;
     uint64_t  spsr;
 
     vec = highest_inservice_irq(vcpu);
-    if ( vec < 0 ) panic("Wrong vector to EOI\n");
+    if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n");
     local_irq_save(spsr);
     VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
     local_irq_restore(spsr);
     VPD_CR(vcpu, eoi)=0;    // overwrite the data
+    vmx_check_pending_irq(vcpu);
 }
 
 uint64_t guest_read_vivr(VCPU *vcpu)
@@ -435,37 +509,54 @@
     local_irq_save(spsr);
     vec = highest_pending_irq(vcpu);
     h_inservice = highest_inservice_irq(vcpu);
-    if ( vec < 0 || irq_masked(vcpu, vec, h_inservice) ) {
+    if ( vec == NULL_VECTOR || 
+        irq_masked(vcpu, vec, h_inservice) != IRQ_NO_MASKED ) {
         local_irq_restore(spsr);
         return IA64_SPURIOUS_INT_VECTOR;
     }
  
     VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63));
     VPD_CR(vcpu, irr[vec>>6]) &= ~(1UL <<(vec&63));
-
-    h_inservice = highest_inservice_irq(vcpu);
-    next = highest_pending_irq(vcpu);
-    if ( VMX_VPD(vcpu,vac).a_int &&
-        (is_higher_irq(next, h_inservice) || (next == -1)) )
-        vmx_inject_vhpi(vcpu, next);
+    update_vhpi(vcpu, NULL_VECTOR);     // clear VHPI till EOI or IRR write
     local_irq_restore(spsr);
     return (uint64_t)vec;
 }
 
-void vmx_inject_vhpi(VCPU *vcpu, u8 vec)
-{
-        VMX_VPD(vcpu,vhpi) = vec / 16;
-
-
-        // non-maskable
-        if ( vec == NMI_VECTOR ) // NMI
-                VMX_VPD(vcpu,vhpi) = 32;
-        else if (vec == ExtINT_VECTOR) //ExtINT
-                VMX_VPD(vcpu,vhpi) = 16;
-        else if (vec == -1)
-                VMX_VPD(vcpu,vhpi) = 0; /* Nothing pending */
-
-        ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT, 
-            (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0);
-}
-
+static void generate_exirq(VCPU *vcpu)
+{
+    IA64_PSR    vpsr;
+    uint64_t    isr;
+    
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    update_vhpi(vcpu, NULL_VECTOR);
+    isr = vpsr.val & IA64_PSR_RI;
+    if ( !vpsr.ic )
+        panic("Interrupt when IC=0\n");
+    vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
+}
+
+vhpi_detection(VCPU *vcpu)
+{
+    uint64_t    threshold,vhpi;
+    tpr_t       vtpr;
+    IA64_PSR    vpsr;
+    
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    vtpr.val = VPD_CR(vcpu, tpr);
+
+    threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
+    vhpi = VMX_VPD(vcpu,vhpi);
+    if ( vhpi > threshold ) {
+        // interrupt actived
+        generate_exirq (vcpu);
+    }
+}
+
+vmx_vexirq(VCPU *vcpu)
+{
+    static  uint64_t  vexirq_count=0;
+
+    vexirq_count ++;
+    printk("Virtual ex-irq %ld\n", vexirq_count);
+    generate_exirq (vcpu);
+}
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/xensystem.h
--- a/xen/include/asm-ia64/xensystem.h  Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/xensystem.h  Fri Jul 15 13:39:50 2005
@@ -33,6 +33,8 @@
 #ifdef CONFIG_VTI
 extern struct task_struct *vmx_ia64_switch_to (void *next_task);
 #define __switch_to(prev,next,last) do {       \
+       ia64_save_fpu(prev->arch._thread.fph);  \
+       ia64_load_fpu(next->arch._thread.fph);  \
        if (VMX_DOMAIN(prev))                   \
                vmx_save_state(prev);           \
        else {                                  \
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/mmio.c
--- a/xen/arch/ia64/mmio.c      Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/mmio.c      Fri Jul 15 13:39:50 2005
@@ -27,7 +27,13 @@
 #include <asm/gcc_intrin.h>
 #include <xen/interrupt.h>
 #include <asm/vmx_vcpu.h>
-
+#include <asm/privop.h>
+#include <asm/types.h>
+#include <public/io/ioreq.h>
+#include <asm/mm.h>
+#include <asm/vmx.h>
+
+/*
 struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base)
 {
     int     i;
@@ -37,65 +43,194 @@
     }
     return NULL;
 }
-
-
-extern void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int 
ma);
-static inline void mmio_write(VCPU *vcpu, void *src, u64 dest_pa, size_t s, 
int ma)
+*/
+
+#define        PIB_LOW_HALF(ofst)      !(ofst&(1<<20))
+#define PIB_OFST_INTA           0x1E0000
+#define PIB_OFST_XTP            0x1E0008
+
+static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int 
ma)
+{
+    switch (pib_off) {
+    case PIB_OFST_INTA:
+        panic("Undefined write on PIB INTA\n");
+        break;
+    case PIB_OFST_XTP:
+        if ( s == 1 && ma == 4 /* UC */) {
+            vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src;
+        }
+        else {
+            panic("Undefined write on PIB XTP\n");
+        }
+        break;
+    default:
+        if ( PIB_LOW_HALF(pib_off) ) {   // lower half
+            if ( s != 8 || ma != 0x4 /* UC */ ) {
+                panic("Undefined IPI-LHF write!\n");
+            }
+            else {
+                write_ipi(vcpu, pib_off, *(uint64_t *)src);
+                // TODO for SM-VP
+            }
+        }
+        else {      // upper half
+            printf("IPI-UHF write %lx\n",pib_off);
+            panic("Not support yet for SM-VP\n");
+        }
+        break;
+    }
+}
+
+static void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int 
ma)
+{
+    switch (pib_off) {
+    case PIB_OFST_INTA:
+        // todo --- emit on processor system bus.
+        if ( s == 1 && ma == 4) { // 1 byte load
+            // TODO: INTA read from IOSAPIC
+        }
+        else {
+            panic("Undefined read on PIB INTA\n");
+        }
+        break;
+    case PIB_OFST_XTP:
+        if ( s == 1 && ma == 4) {
+            *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp;
+        }
+        else {
+            panic("Undefined read on PIB XTP\n");
+        }
+        break;
+    default:
+        if ( PIB_LOW_HALF(pib_off) ) {   // lower half
+            if ( s != 8 || ma != 4 ) {
+                panic("Undefined IPI-LHF read!\n");
+            }
+            else {
+#ifdef  IPI_DEBUG
+                printf("IPI-LHF read %lx\n",pib_off);
+#endif
+                *(uint64_t *)dest = 0;  // TODO for SM-VP
+            }
+        }
+        else {      // upper half
+            if ( s != 1 || ma != 4 ) {
+                panic("Undefined PIB-UHF read!\n");
+            }
+            else {
+#ifdef  IPI_DEBUG
+                printf("IPI-UHF read %lx\n",pib_off);
+#endif
+                *(uint8_t *)dest = 0;   // TODO for SM-VP
+            }
+        }
+        break;
+    }
+}
+
+static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+    unsigned long addr;
+
+    vio = (vcpu_iodata_t *) v->arch.arch_vmx.vmx_platform.shared_page_va;
+    if (vio == 0) {
+        panic("bad shared page: %lx", (unsigned long)vio);
+    }
+    p = &vio->vp_ioreq;
+    p->addr = pa;
+    p->size = 1<<s;
+    p->count = 1;
+    p->dir = dir;
+    if(dir==IOREQ_WRITE)     //write;
+        p->u.data = *val;
+    p->pdata_valid = 0;
+    p->port_mm = 1;
+    p->df = 0;
+
+    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+    p->state = STATE_IOREQ_READY;
+    evtchn_send(IOPACKET_PORT);
+    vmx_wait_io();
+    if(dir){ //read
+        *val=p->u.data;
+    }
+    return;
+}
+#define TO_LEGACY_IO(pa)  (((pa)>>12<<2)|((pa)&0x3))
+
+static void legacy_io_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+    unsigned long addr;
+
+    vio = (vcpu_iodata_t *) v->arch.arch_vmx.vmx_platform.shared_page_va;
+    if (vio == 0) {
+        panic("bad shared page: %lx");
+    }
+    p = &vio->vp_ioreq;
+    p->addr = TO_LEGACY_IO(pa&0x3ffffffUL);
+    p->size = 1<<s;
+    p->count = 1;
+    p->dir = dir;
+    if(dir==IOREQ_WRITE)     //write;
+        p->u.data = *val;
+    p->pdata_valid = 0;
+    p->port_mm = 0;
+    p->df = 0;
+
+    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+    p->state = STATE_IOREQ_READY;
+    evtchn_send(IOPACKET_PORT);
+    vmx_wait_io();
+    if(dir){ //read
+        *val=p->u.data;
+    }
+    return;
+}
+
+static void mmio_access(VCPU *vcpu, u64 src_pa, u64 *dest, size_t s, int ma, 
int dir)
 {
     struct virutal_platform_def *v_plat;
-    struct mmio_list    *mio;
-    
+    //mmio_type_t iot;
+    unsigned long iot;
+    iot=__gpfn_is_io(vcpu->domain, src_pa>>PAGE_SHIFT);
     v_plat = vmx_vcpu_get_plat(vcpu);
-    mio = lookup_mmio(dest_pa, v_plat->mmio);
-    if ( mio == NULL ) 
-        panic ("Wrong address for MMIO\n");
-    
-    switch (mio->iot) {
-    case PIB_MMIO:
-        pib_write(vcpu, src, dest_pa - v_plat->pib_base, s, ma);
-        break;
-    case VGA_BUFF:
-    case CHIPSET_IO:
-    case LOW_MMIO:
-    case LEGACY_IO:
-    case IO_SAPIC:
+
+    switch (iot) {
+    case GPFN_PIB:
+        if(!dir)
+            pib_write(vcpu, src_pa - v_plat->pib_base, dest, s, ma);
+        else
+            pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma);
+        break;
+    case GPFN_GFW:
+        break;
+    case GPFN_FRAME_BUFFER:
+    case GPFN_LOW_MMIO:
+        low_mmio_access(vcpu, src_pa, dest, s, dir);
+        break;
+    case GPFN_LEGACY_IO:
+        legacy_io_access(vcpu, src_pa, dest, s, dir);
+        break;
+    case GPFN_IOSAPIC:
     default:
+        panic("Bad I/O access\n");
         break;
     }
     return;
 }
 
-static inline void mmio_read(VCPU *vcpu, u64 src_pa, void *dest, size_t s, int 
ma)
-{
-    struct virutal_platform_def *v_plat;
-    struct mmio_list    *mio;
-    
-    v_plat = vmx_vcpu_get_plat(vcpu);
-    mio = lookup_mmio(src_pa, v_plat->mmio);
-    if ( mio == NULL ) 
-        panic ("Wrong address for MMIO\n");
-    
-    switch (mio->iot) {
-    case PIB_MMIO:
-        pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma);
-        break;
-    case VGA_BUFF:
-    case CHIPSET_IO:
-    case LOW_MMIO:
-    case LEGACY_IO:
-    case IO_SAPIC:
-    default:
-        break;
-    }
-    return;
-}
-
 /*
  * Read or write data in guest virtual address mode.
  */
- 
+/*
 void
-memwrite_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s)
+memwrite_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s)
 {
     uint64_t pa;
 
@@ -108,7 +243,7 @@
 
 
 void
-memwrite_p(VCPU *vcpu, void *src, void *dest, size_t s)
+memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s)
 {
     uint64_t pa = (uint64_t)dest;
     int    ma;
@@ -116,9 +251,9 @@
     if ( pa & (1UL <<63) ) {
         // UC
         ma = 4;
-        pa <<=1; 
+        pa <<=1;
         pa >>=1;
-    } 
+    }
     else {
         // WBL
         ma = 0;     // using WB for WBL
@@ -127,7 +262,7 @@
 }
 
 void
-memread_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s)
+memread_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s)
 {
     uint64_t pa;
 
@@ -135,12 +270,12 @@
         panic("Normal memory write shouldn't go to this point!");
     pa = PPN_2_PA(vtlb->ppn);
     pa += POFFSET((u64)src, vtlb->ps);
-    
+
     mmio_read(vcpu, pa, dest, s, vtlb->ma);
 }
 
 void
-memread_p(VCPU *vcpu, void *src, void *dest, size_t s)
+memread_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s)
 {
     uint64_t pa = (uint64_t)src;
     int    ma;
@@ -148,19 +283,16 @@
     if ( pa & (1UL <<63) ) {
         // UC
         ma = 4;
-        pa <<=1; 
+        pa <<=1;
         pa >>=1;
-    } 
+    }
     else {
         // WBL
         ma = 0;     // using WB for WBL
     }
     mmio_read(vcpu, pa, dest, s, ma);
 }
-
-#define        PIB_LOW_HALF(ofst)      !(ofst&(1<<20))
-#define PIB_OFST_INTA           0x1E0000
-#define PIB_OFST_XTP            0x1E0008
+*/
 
 
 /*
@@ -182,23 +314,22 @@
         panic ("Inject guest PMI!\n");
         break;
     case 4:     // NMI
-        vmx_vcpu_pend_interrupt (vcpu, 2);     
+        vmx_vcpu_pend_interrupt (vcpu, 2);
         break;
     case 5:     // INIT
         // TODO -- inject guest INIT
         panic ("Inject guest INIT!\n");
         break;
     case 7:     // ExtINT
-        vmx_vcpu_pend_interrupt (vcpu, 0);     
-        break;
-        
+        vmx_vcpu_pend_interrupt (vcpu, 0);
+        break;
     case 1:
     case 3:
     case 6:
     default:
         panic ("Deliver reserved IPI!\n");
         break;
-    }   
+    }
 }
 
 /*
@@ -209,7 +340,6 @@
        int   i;
        VCPU  *vcpu;
        LID       lid;
-       
        for (i=0; i<MAX_VIRT_CPUS; i++) {
                vcpu = d->vcpu[i];
                lid.val = VPD_CR(vcpu, lid);
@@ -226,7 +356,7 @@
 static int write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value)
 {
     VCPU   *target_cpu;
-    
+ 
     target_cpu = lid_2_vcpu(vcpu->domain, 
                                ((ipi_a_t)addr).id, ((ipi_a_t)addr).eid);
     if ( target_cpu == NULL ) panic("Unknown IPI cpu\n");
@@ -243,83 +373,89 @@
     }
 }
 
-void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma)
-{
-    
-    switch (pib_off) {
-    case PIB_OFST_INTA:
-        panic("Undefined write on PIB INTA\n");
-        break;
-    case PIB_OFST_XTP:
-        if ( s == 1 && ma == 4 /* UC */) {
-            vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src;
-        }
-        else {
-            panic("Undefined write on PIB XTP\n");
-        }
-        break;
-    default:
-        if ( PIB_LOW_HALF(pib_off) ) {   // lower half
-            if ( s != 8 || ma != 0x4 /* UC */ ) {
-                panic("Undefined IPI-LHF write!\n");
-            }
-            else {
-                write_ipi(vcpu, pib_off, *(uint64_t *)src);
-                // TODO for SM-VP
-            }
-        }
-        else {      // upper half
-            printf("IPI-UHF write %lx\n",pib_off);
-            panic("Not support yet for SM-VP\n");
-        }
-        break;
-    }
-}
-
-void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int ma)
-{
-    switch (pib_off) {
-    case PIB_OFST_INTA:
-        // todo --- emit on processor system bus.
-        if ( s == 1 && ma == 4) { // 1 byte load
-            // TODO: INTA read from IOSAPIC
-        }
-        else {
-            panic("Undefined read on PIB INTA\n");
-        }
-        break;
-    case PIB_OFST_XTP:
-        if ( s == 1 && ma == 4) {
-            *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp;
-        }
-        else {
-            panic("Undefined read on PIB XTP\n");
-        }
-        break;
-    default:
-        if ( PIB_LOW_HALF(pib_off) ) {   // lower half
-            if ( s != 8 || ma != 4 ) {
-                panic("Undefined IPI-LHF read!\n");
-            }
-            else {
-#ifdef  IPI_DEBUG
-                printf("IPI-LHF read %lx\n",pib_off);
-#endif
-                *(uint64_t *)dest = 0;  // TODO for SM-VP
-            }
-        }
-        else {      // upper half
-            if ( s != 1 || ma != 4 ) {
-                panic("Undefined PIB-UHF read!\n");
-            }
-            else {
-#ifdef  IPI_DEBUG
-                printf("IPI-UHF read %lx\n",pib_off);
-#endif
-                *(uint8_t *)dest = 0;   // TODO for SM-VP
-            }
-        }
-        break;
-    }
-}
-
+
+/*
+   dir 1: read 0:write
+    inst_type 0:integer 1:floating point
+ */
+extern IA64_BUNDLE __vmx_get_domain_bundle(u64 iip);
+
+
+void emulate_io_inst(VCPU *vcpu, u64 padr, u64 ma)
+{
+    REGS *regs;
+    IA64_BUNDLE bundle;
+    int slot, dir, inst_type=0;
+    size_t size;
+    u64 data, value, slot1a, slot1b;
+    INST64 inst;
+    regs=vcpu_regs(vcpu);
+    bundle = __vmx_get_domain_bundle(regs->cr_iip);
+    slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+    if (!slot) inst.inst = bundle.slot0;
+    else if (slot == 1){
+        slot1a=bundle.slot1a;
+        slot1b=bundle.slot1b;
+        inst.inst =slot1a + (slot1b<<18);
+    }
+    else if (slot == 2) inst.inst = bundle.slot2;
+
+    if(inst.M1.major==4&&inst.M1.m==0&&inst.M1.x==0){
+        inst_type=0;  //fp
+        size=(inst.M1.x6&0x3);
+        if((inst.M1.x6>>2)>0xb){      // write
+            vmx_vcpu_get_gr(vcpu,inst.M4.r2,&data);
+            dir=IOREQ_WRITE;     //write
+        }else if((inst.M1.x6>>2)<0xb){   //  read
+            vmx_vcpu_get_gr(vcpu,inst.M1.r1,&value);
+            dir=IOREQ_READ;
+        }else{
+            printf("This memory access instruction can't be emulated one : 
%lx\n",inst.inst);
+            while(1);
+        }
+    }else if(inst.M6.major==6&&inst.M6.m==0&&inst.M6.x==0&&inst.M6.x6==3){
+        inst_type=1;  //fp
+        dir=IOREQ_READ;
+        size=3;     //ldfd
+    }else{
+        printf("This memory access instruction can't be emulated two: %lx\n 
",inst.inst);
+        while(1);
+    }
+
+    if(dir==IOREQ_WRITE){
+        mmio_access(vcpu, padr, &data, size, ma, dir);
+    }else{
+        mmio_access(vcpu, padr, &data, size, ma, dir);
+        if(size==0)
+            data = (value & 0xffffffffffffff00U) | (data & 0xffU);
+        else if(size==1)
+            data = (value & 0xffffffffffff0000U) | (data & 0xffffU);
+        else if(size==2)
+            data = (value & 0xffffffff00000000U) | (data & 0xffffffffU);
+
+        if(inst_type==0){       //gp
+            vmx_vcpu_set_gr(vcpu,inst.M1.r1,data,0);
+        }else{
+            panic("Don't support ldfd now !");
+/*            switch(inst.M6.f1){
+
+            case 6:
+                regs->f6=(struct ia64_fpreg)data;
+            case 7:
+                regs->f7=(struct ia64_fpreg)data;
+            case 8:
+                regs->f8=(struct ia64_fpreg)data;
+            case 9:
+                regs->f9=(struct ia64_fpreg)data;
+            case 10:
+                regs->f10=(struct ia64_fpreg)data;
+            case 11:
+                regs->f11=(struct ia64_fpreg)data;
+            default :
+                ia64_ldfs(inst.M6.f1,&data);
+            }
+*/
+        }
+    }
+    vmx_vcpu_increment_iip(vcpu);
+}
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_minstate.h
--- a/xen/arch/ia64/vmx_minstate.h      Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_minstate.h      Fri Jul 15 13:39:50 2005
@@ -148,10 +148,14 @@
     mov r20=r1;         /* A */                         \
     mov r26=ar.unat;        /* M */                         \
     mov r29=cr.ipsr;        /* M */                         \
+    mov r18=cr.isr;         \
     COVER;              /* B;; (or nothing) */                  \
     ;;                                          \
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+    tbit.z p6,p0=r29,IA64_PSR_VM_BIT;       \
+    tbit.nz.or p6,p0 = r18,39; \
+    ;;        \
 (p6) br.sptk.few vmx_panic;        \
+    tbit.z p0,p15=r29,IA64_PSR_I_BIT;   \
     mov r1=r16;                     \
 /*    mov r21=r16;     */              \
     /* switch from user to kernel RBS: */                           \
diff -r 35b74976f598 -r a83ac0806d6b xen/include/asm-ia64/vmx_vcpu.h
--- a/xen/include/asm-ia64/vmx_vcpu.h   Fri Jul 15 12:52:02 2005
+++ b/xen/include/asm-ia64/vmx_vcpu.h   Fri Jul 15 13:39:50 2005
@@ -53,7 +53,7 @@
 
 #define VMM_RR_SHIFT    20
 #define VMM_RR_MASK     ((1UL<<VMM_RR_SHIFT)-1)
-#define VRID_2_MRID(vcpu,rid)  ((rid) & VMM_RR_MASK) | \
+//#define VRID_2_MRID(vcpu,rid)  ((rid) & VMM_RR_MASK) | \
                 ((vcpu->domain->domain_id) << VMM_RR_SHIFT)
 extern u64 indirect_reg_igfld_MASK ( int type, int index, u64 value);
 extern u64 cr_igfld_mask (int index, u64 value);
@@ -69,7 +69,7 @@
 extern IA64FAULT vmx_vcpu_cover(VCPU *vcpu);
 extern thash_cb_t *vmx_vcpu_get_vtlb(VCPU *vcpu);
 extern thash_cb_t *vmx_vcpu_get_vhpt(VCPU *vcpu);
-ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr);
+extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr);
 extern IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val);
 extern IA64FAULT vmx_vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval);
 extern IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval);
@@ -112,10 +112,10 @@
 extern void vmx_inject_vhpi(VCPU *vcpu, u8 vec);
 extern void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector);
 extern struct virutal_platform_def *vmx_vcpu_get_plat(VCPU *vcpu);
-extern void memread_p(VCPU *vcpu, void *src, void *dest, size_t s);
-extern void memread_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, 
size_t s);
-extern void memwrite_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, 
size_t s);
-extern void memwrite_p(VCPU *vcpu, void *src, void *dest, size_t s);
+extern void memread_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s);
+extern void memread_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, 
size_t s);
+extern void memwrite_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, 
size_t s);
+extern void memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s);
 
 
 /**************************************************************************
@@ -401,14 +401,8 @@
     VPD_CR(vcpu,lid)=val;
     return IA64_NO_FAULT;
 }
-static inline
-IA64FAULT
-vmx_vcpu_set_tpr(VCPU *vcpu, u64 val)
-{
-    VPD_CR(vcpu,tpr)=val;
-    //TODO
-    return IA64_NO_FAULT;
-}
+extern IA64FAULT vmx_vcpu_set_tpr(VCPU *vcpu, u64 val);
+
 static inline
 IA64FAULT
 vmx_vcpu_set_eoi(VCPU *vcpu, u64 val)
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vmx_ivt.S
--- a/xen/arch/ia64/vmx_ivt.S   Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vmx_ivt.S   Fri Jul 15 13:39:50 2005
@@ -347,11 +347,16 @@
        mov r31=pr
     mov r19=11
     mov r30=cr.iim
-    mov r29=0x1100
-    ;;
-    cmp4.eq  p6,p7=r29,r30
+    movl r29=0x1100
+    ;;
+    cmp.eq p6,p7=r30,r0
+    (p6) br.sptk vmx_fault_11
+    ;;
+    cmp.eq  p6,p7=r29,r30
     (p6) br.dptk.few vmx_hypercall_dispatch
     (p7) br.sptk.many vmx_dispatch_break_fault
+    ;;
+    VMX_FAULT(11);
 END(vmx_break_fault)
 
        .org vmx_ia64_ivt+0x3000
@@ -363,6 +368,8 @@
     mov r29=cr.ipsr
     ;;
     tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+    tbit.z p0,p15=r29,IA64_PSR_I_BIT
+    ;;
 (p7) br.sptk vmx_dispatch_interrupt
     ;;
        mov r27=ar.rsc                  /* M */
@@ -447,7 +454,7 @@
     ;;
     srlz.i
        ;;
-    ssm psr.i
+    (p15) ssm psr.i
        adds r3=8,r2            // set up second base pointer for SAVE_REST
        srlz.i                  // ensure everybody knows psr.ic is back on
        ;;
@@ -508,9 +515,12 @@
        .org vmx_ia64_ivt+0x3400
 
/////////////////////////////////////////////////////////////////////////////////////////
 // 0x3400 Entry 13 (size 64 bundles) Reserved
+ENTRY(vmx_virtual_exirq)
        VMX_DBG_FAULT(13)
-       VMX_FAULT(13)
-
+       mov r31=pr
+        mov r19=13
+        br.sptk vmx_dispatch_vexirq
+END(vmx_virtual_exirq)
 
        .org vmx_ia64_ivt+0x3800
 
/////////////////////////////////////////////////////////////////////////////////////////
@@ -876,7 +886,7 @@
     ;;
     srlz.i                  // guarantee that interruption collection is on
     ;;
-    ssm psr.i               // restore psr.i
+    (p15) ssm psr.i               // restore psr.i
     adds r3=16,r2                // set up second base pointer
     ;;
     VMX_SAVE_REST
@@ -887,8 +897,6 @@
 END(vmx_dispatch_reflection)
 
 ENTRY(vmx_dispatch_virtualization_fault)
-    cmp.eq pEml,pNonEml=r0,r0       /* force pEml =1, save r4 ~ r7 */
-    ;;
     VMX_SAVE_MIN_WITH_COVER_R19
     ;;
     alloc r14=ar.pfs,0,0,3,0        // now it's safe (must be first in insn 
group!)
@@ -899,7 +907,7 @@
     ;;
     srlz.i                  // guarantee that interruption collection is on
     ;;
-    ssm psr.i               // restore psr.i
+    (p15) ssm psr.i               // restore psr.i
     adds r3=16,r2                // set up second base pointer
     ;;
     VMX_SAVE_REST
@@ -910,6 +918,24 @@
 END(vmx_dispatch_virtualization_fault)
 
 
+ENTRY(vmx_dispatch_vexirq)
+    VMX_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,1,0
+    mov out0=r13
+
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    (p15) ssm psr.i               // restore psr.i
+    adds r3=16,r2                // set up second base pointer
+    ;;
+    VMX_SAVE_REST
+    movl r14=ia64_leave_hypervisor
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_vexirq
+END(vmx_dispatch_vexirq)
 
 ENTRY(vmx_dispatch_tlb_miss)
     VMX_SAVE_MIN_WITH_COVER_R19
@@ -922,7 +948,7 @@
     ;;
     srlz.i                  // guarantee that interruption collection is on
     ;;
-    ssm psr.i               // restore psr.i
+    (p15) ssm psr.i               // restore psr.i
     adds r3=16,r2                // set up second base pointer
     ;;
     VMX_SAVE_REST
@@ -947,7 +973,7 @@
     ;;
     srlz.i                  // guarantee that interruption collection is on
     ;;
-    ssm psr.i               // restore psr.i
+    (p15)ssm psr.i               // restore psr.i
     adds r3=16,r2                // set up second base pointer
     ;;
     VMX_SAVE_REST
@@ -965,7 +991,7 @@
     ;;
     srlz.i                  // guarantee that interruption collection is on
     ;;
-    ssm psr.i               // restore psr.i
+    (p15) ssm psr.i               // restore psr.i
     adds r3=16,r2                // set up second base pointer
     ;;
     VMX_SAVE_REST
@@ -987,8 +1013,6 @@
 
 
 ENTRY(vmx_dispatch_interrupt)
-    cmp.ne pEml,pNonEml=r0,r0       /* force pNonEml =1, don't save r4 ~ r7 */
-    ;;
        VMX_SAVE_MIN_WITH_COVER_R19     // uses r31; defines r2 and r3
        ;;
        alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
@@ -999,7 +1023,7 @@
        ;;
     srlz.i
     ;;
-    ssm psr.i
+    (p15) ssm psr.i
        adds r3=16,r2           // set up second base pointer for SAVE_REST
        ;;
        VMX_SAVE_REST
diff -r 35b74976f598 -r a83ac0806d6b xen/arch/ia64/vhpt.c
--- a/xen/arch/ia64/vhpt.c      Fri Jul 15 12:52:02 2005
+++ b/xen/arch/ia64/vhpt.c      Fri Jul 15 13:39:50 2005
@@ -90,9 +90,10 @@
 void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned 
long logps)
 {
        unsigned long mask = (1L << logps) - 1;
+       extern long running_on_sim;
        int i;
 
-       if (logps-PAGE_SHIFT > 10) {
+       if (logps-PAGE_SHIFT > 10 && !running_on_sim) {
                // if this happens, we may want to revisit this algorithm
                printf("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
                while(1);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h    Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,41 @@
+
+#ifndef _X86_64_BOOTSETUP_H
+#define _X86_64_BOOTSETUP_H 1
+
+extern char x86_boot_params[2048];
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+#define PARAM  ((unsigned char *)x86_boot_params)
+#define SCREEN_INFO (*(struct screen_info *) (PARAM+0))
+#define EXT_MEM_K (*(unsigned short *) (PARAM+2))
+#define ALT_MEM_K (*(unsigned int *) (PARAM+0x1e0))
+#define E820_MAP_NR (*(char*) (PARAM+E820NR))
+#define E820_MAP    ((struct e820entry *) (PARAM+E820MAP))
+#define APM_BIOS_INFO (*(struct apm_bios_info *) (PARAM+0x40))
+#define DRIVE_INFO (*(struct drive_info_struct *) (PARAM+0x80))
+#define SYS_DESC_TABLE (*(struct sys_desc_table_struct*)(PARAM+0xa0))
+#define MOUNT_ROOT_RDONLY (*(unsigned short *) (PARAM+0x1F2))
+#define RAMDISK_FLAGS (*(unsigned short *) (PARAM+0x1F8))
+#define SAVED_VIDEO_MODE (*(unsigned short *) (PARAM+0x1FA))
+#define ORIG_ROOT_DEV (*(unsigned short *) (PARAM+0x1FC))
+#define AUX_DEVICE_INFO (*(unsigned char *) (PARAM+0x1FF))
+#define LOADER_TYPE (*(unsigned char *) (PARAM+0x210))
+#define KERNEL_START (*(unsigned int *) (PARAM+0x214))
+
+#define INITRD_START (__pa(xen_start_info.mod_start))
+#define INITRD_SIZE (xen_start_info.mod_len)
+#define EDID_INFO   (*(struct edid_info *) (PARAM+0x440))
+
+#define EDD_NR     (*(unsigned char *) (PARAM+EDDNR))
+#define EDD_MBR_SIG_NR (*(unsigned char *) (PARAM+EDD_MBR_SIG_NR_BUF))
+#define EDD_MBR_SIGNATURE ((unsigned int *) (PARAM+EDD_MBR_SIG_BUF))
+#define EDD_BUF     ((struct edd_info *) (PARAM+EDDBUF))
+#define COMMAND_LINE saved_command_line
+
+#define RAMDISK_IMAGE_START_MASK       0x07FF
+#define RAMDISK_PROMPT_FLAG            0x8000
+#define RAMDISK_LOAD_FLAG              0x4000  
+
+#endif
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/lib/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/lib/Makefile     Fri Jul 15 13:39:50 2005
@@ -0,0 +1,15 @@
+
+.S.o:
+       $(CC) $(AFLAGS) -c $< -o $*.o
+
+L_TARGET = lib.a
+
+obj-y = checksum.o old-checksum.o delay.o \
+       usercopy.o getuser.o \
+       memcpy.o strstr.o xen_proc.o
+
+obj-$(CONFIG_X86_USE_3DNOW) += mmx.o
+obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+obj-$(CONFIG_DEBUG_IOVIRT)  += iodebug.o
+
+include $(TOPDIR)/Rules.make
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h       Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,250 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <linux/config.h>
+#include <linux/compiler.h>
+
+asmlinkage int printk(const char * fmt, ...)
+       __attribute__ ((format (printf, 1, 2)));
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+       volatile unsigned int slock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+       unsigned int break_lock;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define SPINLOCK_MAGIC_INIT    , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT    /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)      (*(volatile signed char *)(&(x)->slock) <= 0)
+#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "jns 3f\n" \
+       "2:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0,%0\n\t" \
+       "jle 2b\n\t" \
+       "jmp 1b\n" \
+       "3:\n\t"
+
+#define spin_lock_string_flags \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "jns 4f\n\t" \
+       "2:\t" \
+       "testl $0x200, %1\n\t" \
+       "jz 3f\n\t" \
+       "#sti\n\t" \
+       "3:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0, %0\n\t" \
+       "jle 3b\n\t" \
+       "#cli\n\t" \
+       "jmp 1b\n" \
+       "4:\n\t"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+       "movb $1,%0" \
+               :"=m" (lock->slock) : : "memory"
+
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(lock->magic != SPINLOCK_MAGIC);
+       BUG_ON(!spin_is_locked(lock));
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#else
+
+#define spin_unlock_string \
+       "xchgb %b0, %1" \
+               :"=q" (oldval), "=m" (lock->slock) \
+               :"0" (oldval) : "memory"
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+       char oldval = 1;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(lock->magic != SPINLOCK_MAGIC);
+       BUG_ON(!spin_is_locked(lock));
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#endif
+
+static inline int _raw_spin_trylock(spinlock_t *lock)
+{
+       char oldval;
+       __asm__ __volatile__(
+               "xchgb %b0,%1"
+               :"=q" (oldval), "=m" (lock->slock)
+               :"0" (0) : "memory");
+       return oldval > 0;
+}
+
+static inline void _raw_spin_lock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+               printk("eip: %p\n", __builtin_return_address(0));
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string
+               :"=m" (lock->slock) : : "memory");
+}
+
+static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+               printk("eip: %p\n", __builtin_return_address(0));
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string_flags
+               :"=m" (lock->slock) : "r" (flags) : "memory");
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+       volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+       unsigned int break_lock;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC   0xdeaf1eed
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define RWLOCK_MAGIC_INIT      , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT      /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define read_can_lock(x) ((int)(x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores.  See
+ * semaphore.h for details.  -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void _raw_read_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+       __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void _raw_write_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+       __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define _raw_read_unlock(rw)           asm volatile("lock ; incl %0" :"=m" 
((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw)  asm volatile("lock ; addl $" RW_LOCK_BIAS_STR 
",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int _raw_read_trylock(rwlock_t *lock)
+{
+       atomic_t *count = (atomic_t *)lock;
+       atomic_dec(count);
+       if (atomic_read(count) >= 0)
+               return 1;
+       atomic_inc(count);
+       return 0;
+}
+
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+       atomic_t *count = (atomic_t *)lock;
+       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+               return 1;
+       atomic_add(RW_LOCK_BIAS, count);
+       return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff -r 35b74976f598 -r a83ac0806d6b linux-2.6.11-xen-sparse/arch/xen/Kconfig
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig  Fri Jul 15 13:39:50 2005
@@ -0,0 +1,196 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "Linux Kernel Configuration"
+
+config XEN
+       bool
+       default y
+       help
+         This is the Linux Xen port.
+
+config ARCH_XEN
+       bool
+       default y
+
+
+config NO_IDLE_HZ
+       bool
+       default y
+
+
+menu "XEN"
+
+config XEN_PRIVILEGED_GUEST
+       bool "Privileged Guest (domain 0)"
+       default n
+       select XEN_PHYSDEV_ACCESS
+       help
+         Support for privileged operation (domain 0)
+
+config XEN_PHYSDEV_ACCESS
+       bool "Physical device access"
+       default XEN_PRIVILEGED_GUEST
+       help
+         Assume access is available to physical hardware devices
+         (e.g., hard drives, network cards). This allows you to configure
+         such devices and also includes some low-level support that is
+         otherwise not compiled into the kernel.
+
+config XEN_BLKDEV_BACKEND
+       bool "Block-device backend driver"
+       depends on XEN_PHYSDEV_ACCESS
+       default y
+       help
+         The block-device backend driver allows the kernel to export its
+         block devices to other guests via a high-performance shared-memory
+         interface.
+
+config XEN_BLKDEV_TAP_BE
+        bool "Block Tap support for backend driver (DANGEROUS)"
+        depends on XEN_BLKDEV_BACKEND
+        default n
+        help
+          If you intend to use the block tap driver, the backend domain will
+          not know the domain id of the real frontend, and so will not be able
+          to map its data pages.  This modifies the backend to attempt to map
+          from both the tap domain and the real frontend.  This presents a
+          security risk, and so should ONLY be used for development
+          with the blktap.  This option will be removed as the block drivers 
are
+          modified to use grant tables.
+
+config XEN_BLKDEV_GRANT
+        bool "Grant table substrate for block drivers"
+        depends on !XEN_BLKDEV_TAP_BE
+        default y
+        help
+          This introduces the use of grant tables as a data exhange mechanism
+          between the frontend and backend block drivers. This currently
+          conflicts with the block tap.
+
+config XEN_NETDEV_BACKEND
+       bool "Network-device backend driver"
+       depends on XEN_PHYSDEV_ACCESS
+       default y
+       help
+         The network-device backend driver allows the kernel to export its
+         network devices to other guests via a high-performance shared-memory
+         interface.
+
+config XEN_BLKDEV_FRONTEND
+       bool "Block-device frontend driver"
+       default y
+       help
+         The block-device frontend driver allows the kernel to access block
+         devices mounted within another guest OS. Unless you are building a
+         dedicated device-driver domain, or your master control domain
+         (domain 0), then you almost certainly want to say Y here.
+
+config XEN_NETDEV_FRONTEND
+       bool "Network-device frontend driver"
+       default y
+       help
+         The network-device frontend driver allows the kernel to access
+         network interfaces within another guest OS. Unless you are building a
+         dedicated device-driver domain, or your master control domain
+         (domain 0), then you almost certainly want to say Y here.
+
+config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
+       bool "Pipelined transmitter (DANGEROUS)"
+       depends on XEN_NETDEV_FRONTEND
+       default n
+       help
+         The driver will assume that the backend is pipelining packets for
+         transmission: whenever packets are pending in the remote backend,
+         the driver will not send asynchronous notifications when it queues
+         additional packets for transmission.
+         If the backend is a dumb domain, such as a transparent Ethernet
+         bridge with no local IP interface, it is safe to say Y here to get
+         slightly lower network overhead.
+         If the backend has a local IP interface; or may be doing smart things
+         like reassembling packets to perform firewall filtering; or if you
+         are unsure; or if you experience network hangs when this option is
+         enabled; then you must say N here.
+
+config XEN_BLKDEV_TAP
+       bool "Block device tap driver"
+       default n
+       help
+         This driver allows a VM to interact on block device channels
+         to other VMs.  Block messages may be passed through or redirected
+         to a character device, allowing device prototyping in application
+         space.  Odds are that you want to say N here.
+
+config XEN_SHADOW_MODE
+       bool "Fake shadow mode"
+       default n
+    help
+      fakes out a shadow mode kernel
+
+
+config XEN_SCRUB_PAGES
+       bool "Scrub memory before freeing it to Xen"
+       default y
+       help
+         Erase memory contents before freeing it back to Xen's global
+         pool. This ensures that any secrets contained within that
+         memory (e.g., private keys) cannot be found by other guests that
+         may be running on the machine. Most people will want to say Y here.
+         If security is not a concern then you may increase performance by
+         saying N.
+
+choice
+       prompt "Processor Type"
+       default XEN_X86
+
+config XEN_X86
+       bool "X86"
+       help
+         Choose this option if your computer is a X86 architecture.
+
+config XEN_X86_64
+       bool "X86_64"
+       help
+         Choose this option if your computer is a X86_64 architecture.
+
+endchoice
+
+endmenu
+
+config HAVE_ARCH_DEV_ALLOC_SKB
+       bool
+       default y
+
+source "init/Kconfig"
+
+if XEN_X86
+source "arch/xen/i386/Kconfig"
+endif
+
+if XEN_X86_64
+source "arch/xen/x86_64/Kconfig"
+endif
+
+menu "Executable file formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "arch/xen/Kconfig.drivers"
+
+if XEN_PRIVILEGED_GUEST
+menu "Power management options"
+source "drivers/acpi/Kconfig"
+endmenu
+endif
+
+source "fs/Kconfig"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/i386/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile    Fri Jul 15 13:39:50 2005
@@ -0,0 +1,103 @@
+#
+# i386/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" cleaning up for this architecture.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+# 19990713  Artur Skawina <skawina@xxxxxxxxxxxxx>
+#           Added '-march' and '-mpreferred-stack-boundary' support
+#
+
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+
+LDFLAGS                := -m elf_i386
+LDFLAGS_vmlinux :=
+CHECK          := $(CHECK) -D__i386__=1
+
+CFLAGS += -m32
+AFLAGS += -m32
+
+CFLAGS += -pipe -msoft-float
+
+# prevent gcc from keeping the stack 16 byte aligned
+CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2,)
+
+align := $(cc-option-align)
+cflags-$(CONFIG_M386)          += -march=i386
+cflags-$(CONFIG_M486)          += -march=i486
+cflags-$(CONFIG_M586)          += -march=i586
+cflags-$(CONFIG_M586TSC)       += -march=i586
+cflags-$(CONFIG_M586MMX)       += $(call 
cc-option,-march=pentium-mmx,-march=i586)
+cflags-$(CONFIG_M686)          += -march=i686
+cflags-$(CONFIG_MPENTIUMII)    += -march=i686 $(call cc-option,-mtune=pentium2)
+cflags-$(CONFIG_MPENTIUMIII)   += -march=i686 $(call cc-option,-mtune=pentium3)
+cflags-$(CONFIG_MPENTIUMM)     += -march=i686 $(call cc-option,-mtune=pentium3)
+cflags-$(CONFIG_MPENTIUM4)     += -march=i686 $(call cc-option,-mtune=pentium4)
+cflags-$(CONFIG_MK6)           += -march=k6
+# Please note, that patches that add -march=athlon-xp and friends are 
pointless.
+# They make zero difference whatsosever to performance at this time.
+cflags-$(CONFIG_MK7)           += $(call cc-option,-march=athlon,-march=i686 
$(align)-functions=4)
+cflags-$(CONFIG_MK8)           += $(call cc-option,-march=k8,$(call 
cc-option,-march=athlon,-march=i686 $(align)-functions=4))
+cflags-$(CONFIG_MCRUSOE)       += -march=i686 $(align)-functions=0 
$(align)-jumps=0 $(align)-loops=0
+cflags-$(CONFIG_MEFFICEON)     += -march=i686 $(call 
cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 
$(align)-loops=0
+cflags-$(CONFIG_MWINCHIPC6)    += $(call 
cc-option,-march=winchip-c6,-march=i586)
+cflags-$(CONFIG_MWINCHIP2)     += $(call cc-option,-march=winchip2,-march=i586)
+cflags-$(CONFIG_MWINCHIP3D)    += $(call cc-option,-march=winchip2,-march=i586)
+cflags-$(CONFIG_MCYRIXIII)     += $(call cc-option,-march=c3,-march=i486) 
$(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+cflags-$(CONFIG_MVIAC3_2)      += $(call cc-option,-march=c3-2,-march=i686)
+
+# AMD Elan support
+cflags-$(CONFIG_X86_ELAN)      += -march=i486
+
+# -mregparm=3 works ok on gcc-3.0 and later
+#
+GCC_VERSION                    := $(shell $(CONFIG_SHELL) 
$(srctree)/scripts/gcc-version.sh $(CC))
+cflags-$(CONFIG_REGPARM)       += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; 
then echo "-mregparm=3"; fi ;)
+
+# Disable unit-at-a-time mode, it makes gcc use a lot more stack
+# due to the lack of sharing of stacklots.
+CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
+
+CFLAGS += $(cflags-y)
+
+head-y := arch/xen/i386/kernel/head.o arch/xen/i386/kernel/init_task.o
+
+libs-y                                         += arch/i386/lib/
+core-y                                 += arch/xen/i386/kernel/ \
+                                          arch/xen/i386/mm/ \
+                                          arch/xen/i386/mach-default/ \
+                                          arch/i386/crypto/
+# \
+#                                         arch/xen/$(mcore-y)/
+drivers-$(CONFIG_MATH_EMULATION)       += arch/i386/math-emu/
+drivers-$(CONFIG_PCI)                  += arch/xen/i386/pci/
+# must be linked after kernel/
+drivers-$(CONFIG_OPROFILE)             += arch/i386/oprofile/
+drivers-$(CONFIG_PM)                   += arch/i386/power/
+
+# for clean
+obj-   += kernel/ mm/ pci/
+#obj-  += ../../i386/lib/ ../../i386/mm/ 
+#../../i386/$(mcore-y)/
+#obj-  += ../../i386/pci/ ../../i386/oprofile/ ../../i386/power/
+
+xenflags-y += -Iinclude/asm-xen/asm-i386/mach-xen \
+               -Iinclude/asm-i386/mach-default
+CFLAGS += $(xenflags-y)
+AFLAGS += $(xenflags-y)
+
+prepare: include/asm-$(XENARCH)/asm_offsets.h
+CLEAN_FILES += include/asm-$(XENARCH)/asm_offsets.h
+
+arch/$(XENARCH)/kernel/asm-offsets.s: include/asm include/.asm-ignore \
+       include/linux/version.h include/config/MARKER
+
+include/asm-$(XENARCH)/asm_offsets.h: arch/$(XENARCH)/kernel/asm-offsets.s
+       $(call filechk,gen-asm-offsets)
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/include/asm-xen/vga.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/vga.h     Fri Jul 15 13:39:50 2005
@@ -0,0 +1,42 @@
+/*
+ *     Access to VGA videoram
+ *
+ *     (c) 1998 Martin Mares <mj@xxxxxx>
+ */
+
+#ifndef _LINUX_ASM_VGA_H_
+#define _LINUX_ASM_VGA_H_
+
+#include <asm/io.h>
+
+extern unsigned char *vgacon_mmap;
+
+static unsigned long VGA_MAP_MEM(unsigned long x)
+{
+    if( vgacon_mmap == NULL )
+    {
+        /* This is our first time in this function. This whole thing
+           is a rather grim hack. We know we're going to get asked 
+           to map a 32KB region between 0xb0000 and 0xb8000 because
+           that's what VGAs are. We used the boot time permanent 
+           fixed map region, and map it to machine pages.
+        */
+        if( x != 0xb8000 )
+            panic("Argghh! VGA Console is weird. 1:%08lx\n",x);
+
+        vgacon_mmap = (unsigned char*) bt_ioremap( 0xa0000, 128*1024 );
+        return (unsigned long) (vgacon_mmap+x-0xa0000);
+    }
+    else
+    {
+        if( x != 0xc0000 && x != 0xa0000 ) /* vidmem_end or charmap fonts */
+            panic("Argghh! VGA Console is weird. 2:%08lx\n",x);  
+       return (unsigned long) (vgacon_mmap+x-0xa0000);
+    }
+    return 0;
+}
+
+static inline unsigned char vga_readb(unsigned char * x) { return (*(x)); }
+static inline void vga_writeb(unsigned char x, unsigned char *y) { *(y) = (x); 
}
+
+#endif
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile  Fri Jul 15 13:39:50 2005
@@ -0,0 +1,18 @@
+#
+# Makefile for the linux kernel.
+#
+
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+
+CPPFLAGS_vmlinux.lds += -U$(XENARCH)
+
+$(obj)/vmlinux.lds.S:
+       @ln -fsn $(srctree)/arch/$(XENARCH)/kernel/vmlinux.lds.S $@
+
+extra-y += vmlinux.lds
+
+obj-y   := ctrl_if.o evtchn.o fixup.o reboot.o gnttab.o devmem.o
+
+obj-$(CONFIG_PROC_FS) += xen_proc.o
+obj-$(CONFIG_NET)     += skbuff.o
+obj-$(CONFIG_SMP)     += smp.o
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/processor.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/processor.h    Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,474 @@
+/*
+ * include/asm-x86_64/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_X86_64_PROCESSOR_H
+#define __ASM_X86_64_PROCESSOR_H
+
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/sigcontext.h>
+#include <asm/cpufeature.h>
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/msr.h>
+#include <asm/current.h>
+#include <asm/system.h>
+#include <asm/mmsegment.h>
+#include <asm/percpu.h>
+#include <linux/personality.h>
+
+#define TF_MASK                0x00000100
+#define IF_MASK                0x00000200
+#define IOPL_MASK      0x00003000
+#define NT_MASK                0x00004000
+#define VM_MASK                0x00020000
+#define AC_MASK                0x00040000
+#define VIF_MASK       0x00080000      /* virtual interrupt flag */
+#define VIP_MASK       0x00100000      /* virtual interrupt pending */
+#define ID_MASK                0x00200000
+
+#define desc_empty(desc) \
+               (!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+               (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; asm volatile("leaq 
1f(%%rip),%0\n1:":"=r"(pc)); pc; })
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ */
+
+struct cpuinfo_x86 {
+       __u8    x86;            /* CPU family */
+       __u8    x86_vendor;     /* CPU vendor */
+       __u8    x86_model;
+       __u8    x86_mask;
+       int     cpuid_level;    /* Maximum supported CPUID level, -1=no CPUID */
+       __u32   x86_capability[NCAPINTS];
+       char    x86_vendor_id[16];
+       char    x86_model_id[64];
+       int     x86_cache_size;  /* in KB */
+       int     x86_clflush_size;
+       int     x86_cache_alignment;
+       int     x86_tlbsize;    /* number of 4K pages in DTLB/ITLB combined(in 
pages)*/
+        __u8    x86_virt_bits, x86_phys_bits;
+       __u8    x86_num_cores;
+       __u8    x86_apicid;
+        __u32   x86_power;     
+       __u32   x86_cpuid_level;        /* Max CPUID function supported */
+       unsigned long loops_per_jiffy;
+} ____cacheline_aligned;
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_NUM 8
+#define X86_VENDOR_UNKNOWN 0xff
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+extern char ignore_irq13;
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void dodgy_tsc(void);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF  0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF  0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF  0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF  0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF  0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF  0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF  0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL        0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT  0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF  0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM  0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC  0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID  0x00200000 /* CPUID detection flag */
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME            0x0001  /* enable vm86 extensions */
+#define X86_CR4_PVI            0x0002  /* virtual interrupts flag enable */
+#define X86_CR4_TSD            0x0004  /* disable time stamp at ipl 3 */
+#define X86_CR4_DE             0x0008  /* enable debugging extensions */
+#define X86_CR4_PSE            0x0010  /* enable page size extensions */
+#define X86_CR4_PAE            0x0020  /* enable physical address extensions */
+#define X86_CR4_MCE            0x0040  /* Machine check enable */
+#define X86_CR4_PGE            0x0080  /* enable global pages */
+#define X86_CR4_PCE            0x0100  /* enable performance counters at ipl 3 
*/
+#define X86_CR4_OSFXSR         0x0200  /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT     0x0400  /* enable unmasked SSE exceptions */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+       mmu_cr4_features |= mask;
+       switch (mask) {
+       case X86_CR4_OSFXSR:
+       case X86_CR4_OSXMMEXCPT:
+               break;
+       default:
+               do {
+                       const char *msg = "Xen unsupported cr4 update\n";
+                       (void)HYPERVISOR_console_io(
+                               CONSOLEIO_write, __builtin_strlen(msg),
+                               (char *)msg);
+                       BUG();
+               } while (0);
+       }
+}
+
+#define load_cr3(pgdir) do {                           \
+       xen_pt_switch(__pa(pgdir));                     \
+       per_cpu(cur_pgd, smp_processor_id()) = pgdir;   \
+} while (/* CONSTCOND */0)
+
+/*
+ * Bus types
+ */
+#define MCA_bus 0
+#define MCA_bus__is_a_macro
+
+
+/*
+ * User space process size. 47bits.
+ */
+#define TASK_SIZE      (0x800000000000UL)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 
: 0xFFFFe000)
+#define TASK_UNMAPPED_32 PAGE_ALIGN(IA32_PAGE_OFFSET/3)
+#define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3) 
+#define TASK_UNMAPPED_BASE     \
+       (test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)  
+
+/*
+ * Size of io_bitmap.
+ */
+#define IO_BITMAP_BITS  65536
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fxsave_struct {
+       u16     cwd;
+       u16     swd;
+       u16     twd;
+       u16     fop;
+       u64     rip;
+       u64     rdp; 
+       u32     mxcsr;
+       u32     mxcsr_mask;
+       u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+       u32     xmm_space[64];  /* 16*16 bytes for each XMM-reg = 128 bytes */
+       u32     padding[24];
+} __attribute__ ((aligned (16)));
+
+union i387_union {
+       struct i387_fxsave_struct       fxsave;
+};
+
+struct tss_struct {
+       u32 reserved1;
+       u64 rsp0;       
+       u64 rsp1;
+       u64 rsp2;
+       u64 reserved2;
+       u64 ist[7];
+       u32 reserved3;
+       u32 reserved4;
+       u16 reserved5;
+       u16 io_bitmap_base;
+       /*
+        * The extra 1 is there because the CPU will access an
+        * additional byte beyond the end of the IO permission
+        * bitmap. The extra byte must be all 1 bits, and must
+        * be within the limit. Thus we have:
+        *
+        * 128 bytes, the bitmap itself, for ports 0..0x3ff
+        * 8 bytes, for an extra "long" of ~0UL
+        */
+       unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+} __attribute__((packed)) ____cacheline_aligned;
+
+extern struct cpuinfo_x86 boot_cpu_data;
+DECLARE_PER_CPU(struct tss_struct,init_tss);
+DECLARE_PER_CPU(pgd_t *, cur_pgd);
+
+#define ARCH_MIN_TASKALIGN     16
+
+struct thread_struct {
+       unsigned long   rsp0;
+       unsigned long   rsp;
+       unsigned long   userrsp;        /* Copy from PDA */ 
+       unsigned long   fs;
+       unsigned long   gs;
+               unsigned int    io_pl;
+       unsigned short  es, ds, fsindex, gsindex;       
+/* Hardware debugging registers */
+       unsigned long   debugreg0;  
+       unsigned long   debugreg1;  
+       unsigned long   debugreg2;  
+       unsigned long   debugreg3;  
+       unsigned long   debugreg6;  
+       unsigned long   debugreg7;  
+/* fault info */
+       unsigned long   cr2, trap_no, error_code;
+/* floating point info */
+       union i387_union        i387  __attribute__((aligned(16)));
+/* IO permissions. the bitmap could be moved into the GDT, that would make
+   switch faster for a limited number of ioperm using tasks. -AK */
+       int             ioperm;
+       unsigned long   *io_bitmap_ptr;
+       unsigned io_bitmap_max;
+/* cached TLS descriptors. */
+       u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
+} __attribute__((aligned(16)));
+
+#define INIT_THREAD  {}
+
+#define INIT_MMAP \
+{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, 
NULL }
+
+#define STACKFAULT_STACK 1
+#define DOUBLEFAULT_STACK 2 
+#define NMI_STACK 3 
+#define DEBUG_STACK 4 
+#define MCE_STACK 5
+#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
+#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+#define EXCEPTION_STACK_ORDER 0 
+
+#define start_thread(regs,new_rip,new_rsp) do { \
+       asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));     
 \
+       load_gs_index(0);                                                       
\
+       (regs)->rip = (new_rip);                                                
 \
+       (regs)->rsp = (new_rsp);                                                
 \
+       write_pda(oldrsp, (new_rsp));                                           
 \
+       (regs)->cs = __USER_CS;                                                 
 \
+       (regs)->ss = __USER_DS;                                                 
 \
+       (regs)->eflags = 0x200;                                                 
 \
+       set_fs(USER_DS);                                                        
 \
+} while(0) 
+
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+extern void prepare_to_copy(struct task_struct *tsk);
+
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.rsp - 8))
+
+extern unsigned long get_wchan(struct task_struct *p);
+#define KSTK_EIP(tsk) \
+       (((struct pt_regs *)(tsk->thread.rsp0 - sizeof(struct pt_regs)))->rip)
+#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
+
+
+struct microcode_header {
+       unsigned int hdrver;
+       unsigned int rev;
+       unsigned int date;
+       unsigned int sig;
+       unsigned int cksum;
+       unsigned int ldrver;
+       unsigned int pf;
+       unsigned int datasize;
+       unsigned int totalsize;
+       unsigned int reserved[3];
+};
+
+struct microcode {
+       struct microcode_header hdr;
+       unsigned int bits[0];
+};
+
+typedef struct microcode microcode_t;
+typedef struct microcode_header microcode_header_t;
+
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+       unsigned int sig;
+       unsigned int pf;
+       unsigned int cksum;
+};
+
+struct extended_sigtable {
+       unsigned int count;
+       unsigned int cksum;
+       unsigned int reserved[3];
+       struct extended_signature sigs[0];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE      _IO('6',0)
+
+
+#define ASM_NOP1 K8_NOP1
+#define ASM_NOP2 K8_NOP2
+#define ASM_NOP3 K8_NOP3
+#define ASM_NOP4 K8_NOP4
+#define ASM_NOP5 K8_NOP5
+#define ASM_NOP6 K8_NOP6
+#define ASM_NOP7 K8_NOP7
+#define ASM_NOP8 K8_NOP8
+
+/* Opteron nops */
+#define K8_NOP1 ".byte 0x90\n"
+#define K8_NOP2        ".byte 0x66,0x90\n" 
+#define K8_NOP3        ".byte 0x66,0x66,0x90\n" 
+#define K8_NOP4        ".byte 0x66,0x66,0x66,0x90\n" 
+#define K8_NOP5        K8_NOP3 K8_NOP2 
+#define K8_NOP6        K8_NOP3 K8_NOP3
+#define K8_NOP7        K8_NOP4 K8_NOP3
+#define K8_NOP8        K8_NOP4 K8_NOP4
+
+#define ASM_NOP_MAX 8
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+extern inline void rep_nop(void)
+{
+       __asm__ __volatile__("rep;nop": : :"memory");
+}
+
+/* Stop speculative execution */
+extern inline void sync_core(void)
+{ 
+       int tmp;
+       asm volatile("cpuid" : "=a" (tmp) : "0" (1) : 
"ebx","ecx","edx","memory");
+} 
+
+#define cpu_has_fpu 1
+
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(void *x) 
+{ 
+       asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
+} 
+
+#define ARCH_HAS_PREFETCHW 1
+static inline void prefetchw(void *x) 
+{ 
+       alternative_input(ASM_NOP5,
+                         "prefetchw (%1)",
+                         X86_FEATURE_3DNOW,
+                         "r" (x));
+} 
+
+#define ARCH_HAS_SPINLOCK_PREFETCH 1
+
+#define spin_lock_prefetch(x)  prefetchw(x)
+
+#define cpu_relax()   rep_nop()
+
+/*
+ *      NSC/Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ *      NSC/Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+       outb((reg), 0x22); \
+       outb((data), 0x23); \
+} while (0)
+
+static inline void __monitor(const void *eax, unsigned long ecx,
+               unsigned long edx)
+{
+       /* "monitor %eax,%ecx,%edx;" */
+       asm volatile(
+               ".byte 0x0f,0x01,0xc8;"
+               : :"a" (eax), "c" (ecx), "d"(edx));
+}
+
+static inline void __mwait(unsigned long eax, unsigned long ecx)
+{
+       /* "mwait %eax,%ecx;" */
+       asm volatile(
+               ".byte 0x0f,0x01,0xc9;"
+               : :"a" (eax), "c" (ecx));
+}
+
+#define stack_current() \
+({                                                             \
+       struct thread_info *ti;                                 \
+       asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));  \
+       ti->task;                                       \
+})
+
+#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
+
+extern unsigned long boot_option_idle_override;
+/* Boot loader type from the setup header */
+extern int bootloader_type;
+
+#endif /* __ASM_X86_64_PROCESSOR_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/io_ports.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/io_ports.h    
Fri Jul 15 13:39:50 2005
@@ -0,0 +1,30 @@
+/*
+ *  arch/i386/mach-generic/io_ports.h
+ *
+ *  Machine specific IO port address definition for generic.
+ *  Written by Osamu Tomita <tomita@xxxxxxxxxxx>
+ */
+#ifndef _MACH_IO_PORTS_H
+#define _MACH_IO_PORTS_H
+
+/* i8253A PIT registers */
+#define PIT_MODE               0x43
+#define PIT_CH0                        0x40
+#define PIT_CH2                        0x42
+
+/* i8259A PIC registers */
+#define PIC_MASTER_CMD         0x20
+#define PIC_MASTER_IMR         0x21
+#define PIC_MASTER_ISR         PIC_MASTER_CMD
+#define PIC_MASTER_POLL                PIC_MASTER_ISR
+#define PIC_MASTER_OCW3                PIC_MASTER_ISR
+#define PIC_SLAVE_CMD          0xa0
+#define PIC_SLAVE_IMR          0xa1
+
+/* i8259A PIC related value */
+#define PIC_CASCADE_IR         2
+#define MASTER_ICW4_DEFAULT    0x01
+#define SLAVE_ICW4_DEFAULT     0x01
+#define PIC_ICW4_AEOI          2
+
+#endif /* !_MACH_IO_PORTS_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/Makefile  Fri Jul 15 13:39:50 2005
@@ -0,0 +1,92 @@
+#
+# x86_64/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+# 19990713  Artur Skawina <skawina@xxxxxxxxxxxxx>
+#           Added '-march' and '-mpreferred-stack-boundary' support
+# 20000913  Pavel Machek <pavel@xxxxxxx>
+#          Converted for x86_64 architecture
+# 20010105  Andi Kleen, add IA32 compiler.
+#           ....and later removed it again....
+# 20050205  Jun Nakajima <jun.nakajima@xxxxxxxxx> 
+#           Modified for Xen
+#
+# $Id: Makefile,v 1.31 2002/03/22 15:56:07 ak Exp $
+
+#
+# early bootup linking needs 32bit. You can either use real 32bit tools
+# here or 64bit tools in 32bit mode.
+#
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+
+IA32_CC := $(CC) $(CPPFLAGS) -m32 -O2 -fomit-frame-pointer
+IA32_LD := $(LD) -m elf_i386
+IA32_AS := $(CC) $(AFLAGS) -m32 -Wa,--32 -traditional -c
+IA32_OBJCOPY := $(CROSS_COMPILE)objcopy
+IA32_CPP := $(CROSS_COMPILE)gcc -m32 -E
+export IA32_CC IA32_LD IA32_AS IA32_OBJCOPY IA32_CPP
+
+
+LDFLAGS                := -m elf_x86_64
+#LDFLAGS_vmlinux := -e stext
+
+CHECKFLAGS      += -D__x86_64__ -m64
+
+cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
+cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+CFLAGS += $(cflags-y)
+
+CFLAGS += -mno-red-zone
+CFLAGS += -mcmodel=kernel
+CFLAGS += -pipe
+# this makes reading assembly source easier, but produces worse code
+# actually it makes the kernel smaller too.
+CFLAGS += -fno-reorder-blocks  
+CFLAGS += -Wno-sign-compare
+ifneq ($(CONFIG_DEBUG_INFO),y)
+CFLAGS += -fno-asynchronous-unwind-tables
+# -fweb shrinks the kernel a bit, but the difference is very small
+# it also messes up debugging, so don't use it for now.
+#CFLAGS += $(call cc-option,-fweb)
+endif
+# -funit-at-a-time shrinks the kernel .text considerably
+# unfortunately it makes reading oopses harder.
+CFLAGS += $(call cc-option,-funit-at-a-time,)
+
+head-y := arch/xen/x86_64/kernel/head.o arch/xen/x86_64/kernel/head64.o 
arch/xen/x86_64/kernel/init_task.o
+
+libs-y                                         += arch/x86_64/lib/
+core-y                                 += arch/xen/x86_64/kernel/ 
arch/xen/x86_64/mm/
+core-$(CONFIG_IA32_EMULATION)          += arch/xen/x86_64/ia32/
+drivers-$(CONFIG_PCI)                  += arch/xen/x86_64/pci/
+drivers-$(CONFIG_OPROFILE)             += arch/x86_64/oprofile/
+
+# for clean
+obj-   += kernel/ mm/ pci/
+
+xenflags-y += -Iinclude/asm-xen/asm-x86_64/mach-xen
+
+CFLAGS += $(xenflags-y)
+AFLAGS += $(xenflags-y)
+
+prepare: include/asm-$(XENARCH)/asm_offset.h
+CLEAN_FILES += include/asm-$(XENARCH)/asm_offset.h
+
+arch/$(XENARCH)/kernel/asm-offsets.s: include/asm include/.asm-ignore \
+       include/linux/version.h include/config/MARKER
+
+
+include/asm-$(XENARCH)/asm_offset.h: arch/xen/x86_64/kernel/asm-offsets.s
+       $(call filechk,gen-asm-offsets)
+       ln -fsn asm_offset.h include/asm-$(XENARCH)/offset.h
+
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h       Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,183 @@
+/******************************************************************************
+ * xhci.h
+ *
+ * Private definitions for the Xen Virtual USB Controller.  Based on
+ * drivers/usb/host/uhci.h from Linux.  Copyright for the imported content is
+ * retained by the original authors.
+ *
+ * Modifications are:
+ * Copyright (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2004, 2005 Mark Williamson
+ */
+
+#ifndef __LINUX_XHCI_H
+#define __LINUX_XHCI_H
+
+#include <linux/list.h>
+#include <linux/usb.h>
+#include <asm-xen/xen-public/io/usbif.h>
+#include <linux/spinlock.h>
+
+/* xhci_port_t - current known state of a virtual hub ports */
+typedef struct {
+        unsigned int cs     :1; /* Connection status.         */
+        unsigned int cs_chg :1; /* Connection status change.  */
+        unsigned int pe     :1; /* Port enable.               */
+        unsigned int pe_chg :1; /* Port enable change.        */
+        unsigned int susp   :1; /* Suspended.                 */
+        unsigned int lsda   :1; /* Low speed device attached. */
+        unsigned int pr     :1; /* Port reset.                */
+} xhci_port_t;
+
+/* struct virt_root_hub - state related to the virtual root hub */
+struct virt_root_hub {
+       struct usb_device *dev;
+       int devnum;             /* Address of Root Hub endpoint */
+       struct urb *urb;
+       void *int_addr;
+       int send;
+       int interval;
+       int numports;
+       int c_p_r[8];
+       struct timer_list rh_int_timer;
+        spinlock_t port_state_lock;
+        xhci_port_t *ports;
+};
+
+/* struct xhci - contains the state associated with a single USB interface */
+struct xhci {
+
+#ifdef CONFIG_PROC_FS
+       /* procfs */
+       int num;
+       struct proc_dir_entry *proc_entry;
+#endif
+
+        int evtchn;                        /* Interdom channel to backend */
+        int irq;                           /* Bound to evtchn */
+        enum { 
+                USBIF_STATE_CONNECTED    = 2,
+                USBIF_STATE_DISCONNECTED = 1,
+                USBIF_STATE_CLOSED       = 0
+        } state; /* State of this USB interface */
+        unsigned long recovery; /* boolean recovery in progress flag */
+        
+        unsigned long bandwidth;
+
+       struct usb_bus *bus;
+
+       /* Main list of URB's currently controlled by this HC */
+       spinlock_t urb_list_lock;
+       struct list_head urb_list;              /* P: xhci->urb_list_lock */
+
+       /* List of URB's awaiting completion callback */
+       spinlock_t complete_list_lock;
+       struct list_head complete_list;         /* P: xhci->complete_list_lock 
*/
+
+       struct virt_root_hub rh;        /* private data of the virtual root hub 
*/
+
+        spinlock_t ring_lock;
+        usbif_front_ring_t usb_ring;
+
+        int awaiting_reset;
+};
+
+/* per-URB private data structure for the host controller */
+struct urb_priv {
+       struct urb *urb;
+        usbif_iso_t *schedule;
+       struct usb_device *dev;
+
+        int in_progress : 1;           /* QH was queued (not linked in) */
+       int short_control_packet : 1;   /* If we get a short packet during */
+                                       /*  a control transfer, retrigger */
+                                       /*  the status phase */
+
+       int status;                     /* Final status */
+
+       unsigned long inserttime;       /* In jiffies */
+
+       struct list_head complete_list; /* P: xhci->complete_list_lock */
+};
+
+/*
+ * Locking in xhci.c
+ *
+ * spinlocks are used extensively to protect the many lists and data
+ * structures we have. It's not that pretty, but it's necessary. We
+ * need to be done with all of the locks (except complete_list_lock) when
+ * we call urb->complete. I've tried to make it simple enough so I don't
+ * have to spend hours racking my brain trying to figure out if the
+ * locking is safe.
+ *
+ * Here's the safe locking order to prevent deadlocks:
+ *
+ * #1 xhci->urb_list_lock
+ * #2 urb->lock
+ * #3 xhci->urb_remove_list_lock
+ * #4 xhci->complete_list_lock
+ *
+ * If you're going to grab 2 or more locks at once, ALWAYS grab the lock
+ * at the lowest level FIRST and NEVER grab locks at the same level at the
+ * same time.
+ * 
+ * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock
+ */
+
+/* -------------------------------------------------------------------------
+   Virtual Root HUB
+   ------------------------------------------------------------------------- */
+/* destination of request */
+#define RH_DEVICE              0x00
+#define RH_INTERFACE           0x01
+#define RH_ENDPOINT            0x02
+#define RH_OTHER               0x03
+
+#define RH_CLASS               0x20
+#define RH_VENDOR              0x40
+
+/* Requests: bRequest << 8 | bmRequestType */
+#define RH_GET_STATUS          0x0080
+#define RH_CLEAR_FEATURE       0x0100
+#define RH_SET_FEATURE         0x0300
+#define RH_SET_ADDRESS         0x0500
+#define RH_GET_DESCRIPTOR      0x0680
+#define RH_SET_DESCRIPTOR      0x0700
+#define RH_GET_CONFIGURATION   0x0880
+#define RH_SET_CONFIGURATION   0x0900
+#define RH_GET_STATE           0x0280
+#define RH_GET_INTERFACE       0x0A80
+#define RH_SET_INTERFACE       0x0B00
+#define RH_SYNC_FRAME          0x0C80
+/* Our Vendor Specific Request */
+#define RH_SET_EP              0x2000
+
+/* Hub port features */
+#define RH_PORT_CONNECTION     0x00
+#define RH_PORT_ENABLE         0x01
+#define RH_PORT_SUSPEND                0x02
+#define RH_PORT_OVER_CURRENT   0x03
+#define RH_PORT_RESET          0x04
+#define RH_PORT_POWER          0x08
+#define RH_PORT_LOW_SPEED      0x09
+#define RH_C_PORT_CONNECTION   0x10
+#define RH_C_PORT_ENABLE       0x11
+#define RH_C_PORT_SUSPEND      0x12
+#define RH_C_PORT_OVER_CURRENT 0x13
+#define RH_C_PORT_RESET                0x14
+
+/* Hub features */
+#define RH_C_HUB_LOCAL_POWER   0x00
+#define RH_C_HUB_OVER_CURRENT  0x01
+#define RH_DEVICE_REMOTE_WAKEUP        0x00
+#define RH_ENDPOINT_STALL      0x01
+
+/* Our Vendor Specific feature */
+#define RH_REMOVE_EP           0x00
+
+#define RH_ACK                 0x01
+#define RH_REQ_ERR             -1
+#define RH_NACK                        0x00
+
+#endif
+
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/drivers/char/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/drivers/char/Makefile     Fri Jul 15 13:39:50 2005
@@ -0,0 +1,361 @@
+#
+# Makefile for the kernel character device drivers.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now inherited from the
+# parent makes..
+#
+
+#
+# This file contains the font map for the default (hardware) font
+#
+FONTMAPFILE = cp437.uni
+
+O_TARGET := char.o
+
+obj-y   += mem.o tty_io.o n_tty.o tty_ioctl.o raw.o pty.o misc.o random.o
+
+# All of the (potential) objects that export symbols.
+# This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'.
+
+export-objs     :=     busmouse.o console.o keyboard.o sysrq.o \
+                       misc.o pty.o random.o selection.o serial.o \
+                       sonypi.o tty_io.o tty_ioctl.o generic_serial.o \
+                       au1000_gpio.o vac-serial.o hp_psaux.o nvram.o \
+                       scx200.o fetchop.o
+
+mod-subdirs    :=      joystick ftape drm drm-4.0 pcmcia
+
+list-multi     :=      
+
+KEYMAP   =defkeymap.o
+KEYBD    =pc_keyb.o
+CONSOLE  =console.o
+SERIAL   =serial.o
+
+ifeq ($(ARCH),xen)
+  ifneq ($(CONFIG_XEN_PHYSDEV_ACCESS),y)
+    KEYBD  =
+  endif
+endif
+
+ifeq ($(ARCH),s390)
+  KEYMAP   =
+  KEYBD    =
+  CONSOLE  =
+  SERIAL   =
+endif
+
+ifeq ($(ARCH),mips)
+  ifneq ($(CONFIG_PC_KEYB),y)
+    KEYBD    =
+  endif
+  ifeq ($(CONFIG_VR41XX_KIU),y)
+    KEYMAP   =
+    KEYBD    = vr41xx_keyb.o
+  endif
+endif
+
+ifeq ($(ARCH),s390x)
+  KEYMAP   =
+  KEYBD    =
+  CONSOLE  =
+  SERIAL   =
+endif
+
+ifeq ($(ARCH),m68k)
+   ifdef CONFIG_AMIGA
+      KEYBD = amikeyb.o
+   else
+      ifndef CONFIG_MAC
+        KEYBD =
+      endif
+   endif
+   SERIAL   =
+endif
+
+ifeq ($(ARCH),parisc)
+   ifdef CONFIG_GSC_PS2
+      KEYBD   = hp_psaux.o hp_keyb.o
+   else
+      KEYBD   =
+   endif
+   ifdef CONFIG_SERIAL_MUX
+      CONSOLE += mux.o
+   endif
+   ifdef CONFIG_PDC_CONSOLE
+      CONSOLE += pdc_console.o
+   endif
+endif
+
+ifdef CONFIG_Q40
+  KEYBD += q40_keyb.o
+  SERIAL = serial.o
+endif
+
+ifdef CONFIG_APOLLO
+  KEYBD += dn_keyb.o
+endif
+
+ifeq ($(ARCH),parisc)
+   ifdef CONFIG_GSC_PS2
+      KEYBD   = hp_psaux.o hp_keyb.o
+   else
+      KEYBD   =
+   endif
+   ifdef CONFIG_PDC_CONSOLE
+      CONSOLE += pdc_console.o
+   endif
+endif
+
+ifeq ($(ARCH),arm)
+  ifneq ($(CONFIG_PC_KEYMAP),y)
+    KEYMAP   =
+  endif
+  ifneq ($(CONFIG_PC_KEYB),y)
+    KEYBD    =
+  endif
+endif
+
+ifeq ($(ARCH),sh)
+  KEYMAP   =
+  KEYBD    =
+  CONSOLE  =
+  ifeq ($(CONFIG_SH_HP600),y)
+  KEYMAP   = defkeymap.o
+  KEYBD    = scan_keyb.o hp600_keyb.o
+  CONSOLE  = console.o
+  endif
+  ifeq ($(CONFIG_SH_DMIDA),y)
+  # DMIDA does not connect the HD64465 PS/2 keyboard port
+  # but we allow for USB keyboards to be plugged in.
+  KEYMAP   = defkeymap.o
+  KEYBD    = # hd64465_keyb.o pc_keyb.o
+  CONSOLE  = console.o
+  endif
+  ifeq ($(CONFIG_SH_EC3104),y)
+  KEYMAP   = defkeymap.o
+  KEYBD    = ec3104_keyb.o
+  CONSOLE  = console.o
+  endif
+  ifeq ($(CONFIG_SH_DREAMCAST),y)
+  KEYMAP   = defkeymap.o
+  KEYBD    =
+  CONSOLE  = console.o
+  endif
+endif
+
+ifeq ($(CONFIG_DECSTATION),y)
+  KEYMAP   =
+  KEYBD    =
+endif
+
+ifeq ($(CONFIG_BAGET_MIPS),y)
+  KEYBD    =
+  SERIAL   = vac-serial.o
+endif
+
+ifeq ($(CONFIG_NINO),y)
+  SERIAL   =
+endif
+
+ifneq ($(CONFIG_SUN_SERIAL),)
+  SERIAL   =
+endif
+
+ifeq ($(CONFIG_QTRONIX_KEYBOARD),y)
+  KEYBD    = qtronix.o
+  KEYMAP   = qtronixmap.o
+endif
+
+ifeq ($(CONFIG_DUMMY_KEYB),y)
+  KEYBD = dummy_keyb.o
+endif
+
+obj-$(CONFIG_VT) += vt.o vc_screen.o consolemap.o consolemap_deftbl.o 
$(CONSOLE) selection.o
+obj-$(CONFIG_SERIAL) += $(SERIAL)
+obj-$(CONFIG_PARPORT_SERIAL) += parport_serial.o
+obj-$(CONFIG_SERIAL_HCDP) += hcdp_serial.o
+obj-$(CONFIG_SERIAL_21285) += serial_21285.o
+obj-$(CONFIG_SERIAL_SA1100) += serial_sa1100.o
+obj-$(CONFIG_SERIAL_AMBA) += serial_amba.o
+obj-$(CONFIG_TS_AU1X00_ADS7846) += au1000_ts.o
+obj-$(CONFIG_SERIAL_DEC) += decserial.o
+
+ifndef CONFIG_SUN_KEYBOARD
+  obj-$(CONFIG_VT) += keyboard.o $(KEYMAP) $(KEYBD)
+else
+  obj-$(CONFIG_PCI) += keyboard.o $(KEYMAP)
+endif
+
+obj-$(CONFIG_HIL) += hp_keyb.o
+obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
+obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o
+obj-$(CONFIG_ROCKETPORT) += rocket.o
+obj-$(CONFIG_MOXA_SMARTIO) += mxser.o
+obj-$(CONFIG_MOXA_INTELLIO) += moxa.o
+obj-$(CONFIG_DIGI) += pcxx.o
+obj-$(CONFIG_DIGIEPCA) += epca.o
+obj-$(CONFIG_CYCLADES) += cyclades.o
+obj-$(CONFIG_STALLION) += stallion.o
+obj-$(CONFIG_ISTALLION) += istallion.o
+obj-$(CONFIG_SIBYTE_SB1250_DUART) += sb1250_duart.o
+obj-$(CONFIG_COMPUTONE) += ip2.o ip2main.o
+obj-$(CONFIG_RISCOM8) += riscom8.o
+obj-$(CONFIG_ISI) += isicom.o
+obj-$(CONFIG_ESPSERIAL) += esp.o
+obj-$(CONFIG_SYNCLINK) += synclink.o
+obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o
+obj-$(CONFIG_N_HDLC) += n_hdlc.o
+obj-$(CONFIG_SPECIALIX) += specialix.o
+obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
+obj-$(CONFIG_A2232) += ser_a2232.o generic_serial.o
+obj-$(CONFIG_SX) += sx.o generic_serial.o
+obj-$(CONFIG_RIO) += rio/rio.o generic_serial.o
+obj-$(CONFIG_SH_SCI) += sh-sci.o generic_serial.o
+obj-$(CONFIG_SERIAL167) += serial167.o
+obj-$(CONFIG_MVME147_SCC) += generic_serial.o vme_scc.o
+obj-$(CONFIG_MVME162_SCC) += generic_serial.o vme_scc.o
+obj-$(CONFIG_BVME6000_SCC) += generic_serial.o vme_scc.o
+obj-$(CONFIG_HVC_CONSOLE) += hvc_console.o
+obj-$(CONFIG_SERIAL_TX3912) += generic_serial.o serial_tx3912.o
+obj-$(CONFIG_TXX927_SERIAL) += serial_txx927.o
+obj-$(CONFIG_SERIAL_TXX9) += generic_serial.o serial_txx9.o
+obj-$(CONFIG_IP22_SERIAL) += sgiserial.o
+obj-$(CONFIG_AU1X00_UART) += au1x00-serial.o
+obj-$(CONFIG_SGI_L1_SERIAL) += sn_serial.o
+
+subdir-$(CONFIG_RIO) += rio
+subdir-$(CONFIG_INPUT) += joystick
+
+obj-$(CONFIG_ATIXL_BUSMOUSE) += atixlmouse.o
+obj-$(CONFIG_LOGIBUSMOUSE) += logibusmouse.o
+obj-$(CONFIG_PRINTER) += lp.o
+obj-$(CONFIG_TIPAR) += tipar.o
+obj-$(CONFIG_OBMOUSE) += obmouse.o
+
+ifeq ($(CONFIG_INPUT),y)
+obj-y += joystick/js.o
+endif
+
+obj-$(CONFIG_FETCHOP) += fetchop.o
+obj-$(CONFIG_BUSMOUSE) += busmouse.o
+obj-$(CONFIG_DTLK) += dtlk.o
+obj-$(CONFIG_R3964) += n_r3964.o
+obj-$(CONFIG_APPLICOM) += applicom.o
+obj-$(CONFIG_SONYPI) += sonypi.o
+obj-$(CONFIG_MS_BUSMOUSE) += msbusmouse.o
+obj-$(CONFIG_82C710_MOUSE) += qpmouse.o
+obj-$(CONFIG_AMIGAMOUSE) += amigamouse.o
+obj-$(CONFIG_ATARIMOUSE) += atarimouse.o
+obj-$(CONFIG_ADBMOUSE) += adbmouse.o
+obj-$(CONFIG_PC110_PAD) += pc110pad.o
+obj-$(CONFIG_MK712_MOUSE) += mk712.o
+obj-$(CONFIG_RTC) += rtc.o
+obj-$(CONFIG_GEN_RTC) += genrtc.o
+obj-$(CONFIG_EFI_RTC) += efirtc.o
+obj-$(CONFIG_SGI_DS1286) += ds1286.o
+obj-$(CONFIG_MIPS_RTC) += mips_rtc.o
+obj-$(CONFIG_SGI_IP27_RTC) += ip27-rtc.o
+ifeq ($(CONFIG_PPC),)
+  obj-$(CONFIG_NVRAM) += nvram.o
+endif
+obj-$(CONFIG_TOSHIBA) += toshiba.o
+obj-$(CONFIG_I8K) += i8k.o
+obj-$(CONFIG_DS1620) += ds1620.o
+obj-$(CONFIG_DS1742) += ds1742.o
+obj-$(CONFIG_INTEL_RNG) += i810_rng.o
+obj-$(CONFIG_AMD_RNG) += amd768_rng.o
+obj-$(CONFIG_HW_RANDOM) += hw_random.o
+obj-$(CONFIG_AMD_PM768) += amd76x_pm.o
+obj-$(CONFIG_BRIQ_PANEL) += briq_panel.o
+
+obj-$(CONFIG_ITE_GPIO) += ite_gpio.o
+obj-$(CONFIG_AU1X00_GPIO) += au1000_gpio.o
+obj-$(CONFIG_AU1X00_USB_TTY) += au1000_usbtty.o
+obj-$(CONFIG_AU1X00_USB_RAW) += au1000_usbraw.o
+obj-$(CONFIG_COBALT_LCD) += lcd.o
+
+obj-$(CONFIG_QIC02_TAPE) += tpqic02.o
+
+subdir-$(CONFIG_FTAPE) += ftape
+subdir-$(CONFIG_DRM_OLD) += drm-4.0
+subdir-$(CONFIG_DRM_NEW) += drm
+subdir-$(CONFIG_PCMCIA) += pcmcia
+subdir-$(CONFIG_AGP) += agp
+
+ifeq ($(CONFIG_FTAPE),y)
+obj-y       += ftape/ftape.o
+endif
+
+obj-$(CONFIG_H8) += h8.o
+obj-$(CONFIG_PPDEV) += ppdev.o
+obj-$(CONFIG_DZ) += dz.o
+obj-$(CONFIG_NWBUTTON) += nwbutton.o
+obj-$(CONFIG_NWFLASH) += nwflash.o
+obj-$(CONFIG_SCx200) += scx200.o
+obj-$(CONFIG_SCx200_GPIO) += scx200_gpio.o
+
+# Only one watchdog can succeed. We probe the hardware watchdog
+# drivers first, then the softdog driver.  This means if your hardware
+# watchdog dies or is 'borrowed' for some reason the software watchdog
+# still gives you some cover.
+
+obj-$(CONFIG_PCWATCHDOG) += pcwd.o
+obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o
+obj-$(CONFIG_ADVANTECH_WDT) += advantechwdt.o
+obj-$(CONFIG_IB700_WDT) += ib700wdt.o
+obj-$(CONFIG_MIXCOMWD) += mixcomwd.o
+obj-$(CONFIG_60XX_WDT) += sbc60xxwdt.o
+obj-$(CONFIG_W83877F_WDT) += w83877f_wdt.o
+obj-$(CONFIG_SC520_WDT) += sc520_wdt.o
+obj-$(CONFIG_WDT) += wdt.o
+obj-$(CONFIG_WDTPCI) += wdt_pci.o
+obj-$(CONFIG_21285_WATCHDOG) += wdt285.o
+obj-$(CONFIG_977_WATCHDOG) += wdt977.o
+obj-$(CONFIG_I810_TCO) += i810-tco.o
+obj-$(CONFIG_MACHZ_WDT) += machzwd.o
+obj-$(CONFIG_SH_WDT) += shwdt.o
+obj-$(CONFIG_EUROTECH_WDT) += eurotechwdt.o
+obj-$(CONFIG_ALIM7101_WDT) += alim7101_wdt.o
+obj-$(CONFIG_ALIM1535_WDT) += alim1535d_wdt.o
+obj-$(CONFIG_INDYDOG) += indydog.o
+obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o
+obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o
+obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o
+obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o
+obj-$(CONFIG_INDYDOG) += indydog.o
+obj-$(CONFIG_8xx_WDT) += mpc8xx_wdt.o
+
+subdir-$(CONFIG_MWAVE) += mwave
+ifeq ($(CONFIG_MWAVE),y)
+  obj-y += mwave/mwave.o
+endif
+
+subdir-$(CONFIG_IPMI_HANDLER) += ipmi
+ifeq ($(CONFIG_IPMI_HANDLER),y)
+  obj-y += ipmi/ipmi.o
+endif
+
+include $(TOPDIR)/Rules.make
+
+fastdep:
+
+conmakehash: conmakehash.c
+       $(HOSTCC) $(HOSTCFLAGS) -o conmakehash conmakehash.c
+
+consolemap_deftbl.c: $(FONTMAPFILE) conmakehash
+       ./conmakehash $(FONTMAPFILE) > consolemap_deftbl.c
+
+consolemap_deftbl.o: consolemap_deftbl.c $(TOPDIR)/include/linux/types.h
+
+.DELETE_ON_ERROR:
+
+defkeymap.c: defkeymap.map
+       set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@
+
+qtronixmap.c: qtronixmap.map
+       set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/irq.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/irq.h  Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,36 @@
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+/*
+ *     linux/include/asm/irq.h
+ *
+ *     (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *     IRQ/IPI changes taken from work by Thomas Radke
+ *     <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+/* include comes from machine specific directory */
+#include "irq_vectors.h"
+#include <asm/thread_info.h>
+
+static __inline__ int irq_canonicalize(int irq)
+{
+       return ((irq == 2) ? 9 : irq);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#define ARCH_HAS_NMI_WATCHDOG          /* See include/linux/nmi.h */
+#endif
+
+#define KDB_VECTOR     0xf9
+
+# define irq_ctx_init(cpu) do { } while (0)
+
+struct irqaction;
+struct pt_regs;
+int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+
+#endif /* _ASM_IRQ_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c    Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,769 @@
+/*
+ *  linux/arch/i386/kernel/process.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <stdarg.h>
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/config.h>
+#include <linux/utsname.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/mc146818rtc.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/irq.h>
+#include <asm/desc.h>
+#include <asm-xen/xen-public/physdev.h>
+#ifdef CONFIG_MATH_EMULATION
+#include <asm/math_emu.h>
+#endif
+
+#include <linux/irq.h>
+#include <linux/err.h>
+
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+int hlt_counter;
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+       return ((unsigned long *)tsk->thread.esp)[3];
+}
+
+/*
+ * Powermanagement idle function, if any..
+ */
+void (*pm_idle)(void);
+static cpumask_t cpu_idle_map;
+
+void disable_hlt(void)
+{
+       hlt_counter++;
+}
+
+EXPORT_SYMBOL(disable_hlt);
+
+void enable_hlt(void)
+{
+       hlt_counter--;
+}
+
+EXPORT_SYMBOL(enable_hlt);
+
+/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */
+extern void stop_hz_timer(void);
+extern void start_hz_timer(void);
+void xen_idle(void)
+{
+       local_irq_disable();
+
+       if (need_resched()) {
+               local_irq_enable();
+       } else {
+               stop_hz_timer();
+               HYPERVISOR_block(); /* implicit local_irq_enable() */
+               start_hz_timer();
+       }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+       /* Ack it */
+       __get_cpu_var(cpu_state) = CPU_DEAD;
+
+       /* We shouldn't have to disable interrupts while dead, but
+        * some interrupts just don't seem to go away, and this makes
+        * it "work" for testing purposes. */
+       /* Death loop */
+       while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+               HYPERVISOR_yield();
+
+       local_irq_disable();
+       __flush_tlb_all();
+       cpu_set(smp_processor_id(), cpu_online_map);
+       local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+       int cpu = _smp_processor_id();
+
+       /* endless idle loop with no priority at all */
+       while (1) {
+               while (!need_resched()) {
+
+                       if (cpu_isset(cpu, cpu_idle_map))
+                               cpu_clear(cpu, cpu_idle_map);
+                       rmb();
+
+                       if (cpu_is_offline(cpu)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
+                               /* Tell hypervisor to take vcpu down. */
+                               HYPERVISOR_vcpu_down(cpu);
+#endif
+                               play_dead();
+         }
+
+                       irq_stat[cpu].idle_timestamp = jiffies;
+                       xen_idle();
+               }
+               schedule();
+       }
+}
+
+void cpu_idle_wait(void)
+{
+       int cpu;
+       cpumask_t map;
+
+       for_each_online_cpu(cpu)
+               cpu_set(cpu, cpu_idle_map);
+
+       wmb();
+       do {
+               ssleep(1);
+               cpus_and(map, cpu_idle_map, cpu_online_map);
+       } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */
+/* Always use xen_idle() instead. */
+void __init select_idle_routine(const struct cpuinfo_x86 *c) {}
+
+void show_regs(struct pt_regs * regs)
+{
+       printk("\n");
+       printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+       printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, 
smp_processor_id());
+       print_symbol("EIP is at %s\n", regs->eip);
+
+       if (regs->xcs & 2)
+               printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+       printk(" EFLAGS: %08lx    %s  (%s)\n",
+              regs->eflags, print_tainted(), system_utsname.release);
+       printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+               regs->eax,regs->ebx,regs->ecx,regs->edx);
+       printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+               regs->esi, regs->edi, regs->ebp);
+       printk(" DS: %04x ES: %04x\n",
+               0xffff & regs->xds,0xffff & regs->xes);
+
+       show_trace(NULL, &regs->esp);
+}
+
+/*
+ * This gets run with %ebx containing the
+ * function to call, and %edx containing
+ * the "args".
+ */
+extern void kernel_thread_helper(void);
+__asm__(".section .text\n"
+       ".align 4\n"
+       "kernel_thread_helper:\n\t"
+       "movl %edx,%eax\n\t"
+       "pushl %edx\n\t"
+       "call *%ebx\n\t"
+       "pushl %eax\n\t"
+       "call do_exit\n"
+       ".previous");
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+       struct pt_regs regs;
+
+       memset(&regs, 0, sizeof(regs));
+
+       regs.ebx = (unsigned long) fn;
+       regs.edx = (unsigned long) arg;
+
+       regs.xds = __USER_DS;
+       regs.xes = __USER_DS;
+       regs.orig_eax = -1;
+       regs.eip = (unsigned long) kernel_thread_helper;
+       regs.xcs = __KERNEL_CS;
+       regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+
+       /* Ok, create the new process.. */
+       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, 
NULL);
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+       struct task_struct *tsk = current;
+       struct thread_struct *t = &tsk->thread;
+
+       /* The process may have allocated an io port bitmap... nuke it. */
+       if (unlikely(NULL != t->io_bitmap_ptr)) {
+               physdev_op_t op = { 0 };
+               op.cmd = PHYSDEVOP_SET_IOBITMAP;
+               HYPERVISOR_physdev_op(&op);
+               kfree(t->io_bitmap_ptr);
+               t->io_bitmap_ptr = NULL;
+       }
+}
+
+void flush_thread(void)
+{
+       struct task_struct *tsk = current;
+
+       memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+       memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));        
+       /*
+        * Forget coprocessor state..
+        */
+       clear_fpu(tsk);
+       clear_used_math();
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+       if (dead_task->mm) {
+               // temporary debugging check
+               if (dead_task->mm->context.size) {
+                       printk("WARNING: dead process %8s still has LDT? 
<%p/%d>\n",
+                                       dead_task->comm,
+                                       dead_task->mm->context.ldt,
+                                       dead_task->mm->context.size);
+                       BUG();
+               }
+       }
+
+       release_vm86_irqs(dead_task);
+}
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+       unlazy_fpu(tsk);
+}
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+       unsigned long unused,
+       struct task_struct * p, struct pt_regs * regs)
+{
+       struct pt_regs * childregs;
+       struct task_struct *tsk;
+       int err;
+
+       childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) 
p->thread_info)) - 1;
+       *childregs = *regs;
+       childregs->eax = 0;
+       childregs->esp = esp;
+
+       p->thread.esp = (unsigned long) childregs;
+       p->thread.esp0 = (unsigned long) (childregs+1);
+
+       p->thread.eip = (unsigned long) ret_from_fork;
+
+       savesegment(fs,p->thread.fs);
+       savesegment(gs,p->thread.gs);
+
+       tsk = current;
+       if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
+               p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+               if (!p->thread.io_bitmap_ptr) {
+                       p->thread.io_bitmap_max = 0;
+                       return -ENOMEM;
+               }
+               memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
+                       IO_BITMAP_BYTES);
+       }
+
+       /*
+        * Set a new TLS for the child thread?
+        */
+       if (clone_flags & CLONE_SETTLS) {
+               struct desc_struct *desc;
+               struct user_desc info;
+               int idx;
+
+               err = -EFAULT;
+               if (copy_from_user(&info, (void __user *)childregs->esi, 
sizeof(info)))
+                       goto out;
+               err = -EINVAL;
+               if (LDT_empty(&info))
+                       goto out;
+
+               idx = info.entry_number;
+               if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+                       goto out;
+
+               desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+               desc->a = LDT_entry_a(&info);
+               desc->b = LDT_entry_b(&info);
+       }
+
+       p->thread.io_pl = current->thread.io_pl;
+
+       err = 0;
+ out:
+       if (err && p->thread.io_bitmap_ptr) {
+               kfree(p->thread.io_bitmap_ptr);
+               p->thread.io_bitmap_max = 0;
+       }
+       return err;
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+       int i;
+
+/* changed the size calculations - should hopefully work better. lbt */
+       dump->magic = CMAGIC;
+       dump->start_code = 0;
+       dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+       dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+       dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> 
PAGE_SHIFT;
+       dump->u_dsize -= dump->u_tsize;
+       dump->u_ssize = 0;
+       for (i = 0; i < 8; i++)
+               dump->u_debugreg[i] = current->thread.debugreg[i];  
+
+       if (dump->start_stack < TASK_SIZE)
+               dump->u_ssize = ((unsigned long) (TASK_SIZE - 
dump->start_stack)) >> PAGE_SHIFT;
+
+       dump->regs.ebx = regs->ebx;
+       dump->regs.ecx = regs->ecx;
+       dump->regs.edx = regs->edx;
+       dump->regs.esi = regs->esi;
+       dump->regs.edi = regs->edi;
+       dump->regs.ebp = regs->ebp;
+       dump->regs.eax = regs->eax;
+       dump->regs.ds = regs->xds;
+       dump->regs.es = regs->xes;
+       savesegment(fs,dump->regs.fs);
+       savesegment(gs,dump->regs.gs);
+       dump->regs.orig_eax = regs->orig_eax;
+       dump->regs.eip = regs->eip;
+       dump->regs.cs = regs->xcs;
+       dump->regs.eflags = regs->eflags;
+       dump->regs.esp = regs->esp;
+       dump->regs.ss = regs->xss;
+
+       dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+}
+
+/* 
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+       struct pt_regs ptregs;
+       
+       ptregs = *(struct pt_regs *)
+               ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
+       ptregs.xcs &= 0xffff;
+       ptregs.xds &= 0xffff;
+       ptregs.xes &= 0xffff;
+       ptregs.xss &= 0xffff;
+
+       elf_core_copy_regs(regs, &ptregs);
+
+       boot_option_idle_override = 1;
+       return 1;
+}
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+               HYPERVISOR_set_debugreg((register),     \
+                       (thread->debugreg[register]))
+
+/*
+ *     switch_to(x,yn) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. Lazy FP saving no longer makes any sense
+ * with modern CPU's, and this simplifies a lot of things (SMP
+ * and UP become the same).
+ *
+ * NOTE! We used to use the x86 hardware context switching. The
+ * reason for not using it any more becomes apparent when you
+ * try to recover gracefully from saved state that is no longer
+ * valid (stale segment register values in particular). With the
+ * hardware task-switch, there is no way to fix up bad state in
+ * a reasonable manner.
+ *
+ * The fact that Intel documents the hardware task-switching to
+ * be slow is a fairly red herring - this code is not noticeably
+ * faster. However, there _is_ some room for improvement here,
+ * so the performance issues may eventually be a valid point.
+ * More important, however, is the fact that this allows us much
+ * more flexibility.
+ *
+ * The return value (in %eax) will be the "prev" task after
+ * the task-switch, and shows up in ret_from_fork in entry.S,
+ * for example.
+ */
+struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct 
task_struct *next_p)
+{
+       struct thread_struct *prev = &prev_p->thread,
+                                *next = &next_p->thread;
+       int cpu = smp_processor_id();
+       struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       physdev_op_t iopl_op, iobmp_op;
+       multicall_entry_t _mcl[8], *mcl = _mcl;
+
+       /* XEN NOTE: FS/GS saved in switch_mm(), not here. */
+
+       /*
+        * This is basically '__unlazy_fpu', except that we queue a
+        * multicall to indicate FPU task switch, rather than
+        * synchronously trapping to Xen.
+        */
+       if (prev_p->thread_info->status & TS_USEDFPU) {
+               __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
+               mcl->op      = __HYPERVISOR_fpu_taskswitch;
+               mcl->args[0] = 1;
+               mcl++;
+       }
+
+       /*
+        * Reload esp0, LDT and the page table pointer:
+        * This is load_esp0(tss, next) with a multicall.
+        */
+       tss->esp0 = next->esp0;
+       mcl->op      = __HYPERVISOR_stack_switch;
+       mcl->args[0] = tss->ss0;
+       mcl->args[1] = tss->esp0;
+       mcl++;
+
+       /*
+        * Load the per-thread Thread-Local Storage descriptor.
+        * This is load_TLS(next, cpu) with multicalls.
+        */
+#define C(i) do {                                                       \
+       if (unlikely(next->tls_array[i].a != prev->tls_array[i].a ||    \
+                    next->tls_array[i].b != prev->tls_array[i].b)) {   \
+               mcl->op      = __HYPERVISOR_update_descriptor;          \
+               mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu)  \
+                                        [GDT_ENTRY_TLS_MIN + i]);      \
+               mcl->args[1] = ((u32 *)&next->tls_array[i])[0];         \
+               mcl->args[2] = ((u32 *)&next->tls_array[i])[1];         \
+               mcl++;                                                  \
+       }                                                               \
+} while (0)
+       C(0); C(1); C(2);
+#undef C
+
+       if (unlikely(prev->io_pl != next->io_pl)) {
+               iopl_op.cmd             = PHYSDEVOP_SET_IOPL;
+               iopl_op.u.set_iopl.iopl = next->io_pl;
+               mcl->op      = __HYPERVISOR_physdev_op;
+               mcl->args[0] = (unsigned long)&iopl_op;
+               mcl++;
+       }
+
+       if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
+               iobmp_op.cmd                     =
+                       PHYSDEVOP_SET_IOBITMAP;
+               iobmp_op.u.set_iobitmap.bitmap   =
+                       (unsigned long)next->io_bitmap_ptr;
+               iobmp_op.u.set_iobitmap.nr_ports =
+                       next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
+               mcl->op      = __HYPERVISOR_physdev_op;
+               mcl->args[0] = (unsigned long)&iobmp_op;
+               mcl++;
+       }
+
+       (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
+
+       /*
+        * Restore %fs and %gs if needed.
+        */
+       if (unlikely(next->fs | next->gs)) {
+               loadsegment(fs, next->fs);
+               loadsegment(gs, next->gs);
+       }
+
+       /*
+        * Now maybe reload the debug registers
+        */
+       if (unlikely(next->debugreg[7])) {
+               loaddebug(next, 0);
+               loaddebug(next, 1);
+               loaddebug(next, 2);
+               loaddebug(next, 3);
+               /* no 4 and 5 */
+               loaddebug(next, 6);
+               loaddebug(next, 7);
+       }
+
+       return prev_p;
+}
+
+asmlinkage int sys_fork(struct pt_regs regs)
+{
+       return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+}
+
+asmlinkage int sys_clone(struct pt_regs regs)
+{
+       unsigned long clone_flags;
+       unsigned long newsp;
+       int __user *parent_tidptr, *child_tidptr;
+
+       clone_flags = regs.ebx;
+       newsp = regs.ecx;
+       parent_tidptr = (int __user *)regs.edx;
+       child_tidptr = (int __user *)regs.edi;
+       if (!newsp)
+               newsp = regs.esp;
+       return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, 
child_tidptr);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys_vfork(struct pt_regs regs)
+{
+       return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, 
NULL, NULL);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(struct pt_regs regs)
+{
+       int error;
+       char * filename;
+
+       filename = getname((char __user *) regs.ebx);
+       error = PTR_ERR(filename);
+       if (IS_ERR(filename))
+               goto out;
+       error = do_execve(filename,
+                       (char __user * __user *) regs.ecx,
+                       (char __user * __user *) regs.edx,
+                       &regs);
+       if (error == 0) {
+               task_lock(current);
+               current->ptrace &= ~PT_DTRACE;
+               task_unlock(current);
+               /* Make sure we don't return using sysenter.. */
+               set_thread_flag(TIF_IRET);
+       }
+       putname(filename);
+out:
+       return error;
+}
+
+#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
+#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))
+
+unsigned long get_wchan(struct task_struct *p)
+{
+       unsigned long ebp, esp, eip;
+       unsigned long stack_page;
+       int count = 0;
+       if (!p || p == current || p->state == TASK_RUNNING)
+               return 0;
+       stack_page = (unsigned long)p->thread_info;
+       esp = p->thread.esp;
+       if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
+               return 0;
+       /* include/asm-i386/system.h:switch_to() pushes ebp last. */
+       ebp = *(unsigned long *) esp;
+       do {
+               if (ebp < stack_page || ebp > top_ebp+stack_page)
+                       return 0;
+               eip = *(unsigned long *) (ebp+4);
+               if (!in_sched_functions(eip))
+                       return eip;
+               ebp = *(unsigned long *) ebp;
+       } while (count++ < 16);
+       return 0;
+}
+
+/*
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
+ */
+static int get_free_idx(void)
+{
+       struct thread_struct *t = &current->thread;
+       int idx;
+
+       for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+               if (desc_empty(t->tls_array + idx))
+                       return idx + GDT_ENTRY_TLS_MIN;
+       return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
+{
+       struct thread_struct *t = &current->thread;
+       struct user_desc info;
+       struct desc_struct *desc;
+       int cpu, idx;
+
+       if (copy_from_user(&info, u_info, sizeof(info)))
+               return -EFAULT;
+       idx = info.entry_number;
+
+       /*
+        * index -1 means the kernel should try to find and
+        * allocate an empty descriptor:
+        */
+       if (idx == -1) {
+               idx = get_free_idx();
+               if (idx < 0)
+                       return idx;
+               if (put_user(idx, &u_info->entry_number))
+                       return -EFAULT;
+       }
+
+       if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+               return -EINVAL;
+
+       desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+       /*
+        * We must not get preempted while modifying the TLS.
+        */
+       cpu = get_cpu();
+
+       if (LDT_empty(&info)) {
+               desc->a = 0;
+               desc->b = 0;
+       } else {
+               desc->a = LDT_entry_a(&info);
+               desc->b = LDT_entry_b(&info);
+       }
+       load_TLS(t, cpu);
+
+       put_cpu();
+
+       return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+       (((desc)->a >> 16) & 0x0000ffff) | \
+       (((desc)->b << 16) & 0x00ff0000) | \
+       ( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+       ((desc)->a & 0x0ffff) | \
+        ((desc)->b & 0xf0000) )
+       
+#define GET_32BIT(desc)                (((desc)->b >> 22) & 1)
+#define GET_CONTENTS(desc)     (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)     (((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)  (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)      (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)      (((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
+{
+       struct user_desc info;
+       struct desc_struct *desc;
+       int idx;
+
+       if (get_user(idx, &u_info->entry_number))
+               return -EFAULT;
+       if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+               return -EINVAL;
+
+       desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+       info.entry_number = idx;
+       info.base_addr = GET_BASE(desc);
+       info.limit = GET_LIMIT(desc);
+       info.seg_32bit = GET_32BIT(desc);
+       info.contents = GET_CONTENTS(desc);
+       info.read_exec_only = !GET_WRITABLE(desc);
+       info.limit_in_pages = GET_LIMIT_PAGES(desc);
+       info.seg_not_present = !GET_PRESENT(desc);
+       info.useable = GET_USEABLE(desc);
+
+       if (copy_to_user(u_info, &info, sizeof(info)))
+               return -EFAULT;
+       return 0;
+}
+
diff -r 35b74976f598 -r a83ac0806d6b linux-2.6.11-xen-sparse/mm/highmem.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/mm/highmem.c      Fri Jul 15 13:39:50 2005
@@ -0,0 +1,614 @@
+/*
+ * High memory handling common code and variables.
+ *
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@xxxxxxx
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@xxxxxxxxxxxxxx
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with
+ * 64-bit physical space. With current x86 CPUs this
+ * means up to 64 Gigabytes physical RAM.
+ *
+ * Rewrote high memory support to move the page cache into
+ * high memory. Implemented permanent (schedulable) kmaps
+ * based on Linus' idea.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx>
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/bio.h>
+#include <linux/pagemap.h>
+#include <linux/mempool.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/highmem.h>
+#include <asm/tlbflush.h>
+
+static mempool_t *page_pool, *isa_page_pool;
+
+static void *page_pool_alloc(int gfp_mask, void *data)
+{
+       int gfp = gfp_mask | (int) (long) data;
+
+       return alloc_page(gfp);
+}
+
+static void page_pool_free(void *page, void *data)
+{
+       __free_page(page);
+}
+
+/*
+ * Virtual_count is not a pure "count".
+ *  0 means that it is not mapped, and has not been mapped
+ *    since a TLB flush - it is usable.
+ *  1 means that there are no users, but it has been mapped
+ *    since the last TLB flush - so we can't use it.
+ *  n means that there are (n-1) current users of it.
+ */
+#ifdef CONFIG_HIGHMEM
+static int pkmap_count[LAST_PKMAP];
+static unsigned int last_pkmap_nr;
+static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
+
+pte_t * pkmap_page_table;
+
+static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
+
+static void flush_all_zero_pkmaps(void)
+{
+       int i;
+
+       flush_cache_kmaps();
+
+       for (i = 0; i < LAST_PKMAP; i++) {
+               struct page *page;
+
+               /*
+                * zero means we don't have anything to do,
+                * >1 means that it is still in use. Only
+                * a count of 1 means that it is free but
+                * needs to be unmapped
+                */
+               if (pkmap_count[i] != 1)
+                       continue;
+               pkmap_count[i] = 0;
+
+               /* sanity check */
+               if (pte_none(pkmap_page_table[i]))
+                       BUG();
+
+               /*
+                * Don't need an atomic fetch-and-clear op here;
+                * no-one has the page mapped, and cannot get at
+                * its virtual address (and hence PTE) without first
+                * getting the kmap_lock (which is held here).
+                * So no dangers, even with speculative execution.
+                */
+               page = pte_page(pkmap_page_table[i]);
+               pte_clear(&pkmap_page_table[i]);
+
+               set_page_address(page, NULL);
+       }
+       flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
+}
+
+static inline unsigned long map_new_virtual(struct page *page)
+{
+       unsigned long vaddr;
+       int count;
+
+start:
+       count = LAST_PKMAP;
+       /* Find an empty entry */
+       for (;;) {
+               last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
+               if (!last_pkmap_nr) {
+                       flush_all_zero_pkmaps();
+                       count = LAST_PKMAP;
+               }
+               if (!pkmap_count[last_pkmap_nr])
+                       break;  /* Found a usable entry */
+               if (--count)
+                       continue;
+
+               /*
+                * Sleep for somebody else to unmap their entries
+                */
+               {
+                       DECLARE_WAITQUEUE(wait, current);
+
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       add_wait_queue(&pkmap_map_wait, &wait);
+                       spin_unlock(&kmap_lock);
+                       schedule();
+                       remove_wait_queue(&pkmap_map_wait, &wait);
+                       spin_lock(&kmap_lock);
+
+                       /* Somebody else might have mapped it while we slept */
+                       if (page_address(page))
+                               return (unsigned long)page_address(page);
+
+                       /* Re-start */
+                       goto start;
+               }
+       }
+       vaddr = PKMAP_ADDR(last_pkmap_nr);
+       set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
+
+       pkmap_count[last_pkmap_nr] = 1;
+       set_page_address(page, (void *)vaddr);
+
+       return vaddr;
+}
+
+void kmap_flush_unused(void)
+{
+       spin_lock(&kmap_lock);
+       flush_all_zero_pkmaps();
+       spin_unlock(&kmap_lock);
+}
+
+EXPORT_SYMBOL(kmap_flush_unused);
+
+void fastcall *kmap_high(struct page *page)
+{
+       unsigned long vaddr;
+
+       /*
+        * For highmem pages, we can't trust "virtual" until
+        * after we have the lock.
+        *
+        * We cannot call this from interrupts, as it may block
+        */
+       spin_lock(&kmap_lock);
+       vaddr = (unsigned long)page_address(page);
+       if (!vaddr)
+               vaddr = map_new_virtual(page);
+       pkmap_count[PKMAP_NR(vaddr)]++;
+       if (pkmap_count[PKMAP_NR(vaddr)] < 2)
+               BUG();
+       spin_unlock(&kmap_lock);
+       return (void*) vaddr;
+}
+
+EXPORT_SYMBOL(kmap_high);
+
+void fastcall kunmap_high(struct page *page)
+{
+       unsigned long vaddr;
+       unsigned long nr;
+       int need_wakeup;
+
+       spin_lock(&kmap_lock);
+       vaddr = (unsigned long)page_address(page);
+       if (!vaddr)
+               BUG();
+       nr = PKMAP_NR(vaddr);
+
+       /*
+        * A count must never go down to zero
+        * without a TLB flush!
+        */
+       need_wakeup = 0;
+       switch (--pkmap_count[nr]) {
+       case 0:
+               BUG();
+       case 1:
+               /*
+                * Avoid an unnecessary wake_up() function call.
+                * The common case is pkmap_count[] == 1, but
+                * no waiters.
+                * The tasks queued in the wait-queue are guarded
+                * by both the lock in the wait-queue-head and by
+                * the kmap_lock.  As the kmap_lock is held here,
+                * no need for the wait-queue-head's lock.  Simply
+                * test if the queue is empty.
+                */
+               need_wakeup = waitqueue_active(&pkmap_map_wait);
+       }
+       spin_unlock(&kmap_lock);
+
+       /* do wake-up, if needed, race-free outside of the spin lock */
+       if (need_wakeup)
+               wake_up(&pkmap_map_wait);
+}
+
+EXPORT_SYMBOL(kunmap_high);
+
+#define POOL_SIZE      64
+
+static __init int init_emergency_pool(void)
+{
+       struct sysinfo i;
+       si_meminfo(&i);
+       si_swapinfo(&i);
+        
+       if (!i.totalhigh)
+               return 0;
+
+       page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, 
NULL);
+       if (!page_pool)
+               BUG();
+       printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
+
+       return 0;
+}
+
+__initcall(init_emergency_pool);
+
+/*
+ * highmem version, map in to vec
+ */
+static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
+{
+       unsigned long flags;
+       unsigned char *vto;
+
+       local_irq_save(flags);
+       vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
+       memcpy(vto + to->bv_offset, vfrom, to->bv_len);
+       kunmap_atomic(vto, KM_BOUNCE_READ);
+       local_irq_restore(flags);
+}
+
+#else /* CONFIG_HIGHMEM */
+
+#define bounce_copy_vec(to, vfrom)     \
+       memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, 
(to)->bv_len)
+
+#endif
+
+#define ISA_POOL_SIZE  16
+
+/*
+ * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
+ * as the max address, so check if the pool has already been created.
+ */
+int init_emergency_isa_pool(void)
+{
+       if (isa_page_pool)
+               return 0;
+
+       isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, 
page_pool_free, (void *) __GFP_DMA);
+       if (!isa_page_pool)
+               BUG();
+
+       printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
+       return 0;
+}
+
+/*
+ * Simple bounce buffer support for highmem pages. Depending on the
+ * queue gfp mask set, *to may or may not be a highmem page. kmap it
+ * always, it will do the Right Thing
+ */
+static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
+{
+       unsigned char *vfrom;
+       struct bio_vec *tovec, *fromvec;
+       int i;
+
+       __bio_for_each_segment(tovec, to, i, 0) {
+               fromvec = from->bi_io_vec + i;
+
+               /*
+                * not bounced
+                */
+               if (tovec->bv_page == fromvec->bv_page)
+                       continue;
+
+               /*
+                * fromvec->bv_offset and fromvec->bv_len might have been
+                * modified by the block layer, so use the original copy,
+                * bounce_copy_vec already uses tovec->bv_len
+                */
+               vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
+
+               flush_dcache_page(tovec->bv_page);
+               bounce_copy_vec(tovec, vfrom);
+       }
+}
+
+static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
+{
+       struct bio *bio_orig = bio->bi_private;
+       struct bio_vec *bvec, *org_vec;
+       int i;
+
+       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
+               set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
+
+       /*
+        * free up bounce indirect pages used
+        */
+       __bio_for_each_segment(bvec, bio, i, 0) {
+               org_vec = bio_orig->bi_io_vec + i;
+               if (bvec->bv_page == org_vec->bv_page)
+                       continue;
+
+               mempool_free(bvec->bv_page, pool);      
+       }
+
+       bio_endio(bio_orig, bio_orig->bi_size, err);
+       bio_put(bio);
+}
+
+static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int 
err)
+{
+       if (bio->bi_size)
+               return 1;
+
+       bounce_end_io(bio, page_pool, err);
+       return 0;
+}
+
+static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, 
int err)
+{
+       if (bio->bi_size)
+               return 1;
+
+       bounce_end_io(bio, isa_page_pool, err);
+       return 0;
+}
+
+static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
+{
+       struct bio *bio_orig = bio->bi_private;
+
+       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+               copy_to_high_bio_irq(bio_orig, bio);
+
+       bounce_end_io(bio, pool, err);
+}
+
+static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int 
err)
+{
+       if (bio->bi_size)
+               return 1;
+
+       __bounce_end_io_read(bio, page_pool, err);
+       return 0;
+}
+
+static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, 
int err)
+{
+       if (bio->bi_size)
+               return 1;
+
+       __bounce_end_io_read(bio, isa_page_pool, err);
+       return 0;
+}
+
+static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
+                       mempool_t *pool)
+{
+       struct page *page;
+       struct bio *bio = NULL;
+       int i, rw = bio_data_dir(*bio_orig);
+       struct bio_vec *to, *from;
+
+       bio_for_each_segment(from, *bio_orig, i) {
+               page = from->bv_page;
+
+               /*
+                * is destination page below bounce pfn?
+                */
+               if (page_to_pfn(page) < q->bounce_pfn)
+                       continue;
+
+               /*
+                * irk, bounce it
+                */
+               if (!bio)
+                       bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
+
+               to = bio->bi_io_vec + i;
+
+               to->bv_page = mempool_alloc(pool, q->bounce_gfp);
+               to->bv_len = from->bv_len;
+               to->bv_offset = from->bv_offset;
+
+               if (rw == WRITE) {
+                       char *vto, *vfrom;
+
+                       flush_dcache_page(from->bv_page);
+                       vto = page_address(to->bv_page) + to->bv_offset;
+                       vfrom = kmap(from->bv_page) + from->bv_offset;
+                       memcpy(vto, vfrom, to->bv_len);
+                       kunmap(from->bv_page);
+               }
+       }
+
+       /*
+        * no pages bounced
+        */
+       if (!bio)
+               return;
+
+       /*
+        * at least one page was bounced, fill in possible non-highmem
+        * pages
+        */
+       __bio_for_each_segment(from, *bio_orig, i, 0) {
+               to = bio_iovec_idx(bio, i);
+               if (!to->bv_page) {
+                       to->bv_page = from->bv_page;
+                       to->bv_len = from->bv_len;
+                       to->bv_offset = from->bv_offset;
+               }
+       }
+
+       bio->bi_bdev = (*bio_orig)->bi_bdev;
+       bio->bi_flags |= (1 << BIO_BOUNCED);
+       bio->bi_sector = (*bio_orig)->bi_sector;
+       bio->bi_rw = (*bio_orig)->bi_rw;
+
+       bio->bi_vcnt = (*bio_orig)->bi_vcnt;
+       bio->bi_idx = (*bio_orig)->bi_idx;
+       bio->bi_size = (*bio_orig)->bi_size;
+
+       if (pool == page_pool) {
+               bio->bi_end_io = bounce_end_io_write;
+               if (rw == READ)
+                       bio->bi_end_io = bounce_end_io_read;
+       } else {
+               bio->bi_end_io = bounce_end_io_write_isa;
+               if (rw == READ)
+                       bio->bi_end_io = bounce_end_io_read_isa;
+       }
+
+       bio->bi_private = *bio_orig;
+       *bio_orig = bio;
+}
+
+void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
+{
+       mempool_t *pool;
+
+       /*
+        * for non-isa bounce case, just check if the bounce pfn is equal
+        * to or bigger than the highest pfn in the system -- in that case,
+        * don't waste time iterating over bio segments
+        */
+       if (!(q->bounce_gfp & GFP_DMA)) {
+               if (q->bounce_pfn >= blk_max_pfn)
+                       return;
+               pool = page_pool;
+       } else {
+               BUG_ON(!isa_page_pool);
+               pool = isa_page_pool;
+       }
+
+       /*
+        * slow path
+        */
+       __blk_queue_bounce(q, bio_orig, pool);
+}
+
+EXPORT_SYMBOL(blk_queue_bounce);
+
+#if defined(HASHED_PAGE_VIRTUAL)
+
+#define PA_HASH_ORDER  7
+
+/*
+ * Describes one page->virtual association
+ */
+struct page_address_map {
+       struct page *page;
+       void *virtual;
+       struct list_head list;
+};
+
+/*
+ * page_address_map freelist, allocated from page_address_maps.
+ */
+static struct list_head page_address_pool;     /* freelist */
+static spinlock_t pool_lock;                   /* protects page_address_pool */
+
+/*
+ * Hash table bucket
+ */
+static struct page_address_slot {
+       struct list_head lh;                    /* List of page_address_maps */
+       spinlock_t lock;                        /* Protect this bucket's list */
+} ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
+
+static struct page_address_slot *page_slot(struct page *page)
+{
+       return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
+}
+
+void *page_address(struct page *page)
+{
+       unsigned long flags;
+       void *ret;
+       struct page_address_slot *pas;
+
+       if (!PageHighMem(page))
+               return lowmem_page_address(page);
+
+       pas = page_slot(page);
+       ret = NULL;
+       spin_lock_irqsave(&pas->lock, flags);
+       if (!list_empty(&pas->lh)) {
+               struct page_address_map *pam;
+
+               list_for_each_entry(pam, &pas->lh, list) {
+                       if (pam->page == page) {
+                               ret = pam->virtual;
+                               goto done;
+                       }
+               }
+       }
+done:
+       spin_unlock_irqrestore(&pas->lock, flags);
+       return ret;
+}
+
+EXPORT_SYMBOL(page_address);
+
+void set_page_address(struct page *page, void *virtual)
+{
+       unsigned long flags;
+       struct page_address_slot *pas;
+       struct page_address_map *pam;
+
+       BUG_ON(!PageHighMem(page));
+
+       pas = page_slot(page);
+       if (virtual) {          /* Add */
+               BUG_ON(list_empty(&page_address_pool));
+
+               spin_lock_irqsave(&pool_lock, flags);
+               pam = list_entry(page_address_pool.next,
+                               struct page_address_map, list);
+               list_del(&pam->list);
+               spin_unlock_irqrestore(&pool_lock, flags);
+
+               pam->page = page;
+               pam->virtual = virtual;
+
+               spin_lock_irqsave(&pas->lock, flags);
+               list_add_tail(&pam->list, &pas->lh);
+               spin_unlock_irqrestore(&pas->lock, flags);
+       } else {                /* Remove */
+               spin_lock_irqsave(&pas->lock, flags);
+               list_for_each_entry(pam, &pas->lh, list) {
+                       if (pam->page == page) {
+                               list_del(&pam->list);
+                               spin_unlock_irqrestore(&pas->lock, flags);
+                               spin_lock_irqsave(&pool_lock, flags);
+                               list_add_tail(&pam->list, &page_address_pool);
+                               spin_unlock_irqrestore(&pool_lock, flags);
+                               goto done;
+                       }
+               }
+               spin_unlock_irqrestore(&pas->lock, flags);
+       }
+done:
+       return;
+}
+
+static struct page_address_map page_address_maps[LAST_PKMAP];
+
+void __init page_address_init(void)
+{
+       int i;
+
+       INIT_LIST_HEAD(&page_address_pool);
+       for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
+               list_add(&page_address_maps[i].list, &page_address_pool);
+       for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
+               INIT_LIST_HEAD(&page_address_htable[i].lh);
+               spin_lock_init(&page_address_htable[i].lock);
+       }
+       spin_lock_init(&pool_lock);
+}
+
+#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/blkfront/Kconfig
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/Kconfig      Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,6 @@
+
+config XENBLOCK
+       tristate "Block device driver"
+       depends on ARCH_XEN
+       help
+         Block device driver for Xen
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/drivers/console/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/drivers/console/Makefile Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-$(CONFIG_XEN_CONSOLE) := console.o
+include $(TOPDIR)/Rules.make
diff -r 35b74976f598 -r a83ac0806d6b linux-2.6.11-xen-sparse/drivers/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/Makefile  Fri Jul 15 13:39:50 2005
@@ -0,0 +1,66 @@
+#
+# Makefile for the Linux kernel device drivers.
+#
+# 15 Sep 2000, Christoph Hellwig <hch@xxxxxxxxxxxxx>
+# Rewritten to use lists instead of if-statements.
+#
+
+obj-$(CONFIG_PCI)              += pci/
+obj-$(CONFIG_PARISC)           += parisc/
+obj-y                          += video/
+obj-$(CONFIG_ACPI_BOOT)                += acpi/
+# PnP must come after ACPI since it will eventually need to check if acpi
+# was used and do nothing if so
+obj-$(CONFIG_PNP)              += pnp/
+
+# char/ comes before serial/ etc so that the VT console is the boot-time
+# default.
+obj-y                          += char/
+
+# i810fb and intelfb depend on char/agp/
+obj-$(CONFIG_FB_I810)           += video/i810/
+obj-$(CONFIG_FB_INTEL)          += video/intelfb/
+
+# we also need input/serio early so serio bus is initialized by the time
+# serial drivers start registering their serio ports
+obj-$(CONFIG_SERIO)            += input/serio/
+obj-y                          += serial/
+obj-$(CONFIG_PARPORT)          += parport/
+obj-y                          += base/ block/ misc/ net/ media/
+obj-$(CONFIG_NUBUS)            += nubus/
+obj-$(CONFIG_ATM)              += atm/
+obj-$(CONFIG_PPC_PMAC)         += macintosh/
+obj-$(CONFIG_ARCH_XEN)         += xen/
+obj-$(CONFIG_IDE)              += ide/
+obj-$(CONFIG_FC4)              += fc4/
+obj-$(CONFIG_SCSI)             += scsi/
+obj-$(CONFIG_FUSION)           += message/
+obj-$(CONFIG_IEEE1394)         += ieee1394/
+obj-y                          += cdrom/
+obj-$(CONFIG_MTD)              += mtd/
+obj-$(CONFIG_PCCARD)           += pcmcia/
+obj-$(CONFIG_DIO)              += dio/
+obj-$(CONFIG_SBUS)             += sbus/
+obj-$(CONFIG_ZORRO)            += zorro/
+obj-$(CONFIG_MAC)              += macintosh/
+obj-$(CONFIG_ATA_OVER_ETH)     += block/aoe/
+obj-$(CONFIG_PARIDE)           += block/paride/
+obj-$(CONFIG_TC)               += tc/
+obj-$(CONFIG_USB)              += usb/
+obj-$(CONFIG_USB_GADGET)       += usb/gadget/
+obj-$(CONFIG_INPUT)            += input/
+obj-$(CONFIG_GAMEPORT)         += input/gameport/
+obj-$(CONFIG_I2O)              += message/
+obj-$(CONFIG_I2C)              += i2c/
+obj-$(CONFIG_W1)               += w1/
+obj-$(CONFIG_PHONE)            += telephony/
+obj-$(CONFIG_MD)               += md/
+obj-$(CONFIG_BT)               += bluetooth/
+obj-$(CONFIG_ISDN)             += isdn/
+obj-$(CONFIG_MCA)              += mca/
+obj-$(CONFIG_EISA)             += eisa/
+obj-$(CONFIG_CPU_FREQ)         += cpufreq/
+obj-$(CONFIG_MMC)              += mmc/
+obj-$(CONFIG_INFINIBAND)       += infiniband/
+obj-y                          += firmware/
+obj-$(CONFIG_CRYPTO)           += crypto/
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h       Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,102 @@
+#ifndef _I386_TLBFLUSH_H
+#define _I386_TLBFLUSH_H
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+
+#define __flush_tlb() xen_tlb_flush()
+#define __flush_tlb_global() xen_tlb_flush()
+#define __flush_tlb_all() xen_tlb_flush()
+
+extern unsigned long pgkern_mask;
+
+#define cpu_has_invlpg (boot_cpu_data.x86 > 3)
+
+#define __flush_tlb_single(addr) xen_invlpg(addr)
+
+#define __flush_tlb_one(addr) __flush_tlb_single(addr)
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+       if (mm == current->active_mm)
+               __flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+       unsigned long addr)
+{
+       if (vma->vm_mm == current->active_mm)
+               __flush_tlb_one(addr);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+       unsigned long start, unsigned long end)
+{
+       if (vma->vm_mm == current->active_mm)
+               __flush_tlb();
+}
+
+#else
+
+#include <asm/smp.h>
+
+#define local_flush_tlb() \
+       __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb()    flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long 
start, unsigned long end)
+{
+       flush_tlb_mm(vma->vm_mm);
+}
+
+#define TLBSTATE_OK    1
+#define TLBSTATE_LAZY  2
+
+struct tlb_state
+{
+       struct mm_struct *active_mm;
+       int state;
+       char __cacheline_padding[L1_CACHE_BYTES-8];
+};
+DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+
+
+#endif
+
+#define flush_tlb_kernel_range(start, end) flush_tlb_all()
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+                                     unsigned long start, unsigned long end)
+{
+       /* i386 does not keep any page table caches in TLB */
+}
+
+#endif /* _I386_TLBFLUSH_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/arch_hooks.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/arch_hooks.h   Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,27 @@
+#ifndef _ASM_ARCH_HOOKS_H
+#define _ASM_ARCH_HOOKS_H
+
+#include <linux/interrupt.h>
+
+/*
+ *     linux/include/asm/arch_hooks.h
+ *
+ *     define the architecture specific hooks 
+ */
+
+/* these aren't arch hooks, they are generic routines
+ * that can be used by the hooks */
+extern void init_ISA_irqs(void);
+extern void apic_intr_init(void);
+extern void smp_intr_init(void);
+extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs 
*regs);
+
+/* these are the defined hooks */
+extern void intr_init_hook(void);
+extern void pre_intr_init_hook(void);
+extern void pre_setup_arch_hook(void);
+extern void trap_init_hook(void);
+extern void time_init_hook(void);
+extern void mca_nmi_hook(void);
+
+#endif
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c        Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,500 @@
+/******************************************************************************
+ * vbd.c
+ * 
+ * XenLinux virtual block-device driver (xvd).
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004-2005, Christian Limpach
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "block.h"
+#include <linux/blkdev.h>
+#include <linux/list.h>
+
+/*
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.,
+ * potentially combinations of the two) in the naming scheme and in a few other
+ * places.
+ */
+
+#define NUM_IDE_MAJORS 10
+#define NUM_SCSI_MAJORS 9
+#define NUM_VBD_MAJORS 1
+
+struct lvdisk
+{
+    blkif_sector_t capacity; /*  0: Size in terms of 512-byte sectors.   */
+    blkif_vdev_t   device;   /*  8: Device number (opaque 16 bit value). */
+    u16            info; 
+    struct list_head list;
+};
+
+static struct xlbd_type_info xlbd_ide_type = {
+    .partn_shift = 6,
+    .disks_per_major = 2,
+    .devname = "ide",
+    .diskname = "hd",
+};
+
+static struct xlbd_type_info xlbd_scsi_type = {
+    .partn_shift = 4,
+    .disks_per_major = 16,
+    .devname = "sd",
+    .diskname = "sd",
+};
+
+static struct xlbd_type_info xlbd_vbd_type = {
+    .partn_shift = 4,
+    .disks_per_major = 16,
+    .devname = "xvd",
+    .diskname = "xvd",
+};
+
+static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
+                                         NUM_VBD_MAJORS];
+
+#define XLBD_MAJOR_IDE_START    0
+#define XLBD_MAJOR_SCSI_START   (NUM_IDE_MAJORS)
+#define XLBD_MAJOR_VBD_START    (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
+
+#define XLBD_MAJOR_IDE_RANGE    XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START 
- 1
+#define XLBD_MAJOR_SCSI_RANGE   XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START 
- 1
+#define XLBD_MAJOR_VBD_RANGE    XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START 
+ NUM_VBD_MAJORS - 1
+
+/* Information about our VBDs. */
+#define MAX_VBDS 64
+struct list_head vbds_list;
+
+#define MAJOR_XEN(dev) ((dev)>>8)
+#define MINOR_XEN(dev) ((dev) & 0xff)
+
+static struct block_device_operations xlvbd_block_fops = 
+{
+    .owner  = THIS_MODULE,
+    .open  = blkif_open,
+    .release = blkif_release,
+    .ioctl  = blkif_ioctl,
+};
+
+spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+
+static struct lvdisk *xlvbd_device_alloc(void)
+{
+    struct lvdisk *disk;
+
+    disk = kmalloc(sizeof(*disk), GFP_KERNEL);
+    if (disk != NULL) {
+        memset(disk, 0, sizeof(*disk));
+        INIT_LIST_HEAD(&disk->list);
+    }
+    return disk;
+}
+
+static void xlvbd_device_free(struct lvdisk *disk)
+{
+    list_del(&disk->list);
+    kfree(disk);
+}
+
+static vdisk_t *xlvbd_probe(int *ret)
+{
+    blkif_response_t rsp;
+    blkif_request_t req;
+    vdisk_t *disk_info = NULL;
+    unsigned long buf;
+    int nr;
+
+    buf = __get_free_page(GFP_KERNEL);
+    if ((void *)buf == NULL)
+        goto out;
+
+    memset(&req, 0, sizeof(req));
+    req.operation = BLKIF_OP_PROBE;
+    req.nr_segments = 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    blkif_control_probe_send(&req, &rsp,
+                             (unsigned long)(virt_to_machine(buf)));
+#else
+    req.frame_and_sects[0] = virt_to_machine(buf) | 7;
+
+    blkif_control_send(&req, &rsp);
+#endif
+    if ( rsp.status <= 0 ) {
+        WPRINTK("Could not probe disks (%d)\n", rsp.status);
+        goto out;
+    }
+    nr = rsp.status;
+    if ( nr > MAX_VBDS )
+        nr = MAX_VBDS;
+
+    disk_info = kmalloc(nr * sizeof(vdisk_t), GFP_KERNEL);
+    if (disk_info != NULL)
+        memcpy(disk_info, (void *) buf, nr * sizeof(vdisk_t));
+
+    if (ret != NULL)
+        *ret = nr;
+
+out:
+    free_page(buf);
+    return disk_info;
+}
+
+static struct xlbd_major_info *xlbd_alloc_major_info(
+    int major, int minor, int index)
+{
+    struct xlbd_major_info *ptr;
+
+    ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
+    if (ptr == NULL)
+        return NULL;
+
+    memset(ptr, 0, sizeof(struct xlbd_major_info));
+
+    ptr->major = major;
+
+    switch (index) {
+    case XLBD_MAJOR_IDE_RANGE:
+        ptr->type = &xlbd_ide_type;
+        ptr->index = index - XLBD_MAJOR_IDE_START;
+        break;
+    case XLBD_MAJOR_SCSI_RANGE:
+        ptr->type = &xlbd_scsi_type;
+        ptr->index = index - XLBD_MAJOR_SCSI_START;
+        break;
+    case XLBD_MAJOR_VBD_RANGE:
+        ptr->type = &xlbd_vbd_type;
+        ptr->index = index - XLBD_MAJOR_VBD_START;
+        break;
+    }
+    
+    if (register_blkdev(ptr->major, ptr->type->devname)) {
+        WPRINTK("can't get major %d with name %s\n",
+                ptr->major, ptr->type->devname);
+        kfree(ptr);
+        return NULL;
+    }
+
+    devfs_mk_dir(ptr->type->devname);
+    major_info[index] = ptr;
+    return ptr;
+}
+
+static struct xlbd_major_info *xlbd_get_major_info(int device)
+{
+    int major, minor, index;
+
+    major = MAJOR_XEN(device);
+    minor = MINOR_XEN(device);
+
+    switch (major) {
+    case IDE0_MAJOR: index = 0; break;
+    case IDE1_MAJOR: index = 1; break;
+    case IDE2_MAJOR: index = 2; break;
+    case IDE3_MAJOR: index = 3; break;
+    case IDE4_MAJOR: index = 4; break;
+    case IDE5_MAJOR: index = 5; break;
+    case IDE6_MAJOR: index = 6; break;
+    case IDE7_MAJOR: index = 7; break;
+    case IDE8_MAJOR: index = 8; break;
+    case IDE9_MAJOR: index = 9; break;
+    case SCSI_DISK0_MAJOR: index = 10; break;
+    case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
+        index = 11 + major - SCSI_DISK1_MAJOR;
+        break;
+    case SCSI_CDROM_MAJOR: index = 18; break;
+    default: index = 19; break;
+    }
+
+    return ((major_info[index] != NULL) ? major_info[index] :
+            xlbd_alloc_major_info(major, minor, index));
+}
+
+static int xlvbd_init_blk_queue(struct gendisk *gd, vdisk_t *disk)
+{
+    request_queue_t *rq;
+
+    rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+    if (rq == NULL)
+        return -1;
+
+    elevator_init(rq, "noop");
+
+    /* Hard sector size and max sectors impersonate the equiv. hardware. */
+    blk_queue_hardsect_size(rq, disk->sector_size);
+    blk_queue_max_sectors(rq, 512);
+
+    /* Each segment in a request is up to an aligned page in size. */
+    blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+    blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+    /* Ensure a merged request will fit in a single I/O ring slot. */
+    blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+    blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+    /* Make sure buffer addresses are sector-aligned. */
+    blk_queue_dma_alignment(rq, 511);
+
+    gd->queue = rq;
+
+    return 0;
+}
+
+struct gendisk *xlvbd_alloc_gendisk(
+    struct xlbd_major_info *mi, int minor, vdisk_t *disk)
+{
+    struct gendisk *gd;
+    struct xlbd_disk_info *di;
+    int nr_minors = 1;
+
+    di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
+    if (di == NULL)
+        return NULL;
+    memset(di, 0, sizeof(*di));
+    di->mi = mi;
+    di->xd_device = disk->device;
+
+    if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
+        nr_minors = 1 << mi->type->partn_shift;
+
+    gd = alloc_disk(nr_minors);
+    if (gd == NULL)
+        goto out;
+
+    if (nr_minors > 1)
+        sprintf(gd->disk_name, "%s%c", mi->type->diskname,
+                'a' + mi->index * mi->type->disks_per_major +
+                    (minor >> mi->type->partn_shift));
+    else
+        sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
+                'a' + mi->index * mi->type->disks_per_major +
+                (minor >> mi->type->partn_shift),
+                minor & ((1 << mi->type->partn_shift) - 1));
+
+    gd->major = mi->major;
+    gd->first_minor = minor;
+    gd->fops = &xlvbd_block_fops;
+    gd->private_data = di;
+    set_capacity(gd, disk->capacity);
+
+    if (xlvbd_init_blk_queue(gd, disk)) {
+        del_gendisk(gd);
+        goto out;
+    }
+
+    di->rq = gd->queue;
+
+    if (disk->info & VDISK_READONLY)
+        set_disk_ro(gd, 1);
+
+    if (disk->info & VDISK_REMOVABLE)
+        gd->flags |= GENHD_FL_REMOVABLE;
+
+    if (disk->info & VDISK_CDROM)
+        gd->flags |= GENHD_FL_CD;
+
+    add_disk(gd);
+
+    return gd;
+
+out:
+    kfree(di);
+    return NULL;
+}
+
+static int xlvbd_device_add(struct list_head *list, vdisk_t *disk)
+{
+    struct lvdisk *new;
+    int minor;
+    dev_t device;
+    struct block_device *bd;
+    struct gendisk *gd;
+    struct xlbd_major_info *mi;
+
+    mi = xlbd_get_major_info(disk->device);
+    if (mi == NULL)
+        return -EPERM;
+
+    new = xlvbd_device_alloc();
+    if (new == NULL)
+        return -1;
+    new->capacity = disk->capacity;
+    new->device = disk->device;
+    new->info = disk->info;
+    
+    minor = MINOR_XEN(disk->device);
+    device = MKDEV(mi->major, minor);
+    
+    bd = bdget(device);
+    if (bd == NULL)
+        goto out;
+    
+    gd = xlvbd_alloc_gendisk(mi, minor, disk);
+    if (gd == NULL)
+        goto out_bd;
+
+    list_add(&new->list, list);
+out_bd:
+    bdput(bd);
+out:
+    return 0;
+}
+
+static int xlvbd_device_del(struct lvdisk *disk)
+{
+    dev_t device;
+    struct block_device *bd;
+    struct gendisk *gd;
+    struct xlbd_disk_info *di;
+    int ret = 0, unused;
+    request_queue_t *rq;
+
+    device = MKDEV(MAJOR_XEN(disk->device), MINOR_XEN(disk->device));
+
+    bd = bdget(device);
+    if (bd == NULL)
+        return -1;
+
+    gd = get_gendisk(device, &unused);
+    di = gd->private_data;
+
+    if (di->mi->usage != 0) {
+        WPRINTK("disk removal failed: used [dev=%x]\n", device);
+        ret = -1;
+        goto out;
+    }
+
+    rq = gd->queue;
+    del_gendisk(gd);
+    put_disk(gd);
+    blk_cleanup_queue(rq);
+
+    xlvbd_device_free(disk);
+out:
+    bdput(bd);
+    return ret;
+}
+
+static int xlvbd_device_update(struct lvdisk *ldisk, vdisk_t *disk)
+{
+    dev_t device;
+    struct block_device *bd;
+    struct gendisk *gd;
+    int unused;
+
+    if ((ldisk->capacity == disk->capacity) && (ldisk->info == disk->info))
+        return 0;    
+
+    device = MKDEV(MAJOR_XEN(ldisk->device), MINOR_XEN(ldisk->device));
+
+    bd = bdget(device);
+    if (bd == NULL)
+        return -1;
+
+    gd = get_gendisk(device, &unused);
+    set_capacity(gd, disk->capacity);    
+    ldisk->capacity = disk->capacity;
+
+    bdput(bd);
+
+    return 0;
+}
+
+void xlvbd_refresh(void)
+{
+    vdisk_t *newdisks;
+    struct list_head *tmp, *tmp2;
+    struct lvdisk *disk;
+    int i, nr;
+
+    newdisks = xlvbd_probe(&nr);
+    if (newdisks == NULL) {
+        WPRINTK("failed to probe\n");
+        return;
+    }
+    
+    i = 0;
+    list_for_each_safe(tmp, tmp2, &vbds_list) {
+        disk = list_entry(tmp, struct lvdisk, list);
+        
+        for (i = 0; i < nr; i++) {
+            if ( !newdisks[i].device )
+                continue;
+            if ( disk->device == newdisks[i].device ) {
+                xlvbd_device_update(disk, &newdisks[i]);
+                newdisks[i].device = 0;
+                break;
+            }
+        }
+        if (i == nr) {
+            xlvbd_device_del(disk);
+            newdisks[i].device = 0;
+        }
+    }
+    for (i = 0; i < nr; i++)
+        if ( newdisks[i].device )
+            xlvbd_device_add(&vbds_list, &newdisks[i]);
+    kfree(newdisks);
+}
+
+/*
+ * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
+ * state. The VBDs need to be updated in this way when the domain is
+ * initialised and also each time we receive an XLBLK_UPDATE event.
+ */
+void xlvbd_update_vbds(void)
+{
+    xlvbd_refresh();
+}
+
+/*
+ * Set up all the linux device goop for the virtual block devices
+ * (vbd's) that we know about. Note that although from the backend
+ * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
+ * number, the domain creation tools conventionally allocate these
+ * numbers to correspond to those used by 'real' linux -- this is just
+ * for convenience as it means e.g. that the same /etc/fstab can be
+ * used when booting with or without Xen.
+ */
+int xlvbd_init(void)
+{
+    int i, nr;
+    vdisk_t *disks;
+
+    INIT_LIST_HEAD(&vbds_list);
+
+    memset(major_info, 0, sizeof(major_info));
+    
+    disks = xlvbd_probe(&nr);
+    if (disks == NULL) {
+        WPRINTK("failed to probe\n");
+        return -1;
+    }
+
+    for (i = 0; i < nr; i++)
+        xlvbd_device_add(&vbds_list, &disks[i]);
+
+    kfree(disks);
+    return 0;
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c     Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,200 @@
+/*
+ *     Local APIC handling, local APIC timers
+ *
+ *     (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively.
+ *     Maciej W. Rozycki       :       Various updates and fixes.
+ *     Mikael Pettersson       :       Power Management for UP-APIC.
+ *     Pavel Machek and
+ *     Mikael Pettersson       :       PM converted to driver model.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+
+#include "io_ports.h"
+
+/*
+ * Debug level
+ */
+int apic_verbosity;
+int disable_apic;
+
+void smp_local_timer_interrupt(struct pt_regs *regs)
+{
+       int cpu = smp_processor_id();
+
+       profile_tick(CPU_PROFILING, regs);
+#if 0
+       if (--per_cpu(prof_counter, cpu) <= 0) {
+               /*
+                * The multiplier may have changed since the last time we got
+                * to this point as a result of the user writing to
+                * /proc/profile. In this case we need to adjust the APIC
+                * timer accordingly.
+                *
+                * Interrupts are already masked off at this point.
+                */
+               per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
+               if (per_cpu(prof_counter, cpu) != 
+                   per_cpu(prof_old_multiplier, cpu)) {
+                       __setup_APIC_LVTT(calibration_result/
+                                       per_cpu(prof_counter, cpu));
+                       per_cpu(prof_old_multiplier, cpu) =
+                               per_cpu(prof_counter, cpu);
+               }
+
+#ifdef CONFIG_SMP
+               update_process_times(user_mode(regs));
+#endif
+       }
+#endif
+
+       /*
+        * We take the 'long' return path, and there every subsystem
+        * grabs the appropriate locks (kernel lock/ irq lock).
+        *
+        * we might want to decouple profiling from the 'long path',
+        * and do the profiling totally in assembly.
+        *
+        * Currently this isn't too much of an issue (performance wise),
+        * we can take more than 100K local irqs per second on a 100 MHz P5.
+        */
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+void smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+       /*
+        * the NMI deadlock-detector uses this.
+        */
+       add_pda(apic_timer_irqs, 1);
+
+       /*
+        * NOTE! We'd better ACK the irq immediately,
+        * because timer handling can be slow.
+        */
+       ack_APIC_irq();
+       /*
+        * update_process_times() expects us to have done irq_enter().
+        * Besides, if we don't timer interrupts ignore the global
+        * interrupt lock, which is the WrongThing (tm) to do.
+        */
+       irq_enter();
+       smp_local_timer_interrupt(regs);
+       irq_exit();
+}
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+       unsigned int v;
+       irq_enter();
+       /*
+        * Check if this really is a spurious interrupt and ACK it
+        * if it is a vectored one.  Just in case...
+        * Spurious interrupts should not be ACKed.
+        */
+       v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+       if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+               ack_APIC_irq();
+
+#if 0
+       static unsigned long last_warning; 
+       static unsigned long skipped; 
+
+       /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+       if (time_before(last_warning+30*HZ,jiffies)) { 
+               printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld 
skipped.\n",
+                      smp_processor_id(), skipped);
+               last_warning = jiffies; 
+               skipped = 0;
+       } else { 
+               skipped++; 
+       } 
+#endif 
+       irq_exit();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+
+asmlinkage void smp_error_interrupt(void)
+{
+       unsigned int v, v1;
+
+       irq_enter();
+       /* First tickle the hardware, only then report what went on. -- REW */
+       v = apic_read(APIC_ESR);
+       apic_write(APIC_ESR, 0);
+       v1 = apic_read(APIC_ESR);
+       ack_APIC_irq();
+       atomic_inc(&irq_err_count);
+
+       /* Here is what the APIC error bits mean:
+          0: Send CS error
+          1: Receive CS error
+          2: Send accept error
+          3: Receive accept error
+          4: Reserved
+          5: Send illegal vector
+          6: Received illegal vector
+          7: Illegal register address
+       */
+       printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+               smp_processor_id(), v , v1);
+       irq_exit();
+}
+
+int get_physical_broadcast(void)
+{
+        return 0xff;
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (smp_found_config)
+               if (!skip_ioapic_setup && nr_ioapics)
+                       setup_IO_APIC();
+#endif
+
+       return 0;
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/defconfig-xen0
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/defconfig-xen0   Fri Jul 15 13:39:50 2005
@@ -0,0 +1,927 @@
+#
+# Automatically generated by make menuconfig: don't edit
+#
+CONFIG_XEN=y
+CONFIG_X86=y
+CONFIG_ISA=y
+# CONFIG_SBUS is not set
+CONFIG_UID16=y
+
+#
+# Xen
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+CONFIG_XEN_PHYSDEV_ACCESS=y
+# CONFIG_XEN_USB_BACKEND is not set
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_GRANT=y
+# CONFIG_XEN_USB_FRONTEND is not set
+CONFIG_NO_IDLE_HZ=y
+CONFIG_FOREIGN_PAGES=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# Processor type and features
+#
+CONFIG_M686=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MVIAC3_2 is not set
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_PGE=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+CONFIG_FORCE_MAX_ZONEORDER=11
+
+#
+# General setup
+#
+CONFIG_NET=y
+CONFIG_PCI=y
+CONFIG_PCI_NAMES=y
+CONFIG_HOTPLUG=y
+
+#
+# PCMCIA/CardBus support
+#
+# CONFIG_PCMCIA is not set
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+# CONFIG_HOTPLUG_PCI_COMPAQ is not set
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+# CONFIG_HOTPLUG_PCI_SHPC_POLL_EVENT_MODE is not set
+# CONFIG_HOTPLUG_PCI_PCIE is not set
+# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set
+CONFIG_SYSVIPC=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+CONFIG_KCORE_ELF=y
+# CONFIG_KCORE_AOUT is not set
+CONFIG_BINFMT_AOUT=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_OOM_KILLER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play configuration
+#
+CONFIG_PNP=y
+# CONFIG_ISAPNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_CISS_SCSI_TAPE is not set
+# CONFIG_CISS_MONITOR_THREAD is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_SX8 is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_BLK_STATS is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+# CONFIG_MD_RAID0 is not set
+CONFIG_MD_RAID1=y
+# CONFIG_MD_RAID5 is not set
+# CONFIG_MD_MULTIPATH is not set
+CONFIG_BLK_DEV_LVM=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+# CONFIG_NETLINK_DEV is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+# CONFIG_SYN_COOKIES is not set
+
+#
+#   IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+# CONFIG_IP_NF_AMANDA is not set
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_IRC=m
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=y
+# CONFIG_IP_NF_MATCH_LIMIT is not set
+# CONFIG_IP_NF_MATCH_MAC is not set
+# CONFIG_IP_NF_MATCH_PKTTYPE is not set
+# CONFIG_IP_NF_MATCH_MARK is not set
+# CONFIG_IP_NF_MATCH_MULTIPORT is not set
+# CONFIG_IP_NF_MATCH_TOS is not set
+# CONFIG_IP_NF_MATCH_RECENT is not set
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+# CONFIG_IP_NF_MATCH_LENGTH is not set
+# CONFIG_IP_NF_MATCH_TTL is not set
+# CONFIG_IP_NF_MATCH_TCPMSS is not set
+# CONFIG_IP_NF_MATCH_HELPER is not set
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+# CONFIG_IP_NF_MATCH_UNCLEAN is not set
+# CONFIG_IP_NF_MATCH_OWNER is not set
+CONFIG_IP_NF_MATCH_PHYSDEV=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+# CONFIG_IP_NF_TARGET_MIRROR is not set
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+# CONFIG_IP_NF_MANGLE is not set
+CONFIG_IP_NF_TARGET_LOG=y
+CONFIG_IP_NF_TARGET_ULOG=y
+# CONFIG_IP_NF_TARGET_TCPMSS is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+#   IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+# CONFIG_KHTTPD is not set
+
+#
+#    SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+CONFIG_VLAN_8021Q=y
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_DECNET is not set
+CONFIG_BRIDGE=y
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_IPF=m
+CONFIG_BRIDGE_EBT_ARPF=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_VLANF=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_MARKF=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+
+#
+# ATA/IDE/MFM/RLL support
+#
+CONFIG_IDE=y
+
+#
+# IDE, ATA and ATAPI Block devices
+#
+CONFIG_BLK_DEV_IDE=y
+# CONFIG_BLK_DEV_HD_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_IDEDISK_STROKE=y
+# CONFIG_BLK_DEV_IDECS is not set
+# CONFIG_BLK_DEV_DELKIN is not set
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_IDETAPE=y
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=y
+CONFIG_IDE_TASK_IOCTL=y
+CONFIG_BLK_DEV_CMD640=y
+CONFIG_BLK_DEV_CMD640_ENHANCED=y
+# CONFIG_BLK_DEV_ISAPNP is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+CONFIG_BLK_DEV_OFFBOARD=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_PCI_WIP is not set
+CONFIG_BLK_DEV_ADMA100=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+CONFIG_WDC_ALI15X3=y
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_AMD74XX_OVERRIDE=y
+# CONFIG_BLK_DEV_ATIIXP is not set
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_NS87415=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_SC1200=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+CONFIG_BLK_DEV_TRM290=y
+CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_IDE_CHIPSETS=y
+# CONFIG_BLK_DEV_4DRIVES is not set
+# CONFIG_BLK_DEV_ALI14XX is not set
+# CONFIG_BLK_DEV_DTC2278 is not set
+# CONFIG_BLK_DEV_HT6560B is not set
+# CONFIG_BLK_DEV_PDC4030 is not set
+# CONFIG_BLK_DEV_QD65XX is not set
+# CONFIG_BLK_DEV_UMC8672 is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_DMA_NONPCI is not set
+CONFIG_BLK_DEV_PDC202XX=y
+# CONFIG_BLK_DEV_ATARAID is not set
+# CONFIG_BLK_DEV_ATARAID_PDC is not set
+# CONFIG_BLK_DEV_ATARAID_HPT is not set
+# CONFIG_BLK_DEV_ATARAID_MEDLEY is not set
+# CONFIG_BLK_DEV_ATARAID_SII is not set
+
+#
+# SCSI support
+#
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SD_EXTRA_DEVS=40
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+CONFIG_CHR_DEV_SG=y
+# CONFIG_SCSI_DEBUG_QUEUES is not set
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AHA1740 is not set
+CONFIG_SCSI_AACRAID=y
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_PROBE_EISA_VL is not set
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_AM53C974 is not set
+CONFIG_SCSI_MEGARAID=y
+# CONFIG_SCSI_MEGARAID2 is not set
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_SATA_AHCI is not set
+# CONFIG_SCSI_SATA_SVW is not set
+CONFIG_SCSI_ATA_PIIX=y
+# CONFIG_SCSI_SATA_NV is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
+CONFIG_SCSI_SATA_SX4=y
+CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIS=y
+# CONFIG_SCSI_SATA_ULI is not set
+CONFIG_SCSI_SATA_VIA=y
+CONFIG_SCSI_SATA_VITESSE=y
+CONFIG_SCSI_BUSLOGIC=y
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_DMA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_NCR53C7xx is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_SEAGATE is not set
+# CONFIG_SCSI_SIM710 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+# CONFIG_FUSION_BOOT is not set
+# CONFIG_FUSION_ISENSE is not set
+# CONFIG_FUSION_CTL is not set
+# CONFIG_FUSION_LAN is not set
+
+#
+# IEEE 1394 (FireWire) support (EXPERIMENTAL)
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+# CONFIG_I2O_PCI is not set
+# CONFIG_I2O_BLOCK is not set
+# CONFIG_I2O_LAN is not set
+# CONFIG_I2O_SCSI is not set
+# CONFIG_I2O_PROC is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_ETHERTAP is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+# CONFIG_SUNLANCE is not set
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNBMAC is not set
+# CONFIG_SUNQE is not set
+# CONFIG_SUNGEM is not set
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_EL3 is not set
+# CONFIG_3C515 is not set
+# CONFIG_ELMC is not set
+# CONFIG_ELMC_II is not set
+CONFIG_VORTEX=y
+# CONFIG_TYPHOON is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=y
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+# CONFIG_CS89x0 is not set
+# CONFIG_TULIP is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_DGRS is not set
+# CONFIG_DM9102 is not set
+# CONFIG_EEPRO100 is not set
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=y
+# CONFIG_LNE390 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+CONFIG_NE2K_PCI=y
+# CONFIG_FORCEDETH is not set
+# CONFIG_NE3210 is not set
+# CONFIG_ES3210 is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_SUNDANCE_MMIO is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_MYRI_SBUS is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+CONFIG_TIGON3=y
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+# CONFIG_NET_FC is not set
+# CONFIG_RCPCI is not set
+# CONFIG_SHAPER is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# IrDA (infrared) support
+#
+# CONFIG_IRDA is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Input core support
+#
+# CONFIG_INPUT is not set
+# CONFIG_INPUT_KEYBDEV is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_UINPUT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+# CONFIG_SERIAL is not set
+# CONFIG_SERIAL_EXTENDED is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=256
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+# CONFIG_MK712_MOUSE is not set
+
+#
+# Joysticks
+#
+# CONFIG_INPUT_GAMEPORT is not set
+# CONFIG_QIC02_TAPE is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_IPMI_PANIC_EVENT is not set
+# CONFIG_IPMI_DEVICE_INTERFACE is not set
+# CONFIG_IPMI_KCS is not set
+# CONFIG_IPMI_WATCHDOG is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_SCx200 is not set
+# CONFIG_SCx200_GPIO is not set
+# CONFIG_AMD_RNG is not set
+# CONFIG_INTEL_RNG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_AMD_PM768 is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+
+#
+# Direct Rendering Manager (XFree86 DRI support)
+#
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_OBMOUSE is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+# CONFIG_QUOTA is not set
+# CONFIG_QFMT_V2 is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_UMSDOS_FS=y
+CONFIG_VFAT_FS=y
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+# CONFIG_JFS_FS is not set
+# CONFIG_JFS_DEBUG is not set
+# CONFIG_JFS_STATISTICS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UDF_FS is not set
+# CONFIG_UDF_RW is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_UFS_FS_WRITE is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_XFS_QUOTA is not set
+# CONFIG_XFS_RT is not set
+# CONFIG_XFS_TRACE is not set
+# CONFIG_XFS_DEBUG is not set
+
+#
+# Network File Systems
+#
+# CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_TCP is not set
+CONFIG_SUNRPC=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+# CONFIG_SMB_FS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_NCPFS_PACKET_SIGNING is not set
+# CONFIG_NCPFS_IOCTL_LOCKING is not set
+# CONFIG_NCPFS_STRONG is not set
+# CONFIG_NCPFS_NFS_NS is not set
+# CONFIG_NCPFS_OS2_NS is not set
+# CONFIG_NCPFS_SMALLDOS is not set
+# CONFIG_NCPFS_NLS is not set
+# CONFIG_NCPFS_EXTRAS is not set
+CONFIG_ZISOFS_FS=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SMB_NLS is not set
+CONFIG_NLS=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS_DEFAULT="iso8559-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Console drivers
+#
+CONFIG_XEN_CONSOLE=y
+CONFIG_VGA_CONSOLE=y
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_VIDEO_SELECT is not set
+# CONFIG_MDA_CONSOLE is not set
+
+#
+# Frame-buffer support
+#
+# CONFIG_FB is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB is not set
+
+#
+# Support for USB gadgets
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# Bluetooth support
+#
+# CONFIG_BLUEZ is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_IOVIRT is not set
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_KALLSYMS=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_LOG_BUF_SHIFT=0
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_DEFLATE=m
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC32 is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+# CONFIG_FW_LOADER is not set
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/include/asm-xen/highmem.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/highmem.h Fri Jul 15 13:39:50 2005
@@ -0,0 +1,132 @@
+/*
+ * highmem.h: virtual kernel memory mappings for high memory
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *                   Gerhard.Wichert@xxxxxxxxxxxxxx
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with 
+ * up to 16 Terabyte physical memory. With current x86 CPUs
+ * we now support up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx>
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/kmap_types.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define HIGHMEM_DEBUG 1
+#else
+#define HIGHMEM_DEBUG 0
+#endif
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+extern pte_t *pkmap_page_table;
+
+extern void kmap_init(void) __init;
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+#define PKMAP_BASE (HYPERVISOR_VIRT_START - (1<<23))
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+#define LAST_PKMAP_MASK (LAST_PKMAP-1)
+#define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+extern void * FASTCALL(kmap_high(struct page *page, int nonblocking));
+extern void FASTCALL(kunmap_high(struct page *page));
+
+#define kmap(page) __kmap(page, 0)
+#define kmap_nonblock(page) __kmap(page, 1)
+
+static inline void *__kmap(struct page *page, int nonblocking)
+{
+       if (in_interrupt())
+               out_of_line_bug();
+       if (page < highmem_start_page)
+               return page_address(page);
+       return kmap_high(page, nonblocking);
+}
+
+static inline void kunmap(struct page *page)
+{
+       if (in_interrupt())
+               out_of_line_bug();
+       if (page < highmem_start_page)
+               return;
+       kunmap_high(page);
+}
+
+/*
+ * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
+ * gives a more generic (and caching) interface. But kmap_atomic can
+ * be used in IRQ contexts, so in some (very limited) cases we need
+ * it.
+ */
+static inline void *kmap_atomic(struct page *page, enum km_type type)
+{
+       enum fixed_addresses idx;
+       unsigned long vaddr;
+
+       if (page < highmem_start_page)
+               return page_address(page);
+
+       idx = type + KM_TYPE_NR*smp_processor_id();
+       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#if HIGHMEM_DEBUG
+       if (!pte_none(*(kmap_pte-idx)))
+               out_of_line_bug();
+#endif
+       set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+       __flush_tlb_one(vaddr);
+
+       return (void*) vaddr;
+}
+
+static inline void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+       unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+       enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+       if (vaddr < FIXADDR_START) // FIXME
+               return;
+
+       if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
+               out_of_line_bug();
+
+       /*
+        * force other mappings to Oops if they'll try to access
+        * this pte without first remap it
+        */
+       pte_clear(kmap_pte-idx);
+       __flush_tlb_one(vaddr);
+#endif
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_HIGHMEM_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/defconfig-xenU
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/defconfig-xenU   Fri Jul 15 13:39:50 2005
@@ -0,0 +1,562 @@
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_XEN=y
+CONFIG_X86=y
+CONFIG_ISA=y
+# CONFIG_SBUS is not set
+CONFIG_UID16=y
+
+#
+# Xen
+#
+# CONFIG_XEN_PRIVILEGED_GUEST is not set
+# CONFIG_XEN_PHYSDEV_ACCESS is not set
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_GRANT=y
+# CONFIG_XEN_USB_FRONTEND is not set
+CONFIG_NO_IDLE_HZ=y
+# CONFIG_FOREIGN_PAGES is not set
+CONFIG_NETDEVICES=y
+# CONFIG_VT is not set
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# Processor type and features
+#
+CONFIG_M686=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MVIAC3_2 is not set
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_PGE=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+CONFIG_FORCE_MAX_ZONEORDER=11
+
+#
+# General setup
+#
+CONFIG_NET=y
+CONFIG_SYSVIPC=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+CONFIG_KCORE_ELF=y
+# CONFIG_KCORE_AOUT is not set
+CONFIG_BINFMT_AOUT=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_OOM_KILLER is not set
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+# CONFIG_NETLINK_DEV is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+# CONFIG_SYN_COOKIES is not set
+
+#
+#   IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=y
+CONFIG_IP_NF_FTP=y
+# CONFIG_IP_NF_AMANDA is not set
+CONFIG_IP_NF_TFTP=y
+CONFIG_IP_NF_IRC=y
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=y
+# CONFIG_IP_NF_MATCH_LIMIT is not set
+# CONFIG_IP_NF_MATCH_MAC is not set
+# CONFIG_IP_NF_MATCH_PKTTYPE is not set
+# CONFIG_IP_NF_MATCH_MARK is not set
+# CONFIG_IP_NF_MATCH_MULTIPORT is not set
+# CONFIG_IP_NF_MATCH_TOS is not set
+# CONFIG_IP_NF_MATCH_RECENT is not set
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+# CONFIG_IP_NF_MATCH_LENGTH is not set
+# CONFIG_IP_NF_MATCH_TTL is not set
+# CONFIG_IP_NF_MATCH_TCPMSS is not set
+# CONFIG_IP_NF_MATCH_HELPER is not set
+CONFIG_IP_NF_MATCH_STATE=y
+CONFIG_IP_NF_MATCH_CONNTRACK=y
+# CONFIG_IP_NF_MATCH_UNCLEAN is not set
+# CONFIG_IP_NF_MATCH_OWNER is not set
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+# CONFIG_IP_NF_TARGET_MIRROR is not set
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_NAT_IRC=y
+CONFIG_IP_NF_NAT_FTP=y
+CONFIG_IP_NF_NAT_TFTP=y
+# CONFIG_IP_NF_MANGLE is not set
+CONFIG_IP_NF_TARGET_LOG=y
+CONFIG_IP_NF_TARGET_ULOG=y
+# CONFIG_IP_NF_TARGET_TCPMSS is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+#   IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+# CONFIG_KHTTPD is not set
+
+#
+#    SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+CONFIG_VLAN_8021Q=y
+
+#
+#  
+#
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_DECNET is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+
+#
+# SCSI support
+#
+CONFIG_SCSI=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_SD_EXTRA_DEVS=40
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+CONFIG_CHR_DEV_SG=y
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_DEBUG_QUEUES is not set
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AHA1740 is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_AM53C974 is not set
+# CONFIG_SCSI_MEGARAID is not set
+# CONFIG_SCSI_MEGARAID2 is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_SATA_AHCI is not set
+# CONFIG_SCSI_SATA_SVW is not set
+# CONFIG_SCSI_ATA_PIIX is not set
+# CONFIG_SCSI_SATA_NV is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+# CONFIG_SCSI_SATA_PROMISE is not set
+# CONFIG_SCSI_SATA_SX4 is not set
+# CONFIG_SCSI_SATA_SIL is not set
+# CONFIG_SCSI_SATA_SIS is not set
+# CONFIG_SCSI_SATA_ULI is not set
+# CONFIG_SCSI_SATA_VIA is not set
+# CONFIG_SCSI_SATA_VITESSE is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_DMA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_PPA is not set
+# CONFIG_SCSI_IMM is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_NCR53C7xx is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_SEAGATE is not set
+# CONFIG_SCSI_SIM710 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_BLK_STATS is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL is not set
+# CONFIG_SERIAL_EXTENDED is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=256
+# CONFIG_PRINTER is not set
+# CONFIG_PPDEV is not set
+# CONFIG_TIPAR is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+# CONFIG_MK712_MOUSE is not set
+
+#
+# Joysticks
+#
+# CONFIG_INPUT_GAMEPORT is not set
+# CONFIG_INPUT_NS558 is not set
+# CONFIG_INPUT_LIGHTNING is not set
+# CONFIG_INPUT_PCIGAME is not set
+# CONFIG_INPUT_CS461X is not set
+# CONFIG_INPUT_EMU10K1 is not set
+# CONFIG_INPUT_SERIO is not set
+# CONFIG_INPUT_SERPORT is not set
+
+#
+# Joysticks
+#
+# CONFIG_INPUT_ANALOG is not set
+# CONFIG_INPUT_A3D is not set
+# CONFIG_INPUT_ADI is not set
+# CONFIG_INPUT_COBRA is not set
+# CONFIG_INPUT_GF2K is not set
+# CONFIG_INPUT_GRIP is not set
+# CONFIG_INPUT_INTERACT is not set
+# CONFIG_INPUT_TMDC is not set
+# CONFIG_INPUT_SIDEWINDER is not set
+# CONFIG_INPUT_IFORCE_USB is not set
+# CONFIG_INPUT_IFORCE_232 is not set
+# CONFIG_INPUT_WARRIOR is not set
+# CONFIG_INPUT_MAGELLAN is not set
+# CONFIG_INPUT_SPACEORB is not set
+# CONFIG_INPUT_SPACEBALL is not set
+# CONFIG_INPUT_STINGER is not set
+# CONFIG_INPUT_DB9 is not set
+# CONFIG_INPUT_GAMECON is not set
+# CONFIG_INPUT_TURBOGRAFX is not set
+# CONFIG_QIC02_TAPE is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_IPMI_PANIC_EVENT is not set
+# CONFIG_IPMI_DEVICE_INTERFACE is not set
+# CONFIG_IPMI_KCS is not set
+# CONFIG_IPMI_WATCHDOG is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_SCx200 is not set
+# CONFIG_SCx200_GPIO is not set
+# CONFIG_AMD_RNG is not set
+# CONFIG_INTEL_RNG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_AMD_PM768 is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+
+#
+# Direct Rendering Manager (XFree86 DRI support)
+#
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_OBMOUSE is not set
+
+#
+# File systems
+#
+# CONFIG_QUOTA is not set
+# CONFIG_QFMT_V2 is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_UMSDOS_FS=y
+CONFIG_VFAT_FS=y
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+# CONFIG_JFS_FS is not set
+# CONFIG_JFS_DEBUG is not set
+# CONFIG_JFS_STATISTICS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UDF_FS is not set
+# CONFIG_UDF_RW is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_UFS_FS_WRITE is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_XFS_QUOTA is not set
+# CONFIG_XFS_RT is not set
+# CONFIG_XFS_TRACE is not set
+# CONFIG_XFS_DEBUG is not set
+
+#
+# Network File Systems
+#
+# CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_TCP is not set
+CONFIG_SUNRPC=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+# CONFIG_SMB_FS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_NCPFS_PACKET_SIGNING is not set
+# CONFIG_NCPFS_IOCTL_LOCKING is not set
+# CONFIG_NCPFS_STRONG is not set
+# CONFIG_NCPFS_NFS_NS is not set
+# CONFIG_NCPFS_OS2_NS is not set
+# CONFIG_NCPFS_SMALLDOS is not set
+# CONFIG_NCPFS_NLS is not set
+# CONFIG_NCPFS_EXTRAS is not set
+CONFIG_ZISOFS_FS=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SMB_NLS is not set
+CONFIG_NLS=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS_DEFAULT="iso8559-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Console drivers
+#
+CONFIG_XEN_CONSOLE=y
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_IOVIRT is not set
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_KALLSYMS=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_LOG_BUF_SHIFT=0
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC32 is not set
+CONFIG_ZLIB_INFLATE=y
+# CONFIG_ZLIB_DEFLATE is not set
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/Makefile       Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,31 @@
+#
+# Makefile for the linux x86_64-specific parts of the memory manager.
+#
+
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+
+CFLAGS += -Iarch/$(XENARCH)/mm
+
+obj-y  := init.o fault.o ioremap.o pageattr.o
+c-obj-y        := extable.o
+
+i386-obj-y := hypervisor.o
+
+#obj-y  := init.o fault.o ioremap.o extable.o pageattr.o
+#c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+c-obj-$(CONFIG_DISCONTIGMEM) += numa.o
+c-obj-$(CONFIG_K8_NUMA) += k8topology.o
+
+hugetlbpage-y = ../../../i386/mm/hugetlbpage.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/x86_64/mm/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(i386-obj-y)):
+       ln -fsn $(srctree)/arch/xen/i386/mm/$(notdir $@) $@
+
+obj-y  += $(c-obj-y) $(i386-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link) $(i386-obj-y))
diff -r 35b74976f598 -r a83ac0806d6b linux-2.4.30-xen-sparse/arch/xen/mm/fault.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/mm/fault.c       Fri Jul 15 13:39:50 2005
@@ -0,0 +1,302 @@
+/*
+ *  linux/arch/i386/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>             /* For unblank_screen() */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+pgd_t *cur_pgd;
+
+extern spinlock_t timerlist_lock;
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out (timerlist_lock is acquired through the
+ * console unblank code)
+ */
+void bust_spinlocks(int yes)
+{
+       spin_lock_init(&timerlist_lock);
+       if (yes) {
+               oops_in_progress = 1;
+       } else {
+               int loglevel_save = console_loglevel;
+#ifdef CONFIG_VT
+               unblank_screen();
+#endif
+               oops_in_progress = 0;
+               /*
+                * OK, the message is on the console.  Now we call printk()
+                * without oops_in_progress set so that printk will give klogd
+                * a poke.  Hold onto your hats...
+                */
+               console_loglevel = 15;          /* NMI oopser may have shut the 
console up */
+               printk(" ");
+               console_loglevel = loglevel_save;
+       }
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * error_code:
+ *     bit 0 == 0 means no page found, 1 means protection fault
+ *     bit 1 == 0 means read, 1 means write
+ *     bit 2 == 0 means kernel, 1 means user-mode
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, 
+                              unsigned long error_code,
+                              unsigned long address)
+{
+       struct task_struct *tsk = current;
+       struct mm_struct *mm;
+       struct vm_area_struct * vma;
+       unsigned long page;
+       unsigned long fixup;
+       int write;
+       siginfo_t info;
+
+        /* Set the "privileged fault" bit to something sane. */
+        error_code &= 3;
+        error_code |= (regs->xcs & 2) << 1;
+
+       /*
+        * We fault-in kernel-space virtual memory on-demand. The
+        * 'reference' page table is init_mm.pgd.
+        *
+        * NOTE! We MUST NOT take any locks for this case. We may
+        * be in an interrupt or a critical region, and should
+        * only copy the information from the master page table,
+        * nothing more.
+        *
+        * This verifies that the fault happens in kernel space
+        * (error_code & 4) == 0, and that the fault was not a
+        * protection error (error_code & 1) == 0.
+        */
+       if (address >= TASK_SIZE && !(error_code & 5))
+               goto vmalloc_fault;
+
+       mm = tsk->mm;
+       info.si_code = SEGV_MAPERR;
+
+       /*
+        * If we're in an interrupt or have no user
+        * context, we must not take the fault..
+        */
+       if (in_interrupt() || !mm)
+               goto no_context;
+
+       down_read(&mm->mmap_sem);
+
+       vma = find_vma(mm, address);
+       if (!vma)
+               goto bad_area;
+       if (vma->vm_start <= address)
+               goto good_area;
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               goto bad_area;
+       if (error_code & 4) {
+               /*
+                * accessing the stack below %esp is always a bug.
+                * The "+ 32" is there due to some instructions (like
+                * pusha) doing post-decrement on the stack and that
+                * doesn't show up until later..
+                */
+               if (address + 32 < regs->esp)
+                       goto bad_area;
+       }
+       if (expand_stack(vma, address))
+               goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+       info.si_code = SEGV_ACCERR;
+       write = 0;
+       switch (error_code & 3) {
+               default:        /* 3: write, present */
+                       /* fall through */
+               case 2:         /* write, not present */
+                       if (!(vma->vm_flags & VM_WRITE))
+                               goto bad_area;
+                       write++;
+                       break;
+               case 1:         /* read, present */
+                       goto bad_area;
+               case 0:         /* read, not present */
+                       if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                               goto bad_area;
+       }
+
+ survive:
+       /*
+        * If for any reason at all we couldn't handle the fault,
+        * make sure we exit gracefully rather than endlessly redo
+        * the fault.
+        */
+       switch (handle_mm_fault(mm, vma, address, write)) {
+       case 1:
+               tsk->min_flt++;
+               break;
+       case 2:
+               tsk->maj_flt++;
+               break;
+       case 0:
+               goto do_sigbus;
+       default:
+               goto out_of_memory;
+       }
+
+       up_read(&mm->mmap_sem);
+       return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+       up_read(&mm->mmap_sem);
+
+       /* User mode accesses just cause a SIGSEGV */
+       if (error_code & 4) {
+               tsk->thread.cr2 = address;
+               /* Kernel addresses are always protection faults */
+               tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+               tsk->thread.trap_no = 14;
+               info.si_signo = SIGSEGV;
+               info.si_errno = 0;
+               /* info.si_code has been set above */
+               info.si_addr = (void *)address;
+               force_sig_info(SIGSEGV, &info, tsk);
+               return;
+       }
+
+no_context:
+       /* Are we prepared to handle this kernel fault?  */
+       if ((fixup = search_exception_table(regs->eip)) != 0) {
+               regs->eip = fixup;
+               return;
+       }
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+       bust_spinlocks(1);
+
+       if (address < PAGE_SIZE)
+               printk(KERN_ALERT "Unable to handle kernel NULL pointer 
dereference");
+       else
+               printk(KERN_ALERT "Unable to handle kernel paging request");
+       printk(" at virtual address %08lx\n",address);
+       printk(" printing eip:\n");
+       printk("%08lx\n", regs->eip);
+        page = ((unsigned long *) cur_pgd)[address >> 22];
+        printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page));
+       if (page & 1) {
+               page &= PAGE_MASK;
+               address &= 0x003ff000;
+                page = machine_to_phys(page);
+               page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+                printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, 
+                       machine_to_phys(page));
+       }
+       die("Oops", regs, error_code);
+       bust_spinlocks(0);
+       do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+       if (tsk->pid == 1) {
+               yield();
+               goto survive;
+       }
+       up_read(&mm->mmap_sem);
+       printk("VM: killing process %s\n", tsk->comm);
+       if (error_code & 4)
+               do_exit(SIGKILL);
+       goto no_context;
+
+do_sigbus:
+       up_read(&mm->mmap_sem);
+
+       /*
+        * Send a sigbus, regardless of whether we were in kernel
+        * or user mode.
+        */
+       tsk->thread.cr2 = address;
+       tsk->thread.error_code = error_code;
+       tsk->thread.trap_no = 14;
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_ADRERR;
+       info.si_addr = (void *)address;
+       force_sig_info(SIGBUS, &info, tsk);
+
+       /* Kernel mode? Handle exceptions or die */
+       if (!(error_code & 4))
+               goto no_context;
+       return;
+
+vmalloc_fault:
+       {
+               /*
+                * Synchronize this task's top level page-table
+                * with the 'reference' page table.
+                *
+                * Do _not_ use "tsk" here. We might be inside
+                * an interrupt in the middle of a task switch..
+                */
+               int offset = __pgd_offset(address);
+               pgd_t *pgd, *pgd_k;
+               pmd_t *pmd, *pmd_k;
+               pte_t *pte_k;
+
+               pgd = offset + cur_pgd;
+               pgd_k = init_mm.pgd + offset;
+
+               if (!pgd_present(*pgd_k))
+                       goto no_context;
+               set_pgd(pgd, *pgd_k);
+               
+               pmd = pmd_offset(pgd, address);
+               pmd_k = pmd_offset(pgd_k, address);
+               if (!pmd_present(*pmd_k))
+                       goto no_context;
+               set_pmd(pmd, *pmd_k);
+
+               pte_k = pte_offset(pmd_k, address);
+               if (!pte_present(*pte_k))
+                       goto no_context;
+               return;
+       }
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/drivers/netif/frontend/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/drivers/netif/frontend/Makefile  Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := main.o
+include $(TOPDIR)/Rules.make
diff -r 35b74976f598 -r a83ac0806d6b linux-2.6.11-xen-sparse/drivers/char/mem.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/char/mem.c        Fri Jul 15 13:39:50 2005
@@ -0,0 +1,733 @@
+/*
+ *  linux/drivers/char/mem.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Added devfs support. 
+ *    Jan-11-1998, C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>
+ *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@xxxxxxx>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/raw.h>
+#include <linux/tty.h>
+#include <linux/capability.h>
+#include <linux/smp_lock.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/ptrace.h>
+#include <linux/device.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_IA64
+# include <linux/efi.h>
+#endif
+
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
+extern void tapechar_init(void);
+#endif
+
+/*
+ * Architectures vary in how they handle caching for addresses
+ * outside of main memory.
+ *
+ */
+static inline int uncached_access(struct file *file, unsigned long addr)
+{
+#if defined(__i386__)
+       /*
+        * On the PPro and successors, the MTRRs are used to set
+        * memory types for physical addresses outside main memory,
+        * so blindly setting PCD or PWT on those pages is wrong.
+        * For Pentiums and earlier, the surround logic should disable
+        * caching for the high addresses through the KEN pin, but
+        * we maintain the tradition of paranoia in this code.
+        */
+       if (file->f_flags & O_SYNC)
+               return 1;
+       return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
+                 test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
+                 test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) 
||
+                 test_bit(X86_FEATURE_CENTAUR_MCR, 
boot_cpu_data.x86_capability) )
+         && addr >= __pa(high_memory);
+#elif defined(__x86_64__)
+       /* 
+        * This is broken because it can generate memory type aliases,
+        * which can cause cache corruptions
+        * But it is only available for root and we have to be bug-to-bug
+        * compatible with i386.
+        */
+       if (file->f_flags & O_SYNC)
+               return 1;
+       /* same behaviour as i386. PAT always set to cached and MTRRs control 
the
+          caching behaviour. 
+          Hopefully a full PAT implementation will fix that soon. */      
+       return 0;
+#elif defined(CONFIG_IA64)
+       /*
+        * On ia64, we ignore O_SYNC because we cannot tolerate memory 
attribute aliases.
+        */
+       return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
+#elif defined(CONFIG_PPC64)
+       /* On PPC64, we always do non-cacheable access to the IO hole and
+        * cacheable elsewhere. Cache paradox can checkstop the CPU and
+        * the high_memory heuristic below is wrong on machines with memory
+        * above the IO hole... Ah, and of course, XFree86 doesn't pass
+        * O_SYNC when mapping us to tap IO space. Surprised ?
+        */
+       return !page_is_ram(addr >> PAGE_SHIFT);
+#else
+       /*
+        * Accessing memory above the top the kernel knows about or through a 
file pointer
+        * that was marked O_SYNC will be done non-cached.
+        */
+       if (file->f_flags & O_SYNC)
+               return 1;
+       return addr >= __pa(high_memory);
+#endif
+}
+
+#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
+static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
+{
+       unsigned long end_mem;
+
+       end_mem = __pa(high_memory);
+       if (addr >= end_mem)
+               return 0;
+
+       if (*count > end_mem - addr)
+               *count = end_mem - addr;
+
+       return 1;
+}
+#endif
+
+static ssize_t do_write_mem(void *p, unsigned long realp,
+                           const char __user * buf, size_t count, loff_t *ppos)
+{
+       ssize_t written;
+       unsigned long copied;
+
+       written = 0;
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+       /* we don't have page 0 mapped on sparc and m68k.. */
+       if (realp < PAGE_SIZE) {
+               unsigned long sz = PAGE_SIZE-realp;
+               if (sz > count) sz = count; 
+               /* Hmm. Do something? */
+               buf+=sz;
+               p+=sz;
+               count-=sz;
+               written+=sz;
+       }
+#endif
+       copied = copy_from_user(p, buf, count);
+       if (copied) {
+               ssize_t ret = written + (count - copied);
+
+               if (ret)
+                       return ret;
+               return -EFAULT;
+       }
+       written += count;
+       *ppos += written;
+       return written;
+}
+
+#ifndef ARCH_HAS_DEV_MEM
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly to the 
+ * memory location. 
+ */
+static ssize_t read_mem(struct file * file, char __user * buf,
+                       size_t count, loff_t *ppos)
+{
+       unsigned long p = *ppos;
+       ssize_t read;
+
+       if (!valid_phys_addr_range(p, &count))
+               return -EFAULT;
+       read = 0;
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+       /* we don't have page 0 mapped on sparc and m68k.. */
+       if (p < PAGE_SIZE) {
+               unsigned long sz = PAGE_SIZE-p;
+               if (sz > count) 
+                       sz = count; 
+               if (sz > 0) {
+                       if (clear_user(buf, sz))
+                               return -EFAULT;
+                       buf += sz; 
+                       p += sz; 
+                       count -= sz; 
+                       read += sz; 
+               }
+       }
+#endif
+       if (copy_to_user(buf, __va(p), count))
+               return -EFAULT;
+       read += count;
+       *ppos += read;
+       return read;
+}
+
+static ssize_t write_mem(struct file * file, const char __user * buf, 
+                        size_t count, loff_t *ppos)
+{
+       unsigned long p = *ppos;
+
+       if (!valid_phys_addr_range(p, &count))
+               return -EFAULT;
+       return do_write_mem(__va(p), p, buf, count, ppos);
+}
+#endif
+
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+#ifdef pgprot_noncached
+       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+       int uncached;
+
+       uncached = uncached_access(file, offset);
+       if (uncached)
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+#endif
+
+       /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+       if (remap_pfn_range(vma,
+                           vma->vm_start,
+                           vma->vm_pgoff,
+                           vma->vm_end-vma->vm_start,
+                           vma->vm_page_prot))
+               return -EAGAIN;
+       return 0;
+}
+
+extern long vread(char *buf, char *addr, unsigned long count);
+extern long vwrite(char *buf, char *addr, unsigned long count);
+
+/*
+ * This function reads the *virtual* memory as seen by the kernel.
+ */
+static ssize_t read_kmem(struct file *file, char __user *buf, 
+                        size_t count, loff_t *ppos)
+{
+       unsigned long p = *ppos;
+       ssize_t read = 0;
+       ssize_t virtr = 0;
+       char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
+               
+       if (p < (unsigned long) high_memory) {
+               read = count;
+               if (count > (unsigned long) high_memory - p)
+                       read = (unsigned long) high_memory - p;
+
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+               /* we don't have page 0 mapped on sparc and m68k.. */
+               if (p < PAGE_SIZE && read > 0) {
+                       size_t tmp = PAGE_SIZE - p;
+                       if (tmp > read) tmp = read;
+                       if (clear_user(buf, tmp))
+                               return -EFAULT;
+                       buf += tmp;
+                       p += tmp;
+                       read -= tmp;
+                       count -= tmp;
+               }
+#endif
+               if (copy_to_user(buf, (char *)p, read))
+                       return -EFAULT;
+               p += read;
+               buf += read;
+               count -= read;
+       }
+
+       if (count > 0) {
+               kbuf = (char *)__get_free_page(GFP_KERNEL);
+               if (!kbuf)
+                       return -ENOMEM;
+               while (count > 0) {
+                       int len = count;
+
+                       if (len > PAGE_SIZE)
+                               len = PAGE_SIZE;
+                       len = vread(kbuf, (char *)p, len);
+                       if (!len)
+                               break;
+                       if (copy_to_user(buf, kbuf, len)) {
+                               free_page((unsigned long)kbuf);
+                               return -EFAULT;
+                       }
+                       count -= len;
+                       buf += len;
+                       virtr += len;
+                       p += len;
+               }
+               free_page((unsigned long)kbuf);
+       }
+       *ppos = p;
+       return virtr + read;
+}
+
+/*
+ * This function writes to the *virtual* memory as seen by the kernel.
+ */
+static ssize_t write_kmem(struct file * file, const char __user * buf, 
+                         size_t count, loff_t *ppos)
+{
+       unsigned long p = *ppos;
+       ssize_t wrote = 0;
+       ssize_t virtr = 0;
+       ssize_t written;
+       char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
+
+       if (p < (unsigned long) high_memory) {
+
+               wrote = count;
+               if (count > (unsigned long) high_memory - p)
+                       wrote = (unsigned long) high_memory - p;
+
+               written = do_write_mem((void*)p, p, buf, wrote, ppos);
+               if (written != wrote)
+                       return written;
+               wrote = written;
+               p += wrote;
+               buf += wrote;
+               count -= wrote;
+       }
+
+       if (count > 0) {
+               kbuf = (char *)__get_free_page(GFP_KERNEL);
+               if (!kbuf)
+                       return wrote ? wrote : -ENOMEM;
+               while (count > 0) {
+                       int len = count;
+
+                       if (len > PAGE_SIZE)
+                               len = PAGE_SIZE;
+                       if (len) {
+                               written = copy_from_user(kbuf, buf, len);
+                               if (written) {
+                                       ssize_t ret;
+
+                                       free_page((unsigned long)kbuf);
+                                       ret = wrote + virtr + (len - written);
+                                       return ret ? ret : -EFAULT;
+                               }
+                       }
+                       len = vwrite(kbuf, (char *)p, len);
+                       count -= len;
+                       buf += len;
+                       virtr += len;
+                       p += len;
+               }
+               free_page((unsigned long)kbuf);
+       }
+
+       *ppos = p;
+       return virtr + wrote;
+}
+
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+static ssize_t read_port(struct file * file, char __user * buf,
+                        size_t count, loff_t *ppos)
+{
+       unsigned long i = *ppos;
+       char __user *tmp = buf;
+
+       if (verify_area(VERIFY_WRITE,buf,count))
+               return -EFAULT; 
+       while (count-- > 0 && i < 65536) {
+               if (__put_user(inb(i),tmp) < 0) 
+                       return -EFAULT;  
+               i++;
+               tmp++;
+       }
+       *ppos = i;
+       return tmp-buf;
+}
+
+static ssize_t write_port(struct file * file, const char __user * buf,
+                         size_t count, loff_t *ppos)
+{
+       unsigned long i = *ppos;
+       const char __user * tmp = buf;
+
+       if (verify_area(VERIFY_READ,buf,count))
+               return -EFAULT;
+       while (count-- > 0 && i < 65536) {
+               char c;
+               if (__get_user(c, tmp)) 
+                       return -EFAULT; 
+               outb(c,i);
+               i++;
+               tmp++;
+       }
+       *ppos = i;
+       return tmp-buf;
+}
+#endif
+
+static ssize_t read_null(struct file * file, char __user * buf,
+                        size_t count, loff_t *ppos)
+{
+       return 0;
+}
+
+static ssize_t write_null(struct file * file, const char __user * buf,
+                         size_t count, loff_t *ppos)
+{
+       return count;
+}
+
+#ifdef CONFIG_MMU
+/*
+ * For fun, we are using the MMU for this.
+ */
+static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
+{
+       struct mm_struct *mm;
+       struct vm_area_struct * vma;
+       unsigned long addr=(unsigned long)buf;
+
+       mm = current->mm;
+       /* Oops, this was forgotten before. -ben */
+       down_read(&mm->mmap_sem);
+
+       /* For private mappings, just map in zero pages. */
+       for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+               unsigned long count;
+
+               if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
+                       goto out_up;
+               if (vma->vm_flags & (VM_SHARED | VM_HUGETLB))
+                       break;
+               count = vma->vm_end - addr;
+               if (count > size)
+                       count = size;
+
+               zap_page_range(vma, addr, count, NULL);
+               zeromap_page_range(vma, addr, count, PAGE_COPY);
+
+               size -= count;
+               buf += count;
+               addr += count;
+               if (size == 0)
+                       goto out_up;
+       }
+
+       up_read(&mm->mmap_sem);
+       
+       /* The shared case is hard. Let's do the conventional zeroing. */ 
+       do {
+               unsigned long unwritten = clear_user(buf, PAGE_SIZE);
+               if (unwritten)
+                       return size + unwritten - PAGE_SIZE;
+               cond_resched();
+               buf += PAGE_SIZE;
+               size -= PAGE_SIZE;
+       } while (size);
+
+       return size;
+out_up:
+       up_read(&mm->mmap_sem);
+       return size;
+}
+
+static ssize_t read_zero(struct file * file, char __user * buf, 
+                        size_t count, loff_t *ppos)
+{
+       unsigned long left, unwritten, written = 0;
+
+       if (!count)
+               return 0;
+
+       if (!access_ok(VERIFY_WRITE, buf, count))
+               return -EFAULT;
+
+       left = count;
+
+       /* do we want to be clever? Arbitrary cut-off */
+       if (count >= PAGE_SIZE*4) {
+               unsigned long partial;
+
+               /* How much left of the page? */
+               partial = (PAGE_SIZE-1) & -(unsigned long) buf;
+               unwritten = clear_user(buf, partial);
+               written = partial - unwritten;
+               if (unwritten)
+                       goto out;
+               left -= partial;
+               buf += partial;
+               unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
+               written += (left & PAGE_MASK) - unwritten;
+               if (unwritten)
+                       goto out;
+               buf += left & PAGE_MASK;
+               left &= ~PAGE_MASK;
+       }
+       unwritten = clear_user(buf, left);
+       written += left - unwritten;
+out:
+       return written ? written : -EFAULT;
+}
+
+static int mmap_zero(struct file * file, struct vm_area_struct * vma)
+{
+       if (vma->vm_flags & VM_SHARED)
+               return shmem_zero_setup(vma);
+       if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, 
vma->vm_page_prot))
+               return -EAGAIN;
+       return 0;
+}
+#else /* CONFIG_MMU */
+static ssize_t read_zero(struct file * file, char * buf, 
+                        size_t count, loff_t *ppos)
+{
+       size_t todo = count;
+
+       while (todo) {
+               size_t chunk = todo;
+
+               if (chunk > 4096)
+                       chunk = 4096;   /* Just for latency reasons */
+               if (clear_user(buf, chunk))
+                       return -EFAULT;
+               buf += chunk;
+               todo -= chunk;
+               cond_resched();
+       }
+       return count;
+}
+
+static int mmap_zero(struct file * file, struct vm_area_struct * vma)
+{
+       return -ENOSYS;
+}
+#endif /* CONFIG_MMU */
+
+static ssize_t write_full(struct file * file, const char __user * buf,
+                         size_t count, loff_t *ppos)
+{
+       return -ENOSPC;
+}
+
+/*
+ * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
+ * can fopen() both devices with "a" now.  This was previously impossible.
+ * -- SRB.
+ */
+
+static loff_t null_lseek(struct file * file, loff_t offset, int orig)
+{
+       return file->f_pos = 0;
+}
+
+/*
+ * The memory devices use the full 32/64 bits of the offset, and so we cannot
+ * check against negative addresses: they are ok. The return value is weird,
+ * though, in that case (0).
+ *
+ * also note that seeking relative to the "end of file" isn't supported:
+ * it has no meaning, so it returns -EINVAL.
+ */
+static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+{
+       loff_t ret;
+
+       down(&file->f_dentry->d_inode->i_sem);
+       switch (orig) {
+               case 0:
+                       file->f_pos = offset;
+                       ret = file->f_pos;
+                       force_successful_syscall_return();
+                       break;
+               case 1:
+                       file->f_pos += offset;
+                       ret = file->f_pos;
+                       force_successful_syscall_return();
+                       break;
+               default:
+                       ret = -EINVAL;
+       }
+       up(&file->f_dentry->d_inode->i_sem);
+       return ret;
+}
+
+static int open_port(struct inode * inode, struct file * filp)
+{
+       return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+#define mmap_mem       mmap_kmem
+#define zero_lseek     null_lseek
+#define full_lseek      null_lseek
+#define write_zero     write_null
+#define read_full       read_zero
+#define open_mem       open_port
+#define open_kmem      open_mem
+
+#ifndef ARCH_HAS_DEV_MEM
+static struct file_operations mem_fops = {
+       .llseek         = memory_lseek,
+       .read           = read_mem,
+       .write          = write_mem,
+       .mmap           = mmap_mem,
+       .open           = open_mem,
+};
+#else
+extern struct file_operations mem_fops;
+#endif
+
+static struct file_operations kmem_fops = {
+       .llseek         = memory_lseek,
+       .read           = read_kmem,
+       .write          = write_kmem,
+       .mmap           = mmap_kmem,
+       .open           = open_kmem,
+};
+
+static struct file_operations null_fops = {
+       .llseek         = null_lseek,
+       .read           = read_null,
+       .write          = write_null,
+};
+
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+static struct file_operations port_fops = {
+       .llseek         = memory_lseek,
+       .read           = read_port,
+       .write          = write_port,
+       .open           = open_port,
+};
+#endif
+
+static struct file_operations zero_fops = {
+       .llseek         = zero_lseek,
+       .read           = read_zero,
+       .write          = write_zero,
+       .mmap           = mmap_zero,
+};
+
+static struct file_operations full_fops = {
+       .llseek         = full_lseek,
+       .read           = read_full,
+       .write          = write_full,
+};
+
+static ssize_t kmsg_write(struct file * file, const char __user * buf,
+                         size_t count, loff_t *ppos)
+{
+       char *tmp;
+       int ret;
+
+       tmp = kmalloc(count + 1, GFP_KERNEL);
+       if (tmp == NULL)
+               return -ENOMEM;
+       ret = -EFAULT;
+       if (!copy_from_user(tmp, buf, count)) {
+               tmp[count] = 0;
+               ret = printk("%s", tmp);
+       }
+       kfree(tmp);
+       return ret;
+}
+
+static struct file_operations kmsg_fops = {
+       .write =        kmsg_write,
+};
+
+static int memory_open(struct inode * inode, struct file * filp)
+{
+       switch (iminor(inode)) {
+               case 1:
+                       filp->f_op = &mem_fops;
+                       break;
+               case 2:
+                       filp->f_op = &kmem_fops;
+                       break;
+               case 3:
+                       filp->f_op = &null_fops;
+                       break;
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+               case 4:
+                       filp->f_op = &port_fops;
+                       break;
+#endif
+               case 5:
+                       filp->f_op = &zero_fops;
+                       break;
+               case 7:
+                       filp->f_op = &full_fops;
+                       break;
+               case 8:
+                       filp->f_op = &random_fops;
+                       break;
+               case 9:
+                       filp->f_op = &urandom_fops;
+                       break;
+               case 11:
+                       filp->f_op = &kmsg_fops;
+                       break;
+               default:
+                       return -ENXIO;
+       }
+       if (filp->f_op && filp->f_op->open)
+               return filp->f_op->open(inode,filp);
+       return 0;
+}
+
+static struct file_operations memory_fops = {
+       .open           = memory_open,  /* just a selector for the real open */
+};
+
+static const struct {
+       unsigned int            minor;
+       char                    *name;
+       umode_t                 mode;
+       struct file_operations  *fops;
+} devlist[] = { /* list of minor devices */
+       {1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
+       {2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
+       {3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+       {4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
+#endif
+       {5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
+       {7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
+       {8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
+       {9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
+       {11,"kmsg",    S_IRUGO | S_IWUSR,           &kmsg_fops},
+};
+
+static struct class_simple *mem_class;
+
+static int __init chr_dev_init(void)
+{
+       int i;
+
+       if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
+               printk("unable to get major %d for memory devs\n", MEM_MAJOR);
+
+       mem_class = class_simple_create(THIS_MODULE, "mem");
+       for (i = 0; i < ARRAY_SIZE(devlist); i++) {
+               class_simple_device_add(mem_class,
+                                       MKDEV(MEM_MAJOR, devlist[i].minor),
+                                       NULL, devlist[i].name);
+               devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor),
+                               S_IFCHR | devlist[i].mode, devlist[i].name);
+       }
+       
+       return 0;
+}
+
+fs_initcall(chr_dev_init);
diff -r 35b74976f598 -r a83ac0806d6b patches/linux-2.6.11/rcu-nohz.patch
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/patches/linux-2.6.11/rcu-nohz.patch       Fri Jul 15 13:39:50 2005
@@ -0,0 +1,16 @@
+diff -ur linux-2.6.11/kernel/rcupdate.c linux-2.6.11-rcu/kernel/rcupdate.c
+--- linux-2.6.11/kernel/rcupdate.c     2005-05-30 10:51:41 +01:00
++++ linux-2.6.11-rcu/kernel/rcupdate.c 2005-05-30 10:53:53 +01:00
+@@ -202,8 +202,11 @@
+  */
+ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
+ {
++      cpumask_t mask;
++
+       cpu_clear(cpu, rsp->cpumask);
+-      if (cpus_empty(rsp->cpumask)) {
++      cpus_andnot(mask, rsp->cpumask, nohz_cpu_mask);
++      if (cpus_empty(mask)) {
+               /* batch completed ! */
+               rcp->completed = rcp->cur;
+               rcu_start_batch(rcp, rsp, 0);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c      Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,225 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/user.h>
+#include <linux/sched.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/pm.h>
+#include <linux/pci.h>
+#include <linux/apm_bios.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/syscalls.h>
+#include <linux/tty.h>
+#include <linux/ioctl32.h>
+
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/mmx.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/nmi.h>
+#include <asm/kdebug.h>
+#include <asm/unistd.h>
+#include <asm/tlbflush.h>
+#include <asm/kdebug.h>
+
+extern spinlock_t rtc_lock;
+
+#ifdef CONFIG_SMP
+extern void __write_lock_failed(rwlock_t *rw);
+extern void __read_lock_failed(rwlock_t *rw);
+#endif
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || 
defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+
+extern unsigned long get_cmos_time(void);
+
+/* platform dependent support */
+EXPORT_SYMBOL(boot_cpu_data);
+//EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(ioremap_nocache);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(disable_irq_nosync);
+EXPORT_SYMBOL(probe_irq_mask);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(pm_idle);
+#ifdef CONFIG_ACPI_BOOT
+EXPORT_SYMBOL(pm_power_off);
+#endif
+EXPORT_SYMBOL(get_cmos_time);
+
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__down_failed_trylock);
+EXPORT_SYMBOL(__up_wakeup);
+/* Networking helper routines. */
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
+EXPORT_SYMBOL(ip_compute_csum);
+/* Delay loops */
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__ndelay);
+EXPORT_SYMBOL(__delay);
+EXPORT_SYMBOL(__const_udelay);
+
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__get_user_8);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strstr);
+
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(copy_user_generic);
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(copy_in_user);
+EXPORT_SYMBOL(strnlen_user);
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pci_alloc_consistent);
+EXPORT_SYMBOL(pci_free_consistent);
+#endif
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pcibios_penalize_isa_irq);
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+EXPORT_SYMBOL(copy_page);
+EXPORT_SYMBOL(clear_page);
+
+EXPORT_SYMBOL(cpu_pda);
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_data);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(__write_lock_failed);
+EXPORT_SYMBOL(__read_lock_failed);
+
+EXPORT_SYMBOL(synchronize_irq);
+EXPORT_SYMBOL(smp_call_function);
+EXPORT_SYMBOL(cpu_callout_map);
+#endif
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(get_wchan);
+
+EXPORT_SYMBOL(rtc_lock);
+
+/* EXPORT_SYMBOL_GPL(set_nmi_callback);
+   EXPORT_SYMBOL_GPL(unset_nmi_callback); */
+
+/* Export string functions. We normally rely on gcc builtin for most of these,
+   but gcc sometimes decides not to inline them. */    
+#undef memcpy
+#undef memset
+#undef memmove
+#undef memchr
+#undef strlen
+#undef strcpy
+#undef strncmp
+#undef strncpy
+#undef strchr  
+#undef strcmp 
+#undef strcpy 
+#undef strcat
+#undef memcmp
+
+extern void * memset(void *,int,__kernel_size_t);
+extern size_t strlen(const char *);
+extern void * memmove(void * dest,const void *src,size_t count);
+extern char * strcpy(char * dest,const char *src);
+extern int strcmp(const char * cs,const char * ct);
+extern void *memchr(const void *s, int c, size_t n);
+extern void * memcpy(void *,const void *,__kernel_size_t);
+extern void * __memcpy(void *,const void *,__kernel_size_t);
+extern char * strcat(char *, const char *);
+extern int memcmp(const void * cs,const void * ct,size_t count);
+
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strncat);
+EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(memscan);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(memcmp);
+
+#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
+/* prototypes are wrong, these are assembly with custom calling functions */
+extern void rwsem_down_read_failed_thunk(void);
+extern void rwsem_wake_thunk(void);
+extern void rwsem_downgrade_thunk(void);
+extern void rwsem_down_write_failed_thunk(void);
+EXPORT_SYMBOL(rwsem_down_read_failed_thunk);
+EXPORT_SYMBOL(rwsem_wake_thunk);
+EXPORT_SYMBOL(rwsem_downgrade_thunk);
+EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
+#endif
+
+EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_HAVE_DEC_LOCK
+EXPORT_SYMBOL(_atomic_dec_and_lock);
+#endif
+
+EXPORT_SYMBOL(die_chain);
+EXPORT_SYMBOL(register_die_notifier);
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_SYMBOL(smp_num_siblings);
+#endif
+
+extern void do_softirq_thunk(void);
+EXPORT_SYMBOL(do_softirq_thunk);
+
+void out_of_line_bug(void);
+EXPORT_SYMBOL(out_of_line_bug);
+
+EXPORT_SYMBOL(init_level4_pgt);
+
+extern unsigned long __supported_pte_mask;
+EXPORT_SYMBOL(__supported_pte_mask);
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(flush_tlb_page);
+EXPORT_SYMBOL_GPL(flush_tlb_all);
+#endif
+
+EXPORT_SYMBOL(cpu_khz);
diff -r 35b74976f598 -r a83ac0806d6b linux-2.6.11-xen-sparse/mm/mmap.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/mm/mmap.c Fri Jul 15 13:39:50 2005
@@ -0,0 +1,2108 @@
+/*
+ * mm/mmap.c
+ *
+ * Written by obz.
+ *
+ * Address space accounting code       <alan@xxxxxxxxxx>
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/mman.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/init.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/personality.h>
+#include <linux/security.h>
+#include <linux/hugetlb.h>
+#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/acct.h>
+#include <linux/mount.h>
+#include <linux/mempolicy.h>
+#include <linux/rmap.h>
+
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/tlb.h>
+
+/*
+ * WARNING: the debugging will use recursive algorithms so never enable this
+ * unless you know what you are doing.
+ */
+#undef DEBUG_MM_RB
+
+/* description of effects of mapping type and prot in current implementation.
+ * this is due to the limited x86 page protection hardware.  The expected
+ * behavior is in parens:
+ *
+ * map_type    prot
+ *             PROT_NONE       PROT_READ       PROT_WRITE      PROT_EXEC
+ * MAP_SHARED  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
+ *             w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
+ *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
+ *             
+ * MAP_PRIVATE r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
+ *             w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
+ *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
+ *
+ */
+pgprot_t protection_map[16] = {
+       __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
+       __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
+};
+
+int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
+int sysctl_overcommit_ratio = 50;      /* default is 50% */
+int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
+atomic_t vm_committed_space = ATOMIC_INIT(0);
+
+/*
+ * Check that a process has enough memory to allocate a new virtual
+ * mapping. 0 means there is enough memory for the allocation to
+ * succeed and -ENOMEM implies there is not.
+ *
+ * We currently support three overcommit policies, which are set via the
+ * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
+ *
+ * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
+ * Additional code 2002 Jul 20 by Robert Love.
+ *
+ * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
+ *
+ * Note this is a helper function intended to be used by LSMs which
+ * wish to use this logic.
+ */
+int __vm_enough_memory(long pages, int cap_sys_admin)
+{
+       unsigned long free, allowed;
+
+       vm_acct_memory(pages);
+
+       /*
+        * Sometimes we want to use more memory than we have
+        */
+       if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
+               return 0;
+
+       if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
+               unsigned long n;
+
+               free = get_page_cache_size();
+               free += nr_swap_pages;
+
+               /*
+                * Any slabs which are created with the
+                * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+                * which are reclaimable, under pressure.  The dentry
+                * cache and most inode caches should fall into this
+                */
+               free += atomic_read(&slab_reclaim_pages);
+
+               /*
+                * Leave the last 3% for root
+                */
+               if (!cap_sys_admin)
+                       free -= free / 32;
+
+               if (free > pages)
+                       return 0;
+
+               /*
+                * nr_free_pages() is very expensive on large systems,
+                * only call if we're about to fail.
+                */
+               n = nr_free_pages();
+               if (!cap_sys_admin)
+                       n -= n / 32;
+               free += n;
+
+               if (free > pages)
+                       return 0;
+               vm_unacct_memory(pages);
+               return -ENOMEM;
+       }
+
+       allowed = (totalram_pages - hugetlb_total_pages())
+               * sysctl_overcommit_ratio / 100;
+       /*
+        * Leave the last 3% for root
+        */
+       if (!cap_sys_admin)
+               allowed -= allowed / 32;
+       allowed += total_swap_pages;
+
+       /* Don't let a single process grow too big:
+          leave 3% of the size of this process for other processes */
+       allowed -= current->mm->total_vm / 32;
+
+       if (atomic_read(&vm_committed_space) < allowed)
+               return 0;
+
+       vm_unacct_memory(pages);
+
+       return -ENOMEM;
+}
+
+EXPORT_SYMBOL(sysctl_overcommit_memory);
+EXPORT_SYMBOL(sysctl_overcommit_ratio);
+EXPORT_SYMBOL(sysctl_max_map_count);
+EXPORT_SYMBOL(vm_committed_space);
+EXPORT_SYMBOL(__vm_enough_memory);
+
+/*
+ * Requires inode->i_mapping->i_mmap_lock
+ */
+static void __remove_shared_vm_struct(struct vm_area_struct *vma,
+               struct file *file, struct address_space *mapping)
+{
+       if (vma->vm_flags & VM_DENYWRITE)
+               atomic_inc(&file->f_dentry->d_inode->i_writecount);
+       if (vma->vm_flags & VM_SHARED)
+               mapping->i_mmap_writable--;
+
+       flush_dcache_mmap_lock(mapping);
+       if (unlikely(vma->vm_flags & VM_NONLINEAR))
+               list_del_init(&vma->shared.vm_set.list);
+       else
+               vma_prio_tree_remove(vma, &mapping->i_mmap);
+       flush_dcache_mmap_unlock(mapping);
+}
+
+/*
+ * Remove one vm structure and free it.
+ */
+static void remove_vm_struct(struct vm_area_struct *vma)
+{
+       struct file *file = vma->vm_file;
+
+       might_sleep();
+       if (file) {
+               struct address_space *mapping = file->f_mapping;
+               spin_lock(&mapping->i_mmap_lock);
+               __remove_shared_vm_struct(vma, file, mapping);
+               spin_unlock(&mapping->i_mmap_lock);
+       }
+       if (vma->vm_ops && vma->vm_ops->close)
+               vma->vm_ops->close(vma);
+       if (file)
+               fput(file);
+       anon_vma_unlink(vma);
+       mpol_free(vma_policy(vma));
+       kmem_cache_free(vm_area_cachep, vma);
+}
+
+/*
+ *  sys_brk() for the most part doesn't need the global kernel
+ *  lock, except when an application is doing something nasty
+ *  like trying to un-brk an area that has already been mapped
+ *  to a regular file.  in this case, the unmapping will need
+ *  to invoke file system routines that need the global lock.
+ */
+asmlinkage unsigned long sys_brk(unsigned long brk)
+{
+       unsigned long rlim, retval;
+       unsigned long newbrk, oldbrk;
+       struct mm_struct *mm = current->mm;
+
+       down_write(&mm->mmap_sem);
+
+       if (brk < mm->end_code)
+               goto out;
+       newbrk = PAGE_ALIGN(brk);
+       oldbrk = PAGE_ALIGN(mm->brk);
+       if (oldbrk == newbrk)
+               goto set_brk;
+
+       /* Always allow shrinking brk. */
+       if (brk <= mm->brk) {
+               if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+                       goto set_brk;
+               goto out;
+       }
+
+       /* Check against rlimit.. */
+       rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+       if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+               goto out;
+
+       /* Check against existing mmap mappings. */
+       if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+               goto out;
+
+       /* Ok, looks good - let it rip. */
+       if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+               goto out;
+set_brk:
+       mm->brk = brk;
+out:
+       retval = mm->brk;
+       up_write(&mm->mmap_sem);
+       return retval;
+}
+
+#ifdef DEBUG_MM_RB
+static int browse_rb(struct rb_root *root)
+{
+       int i = 0, j;
+       struct rb_node *nd, *pn = NULL;
+       unsigned long prev = 0, pend = 0;
+
+       for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+               struct vm_area_struct *vma;
+               vma = rb_entry(nd, struct vm_area_struct, vm_rb);
+               if (vma->vm_start < prev)
+                       printk("vm_start %lx prev %lx\n", vma->vm_start, prev), 
i = -1;
+               if (vma->vm_start < pend)
+                       printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
+               if (vma->vm_start > vma->vm_end)
+                       printk("vm_end %lx < vm_start %lx\n", vma->vm_end, 
vma->vm_start);
+               i++;
+               pn = nd;
+       }
+       j = 0;
+       for (nd = pn; nd; nd = rb_prev(nd)) {
+               j++;
+       }
+       if (i != j)
+               printk("backwards %d, forwards %d\n", j, i), i = 0;
+       return i;
+}
+
+void validate_mm(struct mm_struct *mm)
+{
+       int bug = 0;
+       int i = 0;
+       struct vm_area_struct *tmp = mm->mmap;
+       while (tmp) {
+               tmp = tmp->vm_next;
+               i++;
+       }
+       if (i != mm->map_count)
+               printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
+       i = browse_rb(&mm->mm_rb);
+       if (i != mm->map_count)
+               printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
+       if (bug)
+               BUG();
+}
+#else
+#define validate_mm(mm) do { } while (0)
+#endif
+
+static struct vm_area_struct *
+find_vma_prepare(struct mm_struct *mm, unsigned long addr,
+               struct vm_area_struct **pprev, struct rb_node ***rb_link,
+               struct rb_node ** rb_parent)
+{
+       struct vm_area_struct * vma;
+       struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
+
+       __rb_link = &mm->mm_rb.rb_node;
+       rb_prev = __rb_parent = NULL;
+       vma = NULL;
+
+       while (*__rb_link) {
+               struct vm_area_struct *vma_tmp;
+
+               __rb_parent = *__rb_link;
+               vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
+
+               if (vma_tmp->vm_end > addr) {
+                       vma = vma_tmp;
+                       if (vma_tmp->vm_start <= addr)
+                               return vma;
+                       __rb_link = &__rb_parent->rb_left;
+               } else {
+                       rb_prev = __rb_parent;
+                       __rb_link = &__rb_parent->rb_right;
+               }
+       }
+
+       *pprev = NULL;
+       if (rb_prev)
+               *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
+       *rb_link = __rb_link;
+       *rb_parent = __rb_parent;
+       return vma;
+}
+
+static inline void
+__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
+               struct vm_area_struct *prev, struct rb_node *rb_parent)
+{
+       if (prev) {
+               vma->vm_next = prev->vm_next;
+               prev->vm_next = vma;
+       } else {
+               mm->mmap = vma;
+               if (rb_parent)
+                       vma->vm_next = rb_entry(rb_parent,
+                                       struct vm_area_struct, vm_rb);
+               else
+                       vma->vm_next = NULL;
+       }
+}
+
+void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
+               struct rb_node **rb_link, struct rb_node *rb_parent)
+{
+       rb_link_node(&vma->vm_rb, rb_parent, rb_link);
+       rb_insert_color(&vma->vm_rb, &mm->mm_rb);
+}
+
+static inline void __vma_link_file(struct vm_area_struct *vma)
+{
+       struct file * file;
+
+       file = vma->vm_file;
+       if (file) {
+               struct address_space *mapping = file->f_mapping;
+
+               if (vma->vm_flags & VM_DENYWRITE)
+                       atomic_dec(&file->f_dentry->d_inode->i_writecount);
+               if (vma->vm_flags & VM_SHARED)
+                       mapping->i_mmap_writable++;
+
+               flush_dcache_mmap_lock(mapping);
+               if (unlikely(vma->vm_flags & VM_NONLINEAR))
+                       vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
+               else
+                       vma_prio_tree_insert(vma, &mapping->i_mmap);
+               flush_dcache_mmap_unlock(mapping);
+       }
+}
+
+static void
+__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
+       struct vm_area_struct *prev, struct rb_node **rb_link,
+       struct rb_node *rb_parent)
+{
+       __vma_link_list(mm, vma, prev, rb_parent);
+       __vma_link_rb(mm, vma, rb_link, rb_parent);
+       __anon_vma_link(vma);
+}
+
+static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
+                       struct vm_area_struct *prev, struct rb_node **rb_link,
+                       struct rb_node *rb_parent)
+{
+       struct address_space *mapping = NULL;
+
+       if (vma->vm_file)
+               mapping = vma->vm_file->f_mapping;
+
+       if (mapping) {
+               spin_lock(&mapping->i_mmap_lock);
+               vma->vm_truncate_count = mapping->truncate_count;
+       }
+       anon_vma_lock(vma);
+
+       __vma_link(mm, vma, prev, rb_link, rb_parent);
+       __vma_link_file(vma);
+
+       anon_vma_unlock(vma);
+       if (mapping)
+               spin_unlock(&mapping->i_mmap_lock);
+
+       mm->map_count++;
+       validate_mm(mm);
+}
+
+/*
+ * Helper for vma_adjust in the split_vma insert case:
+ * insert vm structure into list and rbtree and anon_vma,
+ * but it has already been inserted into prio_tree earlier.
+ */
+static void
+__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+       struct vm_area_struct * __vma, * prev;
+       struct rb_node ** rb_link, * rb_parent;
+
+       __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
+       if (__vma && __vma->vm_start < vma->vm_end)
+               BUG();
+       __vma_link(mm, vma, prev, rb_link, rb_parent);
+       mm->map_count++;
+}
+
+static inline void
+__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
+               struct vm_area_struct *prev)
+{
+       prev->vm_next = vma->vm_next;
+       rb_erase(&vma->vm_rb, &mm->mm_rb);
+       if (mm->mmap_cache == vma)
+               mm->mmap_cache = prev;
+}
+
+/*
+ * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
+ * is already present in an i_mmap tree without adjusting the tree.
+ * The following helper function should be used when such adjustments
+ * are necessary.  The "insert" vma (if any) is to be inserted
+ * before we drop the necessary locks.
+ */
+void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+       unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *next = vma->vm_next;
+       struct vm_area_struct *importer = NULL;
+       struct address_space *mapping = NULL;
+       struct prio_tree_root *root = NULL;
+       struct file *file = vma->vm_file;
+       struct anon_vma *anon_vma = NULL;
+       long adjust_next = 0;
+       int remove_next = 0;
+
+       if (next && !insert) {
+               if (end >= next->vm_end) {
+                       /*
+                        * vma expands, overlapping all the next, and
+                        * perhaps the one after too (mprotect case 6).
+                        */
+again:                 remove_next = 1 + (end > next->vm_end);
+                       end = next->vm_end;
+                       anon_vma = next->anon_vma;
+                       importer = vma;
+               } else if (end > next->vm_start) {
+                       /*
+                        * vma expands, overlapping part of the next:
+                        * mprotect case 5 shifting the boundary up.
+                        */
+                       adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
+                       anon_vma = next->anon_vma;
+                       importer = vma;
+               } else if (end < vma->vm_end) {
+                       /*
+                        * vma shrinks, and !insert tells it's not
+                        * split_vma inserting another: so it must be
+                        * mprotect case 4 shifting the boundary down.
+                        */
+                       adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
+                       anon_vma = next->anon_vma;
+                       importer = next;
+               }
+       }
+
+       if (file) {
+               mapping = file->f_mapping;
+               if (!(vma->vm_flags & VM_NONLINEAR))
+                       root = &mapping->i_mmap;
+               spin_lock(&mapping->i_mmap_lock);
+               if (importer &&
+                   vma->vm_truncate_count != next->vm_truncate_count) {
+                       /*
+                        * unmap_mapping_range might be in progress:
+                        * ensure that the expanding vma is rescanned.
+                        */
+                       importer->vm_truncate_count = 0;
+               }
+               if (insert) {
+                       insert->vm_truncate_count = vma->vm_truncate_count;
+                       /*
+                        * Put into prio_tree now, so instantiated pages
+                        * are visible to arm/parisc __flush_dcache_page
+                        * throughout; but we cannot insert into address
+                        * space until vma start or end is updated.
+                        */
+                       __vma_link_file(insert);
+               }
+       }
+
+       /*
+        * When changing only vma->vm_end, we don't really need
+        * anon_vma lock: but is that case worth optimizing out?
+        */
+       if (vma->anon_vma)
+               anon_vma = vma->anon_vma;
+       if (anon_vma) {
+               spin_lock(&anon_vma->lock);
+               /*
+                * Easily overlooked: when mprotect shifts the boundary,
+                * make sure the expanding vma has anon_vma set if the
+                * shrinking vma had, to cover any anon pages imported.
+                */
+               if (importer && !importer->anon_vma) {
+                       importer->anon_vma = anon_vma;
+                       __anon_vma_link(importer);
+               }
+       }
+
+       if (root) {
+               flush_dcache_mmap_lock(mapping);
+               vma_prio_tree_remove(vma, root);
+               if (adjust_next)
+                       vma_prio_tree_remove(next, root);
+       }
+
+       vma->vm_start = start;
+       vma->vm_end = end;
+       vma->vm_pgoff = pgoff;
+       if (adjust_next) {
+               next->vm_start += adjust_next << PAGE_SHIFT;
+               next->vm_pgoff += adjust_next;
+       }
+
+       if (root) {
+               if (adjust_next)
+                       vma_prio_tree_insert(next, root);
+               vma_prio_tree_insert(vma, root);
+               flush_dcache_mmap_unlock(mapping);
+       }
+
+       if (remove_next) {
+               /*
+                * vma_merge has merged next into vma, and needs
+                * us to remove next before dropping the locks.
+                */
+               __vma_unlink(mm, next, vma);
+               if (file)
+                       __remove_shared_vm_struct(next, file, mapping);
+               if (next->anon_vma)
+                       __anon_vma_merge(vma, next);
+       } else if (insert) {
+               /*
+                * split_vma has split insert from vma, and needs
+                * us to insert it before dropping the locks
+                * (it may either follow vma or precede it).
+                */
+               __insert_vm_struct(mm, insert);
+       }
+
+       if (anon_vma)
+               spin_unlock(&anon_vma->lock);
+       if (mapping)
+               spin_unlock(&mapping->i_mmap_lock);
+
+       if (remove_next) {
+               if (file)
+                       fput(file);
+               mm->map_count--;
+               mpol_free(vma_policy(next));
+               kmem_cache_free(vm_area_cachep, next);
+               /*
+                * In mprotect's case 6 (see comments on vma_merge),
+                * we must remove another next too. It would clutter
+                * up the code too much to do both in one go.
+                */
+               if (remove_next == 2) {
+                       next = vma->vm_next;
+                       goto again;
+               }
+       }
+
+       validate_mm(mm);
+}
+
+/*
+ * If the vma has a ->close operation then the driver probably needs to release
+ * per-vma resources, so we don't attempt to merge those.
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED)
+
+static inline int is_mergeable_vma(struct vm_area_struct *vma,
+                       struct file *file, unsigned long vm_flags)
+{
+       if (vma->vm_flags != vm_flags)
+               return 0;
+       if (vma->vm_file != file)
+               return 0;
+       if (vma->vm_ops && vma->vm_ops->close)
+               return 0;
+       return 1;
+}
+
+static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
+                                       struct anon_vma *anon_vma2)
+{
+       return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
+}
+
+/*
+ * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
+ * in front of (at a lower virtual address and file offset than) the vma.
+ *
+ * We cannot merge two vmas if they have differently assigned (non-NULL)
+ * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
+ *
+ * We don't check here for the merged mmap wrapping around the end of pagecache
+ * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
+ * wrap, nor mmaps which cover the final page at index -1UL.
+ */
+static int
+can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
+       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+{
+       if (is_mergeable_vma(vma, file, vm_flags) &&
+           is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+               if (vma->vm_pgoff == vm_pgoff)
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
+ * beyond (at a higher virtual address and file offset than) the vma.
+ *
+ * We cannot merge two vmas if they have differently assigned (non-NULL)
+ * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
+ */
+static int
+can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
+       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+{
+       if (is_mergeable_vma(vma, file, vm_flags) &&
+           is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+               pgoff_t vm_pglen;
+               vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+               if (vma->vm_pgoff + vm_pglen == vm_pgoff)
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
+ * whether that can be merged with its predecessor or its successor.
+ * Or both (it neatly fills a hole).
+ *
+ * In most cases - when called for mmap, brk or mremap - [addr,end) is
+ * certain not to be mapped by the time vma_merge is called; but when
+ * called for mprotect, it is certain to be already mapped (either at
+ * an offset within prev, or at the start of next), and the flags of
+ * this area are about to be changed to vm_flags - and the no-change
+ * case has already been eliminated.
+ *
+ * The following mprotect cases have to be considered, where AAAA is
+ * the area passed down from mprotect_fixup, never extending beyond one
+ * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
+ *
+ *     AAAA             AAAA                AAAA          AAAA
+ *    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPNNNNXXXX
+ *    cannot merge    might become    might become    might become
+ *                    PPNNNNNNNNNN    PPPPPPPPPPNN    PPPPPPPPPPPP 6 or
+ *    mmap, brk or    case 4 below    case 5 below    PPPPPPPPXXXX 7 or
+ *    mremap move:                                    PPPPNNNNNNNN 8
+ *        AAAA
+ *    PPPP    NNNN    PPPPPPPPPPPP    PPPPPPPPNNNN    PPPPNNNNNNNN
+ *    might become    case 1 below    case 2 below    case 3 below
+ *
+ * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
+ * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
+ */
+struct vm_area_struct *vma_merge(struct mm_struct *mm,
+                       struct vm_area_struct *prev, unsigned long addr,
+                       unsigned long end, unsigned long vm_flags,
+                       struct anon_vma *anon_vma, struct file *file,
+                       pgoff_t pgoff, struct mempolicy *policy)
+{
+       pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
+       struct vm_area_struct *area, *next;
+
+       /*
+        * We later require that vma->vm_flags == vm_flags,
+        * so this tests vma->vm_flags & VM_SPECIAL, too.
+        */
+       if (vm_flags & VM_SPECIAL)
+               return NULL;
+
+       if (prev)
+               next = prev->vm_next;
+       else
+               next = mm->mmap;
+       area = next;
+       if (next && next->vm_end == end)                /* cases 6, 7, 8 */
+               next = next->vm_next;
+
+       /*
+        * Can it merge with the predecessor?
+        */
+       if (prev && prev->vm_end == addr &&
+                       mpol_equal(vma_policy(prev), policy) &&
+                       can_vma_merge_after(prev, vm_flags,
+                                               anon_vma, file, pgoff)) {
+               /*
+                * OK, it can.  Can we now merge in the successor as well?
+                */
+               if (next && end == next->vm_start &&
+                               mpol_equal(policy, vma_policy(next)) &&
+                               can_vma_merge_before(next, vm_flags,
+                                       anon_vma, file, pgoff+pglen) &&
+                               is_mergeable_anon_vma(prev->anon_vma,
+                                                     next->anon_vma)) {
+                                                       /* cases 1, 6 */
+                       vma_adjust(prev, prev->vm_start,
+                               next->vm_end, prev->vm_pgoff, NULL);
+               } else                                  /* cases 2, 5, 7 */
+                       vma_adjust(prev, prev->vm_start,
+                               end, prev->vm_pgoff, NULL);
+               return prev;
+       }
+
+       /*
+        * Can this new request be merged in front of next?
+        */
+       if (next && end == next->vm_start &&
+                       mpol_equal(policy, vma_policy(next)) &&
+                       can_vma_merge_before(next, vm_flags,
+                                       anon_vma, file, pgoff+pglen)) {
+               if (prev && addr < prev->vm_end)        /* case 4 */
+                       vma_adjust(prev, prev->vm_start,
+                               addr, prev->vm_pgoff, NULL);
+               else                                    /* cases 3, 8 */
+                       vma_adjust(area, addr, next->vm_end,
+                               next->vm_pgoff - pglen, NULL);
+               return area;
+       }
+
+       return NULL;
+}
+
+/*
+ * find_mergeable_anon_vma is used by anon_vma_prepare, to check
+ * neighbouring vmas for a suitable anon_vma, before it goes off
+ * to allocate a new anon_vma.  It checks because a repetitive
+ * sequence of mprotects and faults may otherwise lead to distinct
+ * anon_vmas being allocated, preventing vma merge in subsequent
+ * mprotect.
+ */
+struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
+{
+       struct vm_area_struct *near;
+       unsigned long vm_flags;
+
+       near = vma->vm_next;
+       if (!near)
+               goto try_prev;
+
+       /*
+        * Since only mprotect tries to remerge vmas, match flags
+        * which might be mprotected into each other later on.
+        * Neither mlock nor madvise tries to remerge at present,
+        * so leave their flags as obstructing a merge.
+        */
+       vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
+       vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
+
+       if (near->anon_vma && vma->vm_end == near->vm_start &&
+                       mpol_equal(vma_policy(vma), vma_policy(near)) &&
+                       can_vma_merge_before(near, vm_flags,
+                               NULL, vma->vm_file, vma->vm_pgoff +
+                               ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
+               return near->anon_vma;
+try_prev:
+       /*
+        * It is potentially slow to have to call find_vma_prev here.
+        * But it's only on the first write fault on the vma, not
+        * every time, and we could devise a way to avoid it later
+        * (e.g. stash info in next's anon_vma_node when assigning
+        * an anon_vma, or when trying vma_merge).  Another time.
+        */
+       if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma)
+               BUG();
+       if (!near)
+               goto none;
+
+       vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
+       vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
+
+       if (near->anon_vma && near->vm_end == vma->vm_start &&
+                       mpol_equal(vma_policy(near), vma_policy(vma)) &&
+                       can_vma_merge_after(near, vm_flags,
+                               NULL, vma->vm_file, vma->vm_pgoff))
+               return near->anon_vma;
+none:
+       /*
+        * There's no absolute need to look only at touching neighbours:
+        * we could search further afield for "compatible" anon_vmas.
+        * But it would probably just be a waste of time searching,
+        * or lead to too many vmas hanging off the same anon_vma.
+        * We're trying to allow mprotect remerging later on,
+        * not trying to minimize memory used for anon_vmas.
+        */
+       return NULL;
+}
+
+#ifdef CONFIG_PROC_FS
+void __vm_stat_account(struct mm_struct *mm, unsigned long flags,
+                                               struct file *file, long pages)
+{
+       const unsigned long stack_flags
+               = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
+
+#ifdef CONFIG_HUGETLB
+       if (flags & VM_HUGETLB) {
+               if (!(flags & VM_DONTCOPY))
+                       mm->shared_vm += pages;
+               return;
+       }
+#endif /* CONFIG_HUGETLB */
+
+       if (file) {
+               mm->shared_vm += pages;
+               if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
+                       mm->exec_vm += pages;
+       } else if (flags & stack_flags)
+               mm->stack_vm += pages;
+       if (flags & (VM_RESERVED|VM_IO))
+               mm->reserved_vm += pages;
+}
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * The caller must hold down_write(current->mm->mmap_sem).
+ */
+
+unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
+                       unsigned long len, unsigned long prot,
+                       unsigned long flags, unsigned long pgoff)
+{
+       struct mm_struct * mm = current->mm;
+       struct vm_area_struct * vma, * prev;
+       struct inode *inode;
+       unsigned int vm_flags;
+       int correct_wcount = 0;
+       int error;
+       struct rb_node ** rb_link, * rb_parent;
+       int accountable = 1;
+       unsigned long charged = 0;
+
+       if (file) {
+               if (is_file_hugepages(file))
+                       accountable = 0;
+
+               if (!file->f_op || !file->f_op->mmap)
+                       return -ENODEV;
+
+               if ((prot & PROT_EXEC) &&
+                   (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
+                       return -EPERM;
+       }
+       /*
+        * Does the application expect PROT_READ to imply PROT_EXEC?
+        *
+        * (the exception is when the underlying filesystem is noexec
+        *  mounted, in which case we dont add PROT_EXEC.)
+        */
+       if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
+               if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)))
+                       prot |= PROT_EXEC;
+
+       if (!len)
+               return addr;
+
+       /* Careful about overflows.. */
+       len = PAGE_ALIGN(len);
+       if (!len || len > TASK_SIZE)
+               return -EINVAL;
+
+       /* offset overflow? */
+       if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
+               return -EINVAL;
+
+       /* Too many mappings? */
+       if (mm->map_count > sysctl_max_map_count)
+               return -ENOMEM;
+
+       /* Obtain the address to map to. we verify (or select) it and ensure
+        * that it represents a valid section of the address space.
+        */
+       addr = get_unmapped_area(file, addr, len, pgoff, flags);
+       if (addr & ~PAGE_MASK)
+               return addr;
+
+       /* Do simple checking here so the lower-level routines won't have
+        * to. we assume access permissions have been handled by the open
+        * of the memory object, so we don't do any here.
+        */
+       vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+                       mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
+       if (flags & MAP_LOCKED) {
+               if (!can_do_mlock())
+                       return -EPERM;
+               vm_flags |= VM_LOCKED;
+       }
+       /* mlock MCL_FUTURE? */
+       if (vm_flags & VM_LOCKED) {
+               unsigned long locked, lock_limit;
+               locked = mm->locked_vm << PAGE_SHIFT;
+               lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+               locked += len;
+               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+                       return -EAGAIN;
+       }
+
+       inode = file ? file->f_dentry->d_inode : NULL;
+
+       if (file) {
+               switch (flags & MAP_TYPE) {
+               case MAP_SHARED:
+                       if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
+                               return -EACCES;
+
+                       /*
+                        * Make sure we don't allow writing to an append-only
+                        * file..
+                        */
+                       if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
+                               return -EACCES;
+
+                       /*
+                        * Make sure there are no mandatory locks on the file.
+                        */
+                       if (locks_verify_locked(inode))
+                               return -EAGAIN;
+
+                       vm_flags |= VM_SHARED | VM_MAYSHARE;
+                       if (!(file->f_mode & FMODE_WRITE))
+                               vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
+
+                       /* fall through */
+               case MAP_PRIVATE:
+                       if (!(file->f_mode & FMODE_READ))
+                               return -EACCES;
+                       break;
+
+               default:
+                       return -EINVAL;
+               }
+       } else {
+               switch (flags & MAP_TYPE) {
+               case MAP_SHARED:
+                       vm_flags |= VM_SHARED | VM_MAYSHARE;
+                       break;
+               case MAP_PRIVATE:
+                       /*
+                        * Set pgoff according to addr for anon_vma.
+                        */
+                       pgoff = addr >> PAGE_SHIFT;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       error = security_file_mmap(file, prot, flags);
+       if (error)
+               return error;
+               
+       /* Clear old maps */
+       error = -ENOMEM;
+munmap_back:
+       vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+       if (vma && vma->vm_start < addr + len) {
+               if (do_munmap(mm, addr, len))
+                       return -ENOMEM;
+               goto munmap_back;
+       }
+
+       /* Check against address space limit. */
+       if ((mm->total_vm << PAGE_SHIFT) + len
+           > current->signal->rlim[RLIMIT_AS].rlim_cur)
+               return -ENOMEM;
+
+       if (accountable && (!(flags & MAP_NORESERVE) ||
+                           sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
+               if (vm_flags & VM_SHARED) {
+                       /* Check memory availability in shmem_file_setup? */
+                       vm_flags |= VM_ACCOUNT;
+               } else if (vm_flags & VM_WRITE) {
+                       /*
+                        * Private writable mapping: check memory availability
+                        */
+                       charged = len >> PAGE_SHIFT;
+                       if (security_vm_enough_memory(charged))
+                               return -ENOMEM;
+                       vm_flags |= VM_ACCOUNT;
+               }
+       }
+
+       /*
+        * Can we just expand an old private anonymous mapping?
+        * The VM_SHARED test is necessary because shmem_zero_setup
+        * will create the file object for a shared anonymous map below.
+        */
+       if (!file && !(vm_flags & VM_SHARED) &&
+           vma_merge(mm, prev, addr, addr + len, vm_flags,
+                                       NULL, NULL, pgoff, NULL))
+               goto out;
+
+       /*
+        * Determine the object being mapped and call the appropriate
+        * specific mapper. the address has already been validated, but
+        * not unmapped, but the maps are removed from the list.
+        */
+       vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+       if (!vma) {
+               error = -ENOMEM;
+               goto unacct_error;
+       }
+       memset(vma, 0, sizeof(*vma));
+
+       vma->vm_mm = mm;
+       vma->vm_start = addr;
+       vma->vm_end = addr + len;
+       vma->vm_flags = vm_flags;
+       vma->vm_page_prot = protection_map[vm_flags & 0x0f];
+       vma->vm_pgoff = pgoff;
+
+       if (file) {
+               error = -EINVAL;
+               if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                       goto free_vma;
+               if (vm_flags & VM_DENYWRITE) {
+                       error = deny_write_access(file);
+                       if (error)
+                               goto free_vma;
+                       correct_wcount = 1;
+               }
+               vma->vm_file = file;
+               get_file(file);
+               error = file->f_op->mmap(file, vma);
+               if (error)
+                       goto unmap_and_free_vma;
+       } else if (vm_flags & VM_SHARED) {
+               error = shmem_zero_setup(vma);
+               if (error)
+                       goto free_vma;
+       }
+
+       /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
+        * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
+        * that memory reservation must be checked; but that reservation
+        * belongs to shared memory object, not to vma: so now clear it.
+        */
+       if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
+               vma->vm_flags &= ~VM_ACCOUNT;
+
+       /* Can addr have changed??
+        *
+        * Answer: Yes, several device drivers can do it in their
+        *         f_op->mmap method. -DaveM
+        */
+       addr = vma->vm_start;
+       pgoff = vma->vm_pgoff;
+       vm_flags = vma->vm_flags;
+
+       if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
+                       vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
+               file = vma->vm_file;
+               vma_link(mm, vma, prev, rb_link, rb_parent);
+               if (correct_wcount)
+                       atomic_inc(&inode->i_writecount);
+       } else {
+               if (file) {
+                       if (correct_wcount)
+                               atomic_inc(&inode->i_writecount);
+                       fput(file);
+               }
+               mpol_free(vma_policy(vma));
+               kmem_cache_free(vm_area_cachep, vma);
+       }
+out:   
+       mm->total_vm += len >> PAGE_SHIFT;
+       __vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+       if (vm_flags & VM_LOCKED) {
+               mm->locked_vm += len >> PAGE_SHIFT;
+               make_pages_present(addr, addr + len);
+       }
+       if (flags & MAP_POPULATE) {
+               up_write(&mm->mmap_sem);
+               sys_remap_file_pages(addr, len, 0,
+                                       pgoff, flags & MAP_NONBLOCK);
+               down_write(&mm->mmap_sem);
+       }
+       acct_update_integrals();
+       update_mem_hiwater();
+       return addr;
+
+unmap_and_free_vma:
+       if (correct_wcount)
+               atomic_inc(&inode->i_writecount);
+       vma->vm_file = NULL;
+       fput(file);
+
+       /* Undo any partial mapping done by a device driver. */
+       zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
+free_vma:
+       kmem_cache_free(vm_area_cachep, vma);
+unacct_error:
+       if (charged)
+               vm_unacct_memory(charged);
+       return error;
+}
+
+EXPORT_SYMBOL(do_mmap_pgoff);
+
+/* Get an address range which is currently unmapped.
+ * For shmat() with addr=0.
+ *
+ * Ugly calling convention alert:
+ * Return value with the low bits set means error value,
+ * ie
+ *     if (ret & ~PAGE_MASK)
+ *             error = ret;
+ *
+ * This function "knows" that -ENOMEM has the bits set.
+ */
+#ifndef HAVE_ARCH_UNMAPPED_AREA
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+               unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long start_addr;
+
+       if (len > TASK_SIZE)
+               return -ENOMEM;
+
+       if (addr) {
+               addr = PAGE_ALIGN(addr);
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+                   (!vma || addr + len <= vma->vm_start))
+                       return addr;
+       }
+       start_addr = addr = mm->free_area_cache;
+
+full_search:
+       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+               /* At this point:  (!vma || addr < vma->vm_end). */
+               if (TASK_SIZE - len < addr) {
+                       /*
+                        * Start a new search - just in case we missed
+                        * some holes.
+                        */
+                       if (start_addr != TASK_UNMAPPED_BASE) {
+                               start_addr = addr = TASK_UNMAPPED_BASE;
+                               goto full_search;
+                       }
+                       return -ENOMEM;
+               }
+               if (!vma || addr + len <= vma->vm_start) {
+                       /*
+                        * Remember the place where we stopped the search:
+                        */
+                       mm->free_area_cache = addr + len;
+                       return addr;
+               }
+               addr = vma->vm_end;
+       }
+}
+#endif 
+
+void arch_unmap_area(struct vm_area_struct *area)
+{
+       /*
+        * Is this a new hole at the lowest possible address?
+        */
+       if (area->vm_start >= TASK_UNMAPPED_BASE &&
+                       area->vm_start < area->vm_mm->free_area_cache)
+               area->vm_mm->free_area_cache = area->vm_start;
+}
+
+/*
+ * This mmap-allocator allocates new areas top-down from below the
+ * stack's low limit (the base):
+ */
+#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+                         const unsigned long len, const unsigned long pgoff,
+                         const unsigned long flags)
+{
+       struct vm_area_struct *vma, *prev_vma;
+       struct mm_struct *mm = current->mm;
+       unsigned long base = mm->mmap_base, addr = addr0;
+       int first_time = 1;
+
+       /* requested length too big for entire address space */
+       if (len > TASK_SIZE)
+               return -ENOMEM;
+
+       /* dont allow allocations above current base */
+       if (mm->free_area_cache > base)
+               mm->free_area_cache = base;
+
+       /* requesting a specific address */
+       if (addr) {
+               addr = PAGE_ALIGN(addr);
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+                               (!vma || addr + len <= vma->vm_start))
+                       return addr;
+       }
+
+try_again:
+       /* make sure it can fit in the remaining address space */
+       if (mm->free_area_cache < len)
+               goto fail;
+
+       /* either no address requested or cant fit in requested address hole */
+       addr = (mm->free_area_cache - len) & PAGE_MASK;
+       do {
+               /*
+                * Lookup failure means no vma is above this address,
+                * i.e. return with success:
+                */
+               if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
+                       return addr;
+
+               /*
+                * new region fits between prev_vma->vm_end and
+                * vma->vm_start, use it:
+                */
+               if (addr+len <= vma->vm_start &&
+                               (!prev_vma || (addr >= prev_vma->vm_end)))
+                       /* remember the address as a hint for next time */
+                       return (mm->free_area_cache = addr);
+               else
+                       /* pull free_area_cache down to the first hole */
+                       if (mm->free_area_cache == vma->vm_end)
+                               mm->free_area_cache = vma->vm_start;
+
+               /* try just below the current vma->vm_start */
+               addr = vma->vm_start-len;
+       } while (len <= vma->vm_start);
+
+fail:
+       /*
+        * if hint left us with no space for the requested
+        * mapping then try again:
+        */
+       if (first_time) {
+               mm->free_area_cache = base;
+               first_time = 0;
+               goto try_again;
+       }
+       /*
+        * A failed mmap() very likely causes application failure,
+        * so fall back to the bottom-up function here. This scenario
+        * can happen with large stack limits and large mmap()
+        * allocations.
+        */
+       mm->free_area_cache = TASK_UNMAPPED_BASE;
+       addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+       /*
+        * Restore the topdown base:
+        */
+       mm->free_area_cache = base;
+
+       return addr;
+}
+#endif
+
+void arch_unmap_area_topdown(struct vm_area_struct *area)
+{
+       /*
+        * Is this a new hole at the highest possible address?
+        */
+       if (area->vm_end > area->vm_mm->free_area_cache)
+               area->vm_mm->free_area_cache = area->vm_end;
+}
+
+unsigned long
+get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+               unsigned long pgoff, unsigned long flags)
+{
+       if (flags & MAP_FIXED) {
+               unsigned long ret;
+
+               if (addr > TASK_SIZE - len)
+                       return -ENOMEM;
+               if (addr & ~PAGE_MASK)
+                       return -EINVAL;
+               if (file && is_file_hugepages(file))  {
+                       /*
+                        * Check if the given range is hugepage aligned, and
+                        * can be made suitable for hugepages.
+                        */
+                       ret = prepare_hugepage_range(addr, len);
+               } else {
+                       /*
+                        * Ensure that a normal request is not falling in a
+                        * reserved hugepage range.  For some archs like IA-64,
+                        * there is a separate region for hugepages.
+                        */
+                       ret = is_hugepage_only_range(addr, len);
+               }
+               if (ret)
+                       return -EINVAL;
+               return addr;
+       }
+
+       if (file && file->f_op && file->f_op->get_unmapped_area)
+               return file->f_op->get_unmapped_area(file, addr, len,
+                                               pgoff, flags);
+
+       return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+EXPORT_SYMBOL(get_unmapped_area);
+
+/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
+struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
+{
+       struct vm_area_struct *vma = NULL;
+
+       if (mm) {
+               /* Check the cache first. */
+               /* (Cache hit rate is typically around 35%.) */
+               vma = mm->mmap_cache;
+               if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
+                       struct rb_node * rb_node;
+
+                       rb_node = mm->mm_rb.rb_node;
+                       vma = NULL;
+
+                       while (rb_node) {
+                               struct vm_area_struct * vma_tmp;
+
+                               vma_tmp = rb_entry(rb_node,
+                                               struct vm_area_struct, vm_rb);
+
+                               if (vma_tmp->vm_end > addr) {
+                                       vma = vma_tmp;
+                                       if (vma_tmp->vm_start <= addr)
+                                               break;
+                                       rb_node = rb_node->rb_left;
+                               } else
+                                       rb_node = rb_node->rb_right;
+                       }
+                       if (vma)
+                               mm->mmap_cache = vma;
+               }
+       }
+       return vma;
+}
+
+EXPORT_SYMBOL(find_vma);
+
+/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. 
*/
+struct vm_area_struct *
+find_vma_prev(struct mm_struct *mm, unsigned long addr,
+                       struct vm_area_struct **pprev)
+{
+       struct vm_area_struct *vma = NULL, *prev = NULL;
+       struct rb_node * rb_node;
+       if (!mm)
+               goto out;
+
+       /* Guard against addr being lower than the first VMA */
+       vma = mm->mmap;
+
+       /* Go through the RB tree quickly. */
+       rb_node = mm->mm_rb.rb_node;
+
+       while (rb_node) {
+               struct vm_area_struct *vma_tmp;
+               vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+               if (addr < vma_tmp->vm_end) {
+                       rb_node = rb_node->rb_left;
+               } else {
+                       prev = vma_tmp;
+                       if (!prev->vm_next || (addr < prev->vm_next->vm_end))
+                               break;
+                       rb_node = rb_node->rb_right;
+               }
+       }
+
+out:
+       *pprev = prev;
+       return prev ? prev->vm_next : vma;
+}
+
+/*
+ * Verify that the stack growth is acceptable and
+ * update accounting. This is shared with both the
+ * grow-up and grow-down cases.
+ */
+static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, 
unsigned long grow)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct rlimit *rlim = current->signal->rlim;
+
+       /* address space limit tests */
+       if (mm->total_vm + grow > rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT)
+               return -ENOMEM;
+
+       /* Stack limit test */
+       if (size > rlim[RLIMIT_STACK].rlim_cur)
+               return -ENOMEM;
+
+       /* mlock limit tests */
+       if (vma->vm_flags & VM_LOCKED) {
+               unsigned long locked;
+               unsigned long limit;
+               locked = mm->locked_vm + grow;
+               limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+               if (locked > limit && !capable(CAP_IPC_LOCK))
+                       return -ENOMEM;
+       }
+
+       /*
+        * Overcommit..  This must be the final test, as it will
+        * update security statistics.
+        */
+       if (security_vm_enough_memory(grow))
+               return -ENOMEM;
+
+       /* Ok, everything looks good - let it rip */
+       mm->total_vm += grow;
+       if (vma->vm_flags & VM_LOCKED)
+               mm->locked_vm += grow;
+       __vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
+       acct_update_integrals();
+       update_mem_hiwater();
+       return 0;
+}
+
+#ifdef CONFIG_STACK_GROWSUP
+/*
+ * vma is the first one with address > vma->vm_end.  Have to extend vma.
+ */
+int expand_stack(struct vm_area_struct * vma, unsigned long address)
+{
+       int error;
+
+       if (!(vma->vm_flags & VM_GROWSUP))
+               return -EFAULT;
+
+       /*
+        * We must make sure the anon_vma is allocated
+        * so that the anon_vma locking is not a noop.
+        */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
+       anon_vma_lock(vma);
+
+       /*
+        * vma->vm_start/vm_end cannot change under us because the caller
+        * is required to hold the mmap_sem in read mode.  We need the
+        * anon_vma lock to serialize against concurrent expand_stacks.
+        */
+       address += 4 + PAGE_SIZE - 1;
+       address &= PAGE_MASK;
+       error = 0;
+
+       /* Somebody else might have raced and expanded it already */
+       if (address > vma->vm_end) {
+               unsigned long size, grow;
+
+               size = address - vma->vm_start;
+               grow = (address - vma->vm_end) >> PAGE_SHIFT;
+
+               error = acct_stack_growth(vma, size, grow);
+               if (!error)
+                       vma->vm_end = address;
+       }
+       anon_vma_unlock(vma);
+       return error;
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct *mm, unsigned long addr)
+{
+       struct vm_area_struct *vma, *prev;
+
+       addr &= PAGE_MASK;
+       vma = find_vma_prev(mm, addr, &prev);
+       if (vma && (vma->vm_start <= addr))
+               return vma;
+       if (!prev || expand_stack(prev, addr))
+               return NULL;
+       if (prev->vm_flags & VM_LOCKED) {
+               make_pages_present(addr, prev->vm_end);
+       }
+       return prev;
+}
+#else
+/*
+ * vma is the first one with address < vma->vm_start.  Have to extend vma.
+ */
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+       int error;
+
+       /*
+        * We must make sure the anon_vma is allocated
+        * so that the anon_vma locking is not a noop.
+        */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
+       anon_vma_lock(vma);
+
+       /*
+        * vma->vm_start/vm_end cannot change under us because the caller
+        * is required to hold the mmap_sem in read mode.  We need the
+        * anon_vma lock to serialize against concurrent expand_stacks.
+        */
+       address &= PAGE_MASK;
+       error = 0;
+
+       /* Somebody else might have raced and expanded it already */
+       if (address < vma->vm_start) {
+               unsigned long size, grow;
+
+               size = vma->vm_end - address;
+               grow = (vma->vm_start - address) >> PAGE_SHIFT;
+
+               error = acct_stack_growth(vma, size, grow);
+               if (!error) {
+                       vma->vm_start = address;
+                       vma->vm_pgoff -= grow;
+               }
+       }
+       anon_vma_unlock(vma);
+       return error;
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct * mm, unsigned long addr)
+{
+       struct vm_area_struct * vma;
+       unsigned long start;
+
+       addr &= PAGE_MASK;
+       vma = find_vma(mm,addr);
+       if (!vma)
+               return NULL;
+       if (vma->vm_start <= addr)
+               return vma;
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               return NULL;
+       start = vma->vm_start;
+       if (expand_stack(vma, addr))
+               return NULL;
+       if (vma->vm_flags & VM_LOCKED) {
+               make_pages_present(addr, start);
+       }
+       return vma;
+}
+#endif
+
+/*
+ * Try to free as many page directory entries as we can,
+ * without having to work very hard at actually scanning
+ * the page tables themselves.
+ *
+ * Right now we try to free page tables if we have a nice
+ * PGDIR-aligned area that got free'd up. We could be more
+ * granular if we want to, but this is fast and simple,
+ * and covers the bad cases.
+ *
+ * "prev", if it exists, points to a vma before the one
+ * we just free'd - but there's no telling how much before.
+ */
+static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
+       unsigned long start, unsigned long end)
+{
+       unsigned long first = start & PGDIR_MASK;
+       unsigned long last = end + PGDIR_SIZE - 1;
+       struct mm_struct *mm = tlb->mm;
+
+       if (last > MM_VM_SIZE(mm) || last < end)
+               last = MM_VM_SIZE(mm);
+
+       if (!prev) {
+               prev = mm->mmap;
+               if (!prev)
+                       goto no_mmaps;
+               if (prev->vm_end > start) {
+                       if (last > prev->vm_start)
+                               last = prev->vm_start;
+                       goto no_mmaps;
+               }
+       }
+       for (;;) {
+               struct vm_area_struct *next = prev->vm_next;
+
+               if (next) {
+                       if (next->vm_start < start) {
+                               prev = next;
+                               continue;
+                       }
+                       if (last > next->vm_start)
+                               last = next->vm_start;
+               }
+               if (prev->vm_end > first)
+                       first = prev->vm_end;
+               break;
+       }
+no_mmaps:
+       if (last < first)       /* for arches with discontiguous pgd indices */
+               return;
+       if (first < FIRST_USER_PGD_NR * PGDIR_SIZE)
+               first = FIRST_USER_PGD_NR * PGDIR_SIZE;
+       /* No point trying to free anything if we're in the same pte page */
+       if ((first & PMD_MASK) < (last & PMD_MASK)) {
+               clear_page_range(tlb, first, last);
+               flush_tlb_pgtables(mm, first, last);
+       }
+}
+
+/* Normal function to fix up a mapping
+ * This function is the default for when an area has no specific
+ * function.  This may be used as part of a more specific routine.
+ *
+ * By the time this function is called, the area struct has been
+ * removed from the process mapping list.
+ */
+static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
+{
+       size_t len = area->vm_end - area->vm_start;
+
+       area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+       if (area->vm_flags & VM_LOCKED)
+               area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+       vm_stat_unaccount(area);
+       area->vm_mm->unmap_area(area);
+       remove_vm_struct(area);
+}
+
+/*
+ * Update the VMA and inode share lists.
+ *
+ * Ok - we have the memory areas we should free on the 'free' list,
+ * so release them, and do the vma updates.
+ */
+static void unmap_vma_list(struct mm_struct *mm,
+       struct vm_area_struct *mpnt)
+{
+       do {
+               struct vm_area_struct *next = mpnt->vm_next;
+               unmap_vma(mm, mpnt);
+               mpnt = next;
+       } while (mpnt != NULL);
+       validate_mm(mm);
+}
+
+/*
+ * Get rid of page table information in the indicated region.
+ *
+ * Called with the page table lock held.
+ */
+static void unmap_region(struct mm_struct *mm,
+       struct vm_area_struct *vma,
+       struct vm_area_struct *prev,
+       unsigned long start,
+       unsigned long end)
+{
+       struct mmu_gather *tlb;
+       unsigned long nr_accounted = 0;
+
+       lru_add_drain();
+       tlb = tlb_gather_mmu(mm, 0);
+       unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
+       vm_unacct_memory(nr_accounted);
+
+       if (is_hugepage_only_range(start, end - start))
+               hugetlb_free_pgtables(tlb, prev, start, end);
+       else
+               free_pgtables(tlb, prev, start, end);
+       tlb_finish_mmu(tlb, start, end);
+}
+
+/*
+ * Create a list of vma's touched by the unmap, removing them from the mm's
+ * vma list as we go..
+ */
+static void
+detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
+       struct vm_area_struct *prev, unsigned long end)
+{
+       struct vm_area_struct **insertion_point;
+       struct vm_area_struct *tail_vma = NULL;
+
+       insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+       do {
+               rb_erase(&vma->vm_rb, &mm->mm_rb);
+               mm->map_count--;
+               tail_vma = vma;
+               vma = vma->vm_next;
+       } while (vma && vma->vm_start < end);
+       *insertion_point = vma;
+       tail_vma->vm_next = NULL;
+       mm->mmap_cache = NULL;          /* Kill the cache. */
+}
+
+/*
+ * Split a vma into two pieces at address 'addr', a new vma is allocated
+ * either for the first part or the the tail.
+ */
+int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+             unsigned long addr, int new_below)
+{
+       struct mempolicy *pol;
+       struct vm_area_struct *new;
+
+       if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+               return -EINVAL;
+
+       if (mm->map_count >= sysctl_max_map_count)
+               return -ENOMEM;
+
+       new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+       if (!new)
+               return -ENOMEM;
+
+       /* most fields are the same, copy all, and then fixup */
+       *new = *vma;
+
+       if (new_below)
+               new->vm_end = addr;
+       else {
+               new->vm_start = addr;
+               new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
+       }
+
+       pol = mpol_copy(vma_policy(vma));
+       if (IS_ERR(pol)) {
+               kmem_cache_free(vm_area_cachep, new);
+               return PTR_ERR(pol);
+       }
+       vma_set_policy(new, pol);
+
+       if (new->vm_file)
+               get_file(new->vm_file);
+
+       if (new->vm_ops && new->vm_ops->open)
+               new->vm_ops->open(new);
+
+       if (new_below)
+               vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
+                       ((addr - new->vm_start) >> PAGE_SHIFT), new);
+       else
+               vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+
+       return 0;
+}
+
+/* Munmap is split into 2 main parts -- this part which finds
+ * what needs doing, and the areas themselves, which do the
+ * work.  This now handles partial unmappings.
+ * Jeremy Fitzhardinge <jeremy@xxxxxxxx>
+ */
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+{
+       unsigned long end;
+       struct vm_area_struct *mpnt, *prev, *last;
+
+       if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+               return -EINVAL;
+
+       if ((len = PAGE_ALIGN(len)) == 0)
+               return -EINVAL;
+
+       /* Find the first overlapping VMA */
+       mpnt = find_vma_prev(mm, start, &prev);
+       if (!mpnt)
+               return 0;
+       /* we have  start < mpnt->vm_end  */
+
+       /* if it doesn't overlap, we have nothing.. */
+       end = start + len;
+       if (mpnt->vm_start >= end)
+               return 0;
+
+       /*
+        * If we need to split any vma, do it now to save pain later.
+        *
+        * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
+        * unmapped vm_area_struct will remain in use: so lower split_vma
+        * places tmp vma above, and higher split_vma places tmp vma below.
+        */
+       if (start > mpnt->vm_start) {
+               int error = split_vma(mm, mpnt, start, 0);
+               if (error)
+                       return error;
+               prev = mpnt;
+       }
+
+       /* Does it split the last one? */
+       last = find_vma(mm, end);
+       if (last && end > last->vm_start) {
+               int error = split_vma(mm, last, end, 1);
+               if (error)
+                       return error;
+       }
+       mpnt = prev? prev->vm_next: mm->mmap;
+
+       /*
+        * Remove the vma's, and unmap the actual pages
+        */
+       detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
+       spin_lock(&mm->page_table_lock);
+       unmap_region(mm, mpnt, prev, start, end);
+       spin_unlock(&mm->page_table_lock);
+
+       /* Fix up all other VM information */
+       unmap_vma_list(mm, mpnt);
+
+       return 0;
+}
+
+EXPORT_SYMBOL(do_munmap);
+
+asmlinkage long sys_munmap(unsigned long addr, size_t len)
+{
+       int ret;
+       struct mm_struct *mm = current->mm;
+
+       profile_munmap(addr);
+
+       down_write(&mm->mmap_sem);
+       ret = do_munmap(mm, addr, len);
+       up_write(&mm->mmap_sem);
+       return ret;
+}
+
+static inline void verify_mm_writelocked(struct mm_struct *mm)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+       if (unlikely(down_read_trylock(&mm->mmap_sem))) {
+               WARN_ON(1);
+               up_read(&mm->mmap_sem);
+       }
+#endif
+}
+
+/*
+ *  this is really a simplified "do_mmap".  it only handles
+ *  anonymous maps.  eventually we may be able to do some
+ *  brk-specific accounting here.
+ */
+unsigned long do_brk(unsigned long addr, unsigned long len)
+{
+       struct mm_struct * mm = current->mm;
+       struct vm_area_struct * vma, * prev;
+       unsigned long flags;
+       struct rb_node ** rb_link, * rb_parent;
+       pgoff_t pgoff = addr >> PAGE_SHIFT;
+
+       len = PAGE_ALIGN(len);
+       if (!len)
+               return addr;
+
+       if ((addr + len) > TASK_SIZE || (addr + len) < addr)
+               return -EINVAL;
+
+       /*
+        * mlock MCL_FUTURE?
+        */
+       if (mm->def_flags & VM_LOCKED) {
+               unsigned long locked, lock_limit;
+               locked = mm->locked_vm << PAGE_SHIFT;
+               lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+               locked += len;
+               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+                       return -EAGAIN;
+       }
+
+       /*
+        * mm->mmap_sem is required to protect against another thread
+        * changing the mappings in case we sleep.
+        */
+       verify_mm_writelocked(mm);
+
+       /*
+        * Clear old maps.  this also does some error checking for us
+        */
+ munmap_back:
+       vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+       if (vma && vma->vm_start < addr + len) {
+               if (do_munmap(mm, addr, len))
+                       return -ENOMEM;
+               goto munmap_back;
+       }
+
+       /* Check against address space limits *after* clearing old maps... */
+       if ((mm->total_vm << PAGE_SHIFT) + len
+           > current->signal->rlim[RLIMIT_AS].rlim_cur)
+               return -ENOMEM;
+
+       if (mm->map_count > sysctl_max_map_count)
+               return -ENOMEM;
+
+       if (security_vm_enough_memory(len >> PAGE_SHIFT))
+               return -ENOMEM;
+
+       flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
+       /* Can we just expand an old private anonymous mapping? */
+       if (vma_merge(mm, prev, addr, addr + len, flags,
+                                       NULL, NULL, pgoff, NULL))
+               goto out;
+
+       /*
+        * create a vma struct for an anonymous mapping
+        */
+       vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+       if (!vma) {
+               vm_unacct_memory(len >> PAGE_SHIFT);
+               return -ENOMEM;
+       }
+       memset(vma, 0, sizeof(*vma));
+
+       vma->vm_mm = mm;
+       vma->vm_start = addr;
+       vma->vm_end = addr + len;
+       vma->vm_pgoff = pgoff;
+       vma->vm_flags = flags;
+       vma->vm_page_prot = protection_map[flags & 0x0f];
+       vma_link(mm, vma, prev, rb_link, rb_parent);
+out:
+       mm->total_vm += len >> PAGE_SHIFT;
+       if (flags & VM_LOCKED) {
+               mm->locked_vm += len >> PAGE_SHIFT;
+               make_pages_present(addr, addr + len);
+       }
+       acct_update_integrals();
+       update_mem_hiwater();
+       return addr;
+}
+
+EXPORT_SYMBOL(do_brk);
+
+/* Release all mmaps. */
+void exit_mmap(struct mm_struct *mm)
+{
+       struct mmu_gather *tlb;
+       struct vm_area_struct *vma;
+       unsigned long nr_accounted = 0;
+
+#ifdef arch_exit_mmap
+       arch_exit_mmap(mm);
+#endif
+
+       lru_add_drain();
+
+       spin_lock(&mm->page_table_lock);
+
+       tlb = tlb_gather_mmu(mm, 1);
+       flush_cache_mm(mm);
+       /* Use ~0UL here to ensure all VMAs in the mm are unmapped */
+       mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
+                                       ~0UL, &nr_accounted, NULL);
+       vm_unacct_memory(nr_accounted);
+       BUG_ON(mm->map_count);  /* This is just debugging */
+       clear_page_range(tlb, FIRST_USER_PGD_NR * PGDIR_SIZE, MM_VM_SIZE(mm));
+       
+       tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
+
+       vma = mm->mmap;
+       mm->mmap = mm->mmap_cache = NULL;
+       mm->mm_rb = RB_ROOT;
+       mm->rss = 0;
+       mm->total_vm = 0;
+       mm->locked_vm = 0;
+
+       spin_unlock(&mm->page_table_lock);
+
+       /*
+        * Walk the list again, actually closing and freeing it
+        * without holding any MM locks.
+        */
+       while (vma) {
+               struct vm_area_struct *next = vma->vm_next;
+               remove_vm_struct(vma);
+               vma = next;
+       }
+}
+
+/* Insert vm structure into process list sorted by address
+ * and into the inode's i_mmap tree.  If vm_file is non-NULL
+ * then i_mmap_lock is taken here.
+ */
+int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+       struct vm_area_struct * __vma, * prev;
+       struct rb_node ** rb_link, * rb_parent;
+
+       /*
+        * The vm_pgoff of a purely anonymous vma should be irrelevant
+        * until its first write fault, when page's anon_vma and index
+        * are set.  But now set the vm_pgoff it will almost certainly
+        * end up with (unless mremap moves it elsewhere before that
+        * first wfault), so /proc/pid/maps tells a consistent story.
+        *
+        * By setting it to reflect the virtual start address of the
+        * vma, merges and splits can happen in a seamless way, just
+        * using the existing file pgoff checks and manipulations.
+        * Similarly in do_mmap_pgoff and in do_brk.
+        */
+       if (!vma->vm_file) {
+               BUG_ON(vma->anon_vma);
+               vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
+       }
+       __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
+       if (__vma && __vma->vm_start < vma->vm_end)
+               return -ENOMEM;
+       vma_link(mm, vma, prev, rb_link, rb_parent);
+       return 0;
+}
+
+/*
+ * Copy the vma structure to a new location in the same mm,
+ * prior to moving page table entries, to effect an mremap move.
+ */
+struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+       unsigned long addr, unsigned long len, pgoff_t pgoff)
+{
+       struct vm_area_struct *vma = *vmap;
+       unsigned long vma_start = vma->vm_start;
+       struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *new_vma, *prev;
+       struct rb_node **rb_link, *rb_parent;
+       struct mempolicy *pol;
+
+       /*
+        * If anonymous vma has not yet been faulted, update new pgoff
+        * to match new location, to increase its chance of merging.
+        */
+       if (!vma->vm_file && !vma->anon_vma)
+               pgoff = addr >> PAGE_SHIFT;
+
+       find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+       new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
+                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+       if (new_vma) {
+               /*
+                * Source vma may have been merged into new_vma
+                */
+               if (vma_start >= new_vma->vm_start &&
+                   vma_start < new_vma->vm_end)
+                       *vmap = new_vma;
+       } else {
+               new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+               if (new_vma) {
+                       *new_vma = *vma;
+                       pol = mpol_copy(vma_policy(vma));
+                       if (IS_ERR(pol)) {
+                               kmem_cache_free(vm_area_cachep, new_vma);
+                               return NULL;
+                       }
+                       vma_set_policy(new_vma, pol);
+                       new_vma->vm_start = addr;
+                       new_vma->vm_end = addr + len;
+                       new_vma->vm_pgoff = pgoff;
+                       if (new_vma->vm_file)
+                               get_file(new_vma->vm_file);
+                       if (new_vma->vm_ops && new_vma->vm_ops->open)
+                               new_vma->vm_ops->open(new_vma);
+                       vma_link(mm, new_vma, prev, rb_link, rb_parent);
+               }
+       }
+       return new_vma;
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c Fri Jul 15 13:39:50 2005
@@ -0,0 +1,295 @@
+/******************************************************************************
+ * blkback/vbd.c
+ * 
+ * Routines for managing virtual block devices (VBDs).
+ * 
+ * NOTE: vbd_lock protects updates to the rb_tree against concurrent lookups 
+ * in vbd_translate.  All other lookups are implicitly protected because the 
+ * only caller (the control message dispatch routine) serializes the calls.
+ * 
+ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+struct vbd { 
+    blkif_vdev_t   vdevice;     /* what the domain refers to this vbd as */
+    unsigned char  readonly;    /* Non-zero -> read-only */
+    unsigned char  type;        /* VDISK_xxx */
+    blkif_pdev_t   pdevice;     /* phys device that this vbd maps to */
+    struct block_device *bdev;
+    rb_node_t      rb;          /* for linking into R-B tree lookup struct */
+}; 
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static inline dev_t vbd_map_devnum(blkif_pdev_t cookie)
+{ return MKDEV(cookie>>8, cookie&0xff); }
+#define vbd_sz(_v)   ((_v)->bdev->bd_part ? \
+    (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
+#define bdev_put(_b) blkdev_put(_b)
+#else
+#define vbd_sz(_v)   (blk_size[MAJOR((_v)->pdevice)][MINOR((_v)->pdevice)]*2)
+#define bdev_put(_b) ((void)0)
+#define bdev_hardsect_size(_b) 512
+#endif
+
+void vbd_create(blkif_be_vbd_create_t *create) 
+{
+    struct vbd  *vbd; 
+    rb_node_t  **rb_p, *rb_parent = NULL;
+    blkif_t     *blkif;
+    blkif_vdev_t vdevice = create->vdevice;
+
+    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n", 
+                create->domid, create->blkif_handle); 
+        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    rb_p = &blkif->vbd_rb.rb_node;
+    while ( *rb_p != NULL )
+    {
+        rb_parent = *rb_p;
+        vbd = rb_entry(rb_parent, struct vbd, rb);
+        if ( vdevice < vbd->vdevice )
+        {
+            rb_p = &rb_parent->rb_left;
+        }
+        else if ( vdevice > vbd->vdevice )
+        {
+            rb_p = &rb_parent->rb_right;
+        }
+        else
+        {
+            DPRINTK("vbd_create attempted for already existing vbd\n");
+            create->status = BLKIF_BE_STATUS_VBD_EXISTS;
+            return;
+        }
+    }
+
+    if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
+    {
+        DPRINTK("vbd_create: out of memory\n");
+        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+        return;
+    }
+
+    vbd->vdevice  = vdevice; 
+    vbd->readonly = create->readonly;
+    vbd->type     = 0;
+
+    /* Mask to 16-bit for compatibility with old tools */
+    vbd->pdevice  = create->pdevice & 0xffff;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+    vbd->bdev = open_by_devnum(
+        vbd_map_devnum(vbd->pdevice),
+        vbd->readonly ? FMODE_READ : FMODE_WRITE);
+    if ( IS_ERR(vbd->bdev) )
+    {
+        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
+        return;
+    }
+
+    if ( (vbd->bdev->bd_disk == NULL) )
+    {
+        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
+        bdev_put(vbd->bdev);
+        return;
+    }
+
+    if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
+        vbd->type |= VDISK_CDROM;
+    if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
+        vbd->type |= VDISK_REMOVABLE;
+
+#else
+    if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
+    {
+        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
+        return;
+    }
+#endif
+
+    spin_lock(&blkif->vbd_lock);
+    rb_link_node(&vbd->rb, rb_parent, rb_p);
+    rb_insert_color(&vbd->rb, &blkif->vbd_rb);
+    spin_unlock(&blkif->vbd_lock);
+
+    DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
+            vdevice, create->domid);
+    create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+
+void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 
+{
+    blkif_t           *blkif;
+    struct vbd        *vbd;
+    rb_node_t         *rb;
+    blkif_vdev_t       vdevice = destroy->vdevice;
+
+    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
+                destroy->domid, destroy->blkif_handle); 
+        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    rb = blkif->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, struct vbd, rb);
+        if ( vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            goto found;
+    }
+
+    destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+    return;
+
+ found:
+    spin_lock(&blkif->vbd_lock);
+    rb_erase(rb, &blkif->vbd_rb);
+    spin_unlock(&blkif->vbd_lock);
+    bdev_put(vbd->bdev);
+    kfree(vbd);
+}
+
+
+void destroy_all_vbds(blkif_t *blkif)
+{
+    struct vbd *vbd;
+    rb_node_t  *rb;
+
+    spin_lock(&blkif->vbd_lock);
+
+    while ( (rb = blkif->vbd_rb.rb_node) != NULL )
+    {
+        vbd = rb_entry(rb, struct vbd, rb);
+        rb_erase(rb, &blkif->vbd_rb);
+        spin_unlock(&blkif->vbd_lock);
+        bdev_put(vbd->bdev);
+        kfree(vbd);
+        spin_lock(&blkif->vbd_lock);
+    }
+
+    spin_unlock(&blkif->vbd_lock);
+}
+
+
+static void vbd_probe_single(
+    blkif_t *blkif, vdisk_t *vbd_info, struct vbd *vbd)
+{
+    vbd_info->device      = vbd->vdevice; 
+    vbd_info->info        = vbd->type | (vbd->readonly ? VDISK_READONLY : 0);
+    vbd_info->capacity    = vbd_sz(vbd);
+    vbd_info->sector_size = bdev_hardsect_size(vbd->bdev);
+}
+
+
+int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
+{
+    int        rc = 0, nr_vbds = 0;
+    rb_node_t *rb;
+
+    spin_lock(&blkif->vbd_lock);
+
+    if ( (rb = blkif->vbd_rb.rb_node) == NULL )
+        goto out;
+
+ new_subtree:
+    /* STEP 1. Find least node (it'll be left-most). */
+    while ( rb->rb_left != NULL )
+        rb = rb->rb_left;
+
+    for ( ; ; )
+    {
+        /* STEP 2. Dealt with left subtree. Now process current node. */
+        vbd_probe_single(blkif, &vbd_info[nr_vbds],
+                         rb_entry(rb, struct vbd, rb));
+        if ( ++nr_vbds == max_vbds )
+            goto out;
+
+        /* STEP 3. Process right subtree, if any. */
+        if ( rb->rb_right != NULL )
+        {
+            rb = rb->rb_right;
+            goto new_subtree;
+        }
+
+        /* STEP 4. Done both subtrees. Head back through ancesstors. */
+        for ( ; ; ) 
+        {
+            /* We're done when we get back to the root node. */
+            if ( rb->rb_parent == NULL )
+                goto out;
+            /* If we are left of parent, then parent is next to process. */
+            if ( rb->rb_parent->rb_left == rb )
+                break;
+            /* If we are right of parent, then we climb to grandparent. */
+            rb = rb->rb_parent;
+        }
+
+        rb = rb->rb_parent;
+    }
+
+ out:
+    spin_unlock(&blkif->vbd_lock);
+    return (rc == 0) ? nr_vbds : rc;  
+}
+
+
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
+{
+    struct vbd *vbd;
+    rb_node_t  *rb;
+    int         rc = -EACCES;
+
+    /* Take the vbd_lock because another thread could be updating the tree. */
+    spin_lock(&blkif->vbd_lock);
+
+    rb = blkif->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, struct vbd, rb);
+        if ( req->dev < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( req->dev > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            goto found;
+    }
+
+    DPRINTK("vbd_translate; domain %u attempted to access "
+            "non-existent VBD.\n", blkif->domid);
+    rc = -ENODEV;
+    goto out;
+
+ found:
+
+    if ( (operation == WRITE) && vbd->readonly )
+        goto out;
+
+    if ( unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)) )
+        goto out;
+
+    req->dev  = vbd->pdevice;
+    req->bdev = vbd->bdev;
+    rc = 0;
+
+ out:
+    spin_unlock(&blkif->vbd_lock);
+    return rc;
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h     Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,147 @@
+#ifndef _ASM_GENERIC_PGTABLE_H
+#define _ASM_GENERIC_PGTABLE_H
+
+#ifndef __HAVE_ARCH_PTEP_ESTABLISH
+/*
+ * Establish a new mapping:
+ *  - flush the old one
+ *  - update the page tables
+ *  - inform the TLB about the new one
+ *
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock.
+ *
+ * Note: the old pte is known to not be writable, so we don't need to
+ * worry about dirty bits etc getting lost.
+ */
+#ifndef __HAVE_ARCH_SET_PTE_ATOMIC
+#define ptep_establish(__vma, __address, __ptep, __entry)              \
+do {                                                                   \
+       set_pte(__ptep, __entry);                                       \
+       flush_tlb_page(__vma, __address);                               \
+} while (0)
+#else /* __HAVE_ARCH_SET_PTE_ATOMIC */
+#define ptep_establish(__vma, __address, __ptep, __entry)              \
+do {                                                                   \
+       set_pte_atomic(__ptep, __entry);                                \
+       flush_tlb_page(__vma, __address);                               \
+} while (0)
+#endif /* __HAVE_ARCH_SET_PTE_ATOMIC */
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+/*
+ * Largely same as above, but only sets the access flags (dirty,
+ * accessed, and writable). Furthermore, we know it always gets set
+ * to a "more permissive" setting, which allows most architectures
+ * to optimize this.
+ */
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+do {                                                                     \
+       set_pte(__ptep, __entry);                                         \
+       flush_tlb_page(__vma, __address);                                 \
+} while (0)
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_ESTABLISH_NEW
+/*
+ * Establish a mapping where none previously existed
+ */
+#define ptep_establish_new(__vma, __address, __ptep, __entry)          \
+do {                                                                   \
+       set_pte(__ptep, __entry);                                       \
+} while (0)
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(pte_t *ptep)
+{
+       pte_t pte = *ptep;
+       if (!pte_young(pte))
+               return 0;
+       set_pte(ptep, pte_mkold(pte));
+       return 1;
+}
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(__vma, __address, __ptep)               \
+({                                                                     \
+       int __young = ptep_test_and_clear_young(__ptep);                \
+       if (__young)                                                    \
+               flush_tlb_page(__vma, __address);                       \
+       __young;                                                        \
+})
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+{
+       pte_t pte = *ptep;
+       if (!pte_dirty(pte))
+               return 0;
+       set_pte(ptep, pte_mkclean(pte));
+       return 1;
+}
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
+#define ptep_clear_flush_dirty(__vma, __address, __ptep)               \
+({                                                                     \
+       int __dirty = ptep_test_and_clear_dirty(__ptep);                \
+       if (__dirty)                                                    \
+               flush_tlb_page(__vma, __address);                       \
+       __dirty;                                                        \
+})
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(pte_t *ptep)
+{
+       pte_t pte = *ptep;
+       pte_clear(ptep);
+       return pte;
+}
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
+#define ptep_clear_flush(__vma, __address, __ptep)                     \
+({                                                                     \
+       pte_t __pte = ptep_get_and_clear(__ptep);                       \
+       flush_tlb_page(__vma, __address);                               \
+       __pte;                                                          \
+})
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(pte_t *ptep)
+{
+       pte_t old_pte = *ptep;
+       set_pte(ptep, pte_wrprotect(old_pte));
+}
+#endif
+
+#ifndef __HAVE_ARCH_PTEP_MKDIRTY
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+       pte_t old_pte = *ptep;
+       set_pte(ptep, pte_mkdirty(old_pte));
+}
+#endif
+
+#ifndef __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B)  (pte_val(A) == pte_val(B))
+#endif
+
+#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
+#define page_test_and_clear_dirty(page) (0)
+#endif
+
+#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
+#define page_test_and_clear_young(page) (0)
+#endif
+
+#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
+#define pgd_offset_gate(mm, addr)      pgd_offset(mm, addr)
+#endif
+
+#endif /* _ASM_GENERIC_PGTABLE_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c     Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,756 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/main.c
+ * 
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A 
+ * reference front-end implementation can be found in:
+ *  arch/xen/drivers/blkif/frontend
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Copyright (c) 2005, Christopher Clark
+ */
+
+#include "common.h"
+#include <asm-xen/evtchn.h>
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xen-public/grant_table.h>
+#endif
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ * 
+ * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+
+static unsigned long mmap_vstart;
+#define MMAP_PAGES                                              \
+    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg)                                   \
+    (mmap_vstart +                                              \
+     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
+     ((_seg) * PAGE_SIZE))
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a 
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
+ * the pendcnt towards zero. When it hits zero, the specified domain has a 
+ * response queued for it, with the saved 'id' passed back.
+ */
+typedef struct {
+    blkif_t       *blkif;
+    unsigned long  id;
+    int            nr_pages;
+    atomic_t       pendcnt;
+    unsigned short operation;
+    int            status;
+} pending_req_t;
+
+/*
+ * We can't allocate pending_req's in order, since they may complete out of 
+ * order. We therefore maintain an allocation ring. This ring also indicates 
+ * when enough work has been passed down -- at that point the allocation ring 
+ * will be empty.
+ */
+static pending_req_t pending_reqs[MAX_PENDING_REQS];
+static unsigned char pending_ring[MAX_PENDING_REQS];
+static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+/* NB. We use a different index type to differentiate from shared blk rings. */
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+static kmem_cache_t *buffer_head_cachep;
+#else
+static request_queue_t *plugged_queue;
+static inline void flush_plugged_queue(void)
+{
+    request_queue_t *q = plugged_queue;
+    if ( q != NULL )
+    {
+        if ( q->unplug_fn != NULL )
+            q->unplug_fn(q);
+        blk_put_queue(q);
+        plugged_queue = NULL;
+    }
+}
+#endif
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+static u16 pending_grant_handles[MMAP_PAGES];
+#define pending_handle(_idx, _i) \
+    (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
+#define BLKBACK_INVALID_HANDLE (0xFFFF)
+#endif
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+/*
+ * If the tap driver is used, we may get pages belonging to either the tap
+ * or (more likely) the real frontend.  The backend must specify which domain
+ * a given page belongs to in update_va_mapping though.  For the moment, 
+ * the tap rewrites the ID field of the request to contain the request index
+ * and the id of the real front end domain.
+ */
+#define BLKTAP_COOKIE 0xbeadfeed
+static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
+#endif
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
+static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, int st);
+
+static void fast_flush_area(int idx, int nr_pages)
+{
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    unsigned int i, invcount = 0;
+    u16 handle;
+
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
+        {
+            unmap[i].host_virt_addr = MMAP_VADDR(idx, i);
+            unmap[i].dev_bus_addr   = 0;
+            unmap[i].handle         = handle;
+            pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+            invcount++;
+        }
+    }
+    if ( unlikely(HYPERVISOR_grant_table_op(
+                    GNTTABOP_unmap_grant_ref, unmap, invcount)))
+        BUG();
+#else
+
+    multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int               i;
+
+    for ( i = 0; i < nr_pages; i++ )
+    {
+       MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i),
+                               __pte(0), 0);
+    }
+
+    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+        BUG();
+#endif
+}
+
+
+/******************************************************************
+ * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+    return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( !__on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+    if ( __on_blkdev_list(blkif) )
+    {
+        list_del(&blkif->blkdev_list);
+        blkif->blkdev_list.next = NULL;
+        blkif_put(blkif);
+    }
+    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( __on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+    {
+        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+        blkif_get(blkif);
+    }
+    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int blkio_schedule(void *arg)
+{
+    DECLARE_WAITQUEUE(wq, current);
+
+    blkif_t          *blkif;
+    struct list_head *ent;
+
+    daemonize(
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+        "xenblkd"
+#endif
+        );
+
+    for ( ; ; )
+    {
+        /* Wait for work to do. */
+        add_wait_queue(&blkio_schedule_wait, &wq);
+        set_current_state(TASK_INTERRUPTIBLE);
+        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
+             list_empty(&blkio_schedule_list) )
+            schedule();
+        __set_current_state(TASK_RUNNING);
+        remove_wait_queue(&blkio_schedule_wait, &wq);
+
+        /* Queue up a batch of requests. */
+        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+                !list_empty(&blkio_schedule_list) )
+        {
+            ent = blkio_schedule_list.next;
+            blkif = list_entry(ent, blkif_t, blkdev_list);
+            blkif_get(blkif);
+            remove_from_blkdev_list(blkif);
+            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+                add_to_blkdev_list_tail(blkif);
+            blkif_put(blkif);
+        }
+
+        /* Push the batch through to disc. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+        run_task_queue(&tq_disk);
+#else
+        flush_plugged_queue();
+#endif
+    }
+}
+
+static void maybe_trigger_blkio_schedule(void)
+{
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
+    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+         !list_empty(&blkio_schedule_list) )
+        wake_up(&blkio_schedule_wait);
+}
+
+
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
+ */
+
+static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
+{
+    unsigned long flags;
+
+    /* An error fails the entire request. */
+    if ( !uptodate )
+    {
+        DPRINTK("Buffer not up-to-date at end of operation\n");
+        pending_req->status = BLKIF_RSP_ERROR;
+    }
+
+    if ( atomic_dec_and_test(&pending_req->pendcnt) )
+    {
+        int pending_idx = pending_req - pending_reqs;
+        fast_flush_area(pending_idx, pending_req->nr_pages);
+        make_response(pending_req->blkif, pending_req->id,
+                      pending_req->operation, pending_req->status);
+        blkif_put(pending_req->blkif);
+        spin_lock_irqsave(&pend_prod_lock, flags);
+        pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+        spin_unlock_irqrestore(&pend_prod_lock, flags);
+        maybe_trigger_blkio_schedule();
+    }
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
+{
+    __end_block_io_op(bh->b_private, uptodate);
+    kmem_cache_free(buffer_head_cachep, bh);
+}
+#else
+static int end_block_io_op(struct bio *bio, unsigned int done, int error)
+{
+    if ( bio->bi_size != 0 )
+        return 1;
+    __end_block_io_op(bio->bi_private, !error);
+    bio_put(bio);
+    return error;
+}
+#endif
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    blkif_t *blkif = dev_id;
+    add_to_blkdev_list_tail(blkif);
+    maybe_trigger_blkio_schedule();
+    return IRQ_HANDLED;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+    blkif_request_t *req;
+    RING_IDX i, rp;
+    int more_to_do = 0;
+
+    rp = blk_ring->sring->req_prod;
+    rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+    for ( i = blk_ring->req_cons; 
+         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
+          i++ )
+    {
+        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
+        {
+            more_to_do = 1;
+            break;
+        }
+        
+        req = RING_GET_REQUEST(blk_ring, i);
+        switch ( req->operation )
+        {
+        case BLKIF_OP_READ:
+        case BLKIF_OP_WRITE:
+            dispatch_rw_block_io(blkif, req);
+            break;
+
+        case BLKIF_OP_PROBE:
+            dispatch_probe(blkif, req);
+            break;
+
+        default:
+            DPRINTK("error: unknown block io operation [%d]\n",
+                    req->operation);
+            make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+            break;
+        }
+    }
+
+    blk_ring->req_cons = i;
+    return more_to_do;
+}
+
+static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
+{
+    int rsp = BLKIF_RSP_ERROR;
+    int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+    /* We expect one buffer only. */
+    if ( unlikely(req->nr_segments != 1) )
+        goto out;
+
+    /* Make sure the buffer is page-sized. */
+    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
+         (blkif_last_sect(req->frame_and_sects[0]) != 7) )
+        goto out;
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    {
+        struct gnttab_map_grant_ref map;
+
+        map.host_virt_addr = MMAP_VADDR(pending_idx, 0);
+        map.flags = GNTMAP_host_map;
+        map.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
+        map.dom = blkif->domid;
+
+        if ( unlikely(HYPERVISOR_grant_table_op(
+                        GNTTABOP_map_grant_ref, &map, 1)))
+            BUG();
+
+        if ( map.handle < 0 )
+            goto out;
+
+        pending_handle(pending_idx, 0) = map.handle;
+    }
+#else /* else CONFIG_XEN_BLKDEV_GRANT */
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+    /* Grab the real frontend out of the probe message. */
+    if (req->frame_and_sects[1] == BLKTAP_COOKIE) 
+        blkif->is_blktap = 1;
+#endif
+
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+    if ( HYPERVISOR_update_va_mapping_otherdomain(
+        MMAP_VADDR(pending_idx, 0),
+        (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
+        0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) )
+        
+        goto out;
+#else
+    if ( HYPERVISOR_update_va_mapping_otherdomain(
+        MMAP_VADDR(pending_idx, 0),
+        (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
+        0, blkif->domid) ) 
+        
+        goto out;
+#endif
+#endif /* endif CONFIG_XEN_BLKDEV_GRANT */
+   
+    rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
+                    PAGE_SIZE / sizeof(vdisk_t));
+
+ out:
+    fast_flush_area(pending_idx, 1);
+    make_response(blkif, req->id, req->operation, rsp);
+}
+
+static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
+{
+    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+    int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
+    unsigned long fas = 0;
+    int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+    pending_req_t *pending_req;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#else
+    unsigned long remap_prot;
+    multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#endif
+    struct phys_req preq;
+    struct { 
+        unsigned long buf; unsigned int nsec;
+    } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    unsigned int nseg;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+    struct buffer_head *bh;
+#else
+    struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int nbio = 0;
+    request_queue_t *q;
+#endif
+
+    /* Check that number of segments is sane. */
+    nseg = req->nr_segments;
+    if ( unlikely(nseg == 0) || 
+         unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
+    {
+        DPRINTK("Bad number of segments in request (%d)\n", nseg);
+        goto bad_descriptor;
+    }
+
+    preq.dev           = req->device;
+    preq.sector_number = req->sector_number;
+    preq.nr_sects      = 0;
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    for ( i = 0; i < nseg; i++ )
+    {
+        fas         = req->frame_and_sects[i];
+        seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
+
+        if ( seg[i].nsec <= 0 )
+            goto bad_descriptor;
+        preq.nr_sects += seg[i].nsec;
+
+        map[i].host_virt_addr = MMAP_VADDR(pending_idx, i);
+        map[i].dom = blkif->domid;
+        map[i].ref = blkif_gref_from_fas(fas);
+        map[i].flags = GNTMAP_host_map;
+        if ( operation == WRITE )
+            map[i].flags |= GNTMAP_readonly;
+    }
+
+    if ( unlikely(HYPERVISOR_grant_table_op(
+                    GNTTABOP_map_grant_ref, map, nseg)))
+        BUG();
+
+    for ( i = 0; i < nseg; i++ )
+    {
+        if ( unlikely(map[i].handle < 0) )
+        {
+            DPRINTK("invalid buffer -- could not remap it\n");
+            fast_flush_area(pending_idx, nseg);
+            goto bad_descriptor;
+        }
+
+        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+            FOREIGN_FRAME(map[i].dev_bus_addr);
+
+        pending_handle(pending_idx, i) = map[i].handle;
+    }
+#endif
+
+    for ( i = 0; i < nseg; i++ )
+    {
+        fas         = req->frame_and_sects[i];
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+        seg[i].buf  = (map[i].dev_bus_addr << PAGE_SHIFT) |
+                      (blkif_first_sect(fas) << 9);
+#else
+        seg[i].buf  = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
+        seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
+        if ( seg[i].nsec <= 0 )
+            goto bad_descriptor;
+        preq.nr_sects += seg[i].nsec;
+#endif
+    }
+
+    if ( vbd_translate(&preq, blkif, operation) != 0 )
+    {
+        DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
+                operation == READ ? "read" : "write", preq.sector_number,
+                preq.sector_number + preq.nr_sects, preq.dev); 
+        goto bad_descriptor;
+    }
+
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+    if ( operation == READ )
+        remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
+    else
+        remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
+
+    for ( i = 0; i < nseg; i++ )
+    {
+       MULTI_update_va_mapping_otherdomain(
+           mcl+i, MMAP_VADDR(pending_idx, i),
+           pfn_pte_ma(seg[i].buf >> PAGE_SHIFT, remap_prot),
+           0, blkif->domid);
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+        if ( blkif->is_blktap )
+            mcl[i].args[3] = ID_TO_DOM(req->id);
+#endif
+        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+            FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT);
+    }
+
+    BUG_ON(HYPERVISOR_multicall(mcl, nseg) != 0);
+
+    for ( i = 0; i < nseg; i++ )
+    {
+        if ( unlikely(mcl[i].result != 0) )
+        {
+            DPRINTK("invalid buffer -- could not remap it\n");
+            fast_flush_area(pending_idx, nseg);
+            goto bad_descriptor;
+        }
+    }
+#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */
+
+    pending_req = &pending_reqs[pending_idx];
+    pending_req->blkif     = blkif;
+    pending_req->id        = req->id;
+    pending_req->operation = operation;
+    pending_req->status    = BLKIF_RSP_OKAY;
+    pending_req->nr_pages  = nseg;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+
+    atomic_set(&pending_req->pendcnt, nseg);
+    pending_cons++;
+    blkif_get(blkif);
+
+    for ( i = 0; i < nseg; i++ )
+    {
+        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
+        if ( unlikely(bh == NULL) )
+        {
+            __end_block_io_op(pending_req, 0);
+            continue;
+        }
+
+        memset(bh, 0, sizeof (struct buffer_head));
+
+        init_waitqueue_head(&bh->b_wait);
+        bh->b_size          = seg[i].nsec << 9;
+        bh->b_dev           = preq.dev;
+        bh->b_rdev          = preq.dev;
+        bh->b_rsector       = (unsigned long)preq.sector_number;
+        bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
+            (seg[i].buf & ~PAGE_MASK);
+        bh->b_page          = virt_to_page(MMAP_VADDR(pending_idx, i));
+        bh->b_end_io        = end_block_io_op;
+        bh->b_private       = pending_req;
+
+        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | 
+            (1 << BH_Req) | (1 << BH_Launder);
+        if ( operation == WRITE )
+            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
+
+        atomic_set(&bh->b_count, 1);
+
+        /* Dispatch a single request. We'll flush it to disc later. */
+        generic_make_request(operation, bh);
+
+        preq.sector_number += seg[i].nsec;
+    }
+
+#else
+
+    for ( i = 0; i < nseg; i++ )
+    {
+        if ( ((int)preq.sector_number|(int)seg[i].nsec) &
+             ((bdev_hardsect_size(preq.bdev) >> 9) - 1) )
+        {
+            DPRINTK("Misaligned I/O request from domain %d", blkif->domid);
+            goto cleanup_and_fail;
+        }
+
+        while ( (bio == NULL) ||
+                (bio_add_page(bio,
+                              virt_to_page(MMAP_VADDR(pending_idx, i)),
+                              seg[i].nsec << 9,
+                              seg[i].buf & ~PAGE_MASK) == 0) )
+        {
+            bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
+            if ( unlikely(bio == NULL) )
+            {
+            cleanup_and_fail:
+                for ( i = 0; i < (nbio-1); i++ )
+                    bio_put(biolist[i]);
+                fast_flush_area(pending_idx, nseg);
+                goto bad_descriptor;
+            }
+                
+            bio->bi_bdev    = preq.bdev;
+            bio->bi_private = pending_req;
+            bio->bi_end_io  = end_block_io_op;
+            bio->bi_sector  = preq.sector_number;
+        }
+
+        preq.sector_number += seg[i].nsec;
+    }
+
+    if ( (q = bdev_get_queue(bio->bi_bdev)) != plugged_queue )
+    {
+        flush_plugged_queue();
+        blk_get_queue(q);
+        plugged_queue = q;
+    }
+
+    atomic_set(&pending_req->pendcnt, nbio);
+    pending_cons++;
+    blkif_get(blkif);
+
+    for ( i = 0; i < nbio; i++ )
+        submit_bio(operation, biolist[i]);
+
+#endif
+
+    return;
+
+ bad_descriptor:
+    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+} 
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, int st)
+{
+    blkif_response_t *resp;
+    unsigned long     flags;
+    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+
+    /* Place on the response ring for the relevant domain. */ 
+    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+    resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+    resp->id        = id;
+    resp->operation = op;
+    resp->status    = st;
+    wmb(); /* Ensure other side can see the response fields. */
+    blk_ring->rsp_prod_pvt++;
+    RING_PUSH_RESPONSES(blk_ring);
+    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+    /* Kick the relevant domain. */
+    notify_via_evtchn(blkif->evtchn);
+}
+
+void blkif_deschedule(blkif_t *blkif)
+{
+    remove_from_blkdev_list(blkif);
+}
+
+static int __init blkif_init(void)
+{
+    int i;
+
+    if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
+         !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
+        return 0;
+
+    blkif_interface_init();
+
+    if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
+        BUG();
+
+    pending_cons = 0;
+    pending_prod = MAX_PENDING_REQS;
+    memset(pending_reqs, 0, sizeof(pending_reqs));
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+        pending_ring[i] = i;
+    
+    spin_lock_init(&blkio_schedule_list_lock);
+    INIT_LIST_HEAD(&blkio_schedule_list);
+
+    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+        BUG();
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+    buffer_head_cachep = kmem_cache_create(
+        "buffer_head_cache", sizeof(struct buffer_head),
+        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+#endif
+
+    blkif_ctrlif_init();
+    
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    memset( pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES );
+    printk(KERN_ALERT "Blkif backend is using grant tables.\n");
+#endif
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+    printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
+#endif
+
+    return 0;
+}
+
+__initcall(blkif_init);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,190 @@
+/*
+ *  linux/arch/x86_64/kernel/vsyscall.c
+ *
+ *  Copyright (C) 2001 Andrea Arcangeli <andrea@xxxxxxx> SuSE
+ *  Copyright 2003 Andi Kleen, SuSE Labs.
+ *
+ *  Thanks to hpa@xxxxxxxxxxxxx for some useful hint.
+ *  Special thanks to Ingo Molnar for his early experience with
+ *  a different vsyscall implementation for Linux/IA32 and for the name.
+ *
+ *  vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
+ *  at virtual address -10Mbyte+1024bytes etc... There are at max 8192
+ *  vsyscalls. One vsyscall can reserve more than 1 slot to avoid
+ *  jumping out of line if necessary.
+ *
+ *  Note: the concept clashes with user mode linux. If you use UML just
+ *  set the kernel.vsyscall sysctl to 0.
+ */
+
+/*
+ * TODO 2001-03-20:
+ *
+ * 1) make page fault handler detect faults on page1-page-last of the vsyscall
+ *    virtual space, and make it increase %rip and write -ENOSYS in %rax (so
+ *    we'll be able to upgrade to a new glibc without upgrading kernel after
+ *    we add more vsyscalls.
+ * 2) Possibly we need a fixmap table for the vsyscalls too if we want
+ *    to avoid SIGSEGV and we want to return -EFAULT from the vsyscalls as 
well.
+ *    Can we segfault inside a "syscall"? We can fix this anytime and those 
fixes
+ *    won't be visible for userspace. Not fixing this is a noop for correct 
programs,
+ *    broken programs will segfault and there's no security risk until we 
choose to
+ *    fix it.
+ *
+ * These are not urgent things that we need to address only before shipping 
the first
+ * production binary kernels.
+ */
+
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/seqlock.h>
+#include <linux/jiffies.h>
+
+#include <asm/vsyscall.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/fixmap.h>
+#include <asm/errno.h>
+#include <asm/io.h>
+
+#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define force_inline __attribute__((always_inline)) inline
+
+int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
+seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
+
+#include <asm/unistd.h>
+
+static force_inline void timeval_normalize(struct timeval * tv)
+{
+       time_t __sec;
+
+       __sec = tv->tv_usec / 1000000;
+       if (__sec)
+       {
+               tv->tv_usec %= 1000000;
+               tv->tv_sec += __sec;
+       }
+}
+
+static force_inline void do_vgettimeofday(struct timeval * tv)
+{
+       long sequence, t;
+       unsigned long sec, usec;
+
+       do {
+               sequence = read_seqbegin(&__xtime_lock);
+               
+               sec = __xtime.tv_sec;
+               usec = (__xtime.tv_nsec / 1000) +
+                       (__jiffies - __wall_jiffies) * (1000000 / HZ);
+
+               if (__vxtime.mode == VXTIME_TSC) {
+                       sync_core();
+                       rdtscll(t);
+                       if (t < __vxtime.last_tsc) t = __vxtime.last_tsc;
+                       usec += ((t - __vxtime.last_tsc) *
+                                __vxtime.tsc_quot) >> 32;
+                       /* See comment in x86_64 do_gettimeofday. */ 
+               } else {
+                       usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 
0xf0) -
+                                 __vxtime.last) * __vxtime.quot) >> 32;
+               }
+       } while (read_seqretry(&__xtime_lock, sequence));
+
+       tv->tv_sec = sec + usec / 1000000;
+       tv->tv_usec = usec % 1000000;
+}
+
+/* RED-PEN may want to readd seq locking, but then the variable should be 
write-once. */
+static force_inline void do_get_tz(struct timezone * tz)
+{
+               *tz = __sys_tz;
+}
+
+
+static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+       int ret;
+       asm volatile("syscall" 
+               : "=a" (ret)
+               : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber 
);
+       return ret;
+}
+
+static force_inline long time_syscall(long *t)
+{
+       long secs;
+       asm volatile("syscall" 
+               : "=a" (secs)
+               : "0" (__NR_time),"D" (t) : __syscall_clobber);
+       return secs;
+}
+
+static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * 
tz)
+{
+       if (unlikely(!__sysctl_vsyscall))
+       return gettimeofday(tv,tz); 
+       if (tv)
+               do_vgettimeofday(tv);
+       if (tz)
+               do_get_tz(tz);
+       return 0;
+}
+
+/* This will break when the xtime seconds get inaccurate, but that is
+ * unlikely */
+static time_t __vsyscall(1) vtime(time_t *t)
+{
+       if (unlikely(!__sysctl_vsyscall))
+               return time_syscall(t);
+       else if (t)
+               *t = __xtime.tv_sec;            
+       return __xtime.tv_sec;
+}
+
+static long __vsyscall(2) venosys_0(void)
+{
+       return -ENOSYS;
+}
+
+static long __vsyscall(3) venosys_1(void)
+{
+       return -ENOSYS;
+
+}
+
+static void __init map_vsyscall(void)
+{
+       extern char __vsyscall_0;
+       unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+
+       __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+}
+
+extern void __set_fixmap_user (enum fixed_addresses, unsigned long, pgprot_t);
+
+static void __init map_vsyscall_user(void)
+{
+       extern char __vsyscall_0;
+       unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+
+       __set_fixmap_user(VSYSCALL_FIRST_PAGE, physaddr_page0, 
PAGE_KERNEL_VSYSCALL);
+}
+
+static int __init vsyscall_init(void)
+{
+        BUG_ON(((unsigned long) &vgettimeofday != 
+                     VSYSCALL_ADDR(__NR_vgettimeofday)));
+       BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
+       BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
+       map_vsyscall();
+        map_vsyscall_user();    /* establish tranlation for user address space 
*/
+       sysctl_vsyscall = 0;    /* TBD */
+
+       return 0;
+}
+
+__initcall(vsyscall_init);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile      Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,17 @@
+#
+# Makefile for x86 timers
+#
+
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+
+obj-y :=       timer_tsc.o
+c-obj-y :=
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/i386/kernel/timers/$(notdir $@) $@
+
+obj-y  += $(c-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r 35b74976f598 -r a83ac0806d6b patches/linux-2.6.11/iomap.patch
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/patches/linux-2.6.11/iomap.patch  Fri Jul 15 13:39:50 2005
@@ -0,0 +1,120 @@
+diff -ur linux-2.6.11/drivers/char/agp/frontend.c 
linux-2.6.11-io/drivers/char/agp/frontend.c
+--- linux-2.6.11/drivers/char/agp/frontend.c   2005-03-02 07:37:49.000000000 
+0000
++++ linux-2.6.11-io/drivers/char/agp/frontend.c        2005-03-15 
17:38:30.000000000 +0000
+@@ -627,7 +627,7 @@
+               DBG("client vm_ops=%p", kerninfo.vm_ops);
+               if (kerninfo.vm_ops) {
+                       vma->vm_ops = kerninfo.vm_ops;
+-              } else if (remap_pfn_range(vma, vma->vm_start,
++              } else if (io_remap_pfn_range(vma, vma->vm_start,
+                               (kerninfo.aper_base + offset) >> PAGE_SHIFT,
+                                           size, vma->vm_page_prot)) {
+                       goto out_again;
+@@ -643,7 +643,7 @@
+               DBG("controller vm_ops=%p", kerninfo.vm_ops);
+               if (kerninfo.vm_ops) {
+                       vma->vm_ops = kerninfo.vm_ops;
+-              } else if (remap_pfn_range(vma, vma->vm_start,
++              } else if (io_remap_pfn_range(vma, vma->vm_start,
+                                           kerninfo.aper_base >> PAGE_SHIFT,
+                                           size, vma->vm_page_prot)) {
+                       goto out_again;
+diff -ur linux-2.6.11/drivers/char/drm/drm_vm.c 
linux-2.6.11-io/drivers/char/drm/drm_vm.c
+--- linux-2.6.11/drivers/char/drm/drm_vm.c     2005-03-02 07:38:33.000000000 
+0000
++++ linux-2.6.11-io/drivers/char/drm/drm_vm.c  2005-03-15 17:43:26.000000000 
+0000
+@@ -630,7 +630,7 @@
+                                       vma->vm_end - vma->vm_start,
+                                       vma->vm_page_prot, 0))
+ #else
+-              if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
++              if (io_remap_pfn_range(vma, vma->vm_start,
+                                    (VM_OFFSET(vma) + offset) >> PAGE_SHIFT,
+                                    vma->vm_end - vma->vm_start,
+                                    vma->vm_page_prot))
+diff -ur linux-2.6.11/drivers/char/drm/i810_dma.c 
linux-2.6.11-io/drivers/char/drm/i810_dma.c
+--- linux-2.6.11/drivers/char/drm/i810_dma.c   2005-03-02 07:37:55.000000000 
+0000
++++ linux-2.6.11-io/drivers/char/drm/i810_dma.c        2005-03-15 
17:53:36.000000000 +0000
+@@ -139,7 +139,7 @@
+       buf_priv->currently_mapped = I810_BUF_MAPPED;
+       unlock_kernel();
+ 
+-      if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
++      if (io_remap_pfn_range(vma, vma->vm_start,
+                            VM_OFFSET(vma) >> PAGE_SHIFT,
+                            vma->vm_end - vma->vm_start,
+                            vma->vm_page_prot)) return -EAGAIN;
+diff -ur linux-2.6.11/drivers/char/drm/i830_dma.c 
linux-2.6.11-io/drivers/char/drm/i830_dma.c
+--- linux-2.6.11/drivers/char/drm/i830_dma.c   2005-03-02 07:37:48.000000000 
+0000
++++ linux-2.6.11-io/drivers/char/drm/i830_dma.c        2005-03-15 
17:53:46.000000000 +0000
+@@ -157,7 +157,7 @@
+       buf_priv->currently_mapped = I830_BUF_MAPPED;
+       unlock_kernel();
+ 
+-      if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
++      if (io_remap_pfn_range(vma, vma->vm_start,
+                            VM_OFFSET(vma) >> PAGE_SHIFT,
+                            vma->vm_end - vma->vm_start,
+                            vma->vm_page_prot)) return -EAGAIN;
+diff -ur linux-2.6.11/drivers/char/hpet.c linux-2.6.11-io/drivers/char/hpet.c
+--- linux-2.6.11/drivers/char/hpet.c   2005-03-02 07:38:10.000000000 +0000
++++ linux-2.6.11-io/drivers/char/hpet.c        2005-03-15 17:37:22.000000000 
+0000
+@@ -76,6 +76,7 @@
+ struct hpets {
+       struct hpets *hp_next;
+       struct hpet __iomem *hp_hpet;
++      unsigned long hp_hpet_phys;
+       struct time_interpolator *hp_interpolator;
+       unsigned long hp_period;
+       unsigned long hp_delta;
+@@ -265,7 +266,7 @@
+               return -EINVAL;
+ 
+       devp = file->private_data;
+-      addr = (unsigned long)devp->hd_hpet;
++      addr = devp->hd_hpets->hp_hpet_phys;
+ 
+       if (addr & (PAGE_SIZE - 1))
+               return -ENOSYS;
+@@ -274,7 +275,7 @@
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       addr = __pa(addr);
+ 
+-      if (remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
++      if (io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
+                                       PAGE_SIZE, vma->vm_page_prot)) {
+               printk(KERN_ERR "remap_pfn_range failed in hpet.c\n");
+               return -EAGAIN;
+@@ -795,6 +796,7 @@
+ 
+       hpetp->hp_which = hpet_nhpet++;
+       hpetp->hp_hpet = hdp->hd_address;
++      hpetp->hp_hpet_phys = hdp->hd_phys_address;
+ 
+       hpetp->hp_ntimer = hdp->hd_nirqs;
+ 
+diff -ur linux-2.6.11/drivers/sbus/char/flash.c 
linux-2.6.11-io/drivers/sbus/char/flash.c
+--- linux-2.6.11/drivers/sbus/char/flash.c     2005-03-02 07:38:10.000000000 
+0000
++++ linux-2.6.11-io/drivers/sbus/char/flash.c  2005-03-15 17:20:22.000000000 
+0000
+@@ -75,7 +75,7 @@
+       pgprot_val(vma->vm_page_prot) |= _PAGE_E;
+       vma->vm_flags |= (VM_SHM | VM_LOCKED);
+ 
+-      if (remap_pfn_range(vma, vma->vm_start, addr, size, vma->vm_page_prot))
++      if (io_remap_pfn_range(vma, vma->vm_start, addr, size, 
vma->vm_page_prot))
+               return -EAGAIN;
+               
+       return 0;
+diff -ur linux-2.6.11/include/linux/mm.h linux-2.6.11-io/include/linux/mm.h
+--- linux-2.6.11/include/linux/mm.h    2005-03-02 07:37:47.000000000 +0000
++++ linux-2.6.11-io/include/linux/mm.h 2005-03-15 17:03:46.000000000 +0000
+@@ -815,6 +815,10 @@
+ extern int check_user_page_readable(struct mm_struct *mm, unsigned long 
address);
+ int remap_pfn_range(struct vm_area_struct *, unsigned long,
+               unsigned long, unsigned long, pgprot_t);
++/* Allow arch override for mapping of device and I/O (non-RAM) pages. */
++#ifndef io_remap_pfn_range
++#define io_remap_pfn_range remap_pfn_range
++#endif
+ 
+ #ifdef CONFIG_PROC_FS
+ void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, 
long);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/include/asm-xen/pgtable.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/pgtable.h Fri Jul 15 13:39:50 2005
@@ -0,0 +1,371 @@
+#ifndef _I386_PGTABLE_H
+#define _I386_PGTABLE_H
+
+#include <linux/config.h>
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * the i386, we use that, but "fold" the mid level into the top-level page
+ * table, so that we physically have the same two-level page table as the
+ * i386 mmu expects.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the i386 page table tree.
+ */
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/hypervisor.h>
+#include <linux/threads.h>
+#include <asm/fixmap.h>
+
+#ifndef _I386_BITOPS_H
+#include <asm/bitops.h>
+#endif
+
+#define swapper_pg_dir 0
+extern void paging_init(void);
+
+/* Caches aren't brain-dead on the intel. */
+#define flush_cache_all()                      do { } while (0)
+#define flush_cache_mm(mm)                     do { } while (0)
+#define flush_cache_range(mm, start, end)      do { } while (0)
+#define flush_cache_page(vma, vmaddr)          do { } while (0)
+#define flush_page_to_ram(page)                        do { } while (0)
+#define flush_dcache_page(page)                        do { } while (0)
+#define flush_icache_range(start, end)         do { } while (0)
+#define flush_icache_page(vma,pg)              do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)        do { } while (0)
+
+extern unsigned long pgkern_mask;
+
+#define __flush_tlb() xen_tlb_flush()
+#define __flush_tlb_global() __flush_tlb()
+#define __flush_tlb_all() __flush_tlb_global()
+#define __flush_tlb_one(addr) xen_invlpg(addr)
+#define __flush_tlb_single(addr) xen_invlpg(addr)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[1024];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
+#ifndef __ASSEMBLY__
+#if CONFIG_X86_PAE
+# include <asm/pgtable-3level.h>
+
+/*
+ * Need to initialise the X86 PAE caches
+ */
+extern void pgtable_cache_init(void);
+
+#else
+# include <asm/pgtable-2level.h>
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()   do { } while (0)
+
+#endif
+#endif
+
+#define PMD_SIZE       (1UL << PMD_SHIFT)
+#define PMD_MASK       (~(PMD_SIZE-1))
+#define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK     (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
+#define FIRST_USER_PGD_NR      0
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT   22
+#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
+
+
+#ifndef __ASSEMBLY__
+/* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */
+#define VMALLOC_OFFSET (4*1024*1024)
+extern void * high_memory;
+#define VMALLOC_START  (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
+                                               ~(VMALLOC_OFFSET-1))
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#if CONFIG_HIGHMEM
+# define VMALLOC_END   (PKMAP_BASE-2*PAGE_SIZE)
+#else
+# define VMALLOC_END   (FIXADDR_START-2*PAGE_SIZE)
+#endif
+
+#define _PAGE_BIT_PRESENT      0
+#define _PAGE_BIT_RW           1
+#define _PAGE_BIT_USER         2
+#define _PAGE_BIT_PWT          3
+#define _PAGE_BIT_PCD          4
+#define _PAGE_BIT_ACCESSED     5
+#define _PAGE_BIT_DIRTY                6
+#define _PAGE_BIT_PSE          7       /* 4 MB (or 2MB) page, Pentium+, if 
present.. */
+#define _PAGE_BIT_GLOBAL       8       /* Global TLB entry PPro+ */
+
+#define _PAGE_PRESENT  0x001
+#define _PAGE_RW       0x002
+#define _PAGE_USER     0x004
+#define _PAGE_PWT      0x008
+#define _PAGE_PCD      0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY    0x040
+#define _PAGE_PSE      0x080   /* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_GLOBAL   0x100   /* Global TLB entry PPro+ */
+
+#define _PAGE_PROTNONE 0x080   /* If not present */
+
+#define _PAGE_TABLE    (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED 
| _PAGE_DIRTY)
+#define _KERNPG_TABLE  (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | 
_PAGE_DIRTY)
+#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE      __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED    __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | 
_PAGE_ACCESSED)
+#define PAGE_COPY      __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_READONLY  __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#define __PAGE_KERNEL \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_NOCACHE \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_RO \
+       (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+#if 0
+#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
+#else
+#define MAKE_GLOBAL(x) __pgprot(x)
+#endif
+
+#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+
+/*
+ * The i386 can't do page protection for execute, and considers that
+ * the same are read. Also, write permissions imply read permissions.
+ * This is the closest we can get..
+ */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY
+#define __P101 PAGE_READONLY
+#define __P110 PAGE_COPY
+#define __P111 PAGE_COPY
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY
+#define __S101 PAGE_READONLY
+#define __S110 PAGE_SHARED
+#define __S111 PAGE_SHARED
+
+#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_clear(xp)  do { set_pte(xp, __pte(0)); } while (0)
+
+#define pmd_none(x)    (!pmd_val(x))
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+   can temporarily clear it. */
+#define pmd_present(x) (pmd_val(x))
+#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
+#define pmd_bad(x)     ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & 
~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
+
+
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte)          { return (pte).pte_low & 
_PAGE_USER; }
+static inline int pte_exec(pte_t pte)          { return (pte).pte_low & 
_PAGE_USER; }
+static inline int pte_dirty(pte_t pte)         { return (pte).pte_low & 
_PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)         { return (pte).pte_low & 
_PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte)         { return (pte).pte_low & 
_PAGE_RW; }
+
+static inline pte_t pte_rdprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_USER; 
return pte; }
+static inline pte_t pte_exprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_USER; 
return pte; }
+static inline pte_t pte_mkclean(pte_t pte)     { (pte).pte_low &= 
~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)       { (pte).pte_low &= 
~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_RW; 
return pte; }
+static inline pte_t pte_mkread(pte_t pte)      { (pte).pte_low |= _PAGE_USER; 
return pte; }
+static inline pte_t pte_mkexec(pte_t pte)      { (pte).pte_low |= _PAGE_USER; 
return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)     { (pte).pte_low |= _PAGE_DIRTY; 
return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)     { (pte).pte_low |= 
_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte)     { (pte).pte_low |= _PAGE_RW; 
return pte; }
+
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+{
+    if (!pte_dirty(*ptep))
+        return 0;
+    return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
+}
+
+static inline int ptep_test_and_clear_young(pte_t *ptep)
+{
+    if (!pte_young(*ptep))
+        return 0;
+    return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
+}
+
+static inline void ptep_set_wrprotect(pte_t *ptep)
+{
+    if (pte_write(*ptep))
+        clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
+}
+
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+    if (!pte_dirty(*ptep))
+        set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)   __mk_pte((page) - mem_map, (pgprot))
+
+/* This takes a physical page address that is used by the remapping functions 
*/
+#define mk_pte_phys(physpage, pgprot)  __mk_pte((physpage) >> PAGE_SHIFT, 
pgprot)
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+       pte.pte_low &= _PAGE_CHG_MASK;
+       pte.pte_low |= pgprot_val(newprot);
+       return pte;
+}
+
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+#define pmd_page(pmd) \
+((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+#define __pgd_offset(address) pgd_index(address)
+
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#define __pmd_offset(address) \
+               (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/* Find an entry in the third-level page table.. */
+#define __pte_offset(address) \
+               ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
+                       __pte_offset(address))
+
+/*
+ * The i386 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma,address,pte) do { } while (0)
+
+/* Encode and de-code a swap entry */
+#define SWP_TYPE(x)                    (((x).val >> 1) & 0x3f)
+#define SWP_OFFSET(x)                  ((x).val >> 8)
+#define SWP_ENTRY(type, offset)                ((swp_entry_t) { ((type) << 1) 
| ((offset) << 8) })
+#define pte_to_swp_entry(pte)          ((swp_entry_t) { (pte).pte_low })
+#define swp_entry_to_pte(x)            ((pte_t) { (x).val })
+
+struct page;
+int change_page_attr(struct page *, int, pgprot_t prot);
+
+static inline void __make_page_readonly(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    set_pte(pte, pte_wrprotect(*pte));
+}
+
+static inline void __make_page_writable(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    set_pte(pte, pte_mkwrite(*pte));
+}
+
+static inline void make_page_readonly(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    set_pte(pte, pte_wrprotect(*pte));
+    if ( (unsigned long)va >= VMALLOC_START )
+        __make_page_readonly(machine_to_virt(
+            *(unsigned long *)pte&PAGE_MASK));
+}
+
+static inline void make_page_writable(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    set_pte(pte, pte_mkwrite(*pte));
+    if ( (unsigned long)va >= VMALLOC_START )
+        __make_page_writable(machine_to_virt(
+            *(unsigned long *)pte&PAGE_MASK));
+}
+
+static inline void make_pages_readonly(void *va, unsigned int nr)
+{
+    while ( nr-- != 0 )
+    {
+        make_page_readonly(va);
+        va = (void *)((unsigned long)va + PAGE_SIZE);
+    }
+}
+
+static inline void make_pages_writable(void *va, unsigned int nr)
+{
+    while ( nr-- != 0 )
+    {
+        make_page_writable(va);
+        va = (void *)((unsigned long)va + PAGE_SIZE);
+    }
+}
+
+static inline unsigned long arbitrary_virt_to_machine(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK;
+    return pa | ((unsigned long)va & (PAGE_SIZE-1));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+#define PageSkip(page)         (0)
+#define kern_addr_valid(addr)  (1)
+
+#define io_remap_page_range remap_page_range
+
+#endif /* _I386_PGTABLE_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c  Fri Jul 15 13:39:50 2005
@@ -0,0 +1,390 @@
+/******************************************************************************
+ * gnttab.c
+ * 
+ * Two sets of functionality:
+ * 1. Granting foreign access to our memory reservation.
+ * 2. Accessing others' memory reservations via grant references.
+ * (i.e., mechanisms for both sender and recipient of grant references)
+ * 
+ * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/uaccess.h>
+#include <asm-xen/xen_proc.h>
+#include <asm-xen/linux-public/privcmd.h>
+#include <asm-xen/gnttab.h>
+#include <asm-xen/synch_bitops.h>
+
+#if 1
+#define ASSERT(_p) \
+    if ( !(_p) ) { printk(KERN_ALERT"Assertion '%s': line %d, file %s\n", \
+    #_p , __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define WPRINTK(fmt, args...) \
+    printk(KERN_WARNING "xen_grant: " fmt, ##args)
+
+
+EXPORT_SYMBOL(gnttab_grant_foreign_access);
+EXPORT_SYMBOL(gnttab_end_foreign_access);
+EXPORT_SYMBOL(gnttab_query_foreign_access);
+EXPORT_SYMBOL(gnttab_grant_foreign_transfer);
+EXPORT_SYMBOL(gnttab_end_foreign_transfer);
+EXPORT_SYMBOL(gnttab_alloc_grant_references);
+EXPORT_SYMBOL(gnttab_free_grant_references);
+EXPORT_SYMBOL(gnttab_claim_grant_reference);
+EXPORT_SYMBOL(gnttab_release_grant_reference);
+EXPORT_SYMBOL(gnttab_grant_foreign_access_ref);
+EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref);
+
+static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
+static grant_ref_t gnttab_free_head;
+
+static grant_entry_t *shared;
+
+/*
+ * Lock-free grant-entry allocator
+ */
+
+static inline int
+get_free_entry(
+    void)
+{
+    grant_ref_t fh, nfh = gnttab_free_head;
+    do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
+    while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
+                                    gnttab_free_list[fh])) != fh) );
+    return fh;
+}
+
+static inline void
+put_free_entry(
+    grant_ref_t ref)
+{
+    grant_ref_t fh, nfh = gnttab_free_head;
+    do { gnttab_free_list[ref] = fh = nfh; wmb(); }
+    while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
+}
+
+/*
+ * Public grant-issuing interface functions
+ */
+
+int
+gnttab_grant_foreign_access(
+    domid_t domid, unsigned long frame, int readonly)
+{
+    int ref;
+    
+    if ( unlikely((ref = get_free_entry()) == -1) )
+        return -ENOSPC;
+
+    shared[ref].frame = frame;
+    shared[ref].domid = domid;
+    wmb();
+    shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+
+    return ref;
+}
+
+void
+gnttab_grant_foreign_access_ref(
+    grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
+{
+    shared[ref].frame = frame;
+    shared[ref].domid = domid;
+    wmb();
+    shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+}
+
+
+int
+gnttab_query_foreign_access( grant_ref_t ref )
+{
+    u16 nflags;
+
+    nflags = shared[ref].flags;
+
+    return ( nflags & (GTF_reading|GTF_writing) );
+}
+
+void
+gnttab_end_foreign_access( grant_ref_t ref, int readonly )
+{
+    u16 flags, nflags;
+
+    nflags = shared[ref].flags;
+    do {
+        if ( (flags = nflags) & (GTF_reading|GTF_writing) )
+            printk(KERN_ALERT "WARNING: g.e. still in use!\n");
+    }
+    while ( (nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != flags );
+
+    put_free_entry(ref);
+}
+
+int
+gnttab_grant_foreign_transfer(
+    domid_t domid, unsigned long pfn )
+{
+    int ref;
+
+    if ( unlikely((ref = get_free_entry()) == -1) )
+        return -ENOSPC;
+
+    shared[ref].frame = pfn;
+    shared[ref].domid = domid;
+    wmb();
+    shared[ref].flags = GTF_accept_transfer;
+
+    return ref;
+}
+
+void
+gnttab_grant_foreign_transfer_ref(
+    grant_ref_t ref, domid_t domid, unsigned long pfn )
+{
+    shared[ref].frame = pfn;
+    shared[ref].domid = domid;
+    wmb();
+    shared[ref].flags = GTF_accept_transfer;
+}
+
+unsigned long
+gnttab_end_foreign_transfer(
+    grant_ref_t ref)
+{
+    unsigned long frame = 0;
+    u16           flags;
+
+    flags = shared[ref].flags;
+    ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
+
+    /*
+     * If a transfer is committed then wait for the frame address to appear.
+     * Otherwise invalidate the grant entry against future use.
+     */
+    if ( likely(flags != GTF_accept_transfer) ||
+         (synch_cmpxchg(&shared[ref].flags, flags, 0) != GTF_accept_transfer) )
+        while ( unlikely((frame = shared[ref].frame) == 0) )
+            cpu_relax();
+
+    put_free_entry(ref);
+
+    return frame;
+}
+
+void
+gnttab_free_grant_references( u16 count, grant_ref_t head )
+{
+    /* TODO: O(N)...? */
+    grant_ref_t to_die = 0, next = head;
+    int i;
+
+    for ( i = 0; i < count; i++ )
+    {
+        to_die = next;
+        next = gnttab_free_list[next];
+        put_free_entry( to_die );
+    }
+}
+
+int
+gnttab_alloc_grant_references( u16 count,
+                               grant_ref_t *head,
+                               grant_ref_t *terminal )
+{
+    int i;
+    grant_ref_t h = gnttab_free_head;
+
+    for ( i = 0; i < count; i++ )
+        if ( unlikely(get_free_entry() == -1) )
+            goto not_enough_refs;
+
+    *head = h;
+    *terminal = gnttab_free_head;
+
+    return 0;
+
+not_enough_refs:
+    gnttab_free_head = h;
+    return -ENOSPC;
+}
+
+int
+gnttab_claim_grant_reference( grant_ref_t *private_head,
+                              grant_ref_t  terminal )
+{
+    grant_ref_t g;
+    if ( unlikely((g = *private_head) == terminal) )
+        return -ENOSPC;
+    *private_head = gnttab_free_list[g];
+    return g;
+}
+
+void
+gnttab_release_grant_reference( grant_ref_t *private_head,
+                                grant_ref_t  release )
+{
+    gnttab_free_list[release] = *private_head;
+    *private_head = release;
+}
+
+/*
+ * ProcFS operations
+ */
+
+#ifdef CONFIG_PROC_FS
+
+static struct proc_dir_entry *grant_pde;
+
+static int grant_ioctl(struct inode *inode, struct file *file,
+                       unsigned int cmd, unsigned long data)
+{
+    int                     ret;
+    privcmd_hypercall_t     hypercall;
+
+    /* XXX Need safety checks here if using for anything other
+     *     than debugging */
+    return -ENOSYS;
+
+    if ( cmd != IOCTL_PRIVCMD_HYPERCALL )
+        return -ENOSYS;
+
+    if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
+        return -EFAULT;
+
+    if ( hypercall.op != __HYPERVISOR_grant_table_op )
+        return -ENOSYS;
+
+    /* hypercall-invoking asm taken from privcmd.c */
+    __asm__ __volatile__ (
+        "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
+        "movl  4(%%eax),%%ebx ;"
+        "movl  8(%%eax),%%ecx ;"
+        "movl 12(%%eax),%%edx ;"
+        "movl 16(%%eax),%%esi ;"
+        "movl 20(%%eax),%%edi ;"
+        "movl   (%%eax),%%eax ;"
+        TRAP_INSTR "; "
+        "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
+        : "=a" (ret) : "0" (&hypercall) : "memory" );
+
+    return ret;
+}
+
+static struct file_operations grant_file_ops = {
+    ioctl:  grant_ioctl,
+};
+
+static int grant_read(char *page, char **start, off_t off,
+                      int count, int *eof, void *data)
+{
+    int             len;
+    unsigned int    i;
+    grant_entry_t  *gt;
+
+    gt = (grant_entry_t *)shared;
+    len = 0;
+
+    for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+        /* TODO: safety catch here until this can handle >PAGE_SIZE output */
+        if (len > (PAGE_SIZE - 200))
+        {
+            len += sprintf( page + len, "Truncated.\n");
+            break;
+        }
+
+        if ( gt[i].flags )
+            len += sprintf( page + len,
+                    "Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame 
(0x%x)\n", 
+                    i,
+                    gt[i].flags,
+                    gt[i].domid,
+                    gt[i].frame );
+
+    *eof = 1;
+    return len;
+}
+
+static int grant_write(struct file *file, const char __user *buffer,
+                       unsigned long count, void *data)
+{
+    /* TODO: implement this */
+    return -ENOSYS;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+int gnttab_resume(void)
+{
+    gnttab_setup_table_t setup;
+    unsigned long        frames[NR_GRANT_FRAMES];
+    int                  i;
+
+    setup.dom        = DOMID_SELF;
+    setup.nr_frames  = NR_GRANT_FRAMES;
+    setup.frame_list = frames;
+
+    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0);
+    BUG_ON(setup.status != 0);
+
+    for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+        set_fixmap(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT);
+
+    return 0;
+}
+
+int gnttab_suspend(void)
+{
+    int i;
+
+    for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+       clear_fixmap(FIX_GNTTAB_END - i);
+
+    return 0;
+}
+
+static int __init gnttab_init(void)
+{
+    int i;
+
+    BUG_ON(gnttab_resume());
+
+    shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END);
+
+    for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+        gnttab_free_list[i] = i + 1;
+    
+#ifdef CONFIG_PROC_FS
+    /*
+     *  /proc/xen/grant : used by libxc to access grant tables
+     */
+    if ( (grant_pde = create_xen_proc_entry("grant", 0600)) == NULL )
+    {
+        WPRINTK("Unable to create grant xen proc entry\n");
+        return -1;
+    }
+
+    grant_file_ops.read   = grant_pde->proc_fops->read;
+    grant_file_ops.write  = grant_pde->proc_fops->write;
+
+    grant_pde->proc_fops  = &grant_file_ops;
+
+    grant_pde->read_proc  = &grant_read;
+    grant_pde->write_proc = &grant_write;
+#endif
+
+    printk("Grant table initialized\n");
+    return 0;
+}
+
+__initcall(gnttab_init);
diff -r 35b74976f598 -r a83ac0806d6b linux-2.6.11-xen-sparse/net/core/skbuff.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/net/core/skbuff.c Fri Jul 15 13:39:50 2005
@@ -0,0 +1,1523 @@
+/*
+ *     Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ *     Authors:        Alan Cox <iiitac@xxxxxxxxxxxxxx>
+ *                     Florian La Roche <rzsfl@xxxxxxxxxxxx>
+ *
+ *     Version:        $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
+ *
+ *     Fixes:
+ *             Alan Cox        :       Fixed the worst of the load
+ *                                     balancer bugs.
+ *             Dave Platt      :       Interrupt stacking fix.
+ *     Richard Kooijman        :       Timestamp fixes.
+ *             Alan Cox        :       Changed buffer format.
+ *             Alan Cox        :       destructor hook for AF_UNIX etc.
+ *             Linus Torvalds  :       Better skb_clone.
+ *             Alan Cox        :       Added skb_copy.
+ *             Alan Cox        :       Added all the changed routines Linus
+ *                                     only put in the headers
+ *             Ray VanTassle   :       Fixed --skb->lock in free
+ *             Alan Cox        :       skb_copy copy arp field
+ *             Andi Kleen      :       slabified it.
+ *             Robert Olsson   :       Removed skb_head_pool
+ *
+ *     NOTE:
+ *             The __skb_ routines should be called with interrupts
+ *     disabled, or you better be *real* sure that the operation is atomic
+ *     with respect to whatever list is being frobbed (e.g. via lock_sock()
+ *     or via disabling bottom half handlers, etc).
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *     The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_NET_CLS_ACT
+#include <net/pkt_sched.h>
+#endif
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <net/xfrm.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+static kmem_cache_t *skbuff_head_cache;
+
+/*
+ *     Keep out-of-line to prevent kernel bloat.
+ *     __builtin_return_address is not used because it is not always
+ *     reliable.
+ */
+
+/**
+ *     skb_over_panic  -       private function
+ *     @skb: buffer
+ *     @sz: size
+ *     @here: address
+ *
+ *     Out of line support code for skb_put(). Not user callable.
+ */
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+       printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
+               here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+       BUG();
+}
+
+/**
+ *     skb_under_panic -       private function
+ *     @skb: buffer
+ *     @sz: size
+ *     @here: address
+ *
+ *     Out of line support code for skb_push(). Not user callable.
+ */
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+       printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
+               here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+       BUG();
+}
+
+/*     Allocate a new skbuff. We do this ourselves so we can fill in a few
+ *     'private' fields and also do memory statistics to find all the
+ *     [BEEP] leaks.
+ *
+ */
+
+/**
+ *     alloc_skb       -       allocate a network buffer
+ *     @size: size to allocate
+ *     @gfp_mask: allocation mask
+ *
+ *     Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *     tail room of size bytes. The object has a reference count of one.
+ *     The return is the buffer. On a failure the return is %NULL.
+ *
+ *     Buffers may only be allocated from interrupts using a @gfp_mask of
+ *     %GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
+{
+       struct sk_buff *skb;
+       u8 *data;
+
+       /* Get the HEAD */
+       skb = kmem_cache_alloc(skbuff_head_cache,
+                              gfp_mask & ~__GFP_DMA);
+       if (!skb)
+               goto out;
+
+       /* Get the DATA. Size must match skb_add_mtu(). */
+       size = SKB_DATA_ALIGN(size);
+       data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+       if (!data)
+               goto nodata;
+
+       memset(skb, 0, offsetof(struct sk_buff, truesize));
+       skb->truesize = size + sizeof(struct sk_buff);
+       atomic_set(&skb->users, 1);
+       skb->head = data;
+       skb->data = data;
+       skb->tail = data;
+       skb->end  = data + size;
+
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags  = 0;
+       skb_shinfo(skb)->tso_size = 0;
+       skb_shinfo(skb)->tso_segs = 0;
+       skb_shinfo(skb)->frag_list = NULL;
+out:
+       return skb;
+nodata:
+       kmem_cache_free(skbuff_head_cache, skb);
+       skb = NULL;
+       goto out;
+}
+
+/**
+ *     alloc_skb_from_cache    -       allocate a network buffer
+ *     @cp: kmem_cache from which to allocate the data area
+ *           (object size must be big enough for @size bytes + skb overheads)
+ *     @size: size to allocate
+ *     @gfp_mask: allocation mask
+ *
+ *     Allocate a new &sk_buff. The returned buffer has no headroom and
+ *     tail room of size bytes. The object has a reference count of one.
+ *     The return is the buffer. On a failure the return is %NULL.
+ *
+ *     Buffers may only be allocated from interrupts using a @gfp_mask of
+ *     %GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+                                    unsigned int size, int gfp_mask)
+{
+       struct sk_buff *skb;
+       u8 *data;
+
+       /* Get the HEAD */
+       skb = kmem_cache_alloc(skbuff_head_cache,
+                              gfp_mask & ~__GFP_DMA);
+       if (!skb)
+               goto out;
+
+       /* Get the DATA. */
+       size = SKB_DATA_ALIGN(size);
+       data = kmem_cache_alloc(cp, gfp_mask);
+       if (!data)
+               goto nodata;
+
+       memset(skb, 0, offsetof(struct sk_buff, truesize));
+       skb->truesize = size + sizeof(struct sk_buff);
+       atomic_set(&skb->users, 1);
+       skb->head = data;
+       skb->data = data;
+       skb->tail = data;
+       skb->end  = data + size;
+
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags  = 0;
+       skb_shinfo(skb)->tso_size = 0;
+       skb_shinfo(skb)->tso_segs = 0;
+       skb_shinfo(skb)->frag_list = NULL;
+out:
+       return skb;
+nodata:
+       kmem_cache_free(skbuff_head_cache, skb);
+       skb = NULL;
+       goto out;
+}
+
+
+static void skb_drop_fraglist(struct sk_buff *skb)
+{
+       struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+       skb_shinfo(skb)->frag_list = NULL;
+
+       do {
+               struct sk_buff *this = list;
+               list = list->next;
+               kfree_skb(this);
+       } while (list);
+}
+
+static void skb_clone_fraglist(struct sk_buff *skb)
+{
+       struct sk_buff *list;
+
+       for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
+               skb_get(list);
+}
+
+void skb_release_data(struct sk_buff *skb)
+{
+       if (!skb->cloned ||
+           atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
+               if (skb_shinfo(skb)->nr_frags) {
+                       int i;
+                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+                               put_page(skb_shinfo(skb)->frags[i].page);
+               }
+
+               if (skb_shinfo(skb)->frag_list)
+                       skb_drop_fraglist(skb);
+
+               kfree(skb->head);
+       }
+}
+
+/*
+ *     Free an skbuff by memory without cleaning the state.
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+       skb_release_data(skb);
+       kmem_cache_free(skbuff_head_cache, skb);
+}
+
+/**
+ *     __kfree_skb - private function
+ *     @skb: buffer
+ *
+ *     Free an sk_buff. Release anything attached to the buffer.
+ *     Clean the state. This is an internal helper function. Users should
+ *     always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+       if (skb->list) {
+               printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+                      "on a list (from %p).\n", NET_CALLER(skb));
+               BUG();
+       }
+
+       dst_release(skb->dst);
+#ifdef CONFIG_XFRM
+       secpath_put(skb->sp);
+#endif
+       if(skb->destructor) {
+               if (in_irq())
+                       printk(KERN_WARNING "Warning: kfree_skb on "
+                                           "hard IRQ %p\n", NET_CALLER(skb));
+               skb->destructor(skb);
+       }
+#ifdef CONFIG_NETFILTER
+       nf_conntrack_put(skb->nfct);
+#ifdef CONFIG_BRIDGE_NETFILTER
+       nf_bridge_put(skb->nf_bridge);
+#endif
+#endif
+/* XXX: IS this still necessary? - JHS */
+#ifdef CONFIG_NET_SCHED
+       skb->tc_index = 0;
+#ifdef CONFIG_NET_CLS_ACT
+       skb->tc_verd = 0;
+       skb->tc_classid = 0;
+#endif
+#endif
+
+       kfree_skbmem(skb);
+}
+
+/**
+ *     skb_clone       -       duplicate an sk_buff
+ *     @skb: buffer to clone
+ *     @gfp_mask: allocation priority
+ *
+ *     Duplicate an &sk_buff. The new one is not owned by a socket. Both
+ *     copies share the same packet data but not structure. The new
+ *     buffer has a reference count of 1. If the allocation fails the
+ *     function returns %NULL otherwise the new buffer is returned.
+ *
+ *     If this function is called from an interrupt gfp_mask() must be
+ *     %GFP_ATOMIC.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+       struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+
+       if (!n) 
+               return NULL;
+
+#define C(x) n->x = skb->x
+
+       n->next = n->prev = NULL;
+       n->list = NULL;
+       n->sk = NULL;
+       C(stamp);
+       C(dev);
+       C(real_dev);
+       C(h);
+       C(nh);
+       C(mac);
+       C(dst);
+       dst_clone(skb->dst);
+       C(sp);
+#ifdef CONFIG_INET
+       secpath_get(skb->sp);
+#endif
+       memcpy(n->cb, skb->cb, sizeof(skb->cb));
+       C(len);
+       C(data_len);
+       C(csum);
+       C(local_df);
+       n->cloned = 1;
+       C(proto_csum_valid);
+       C(proto_csum_blank);
+       C(pkt_type);
+       C(ip_summed);
+       C(priority);
+       C(protocol);
+       C(security);
+       n->destructor = NULL;
+#ifdef CONFIG_NETFILTER
+       C(nfmark);
+       C(nfcache);
+       C(nfct);
+       nf_conntrack_get(skb->nfct);
+       C(nfctinfo);
+#ifdef CONFIG_NETFILTER_DEBUG
+       C(nf_debug);
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+       C(nf_bridge);
+       nf_bridge_get(skb->nf_bridge);
+#endif
+#endif /*CONFIG_NETFILTER*/
+#if defined(CONFIG_HIPPI)
+       C(private);
+#endif
+#ifdef CONFIG_NET_SCHED
+       C(tc_index);
+#ifdef CONFIG_NET_CLS_ACT
+       n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
+       n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
+       n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+       C(input_dev);
+       C(tc_classid);
+#endif
+
+#endif
+       C(truesize);
+       atomic_set(&n->users, 1);
+       C(head);
+       C(data);
+       C(tail);
+       C(end);
+
+       atomic_inc(&(skb_shinfo(skb)->dataref));
+       skb->cloned = 1;
+
+       return n;
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+       /*
+        *      Shift between the two data areas in bytes
+        */
+       unsigned long offset = new->data - old->data;
+
+       new->list       = NULL;
+       new->sk         = NULL;
+       new->dev        = old->dev;
+       new->real_dev   = old->real_dev;
+       new->priority   = old->priority;
+       new->protocol   = old->protocol;
+       new->dst        = dst_clone(old->dst);
+#ifdef CONFIG_INET
+       new->sp         = secpath_get(old->sp);
+#endif
+       new->h.raw      = old->h.raw + offset;
+       new->nh.raw     = old->nh.raw + offset;
+       new->mac.raw    = old->mac.raw + offset;
+       memcpy(new->cb, old->cb, sizeof(old->cb));
+       new->local_df   = old->local_df;
+       new->pkt_type   = old->pkt_type;
+       new->stamp      = old->stamp;
+       new->destructor = NULL;
+       new->security   = old->security;
+#ifdef CONFIG_NETFILTER
+       new->nfmark     = old->nfmark;
+       new->nfcache    = old->nfcache;
+       new->nfct       = old->nfct;
+       nf_conntrack_get(old->nfct);
+       new->nfctinfo   = old->nfctinfo;
+#ifdef CONFIG_NETFILTER_DEBUG
+       new->nf_debug   = old->nf_debug;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+       new->nf_bridge  = old->nf_bridge;
+       nf_bridge_get(old->nf_bridge);
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+#ifdef CONFIG_NET_CLS_ACT
+       new->tc_verd = old->tc_verd;
+#endif
+       new->tc_index   = old->tc_index;
+#endif
+       atomic_set(&new->users, 1);
+       skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
+       skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
+}
+
+/**
+ *     skb_copy        -       create private copy of an sk_buff
+ *     @skb: buffer to copy
+ *     @gfp_mask: allocation priority
+ *
+ *     Make a copy of both an &sk_buff and its data. This is used when the
+ *     caller wishes to modify the data and needs a private copy of the
+ *     data to alter. Returns %NULL on failure or the pointer to the buffer
+ *     on success. The returned buffer has a reference count of 1.
+ *
+ *     As by-product this function converts non-linear &sk_buff to linear
+ *     one, so that &sk_buff becomes completely private and caller is allowed
+ *     to modify all the data of returned buffer. This means that this
+ *     function is not recommended for use in circumstances when only
+ *     header is going to be modified. Use pskb_copy() instead.
+ */
+
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+       int headerlen = skb->data - skb->head;
+       /*
+        *      Allocate the copy buffer
+        */
+       struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
+                                     gfp_mask);
+       if (!n)
+               return NULL;
+
+       /* Set the data pointer */
+       skb_reserve(n, headerlen);
+       /* Set the tail pointer and length */
+       skb_put(n, skb->len);
+       n->csum      = skb->csum;
+       n->ip_summed = skb->ip_summed;
+
+       if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
+               BUG();
+
+       copy_skb_header(n, skb);
+       return n;
+}
+
+
+/**
+ *     pskb_copy       -       create copy of an sk_buff with private head.
+ *     @skb: buffer to copy
+ *     @gfp_mask: allocation priority
+ *
+ *     Make a copy of both an &sk_buff and part of its data, located
+ *     in header. Fragmented data remain shared. This is used when
+ *     the caller wishes to modify only header of &sk_buff and needs
+ *     private copy of the header to alter. Returns %NULL on failure
+ *     or the pointer to the buffer on success.
+ *     The returned buffer has a reference count of 1.
+ */
+
+struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+{
+       /*
+        *      Allocate the copy buffer
+        */
+       struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
+
+       if (!n)
+               goto out;
+
+       /* Set the data pointer */
+       skb_reserve(n, skb->data - skb->head);
+       /* Set the tail pointer and length */
+       skb_put(n, skb_headlen(skb));
+       /* Copy the bytes */
+       memcpy(n->data, skb->data, n->len);
+       n->csum      = skb->csum;
+       n->ip_summed = skb->ip_summed;
+
+       n->data_len  = skb->data_len;
+       n->len       = skb->len;
+
+       if (skb_shinfo(skb)->nr_frags) {
+               int i;
+
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                       skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+                       get_page(skb_shinfo(n)->frags[i].page);
+               }
+               skb_shinfo(n)->nr_frags = i;
+       }
+
+       if (skb_shinfo(skb)->frag_list) {
+               skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
+               skb_clone_fraglist(n);
+       }
+
+       copy_skb_header(n, skb);
+out:
+       return n;
+}
+
+/**
+ *     pskb_expand_head - reallocate header of &sk_buff
+ *     @skb: buffer to reallocate
+ *     @nhead: room to add at head
+ *     @ntail: room to add at tail
+ *     @gfp_mask: allocation priority
+ *
+ *     Expands (or creates identical copy, if &nhead and &ntail are zero)
+ *     header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ *     reference count of 1. Returns zero in the case of success or error,
+ *     if expansion failed. In the last case, &sk_buff is not changed.
+ *
+ *     All the pointers pointing into skb header may change and must be
+ *     reloaded after call to this function.
+ */
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+{
+       int i;
+       u8 *data;
+       int size = nhead + (skb->end - skb->head) + ntail;
+       long off;
+
+       if (skb_shared(skb))
+               BUG();
+
+       size = SKB_DATA_ALIGN(size);
+
+       data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+       if (!data)
+               goto nodata;
+
+       /* Copy only real data... and, alas, header. This should be
+        * optimized for the cases when header is void. */
+       memcpy(data + nhead, skb->head, skb->tail - skb->head);
+       memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+               get_page(skb_shinfo(skb)->frags[i].page);
+
+       if (skb_shinfo(skb)->frag_list)
+               skb_clone_fraglist(skb);
+
+       skb_release_data(skb);
+
+       off = (data + nhead) - skb->head;
+
+       skb->head     = data;
+       skb->end      = data + size;
+       skb->data    += off;
+       skb->tail    += off;
+       skb->mac.raw += off;
+       skb->h.raw   += off;
+       skb->nh.raw  += off;
+       skb->cloned   = 0;
+       atomic_set(&skb_shinfo(skb)->dataref, 1);
+       return 0;
+
+nodata:
+       return -ENOMEM;
+}
+
+/* Make private copy of skb with writable head and some headroom */
+
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int 
headroom)
+{
+       struct sk_buff *skb2;
+       int delta = headroom - skb_headroom(skb);
+
+       if (delta <= 0)
+               skb2 = pskb_copy(skb, GFP_ATOMIC);
+       else {
+               skb2 = skb_clone(skb, GFP_ATOMIC);
+               if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
+                                            GFP_ATOMIC)) {
+                       kfree_skb(skb2);
+                       skb2 = NULL;
+               }
+       }
+       return skb2;
+}
+
+
+/**
+ *     skb_copy_expand -       copy and expand sk_buff
+ *     @skb: buffer to copy
+ *     @newheadroom: new free bytes at head
+ *     @newtailroom: new free bytes at tail
+ *     @gfp_mask: allocation priority
+ *
+ *     Make a copy of both an &sk_buff and its data and while doing so
+ *     allocate additional space.
+ *
+ *     This is used when the caller wishes to modify the data and needs a
+ *     private copy of the data to alter as well as more space for new fields.
+ *     Returns %NULL on failure or the pointer to the buffer
+ *     on success. The returned buffer has a reference count of 1.
+ *
+ *     You must pass %GFP_ATOMIC as the allocation priority if this function
+ *     is called from an interrupt.
+ *
+ *     BUG ALERT: ip_summed is not copied. Why does this work? Is it used
+ *     only by netfilter in the cases when checksum is recalculated? --ANK
+ */
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+                               int newheadroom, int newtailroom, int gfp_mask)
+{
+       /*
+        *      Allocate the copy buffer
+        */
+       struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
+                                     gfp_mask);
+       int head_copy_len, head_copy_off;
+
+       if (!n)
+               return NULL;
+
+       skb_reserve(n, newheadroom);
+
+       /* Set the tail pointer and length */
+       skb_put(n, skb->len);
+
+       head_copy_len = skb_headroom(skb);
+       head_copy_off = 0;
+       if (newheadroom <= head_copy_len)
+               head_copy_len = newheadroom;
+       else
+               head_copy_off = newheadroom - head_copy_len;
+
+       /* Copy the linear header and data. */
+       if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+                         skb->len + head_copy_len))
+               BUG();
+
+       copy_skb_header(n, skb);
+
+       return n;
+}
+
+/**
+ *     skb_pad                 -       zero pad the tail of an skb
+ *     @skb: buffer to pad
+ *     @pad: space to pad
+ *
+ *     Ensure that a buffer is followed by a padding area that is zero
+ *     filled. Used by network drivers which may DMA or transfer data
+ *     beyond the buffer end onto the wire.
+ *
+ *     May return NULL in out of memory cases.
+ */
+ 
+struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
+{
+       struct sk_buff *nskb;
+       
+       /* If the skbuff is non linear tailroom is always zero.. */
+       if (skb_tailroom(skb) >= pad) {
+               memset(skb->data+skb->len, 0, pad);
+               return skb;
+       }
+       
+       nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, 
GFP_ATOMIC);
+       kfree_skb(skb);
+       if (nskb)
+               memset(nskb->data+nskb->len, 0, pad);
+       return nskb;
+}      
+ 
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
+ */
+
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+{
+       int offset = skb_headlen(skb);
+       int nfrags = skb_shinfo(skb)->nr_frags;
+       int i;
+
+       for (i = 0; i < nfrags; i++) {
+               int end = offset + skb_shinfo(skb)->frags[i].size;
+               if (end > len) {
+                       if (skb_cloned(skb)) {
+                               if (!realloc)
+                                       BUG();
+                               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+                                       return -ENOMEM;
+                       }
+                       if (len <= offset) {
+                               put_page(skb_shinfo(skb)->frags[i].page);
+                               skb_shinfo(skb)->nr_frags--;
+                       } else {
+                               skb_shinfo(skb)->frags[i].size = len - offset;
+                       }
+               }
+               offset = end;
+       }
+
+       if (offset < len) {
+               skb->data_len -= skb->len - len;
+               skb->len       = len;
+       } else {
+               if (len <= skb_headlen(skb)) {
+                       skb->len      = len;
+                       skb->data_len = 0;
+                       skb->tail     = skb->data + len;
+                       if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+                               skb_drop_fraglist(skb);
+               } else {
+                       skb->data_len -= skb->len - len;
+                       skb->len       = len;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ *     __pskb_pull_tail - advance tail of skb header
+ *     @skb: buffer to reallocate
+ *     @delta: number of bytes to advance tail
+ *
+ *     The function makes a sense only on a fragmented &sk_buff,
+ *     it expands header moving its tail forward and copying necessary
+ *     data from fragmented part.
+ *
+ *     &sk_buff MUST have reference count of 1.
+ *
+ *     Returns %NULL (and &sk_buff does not change) if pull failed
+ *     or value of new tail of skb in the case of success.
+ *
+ *     All the pointers pointing into skb header may change and must be
+ *     reloaded after call to this function.
+ */
+
+/* Moves tail of skb head forward, copying data from fragmented part,
+ * when it is necessary.
+ * 1. It may fail due to malloc failure.
+ * 2. It may change skb pointers.
+ *
+ * It is pretty complicated. Luckily, it is called only in exceptional cases.
+ */
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
+{
+       /* If skb has not enough free space at tail, get new one
+        * plus 128 bytes for future expansions. If we have enough
+        * room at tail, reallocate without expansion only if skb is cloned.
+        */
+       int i, k, eat = (skb->tail + delta) - skb->end;
+
+       if (eat > 0 || skb_cloned(skb)) {
+               if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
+                                    GFP_ATOMIC))
+                       return NULL;
+       }
+
+       if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+               BUG();
+
+       /* Optimization: no fragments, no reasons to preestimate
+        * size of pulled pages. Superb.
+        */
+       if (!skb_shinfo(skb)->frag_list)
+               goto pull_pages;
+
+       /* Estimate size of pulled pages. */
+       eat = delta;
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               if (skb_shinfo(skb)->frags[i].size >= eat)
+                       goto pull_pages;
+               eat -= skb_shinfo(skb)->frags[i].size;
+       }
+
+       /* If we need update frag list, we are in troubles.
+        * Certainly, it possible to add an offset to skb data,
+        * but taking into account that pulling is expected to
+        * be very rare operation, it is worth to fight against
+        * further bloating skb head and crucify ourselves here instead.
+        * Pure masohism, indeed. 8)8)
+        */
+       if (eat) {
+               struct sk_buff *list = skb_shinfo(skb)->frag_list;
+               struct sk_buff *clone = NULL;
+               struct sk_buff *insp = NULL;
+
+               do {
+                       if (!list)
+                               BUG();
+
+                       if (list->len <= eat) {
+                               /* Eaten as whole. */
+                               eat -= list->len;
+                               list = list->next;
+                               insp = list;
+                       } else {
+                               /* Eaten partially. */
+
+                               if (skb_shared(list)) {
+                                       /* Sucks! We need to fork list. :-( */
+                                       clone = skb_clone(list, GFP_ATOMIC);
+                                       if (!clone)
+                                               return NULL;
+                                       insp = list->next;
+                                       list = clone;
+                               } else {
+                                       /* This may be pulled without
+                                        * problems. */
+                                       insp = list;
+                               }
+                               if (!pskb_pull(list, eat)) {
+                                       if (clone)
+                                               kfree_skb(clone);
+                                       return NULL;
+                               }
+                               break;
+                       }
+               } while (eat);
+
+               /* Free pulled out fragments. */
+               while ((list = skb_shinfo(skb)->frag_list) != insp) {
+                       skb_shinfo(skb)->frag_list = list->next;
+                       kfree_skb(list);
+               }
+               /* And insert new clone at head. */
+               if (clone) {
+                       clone->next = list;
+                       skb_shinfo(skb)->frag_list = clone;
+               }
+       }
+       /* Success! Now we may commit changes to skb data. */
+
+pull_pages:
+       eat = delta;
+       k = 0;
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               if (skb_shinfo(skb)->frags[i].size <= eat) {
+                       put_page(skb_shinfo(skb)->frags[i].page);
+                       eat -= skb_shinfo(skb)->frags[i].size;
+               } else {
+                       skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+                       if (eat) {
+                               skb_shinfo(skb)->frags[k].page_offset += eat;
+                               skb_shinfo(skb)->frags[k].size -= eat;
+                               eat = 0;
+                       }
+                       k++;
+               }
+       }
+       skb_shinfo(skb)->nr_frags = k;
+
+       skb->tail     += delta;
+       skb->data_len -= delta;
+
+       return skb->tail;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+       int i, copy;
+       int start = skb_headlen(skb);
+
+       if (offset > (int)skb->len - len)
+               goto fault;
+
+       /* Copy header. */
+       if ((copy = start - offset) > 0) {
+               if (copy > len)
+                       copy = len;
+               memcpy(to, skb->data + offset, copy);
+               if ((len -= copy) == 0)
+                       return 0;
+               offset += copy;
+               to     += copy;
+       }
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+
+               BUG_TRAP(start <= offset + len);
+
+               end = start + skb_shinfo(skb)->frags[i].size;
+               if ((copy = end - offset) > 0) {
+                       u8 *vaddr;
+
+                       if (copy > len)
+                               copy = len;
+
+                       vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+                       memcpy(to,
+                              vaddr + skb_shinfo(skb)->frags[i].page_offset+
+                              offset - start, copy);
+                       kunmap_skb_frag(vaddr);
+
+                       if ((len -= copy) == 0)
+                               return 0;
+                       offset += copy;
+                       to     += copy;
+               }
+               start = end;
+       }
+
+       if (skb_shinfo(skb)->frag_list) {
+               struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+               for (; list; list = list->next) {
+                       int end;
+
+                       BUG_TRAP(start <= offset + len);
+
+                       end = start + list->len;
+                       if ((copy = end - offset) > 0) {
+                               if (copy > len)
+                                       copy = len;
+                               if (skb_copy_bits(list, offset - start,
+                                                 to, copy))
+                                       goto fault;
+                               if ((len -= copy) == 0)
+                                       return 0;
+                               offset += copy;
+                               to     += copy;
+                       }
+                       start = end;
+               }
+       }
+       if (!len)
+               return 0;
+
+fault:
+       return -EFAULT;
+}
+
+/* Keep iterating until skb_iter_next returns false. */
+void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i)
+{
+       i->len = skb_headlen(skb);
+       i->data = (unsigned char *)skb->data;
+       i->nextfrag = 0;
+       i->fraglist = NULL;
+}
+
+int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i)
+{
+       /* Unmap previous, if not head fragment. */
+       if (i->nextfrag)
+               kunmap_skb_frag(i->data);
+
+       if (i->fraglist) {
+       fraglist:
+               /* We're iterating through fraglist. */
+               if (i->nextfrag < skb_shinfo(i->fraglist)->nr_frags) {
+                       i->data = kmap_skb_frag(&skb_shinfo(i->fraglist)
+                                               ->frags[i->nextfrag]);
+                       i->len = skb_shinfo(i->fraglist)->frags[i->nextfrag]
+                               .size;
+                       i->nextfrag++;
+                       return 1;
+               }
+               /* Fragments with fragments?  Too hard! */
+               BUG_ON(skb_shinfo(i->fraglist)->frag_list);
+               i->fraglist = i->fraglist->next;
+               if (!i->fraglist)
+                       goto end;
+
+               i->len = skb_headlen(i->fraglist);
+               i->data = i->fraglist->data;
+               i->nextfrag = 0;
+               return 1;
+       }
+
+       if (i->nextfrag < skb_shinfo(skb)->nr_frags) {
+               i->data = kmap_skb_frag(&skb_shinfo(skb)->frags[i->nextfrag]);
+               i->len = skb_shinfo(skb)->frags[i->nextfrag].size;
+               i->nextfrag++;
+               return 1;
+       }
+
+       i->fraglist = skb_shinfo(skb)->frag_list;
+       if (i->fraglist)
+               goto fraglist;
+
+end:
+       /* Bug trap for callers */
+       i->data = NULL;
+       return 0;
+}
+
+void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i)
+{
+       /* Unmap previous, if not head fragment. */
+       if (i->data && i->nextfrag)
+               kunmap_skb_frag(i->data);
+       /* Bug trap for callers */
+       i->data = NULL;
+}
+
+/* Checksum skb data. */
+
+unsigned int skb_checksum(const struct sk_buff *skb, int offset,
+                         int len, unsigned int csum)
+{
+       int start = skb_headlen(skb);
+       int i, copy = start - offset;
+       int pos = 0;
+
+       /* Checksum header. */
+       if (copy > 0) {
+               if (copy > len)
+                       copy = len;
+               csum = csum_partial(skb->data + offset, copy, csum);
+               if ((len -= copy) == 0)
+                       return csum;
+               offset += copy;
+               pos     = copy;
+       }
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+
+               BUG_TRAP(start <= offset + len);
+
+               end = start + skb_shinfo(skb)->frags[i].size;
+               if ((copy = end - offset) > 0) {
+                       unsigned int csum2;
+                       u8 *vaddr;
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+                       if (copy > len)
+                               copy = len;
+                       vaddr = kmap_skb_frag(frag);
+                       csum2 = csum_partial(vaddr + frag->page_offset +
+                                            offset - start, copy, 0);
+                       kunmap_skb_frag(vaddr);
+                       csum = csum_block_add(csum, csum2, pos);
+                       if (!(len -= copy))
+                               return csum;
+                       offset += copy;
+                       pos    += copy;
+               }
+               start = end;
+       }
+
+       if (skb_shinfo(skb)->frag_list) {
+               struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+               for (; list; list = list->next) {
+                       int end;
+
+                       BUG_TRAP(start <= offset + len);
+
+                       end = start + list->len;
+                       if ((copy = end - offset) > 0) {
+                               unsigned int csum2;
+                               if (copy > len)
+                                       copy = len;
+                               csum2 = skb_checksum(list, offset - start,
+                                                    copy, 0);
+                               csum = csum_block_add(csum, csum2, pos);
+                               if ((len -= copy) == 0)
+                                       return csum;
+                               offset += copy;
+                               pos    += copy;
+                       }
+                       start = end;
+               }
+       }
+       if (len)
+               BUG();
+
+       return csum;
+}
+
+/* Both of above in one bottle. */
+
+unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
+                                   u8 *to, int len, unsigned int csum)
+{
+       int start = skb_headlen(skb);
+       int i, copy = start - offset;
+       int pos = 0;
+
+       /* Copy header. */
+       if (copy > 0) {
+               if (copy > len)
+                       copy = len;
+               csum = csum_partial_copy_nocheck(skb->data + offset, to,
+                                                copy, csum);
+               if ((len -= copy) == 0)
+                       return csum;
+               offset += copy;
+               to     += copy;
+               pos     = copy;
+       }
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+
+               BUG_TRAP(start <= offset + len);
+
+               end = start + skb_shinfo(skb)->frags[i].size;
+               if ((copy = end - offset) > 0) {
+                       unsigned int csum2;
+                       u8 *vaddr;
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+                       if (copy > len)
+                               copy = len;
+                       vaddr = kmap_skb_frag(frag);
+                       csum2 = csum_partial_copy_nocheck(vaddr +
+                                                         frag->page_offset +
+                                                         offset - start, to,
+                                                         copy, 0);
+                       kunmap_skb_frag(vaddr);
+                       csum = csum_block_add(csum, csum2, pos);
+                       if (!(len -= copy))
+                               return csum;
+                       offset += copy;
+                       to     += copy;
+                       pos    += copy;
+               }
+               start = end;
+       }
+
+       if (skb_shinfo(skb)->frag_list) {
+               struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+               for (; list; list = list->next) {
+                       unsigned int csum2;
+                       int end;
+
+                       BUG_TRAP(start <= offset + len);
+
+                       end = start + list->len;
+                       if ((copy = end - offset) > 0) {
+                               if (copy > len)
+                                       copy = len;
+                               csum2 = skb_copy_and_csum_bits(list,
+                                                              offset - start,
+                                                              to, copy, 0);
+                               csum = csum_block_add(csum, csum2, pos);
+                               if ((len -= copy) == 0)
+                                       return csum;
+                               offset += copy;
+                               to     += copy;
+                               pos    += copy;
+                       }
+                       start = end;
+               }
+       }
+       if (len)
+               BUG();
+       return csum;
+}
+
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
+{
+       unsigned int csum;
+       long csstart;
+
+       if (skb->ip_summed == CHECKSUM_HW)
+               csstart = skb->h.raw - skb->data;
+       else
+               csstart = skb_headlen(skb);
+
+       if (csstart > skb_headlen(skb))
+               BUG();
+
+       memcpy(to, skb->data, csstart);
+
+       csum = 0;
+       if (csstart != skb->len)
+               csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
+                                             skb->len - csstart, 0);
+
+       if (skb->ip_summed == CHECKSUM_HW) {
+               long csstuff = csstart + skb->csum;
+
+               *((unsigned short *)(to + csstuff)) = csum_fold(csum);
+       }
+}
+
+/**
+ *     skb_dequeue - remove from the head of the queue
+ *     @list: list to dequeue from
+ *
+ *     Remove the head of the list. The list lock is taken so the function
+ *     may be used safely with other locking list functions. The head item is
+ *     returned or %NULL if the list is empty.
+ */
+
+struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+       unsigned long flags;
+       struct sk_buff *result;
+
+       spin_lock_irqsave(&list->lock, flags);
+       result = __skb_dequeue(list);
+       spin_unlock_irqrestore(&list->lock, flags);
+       return result;
+}
+
+/**
+ *     skb_dequeue_tail - remove from the tail of the queue
+ *     @list: list to dequeue from
+ *
+ *     Remove the tail of the list. The list lock is taken so the function
+ *     may be used safely with other locking list functions. The tail item is
+ *     returned or %NULL if the list is empty.
+ */
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+       unsigned long flags;
+       struct sk_buff *result;
+
+       spin_lock_irqsave(&list->lock, flags);
+       result = __skb_dequeue_tail(list);
+       spin_unlock_irqrestore(&list->lock, flags);
+       return result;
+}
+
+/**
+ *     skb_queue_purge - empty a list
+ *     @list: list to empty
+ *
+ *     Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *     the list and one reference dropped. This function takes the list
+ *     lock and is atomic with respect to other list locking functions.
+ */
+void skb_queue_purge(struct sk_buff_head *list)
+{
+       struct sk_buff *skb;
+       while ((skb = skb_dequeue(list)) != NULL)
+               kfree_skb(skb);
+}
+
+/**
+ *     skb_queue_head - queue a buffer at the list head
+ *     @list: list to use
+ *     @newsk: buffer to queue
+ *
+ *     Queue a buffer at the start of the list. This function takes the
+ *     list lock and can be used safely with other locking &sk_buff functions
+ *     safely.
+ *
+ *     A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&list->lock, flags);
+       __skb_queue_head(list, newsk);
+       spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *     skb_queue_tail - queue a buffer at the list tail
+ *     @list: list to use
+ *     @newsk: buffer to queue
+ *
+ *     Queue a buffer at the tail of the list. This function takes the
+ *     list lock and can be used safely with other locking &sk_buff functions
+ *     safely.
+ *
+ *     A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&list->lock, flags);
+       __skb_queue_tail(list, newsk);
+       spin_unlock_irqrestore(&list->lock, flags);
+}
+/**
+ *     skb_unlink      -       remove a buffer from a list
+ *     @skb: buffer to remove
+ *
+ *     Place a packet after a given packet in a list. The list locks are taken
+ *     and this function is atomic with respect to other list locked calls
+ *
+ *     Works even without knowing the list it is sitting on, which can be
+ *     handy at times. It also means that THE LIST MUST EXIST when you
+ *     unlink. Thus a list must have its contents unlinked before it is
+ *     destroyed.
+ */
+void skb_unlink(struct sk_buff *skb)
+{
+       struct sk_buff_head *list = skb->list;
+
+       if (list) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&list->lock, flags);
+               if (skb->list == list)
+                       __skb_unlink(skb, skb->list);
+               spin_unlock_irqrestore(&list->lock, flags);
+       }
+}
+
+
+/**
+ *     skb_append      -       append a buffer
+ *     @old: buffer to insert after
+ *     @newsk: buffer to insert
+ *
+ *     Place a packet after a given packet in a list. The list locks are taken
+ *     and this function is atomic with respect to other list locked calls.
+ *     A buffer cannot be placed on two lists at the same time.
+ */
+
+void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&old->list->lock, flags);
+       __skb_append(old, newsk);
+       spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+
+/**
+ *     skb_insert      -       insert a buffer
+ *     @old: buffer to insert before
+ *     @newsk: buffer to insert
+ *
+ *     Place a packet before a given packet in a list. The list locks are taken
+ *     and this function is atomic with respect to other list locked calls
+ *     A buffer cannot be placed on two lists at the same time.
+ */
+
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&old->list->lock, flags);
+       __skb_insert(newsk, old->prev, old, old->list);
+       spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+#if 0
+/*
+ *     Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+       /* Must match allocation in alloc_skb */
+       mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
+
+       kmem_add_cache_size(mtu);
+}
+#endif
+
+static inline void skb_split_inside_header(struct sk_buff *skb,
+                                          struct sk_buff* skb1,
+                                          const u32 len, const int pos)
+{
+       int i;
+
+       memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
+
+       /* And move data appendix as is. */
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+               skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
+
+       skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+       skb_shinfo(skb)->nr_frags  = 0;
+       skb1->data_len             = skb->data_len;
+       skb1->len                  += skb1->data_len;
+       skb->data_len              = 0;
+       skb->len                   = len;
+       skb->tail                  = skb->data + len;
+}
+
+static inline void skb_split_no_header(struct sk_buff *skb,
+                                      struct sk_buff* skb1,
+                                      const u32 len, int pos)
+{
+       int i, k = 0;
+       const int nfrags = skb_shinfo(skb)->nr_frags;
+
+       skb_shinfo(skb)->nr_frags = 0;
+       skb1->len                 = skb1->data_len = skb->len - len;
+       skb->len                  = len;
+       skb->data_len             = len - pos;
+
+       for (i = 0; i < nfrags; i++) {
+               int size = skb_shinfo(skb)->frags[i].size;
+
+               if (pos + size > len) {
+                       skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
+
+                       if (pos < len) {
+                               /* Split frag.
+                                * We have to variants in this case:
+                                * 1. Move all the frag to the second
+                                *    part, if it is possible. F.e.
+                                *    this approach is mandatory for TUX,
+                                *    where splitting is expensive.
+                                * 2. Split is accurately. We make this.
+                                */
+                               get_page(skb_shinfo(skb)->frags[i].page);
+                               skb_shinfo(skb1)->frags[0].page_offset += len - 
pos;
+                               skb_shinfo(skb1)->frags[0].size -= len - pos;
+                               skb_shinfo(skb)->frags[i].size  = len - pos;
+                               skb_shinfo(skb)->nr_frags++;
+                       }
+                       k++;
+               } else
+                       skb_shinfo(skb)->nr_frags++;
+               pos += size;
+       }
+       skb_shinfo(skb1)->nr_frags = k;
+}
+
+/**
+ * skb_split - Split fragmented skb to two parts at length len.
+ */
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
+{
+       int pos = skb_headlen(skb);
+
+       if (len < pos)  /* Split line is inside header. */
+               skb_split_inside_header(skb, skb1, len, pos);
+       else            /* Second chunk has no header, nothing to copy. */
+               skb_split_no_header(skb, skb1, len, pos);
+}
+
+void __init skb_init(void)
+{
+       skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+                                             sizeof(struct sk_buff),
+                                             0,
+                                             SLAB_HWCACHE_ALIGN,
+                                             NULL, NULL);
+       if (!skbuff_head_cache)
+               panic("cannot create skbuff cache");
+}
+
+EXPORT_SYMBOL(___pskb_trim);
+EXPORT_SYMBOL(__kfree_skb);
+EXPORT_SYMBOL(__pskb_pull_tail);
+EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(pskb_copy);
+EXPORT_SYMBOL(pskb_expand_head);
+EXPORT_SYMBOL(skb_checksum);
+EXPORT_SYMBOL(skb_clone);
+EXPORT_SYMBOL(skb_clone_fraglist);
+EXPORT_SYMBOL(skb_copy);
+EXPORT_SYMBOL(skb_copy_and_csum_bits);
+EXPORT_SYMBOL(skb_copy_and_csum_dev);
+EXPORT_SYMBOL(skb_copy_bits);
+EXPORT_SYMBOL(skb_copy_expand);
+EXPORT_SYMBOL(skb_over_panic);
+EXPORT_SYMBOL(skb_pad);
+EXPORT_SYMBOL(skb_realloc_headroom);
+EXPORT_SYMBOL(skb_under_panic);
+EXPORT_SYMBOL(skb_dequeue);
+EXPORT_SYMBOL(skb_dequeue_tail);
+EXPORT_SYMBOL(skb_insert);
+EXPORT_SYMBOL(skb_queue_purge);
+EXPORT_SYMBOL(skb_queue_head);
+EXPORT_SYMBOL(skb_queue_tail);
+EXPORT_SYMBOL(skb_unlink);
+EXPORT_SYMBOL(skb_append);
+EXPORT_SYMBOL(skb_split);
+EXPORT_SYMBOL(skb_iter_first);
+EXPORT_SYMBOL(skb_iter_next);
+EXPORT_SYMBOL(skb_iter_abort);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_pre.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ 
b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_pre.h  
    Fri Jul 15 13:39:50 2005
@@ -0,0 +1,5 @@
+/* Hook to call BIOS initialisation function */
+
+#define ARCH_SETUP machine_specific_arch_setup();
+
+static void __init machine_specific_arch_setup(void);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile        Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,32 @@
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+
+CFLAGS += -Iarch/$(XENARCH)/pci
+
+c-obj-y                                := i386.o
+
+c-obj-$(CONFIG_PCI_BIOS)               += pcbios.o
+c-obj-$(CONFIG_PCI_MMCONFIG)   += mmconfig.o
+c-obj-$(CONFIG_PCI_DIRECT)     += direct.o
+
+c-pci-y                                := fixup.o
+c-pci-$(CONFIG_ACPI_PCI)       += acpi.o
+c-pci-y                                += legacy.o
+# Make sure irq.o gets linked in after legacy.o
+l-pci-y                                += irq.o
+
+c-pci-$(CONFIG_X86_VISWS)      := visws.o fixup.o
+pci-$(CONFIG_X86_VISWS)                :=
+c-pci-$(CONFIG_X86_NUMAQ)      := numa.o
+pci-$(CONFIG_X86_NUMAQ)                := irq.o
+
+obj-y                          += $(pci-y)
+c-obj-y                                += $(c-pci-y) common.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
+
+obj-y  += $(c-obj-y) $(l-pci-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/kernel/traps.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/kernel/traps.c   Fri Jul 15 13:39:50 2005
@@ -0,0 +1,619 @@
+/*
+ *  linux/arch/i386/traps.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/debugreg.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+
+#include <asm/hypervisor.h>
+
+#include <linux/irq.h>
+#include <linux/module.h>
+
+asmlinkage int system_call(void);
+asmlinkage void lcall7(void);
+asmlinkage void lcall27(void);
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void fixup_4gb_segment(void);
+asmlinkage void machine_check(void);
+
+int kstack_depth_to_print = 24;
+
+
+/*
+ * If the address is either in the .text section of the
+ * kernel, or in the vmalloc'ed module regions, it *may* 
+ * be the address of a calling routine
+ */
+
+#ifdef CONFIG_MODULES
+
+extern struct module *module_list;
+extern struct module kernel_module;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+       int retval = 0;
+       struct module *mod;
+
+       if (addr >= (unsigned long) &_stext &&
+           addr <= (unsigned long) &_etext)
+               return 1;
+
+       for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+               /* mod_bound tests for addr being inside the vmalloc'ed
+                * module area. Of course it'd be better to test only
+                * for the .text subset... */
+               if (mod_bound(addr, 0, mod)) {
+                       retval = 1;
+                       break;
+               }
+       }
+
+       return retval;
+}
+
+#else
+
+static inline int kernel_text_address(unsigned long addr)
+{
+       return (addr >= (unsigned long) &_stext &&
+               addr <= (unsigned long) &_etext);
+}
+
+#endif
+
+void show_trace(unsigned long * stack)
+{
+       int i;
+       unsigned long addr;
+
+       if (!stack)
+               stack = (unsigned long*)&stack;
+
+       printk("Call Trace: ");
+       i = 1;
+       while (((long) stack & (THREAD_SIZE-1)) != 0) {
+               addr = *stack++;
+               if (kernel_text_address(addr)) {
+                       if (i && ((i % 6) == 0))
+                               printk("\n   ");
+                       printk("[<%08lx>] ", addr);
+                       i++;
+               }
+       }
+       printk("\n");
+}
+
+void show_trace_task(struct task_struct *tsk)
+{
+       unsigned long esp = tsk->thread.esp;
+
+       /* User space on another CPU? */
+       if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
+               return;
+       show_trace((unsigned long *)esp);
+}
+
+void show_stack(unsigned long * esp)
+{
+       unsigned long *stack;
+       int i;
+
+       // debugging aid: "show_stack(NULL);" prints the
+       // back trace for this cpu.
+
+       if(esp==NULL)
+               esp=(unsigned long*)&esp;
+
+       stack = esp;
+       for(i=0; i < kstack_depth_to_print; i++) {
+               if (((long) stack & (THREAD_SIZE-1)) == 0)
+                       break;
+               if (i && ((i % 8) == 0))
+                       printk("\n       ");
+               printk("%08lx ", *stack++);
+       }
+       printk("\n");
+       show_trace(esp);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+       int in_kernel = 1;
+       unsigned long esp;
+       unsigned short ss;
+
+       esp = (unsigned long) (&regs->esp);
+       ss = __KERNEL_DS;
+       if (regs->xcs & 2) {
+               in_kernel = 0;
+               esp = regs->esp;
+               ss = regs->xss & 0xffff;
+       }
+       printk(KERN_ALERT "CPU:    %d\n", smp_processor_id() );
+       printk(KERN_ALERT "EIP:    %04x:[<%08lx>]    %s\n",
+              0xffff & regs->xcs, regs->eip, print_tainted());
+       printk(KERN_ALERT "EFLAGS: %08lx\n",regs->eflags);
+       printk(KERN_ALERT "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+               regs->eax, regs->ebx, regs->ecx, regs->edx);
+       printk(KERN_ALERT "esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+               regs->esi, regs->edi, regs->ebp, esp);
+       printk(KERN_ALERT "ds: %04x   es: %04x   ss: %04x\n",
+               regs->xds & 0xffff, regs->xes & 0xffff, ss);
+       printk(KERN_ALERT "Process %s (pid: %d, stackpage=%08lx)",
+               current->comm, current->pid, 4096+(unsigned long)current);
+       /*
+        * When in-kernel, we also print out the stack and code at the
+        * time of the fault..
+        */
+       if (in_kernel) {
+
+               printk(KERN_ALERT "\nStack: ");
+               show_stack((unsigned long*)esp);
+
+#if 0
+                {
+                        int i;
+                       printk(KERN_ALERT "\nCode: ");
+                       if(regs->eip < PAGE_OFFSET)
+                               goto bad;
+
+                       for(i=0;i<20;i++)
+                       {
+                               unsigned char c;
+                               if(__get_user(c, &((unsigned 
char*)regs->eip)[i])) {
+bad:
+                                       printk(KERN_ALERT " Bad EIP value.");
+                                       break;
+                               }
+                               printk("%02x ", c);
+                       }
+               }
+#endif
+       }
+       printk(KERN_ALERT "\n");
+}      
+
+spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+       console_verbose();
+       spin_lock_irq(&die_lock);
+       bust_spinlocks(1);
+       printk("%s: %04lx\n", str, err & 0xffff);
+       show_registers(regs);
+       bust_spinlocks(0);
+       spin_unlock_irq(&die_lock);
+       do_exit(SIGSEGV);
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long 
err)
+{
+       if (!(2 & regs->xcs))
+               die(str, regs, err);
+}
+
+
+static void inline do_trap(int trapnr, int signr, char *str,
+                          struct pt_regs * regs, long error_code,
+                           siginfo_t *info)
+{
+       if (!(regs->xcs & 2))
+               goto kernel_trap;
+
+       /*trap_signal:*/ {
+               struct task_struct *tsk = current;
+               tsk->thread.error_code = error_code;
+               tsk->thread.trap_no = trapnr;
+               if (info)
+                       force_sig_info(signr, info, tsk);
+               else
+                       force_sig(signr, tsk);
+               return;
+       }
+
+       kernel_trap: {
+               unsigned long fixup = search_exception_table(regs->eip);
+               if (fixup)
+                       regs->eip = fixup;
+               else    
+                       die(str, regs, error_code);
+               return;
+       }
+}
+
+#define DO_ERROR(trapnr, signr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+       do_trap(trapnr, signr, str, regs, error_code, NULL); \
+}
+
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+       siginfo_t info; \
+       info.si_signo = signr; \
+       info.si_errno = 0; \
+       info.si_code = sicode; \
+       info.si_addr = (void *)siaddr; \
+       do_trap(trapnr, signr, str, regs, error_code, &info); \
+}
+
+DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->eip)
+DO_ERROR( 3, SIGTRAP, "int3", int3)
+DO_ERROR( 4, SIGSEGV, "overflow", overflow)
+DO_ERROR( 5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, 
regs->eip)
+DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
+DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
+DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", 
coprocessor_segment_overrun)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR(18, SIGBUS, "machine check", machine_check)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+       /*
+        * If we trapped on an LDT access then ensure that the default_ldt is
+        * loaded, if nothing else. We load default_ldt lazily because LDT
+        * switching costs time and many applications don't need it.
+        */
+       if ( unlikely((error_code & 6) == 4) )
+       {
+               unsigned long ldt;
+               __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
+               if ( ldt == 0 )
+               {
+                    xen_set_ldt((unsigned long)&default_ldt[0], 5);
+                   return;
+               }
+       }
+
+       if (!(regs->xcs & 2))
+               goto gp_in_kernel;
+
+       current->thread.error_code = error_code;
+       current->thread.trap_no = 13;
+       force_sig(SIGSEGV, current);
+       return;
+
+gp_in_kernel:
+       {
+               unsigned long fixup;
+               fixup = search_exception_table(regs->eip);
+               if (fixup) {
+                       regs->eip = fixup;
+                       return;
+               }
+               die("general protection fault", regs, error_code);
+       }
+}
+
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+    unsigned int condition;
+    struct task_struct *tsk = current;
+    siginfo_t info;
+
+    condition = HYPERVISOR_get_debugreg(6);
+
+    /* Mask out spurious debug traps due to lazy DR7 setting */
+    if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+        if (!tsk->thread.debugreg[7])
+            goto clear_dr7;
+    }
+
+    /* Save debug status register where ptrace can see it */
+    tsk->thread.debugreg[6] = condition;
+
+    /* Mask out spurious TF errors due to lazy TF clearing */
+    if (condition & DR_STEP) {
+        /*
+         * The TF error should be masked out only if the current
+         * process is not traced and if the TRAP flag has been set
+         * previously by a tracing process (condition detected by
+         * the PT_DTRACE flag); remember that the i386 TRAP flag
+         * can be modified by the process itself in user mode,
+         * allowing programs to debug themselves without the ptrace()
+         * interface.
+         */
+        if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
+            goto clear_TF;
+    }
+
+    /* Ok, finally something we can handle */
+    tsk->thread.trap_no = 1;
+    tsk->thread.error_code = error_code;
+    info.si_signo = SIGTRAP;
+    info.si_errno = 0;
+    info.si_code = TRAP_BRKPT;
+        
+    /* If this is a kernel mode trap, save the user PC on entry to 
+     * the kernel, that's what the debugger can make sense of.
+     */
+    info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : 
+                                            (void *)regs->eip;
+    force_sig_info(SIGTRAP, &info, tsk);
+
+    /* Disable additional traps. They'll be re-enabled when
+     * the signal is delivered.
+     */
+ clear_dr7:
+    HYPERVISOR_set_debugreg(7, 0);
+    return;
+
+ clear_TF:
+    regs->eflags &= ~TF_MASK;
+    return;
+}
+
+
+/*
+ * Note that we play around with the 'TS' bit in an attempt to get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void *eip)
+{
+       struct task_struct * task;
+       siginfo_t info;
+       unsigned short cwd, swd;
+
+       /*
+        * Save the info for the exception handler and clear the error.
+        */
+       task = current;
+       save_init_fpu(task);
+       task->thread.trap_no = 16;
+       task->thread.error_code = 0;
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_code = __SI_FAULT;
+       info.si_addr = eip;
+       /*
+        * (~cwd & swd) will mask out exceptions that are not set to unmasked
+        * status.  0x3f is the exception bits in these regs, 0x200 is the
+        * C1 reg you need in case of a stack fault, 0x040 is the stack
+        * fault bit.  We should only be taking one exception at a time,
+        * so if this combination doesn't produce any single exception,
+        * then we have a bad program that isn't syncronizing its FPU usage
+        * and it will suffer the consequences since we won't be able to
+        * fully reproduce the context of the exception
+        */
+       cwd = get_fpu_cwd(task);
+       swd = get_fpu_swd(task);
+       switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+               case 0x000:
+               default:
+                       break;
+               case 0x001: /* Invalid Op */
+               case 0x041: /* Stack Fault */
+               case 0x241: /* Stack Fault | Direction */
+                       info.si_code = FPE_FLTINV;
+                       break;
+               case 0x002: /* Denormalize */
+               case 0x010: /* Underflow */
+                       info.si_code = FPE_FLTUND;
+                       break;
+               case 0x004: /* Zero Divide */
+                       info.si_code = FPE_FLTDIV;
+                       break;
+               case 0x008: /* Overflow */
+                       info.si_code = FPE_FLTOVF;
+                       break;
+               case 0x020: /* Precision */
+                       info.si_code = FPE_FLTRES;
+                       break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+       ignore_irq13 = 1;
+       math_error((void *)regs->eip);
+}
+
+void simd_math_error(void *eip)
+{
+       struct task_struct * task;
+       siginfo_t info;
+       unsigned short mxcsr;
+
+       /*
+        * Save the info for the exception handler and clear the error.
+        */
+       task = current;
+       save_init_fpu(task);
+       task->thread.trap_no = 19;
+       task->thread.error_code = 0;
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_code = __SI_FAULT;
+       info.si_addr = eip;
+       /*
+        * The SIMD FPU exceptions are handled a little differently, as there
+        * is only a single status/control register.  Thus, to determine which
+        * unmasked exception was caught we must mask the exception mask bits
+        * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+        */
+       mxcsr = get_fpu_mxcsr(task);
+       switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+               case 0x000:
+               default:
+                       break;
+               case 0x001: /* Invalid Op */
+                       info.si_code = FPE_FLTINV;
+                       break;
+               case 0x002: /* Denormalize */
+               case 0x010: /* Underflow */
+                       info.si_code = FPE_FLTUND;
+                       break;
+               case 0x004: /* Zero Divide */
+                       info.si_code = FPE_FLTDIV;
+                       break;
+               case 0x008: /* Overflow */
+                       info.si_code = FPE_FLTOVF;
+                       break;
+               case 0x020: /* Precision */
+                       info.si_code = FPE_FLTRES;
+                       break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
+                                         long error_code)
+{
+       if (cpu_has_xmm) {
+               /* Handle SIMD FPU exceptions on PIII+ processors. */
+               ignore_irq13 = 1;
+               simd_math_error((void *)regs->eip);
+       } else {
+               die_if_kernel("cache flush denied", regs, error_code);
+               current->thread.trap_no = 19;
+               current->thread.error_code = error_code;
+               force_sig(SIGSEGV, current);
+       }
+}
+
+/*
+ *  'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ */
+asmlinkage void math_state_restore(struct pt_regs regs)
+{
+       /*
+        * A trap in kernel mode can be ignored. It'll be the fast XOR or
+        * copying libraries, which will correctly save/restore state and
+        * reset the TS bit in CR0.
+        */
+       if ( (regs.xcs & 2) == 0 )
+               return;
+
+       if (current->used_math) {
+               restore_fpu(current);
+       } else {
+               init_fpu();
+       }
+       current->flags |= PF_USEDFPU;   /* So we fnsave on switch_to() */
+}
+
+
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+  int __d0, __d1; \
+  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+       "movw %4,%%dx\n\t" \
+       "movl %%eax,%0\n\t" \
+       "movl %%edx,%1" \
+       :"=m" (*((long *) (gate_addr))), \
+        "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+       :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+        "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \
+} while (0)
+
+static void __init set_call_gate(void *a, void *addr)
+{
+       _set_gate(a,12,3,addr);
+}
+
+
+/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */
+static trap_info_t trap_table[] = {
+    {  0, 0, __KERNEL_CS, (unsigned long)divide_error                },
+    {  1, 0, __KERNEL_CS, (unsigned long)debug                       },
+    {  3, 3, __KERNEL_CS, (unsigned long)int3                        },
+    {  4, 3, __KERNEL_CS, (unsigned long)overflow                    },
+    {  5, 3, __KERNEL_CS, (unsigned long)bounds                      },
+    {  6, 0, __KERNEL_CS, (unsigned long)invalid_op                  },
+    {  7, 0, __KERNEL_CS, (unsigned long)device_not_available        },
+    {  8, 0, __KERNEL_CS, (unsigned long)double_fault                },
+    {  9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
+    { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS                 },
+    { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present         },
+    { 12, 0, __KERNEL_CS, (unsigned long)stack_segment               },
+    { 13, 0, __KERNEL_CS, (unsigned long)general_protection          },
+    { 14, 0, __KERNEL_CS, (unsigned long)page_fault                  },
+    { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment           },
+    { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error           },
+    { 17, 0, __KERNEL_CS, (unsigned long)alignment_check             },
+    { 18, 0, __KERNEL_CS, (unsigned long)machine_check               },
+    { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error      },
+    { SYSCALL_VECTOR, 
+          3, __KERNEL_CS, (unsigned long)system_call                 },
+    {  0, 0,           0, 0                           }
+};
+
+
+void __init trap_init(void)
+{
+    HYPERVISOR_set_trap_table(trap_table);    
+
+    /*
+     * The default LDT is a single-entry callgate to lcall7 for iBCS and a
+     * callgate to lcall27 for Solaris/x86 binaries.
+     */
+    clear_page(&default_ldt[0]);
+    set_call_gate(&default_ldt[0],lcall7);
+    set_call_gate(&default_ldt[4],lcall27);
+    __make_page_readonly(&default_ldt[0]);
+
+    cpu_init();
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ 
b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h    
    Fri Jul 15 13:39:50 2005
@@ -0,0 +1,5 @@
+/* Hook to call BIOS initialisation function */
+
+#define ARCH_SETUP machine_specific_arch_setup();
+
+static void __init machine_specific_arch_setup(void);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile       Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,3 @@
+
+obj-y  := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
+
diff -r 35b74976f598 -r a83ac0806d6b linux-2.4.30-xen-sparse/arch/xen/mm/init.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/mm/init.c        Fri Jul 15 13:39:50 2005
@@ -0,0 +1,482 @@
+/*
+ *  linux/arch/i386/mm/init.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/blk.h>
+#endif
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+
+/* XEN: We *cannot* use mmx_clear_page() this early. Force dumb memset(). */
+#undef clear_page
+#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE)
+
+mmu_gather_t mmu_gathers[NR_CPUS];
+unsigned long highstart_pfn, highend_pfn;
+static unsigned long totalram_pages;
+static unsigned long totalhigh_pages;
+
+int do_check_pgt_cache(int low, int high)
+{
+    int freed = 0;
+    if(pgtable_cache_size > high) {
+        do {
+            if (!QUICKLIST_EMPTY(pgd_quicklist)) {
+                free_pgd_slow(get_pgd_fast());
+                freed++;
+            }
+            if (!QUICKLIST_EMPTY(pte_quicklist)) {
+                pte_free_slow(pte_alloc_one_fast(NULL, 0));
+                freed++;
+            }
+        } while(pgtable_cache_size > low);
+    }
+    return freed;
+}
+ 
+/*
+ * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
+ * physical space so we can cache the place of the first one and move
+ * around without checking the pgd every time.
+ */
+
+#if CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+#define kmap_get_fixmap_pte(vaddr) \
+    pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+void __init kmap_init(void)
+{
+    unsigned long kmap_vstart;
+
+    /* cache the first kmap pte */
+    kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+    kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+    kmap_prot = PAGE_KERNEL;
+}
+#endif /* CONFIG_HIGHMEM */
+
+void show_mem(void)
+{
+    int i, total = 0, reserved = 0;
+    int shared = 0, cached = 0;
+    int highmem = 0;
+
+    printk("Mem-info:\n");
+    show_free_areas();
+    printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+    i = max_mapnr;
+    while (i-- > 0) {
+        total++;
+        if (PageHighMem(mem_map+i))
+            highmem++;
+        if (PageReserved(mem_map+i))
+            reserved++;
+        else if (PageSwapCache(mem_map+i))
+            cached++;
+        else if (page_count(mem_map+i))
+            shared += page_count(mem_map+i) - 1;
+    }
+    printk("%d pages of RAM\n", total);
+    printk("%d pages of HIGHMEM\n",highmem);
+    printk("%d reserved pages\n",reserved);
+    printk("%d pages shared\n",shared);
+    printk("%d pages swap cached\n",cached);
+    printk("%ld pages in page table cache\n",pgtable_cache_size);
+    show_buffers();
+}
+
+/* References to section boundaries */
+
+extern char _text, _etext, _edata, __bss_start, _end;
+extern char __init_begin, __init_end;
+
+static inline void set_pte_phys (unsigned long vaddr,
+                                 unsigned long phys, pgprot_t prot)
+{
+    pgd_t *pgd;
+    pmd_t *pmd;
+    pte_t *pte;
+
+    pgd = init_mm.pgd + __pgd_offset(vaddr);
+    if (pgd_none(*pgd)) {
+        printk("PAE BUG #00!\n");
+        return;
+    }
+    pmd = pmd_offset(pgd, vaddr);
+    if (pmd_none(*pmd)) {
+        printk("PAE BUG #01!\n");
+        return;
+    }
+    pte = pte_offset(pmd, vaddr);
+
+    set_pte(pte, (pte_t) { phys | pgprot_val(prot) });
+
+    /*
+     * It's enough to flush this one mapping.
+     * (PGE mappings get flushed as well)
+     */
+    __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, 
+                  pgprot_t flags)
+{
+    unsigned long address = __fix_to_virt(idx);
+
+    if (idx >= __end_of_fixed_addresses) {
+        printk("Invalid __set_fixmap\n");
+        return;
+    }
+    set_pte_phys(address, phys, flags);
+}
+
+void clear_fixmap(enum fixed_addresses idx)
+{
+    set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0));
+}
+
+static void __init fixrange_init (unsigned long start, 
+                                  unsigned long end, pgd_t *pgd_base)
+{
+    pgd_t *pgd, *kpgd;
+    pmd_t *pmd, *kpmd;
+    pte_t *pte, *kpte;
+    int i, j;
+    unsigned long vaddr;
+
+    vaddr = start;
+    i = __pgd_offset(vaddr);
+    j = __pmd_offset(vaddr);
+    pgd = pgd_base + i;
+
+    for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+#if CONFIG_X86_PAE
+        if (pgd_none(*pgd)) {
+            pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+            set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
+            if (pmd != pmd_offset(pgd, 0))
+                printk("PAE BUG #02!\n");
+        }
+        pmd = pmd_offset(pgd, vaddr);
+#else
+        pmd = (pmd_t *)pgd;
+#endif
+        for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+            if (pmd_none(*pmd)) {
+                pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+                clear_page(pte);
+                kpgd = pgd_offset_k((unsigned long)pte);
+                kpmd = pmd_offset(kpgd, (unsigned long)pte);
+                kpte = pte_offset(kpmd, (unsigned long)pte);
+                set_pte(kpte, pte_wrprotect(*kpte));
+                set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
+            }
+            vaddr += PMD_SIZE;
+        }
+        j = 0;
+    }
+}
+
+
+static void __init pagetable_init (void)
+{
+    unsigned long vaddr, end, ram_end;
+    pgd_t *kpgd, *pgd, *pgd_base;
+    int i, j, k;
+    pmd_t *kpmd, *pmd;
+    pte_t *kpte, *pte, *pte_base;
+
+    ram_end = end = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
+    if ( xen_start_info.nr_pages < max_low_pfn )
+        ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE);
+
+    pgd_base = init_mm.pgd;
+    i = __pgd_offset(PAGE_OFFSET);
+    pgd = pgd_base + i;
+
+    for (; i < PTRS_PER_PGD; pgd++, i++) {
+        vaddr = i*PGDIR_SIZE;
+        if (vaddr >= end)
+            break;
+        pmd = (pmd_t *)pgd;
+        for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+            vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+            if (vaddr >= end)
+                break;
+
+            /* Filled in for us already? */
+            if ( pmd_val(*pmd) & _PAGE_PRESENT )
+                continue;
+
+            pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+            clear_page(pte_base);
+
+            for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
+                vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
+                if (vaddr >= ram_end)
+                    break;
+                *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
+            }
+            kpgd = pgd_offset_k((unsigned long)pte_base);
+            kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
+            kpte = pte_offset(kpmd, (unsigned long)pte_base);
+            set_pte(kpte, pte_wrprotect(*kpte));
+            set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
+        }
+    }
+
+    /*
+     * Fixed mappings, only the page table structure has to be
+     * created - mappings will be set by set_fixmap():
+     */
+    vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+    fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd);
+
+#if CONFIG_HIGHMEM
+    /*
+     * Permanent kmaps:
+     */
+    vaddr = PKMAP_BASE;
+    fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, init_mm.pgd);
+
+    pgd = init_mm.pgd + __pgd_offset(vaddr);
+    pmd = pmd_offset(pgd, vaddr);
+    pte = pte_offset(pmd, vaddr);
+    pkmap_page_table = pte;
+#endif
+}
+
+static void __init zone_sizes_init(void)
+{
+    unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+    unsigned int max_dma, high, low;
+
+    max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+    low = max_low_pfn;
+    high = highend_pfn;
+
+    if (low < max_dma)
+        zones_size[ZONE_DMA] = low;
+    else {
+        zones_size[ZONE_DMA] = max_dma;
+        zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+        zones_size[ZONE_HIGHMEM] = high - low;
+#endif
+    }
+    free_area_init(zones_size);
+}
+
+void __init paging_init(void)
+{
+    pagetable_init();
+
+    zone_sizes_init();
+
+    /* Switch to the real shared_info page, and clear the dummy page. */
+    set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
+    HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+    memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
+#ifdef CONFIG_HIGHMEM
+    kmap_init();
+#endif
+}
+
+static inline int page_is_ram (unsigned long pagenr)
+{
+    return 1;
+}
+
+#ifdef CONFIG_HIGHMEM
+void __init one_highpage_init(struct page *page, int free_page)
+{
+    ClearPageReserved(page);
+    set_bit(PG_highmem, &page->flags);
+    atomic_set(&page->count, 1);
+    if ( free_page )
+        __free_page(page);
+    totalhigh_pages++;
+}
+#endif /* CONFIG_HIGHMEM */
+
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+    highmem_start_page = mem_map + highstart_pfn;
+    max_mapnr = num_physpages = highend_pfn;
+    num_mappedpages = max_low_pfn;
+#else
+    max_mapnr = num_mappedpages = num_physpages = max_low_pfn;
+#endif
+}
+
+static int __init free_pages_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+    int bad_ppro = 0;
+#endif
+    int reservedpages, pfn;
+
+    /* add only boot_pfn pages of low memory to free list.
+     * max_low_pfn may be sized for
+     * pages yet to be allocated from the hypervisor, or it may be set
+     * to override the xen_start_info amount of memory
+     */
+    int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn);
+
+    /* this will put all low memory onto the freelists */
+    totalram_pages += free_all_bootmem();
+    /* XEN: init and count low-mem pages outside initial allocation. */
+    for (pfn = boot_pfn; pfn < max_low_pfn; pfn++) {
+        ClearPageReserved(&mem_map[pfn]);
+        atomic_set(&mem_map[pfn].count, 1);
+        totalram_pages++;
+    }
+
+    reservedpages = 0;
+    for (pfn = 0; pfn < boot_pfn ; pfn++) {
+        /*
+         * Only count reserved RAM pages
+         */
+        if (page_is_ram(pfn) && PageReserved(mem_map+pfn))
+            reservedpages++;
+    }
+#ifdef CONFIG_HIGHMEM
+    for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
+        one_highpage_init((struct page *) (mem_map + pfn),
+                          (pfn < xen_start_info.nr_pages));
+    totalram_pages += totalhigh_pages;
+#endif
+    return reservedpages;
+}
+
+void __init mem_init(void)
+{
+    int codesize, reservedpages, datasize, initsize;
+
+    if (!mem_map)
+        BUG();
+
+#ifdef CONFIG_HIGHMEM
+    /* check that fixmap and pkmap do not overlap */
+    if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
+       printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
+       printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
+              PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
+       BUG();
+    }
+#endif
+
+    set_max_mapnr_init();
+
+    high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+    /* clear the zero-page */
+    memset(empty_zero_page, 0, PAGE_SIZE);
+
+    reservedpages = free_pages_init();
+
+    codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+    datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+    initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+    printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk 
reserved, %dk data, %dk init, %ldk highmem)\n",
+           (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+           max_mapnr << (PAGE_SHIFT-10),
+           codesize >> 10,
+           reservedpages << (PAGE_SHIFT-10),
+           datasize >> 10,
+           initsize >> 10,
+           (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+        );
+
+    boot_cpu_data.wp_works_ok = 1;
+}
+
+void free_initmem(void)
+{
+    unsigned long addr;
+
+    addr = (unsigned long)(&__init_begin);
+    for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+        ClearPageReserved(virt_to_page(addr));
+        set_page_count(virt_to_page(addr), 1);
+        free_page(addr);
+        totalram_pages++;
+    }
+    printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", 
(&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+    if (start < end)
+        printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) 
>> 10);
+    for (; start < end; start += PAGE_SIZE) {
+        ClearPageReserved(virt_to_page(start));
+        set_page_count(virt_to_page(start), 1);
+        free_page(start);
+        totalram_pages++;
+    }
+}
+#endif
+
+void si_meminfo(struct sysinfo *val)
+{
+    val->totalram = max_pfn;
+    val->sharedram = 0;
+    val->freeram = nr_free_pages();
+    val->bufferram = atomic_read(&buffermem_pages);
+    val->totalhigh = max_pfn-max_low_pfn;
+    val->freehigh = nr_free_highpages();
+    val->mem_unit = PAGE_SIZE;
+    return;
+}
+
+#if defined(CONFIG_X86_PAE)
+struct kmem_cache_s *pae_pgd_cachep;
+void __init pgtable_cache_init(void)
+{
+    /*
+     * PAE pgds must be 16-byte aligned:
+        */
+    pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0,
+                                       SLAB_HWCACHE_ALIGN | 
SLAB_MUST_HWCACHE_ALIGN, NULL, NULL);
+    if (!pae_pgd_cachep)
+        panic("init_pae(): Cannot alloc pae_pgd SLAB cache");
+}
+#endif /* CONFIG_X86_PAE */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64    Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,897 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.11.10-xenU
+# Mon May 23 15:07:58 2005
+#
+CONFIG_XEN=y
+CONFIG_ARCH_XEN=y
+CONFIG_NO_IDLE_HZ=y
+
+#
+# XEN
+#
+# CONFIG_XEN_PRIVILEGED_GUEST is not set
+# CONFIG_XEN_PHYSDEV_ACCESS is not set
+CONFIG_XEN_BLKDEV_GRANT=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SHADOW_MODE is not set
+CONFIG_XEN_SCRUB_PAGES=y
+# CONFIG_XEN_X86 is not set
+CONFIG_XEN_X86_64=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_BROKEN_ON_SMP=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+CONFIG_SYSCTL=y
+CONFIG_AUDIT=y
+CONFIG_AUDITSYSCALL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_XENARCH="x86_64"
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_GOOD_APIC=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_SMP is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_MICROCODE is not set
+CONFIG_X86_CPUID=y
+# CONFIG_NUMA is not set
+# CONFIG_MTRR is not set
+# CONFIG_X86_LOCAL_APIC is not set
+# CONFIG_X86_IO_APIC is not set
+# CONFIG_PCI is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+
+#
+# X86_64 processor configuration
+#
+CONFIG_X86_64=y
+CONFIG_64BIT=y
+
+#
+# Processor type and features
+#
+CONFIG_MPSC=y
+# CONFIG_GENERIC_CPU is not set
+CONFIG_X86_L1_CACHE_BYTES=128
+# CONFIG_X86_TSC is not set
+# CONFIG_X86_MSR is not set
+CONFIG_DUMMY_IOMMU=y
+# CONFIG_X86_MCE is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Bus options (PCI etc.)
+#
+# CONFIG_UNORDERED_IO is not set
+
+#
+# Executable file formats / Emulations
+#
+CONFIG_IA32_EMULATION=y
+# CONFIG_IA32_AOUT is not set
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=m
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_LBD=y
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=m
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_TUNNEL=m
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+
+#
+# IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=12
+
+#
+# IPVS transport protocol load balancing support
+#
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_CT_ACCT=y
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+CONFIG_IP_NF_CT_PROTO_SCTP=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_MATCH_PHYSDEV=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_REALM=m
+CONFIG_IP_NF_MATCH_SCTP=m
+CONFIG_IP_NF_MATCH_COMMENT=m
+# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_CLASSIFY=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_NOTRACK=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+
+#
+# IPv6: Netfilter Configuration
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_PHYSDEV=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+CONFIG_IP6_NF_RAW=m
+
+#
+# Bridge: Netfilter Configuration
+#
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+# CONFIG_BRIDGE_EBT_ULOG is not set
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=y
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SCTP_HMAC_MD5=y
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+# CONFIG_ATM_CLIP_NO_ICMP is not set
+CONFIG_ATM_LANE=m
+# CONFIG_ATM_MPOA is not set
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+# CONFIG_DECNET is not set
+CONFIG_LLC=m
+# CONFIG_LLC2 is not set
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+CONFIG_NET_DIVERT=y
+# CONFIG_ECONET is not set
+CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CLK_JIFFIES=y
+# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
+# CONFIG_NET_SCH_CLK_CPU is not set
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_NET_CLS_IND=y
+# CONFIG_CLS_U32_MARK is not set
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+# CONFIG_NET_CLS_ACT is not set
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
+# CONFIG_HAMRADIO is not set
+CONFIG_IRDA=m
+
+#
+# IrDA protocols
+#
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+CONFIG_IRCOMM=m
+# CONFIG_IRDA_ULTRA is not set
+
+#
+# IrDA options
+#
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+CONFIG_IRDA_FAST_RR=y
+# CONFIG_IRDA_DEBUG is not set
+
+#
+# Infrared-port device drivers
+#
+
+#
+# SIR device drivers
+#
+CONFIG_IRTTY_SIR=m
+
+#
+# Dongle support
+#
+CONFIG_DONGLE=y
+CONFIG_ESI_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_LITELINK_DONGLE=m
+CONFIG_MA600_DONGLE=m
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_MCP2120_DONGLE=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_ACT200L_DONGLE=m
+
+#
+# Old SIR device drivers
+#
+# CONFIG_IRPORT_SIR is not set
+
+#
+# Old Serial dongle support
+#
+
+#
+# FIR device drivers
+#
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+CONFIG_BT_SCO=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
+CONFIG_BT_HCIVHCI=m
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+
+#
+# Ethernet (1000 Mbit)
+#
+
+#
+# Ethernet (10000 Mbit)
+#
+
+#
+# Token Ring devices
+#
+
+#
+# Wireless LAN (non-hamradio)
+#
+CONFIG_NET_RADIO=y
+
+#
+# Obsolete Wireless cards support (pre-802.11)
+#
+# CONFIG_STRIP is not set
+CONFIG_ATMEL=m
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# ATM drivers
+#
+CONFIG_ATM_TCP=m
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+# CONFIG_PPP_BSDCOMP is not set
+CONFIG_PPPOE=m
+CONFIG_PPPOATM=m
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+CONFIG_NETCONSOLE=m
+CONFIG_INPUT=m
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+
+#
+# Character devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=m
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+# CONFIG_JFS_SECURITY is not set
+# CONFIG_JFS_DEBUG is not set
+# CONFIG_JFS_STATISTICS is not set
+CONFIG_FS_POSIX_ACL=y
+
+#
+# XFS support
+#
+CONFIG_XFS_FS=m
+CONFIG_XFS_EXPORT=y
+# CONFIG_XFS_RT is not set
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_SECURITY=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_QUOTA=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_QUOTACTL=y
+CONFIG_DNOTIFY=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+CONFIG_DEVPTS_FS_XATTR=y
+CONFIG_DEVPTS_FS_SECURITY=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_TMPFS_SECURITY=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+CONFIG_AFFS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_CRAMFS=m
+CONFIG_VXFS_FS=m
+# CONFIG_HPFS_FS is not set
+CONFIG_QNX4FS_FS=m
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+CONFIG_RPCSEC_GSS_KRB5=m
+CONFIG_RPCSEC_GSS_SPKM3=m
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_EXPERIMENTAL is not set
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_CAPABILITIES=y
+# CONFIG_SECURITY_SECLVL is not set
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+CONFIG_SECURITY_SELINUX_AVC_STATS=y
+# CONFIG_SECURITY_SELINUX_MLS is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+# CONFIG_CRYPTO_AES is not set
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_KHAZAD=m
+# CONFIG_CRYPTO_ANUBIS is not set
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_CRC32C=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/lib/delay.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/lib/delay.c      Fri Jul 15 13:39:50 2005
@@ -0,0 +1,52 @@
+/*
+ *     Precise Delay Loops for i386
+ *
+ *     Copyright (C) 1993 Linus Torvalds
+ *     Copyright (C) 1997 Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ *     The __delay function must _NOT_ be inlined as its execution time
+ *     depends wildly on alignment on many x86 processors. The additional
+ *     jump magic is needed to get the timing stable on all the CPU's
+ *     we have to worry about.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_SMP
+#include <asm/smp.h>
+#endif
+
+void __delay(unsigned long loops)
+{
+       unsigned long bclock, now;
+       
+       rdtscl(bclock);
+       do
+       {
+               rep_nop();
+               rdtscl(now);
+       } while ((now-bclock) < loops);
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+       int d0;
+       __asm__("mull %0"
+               :"=d" (xloops), "=&a" (d0)
+               :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy));
+        __delay(xloops * HZ);
+}
+
+void __udelay(unsigned long usecs)
+{
+       __const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
+}
+
+void __ndelay(unsigned long nsecs)
+{
+       __const_udelay(nsecs * 0x00005);  /* 2**32 / 1000000000 (rounded up) */
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/drivers/char/tty_io.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/drivers/char/tty_io.c     Fri Jul 15 13:39:50 2005
@@ -0,0 +1,2891 @@
+/*
+ *  linux/drivers/char/tty_io.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
+ * or rs-channels. It also implements echoing, cooked mode etc.
+ *
+ * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
+ *
+ * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
+ * tty_struct and tty_queue structures.  Previously there was an array
+ * of 256 tty_struct's which was statically allocated, and the
+ * tty_queue structures were allocated at boot time.  Both are now
+ * dynamically allocated only when the tty is open.
+ *
+ * Also restructured routines so that there is more of a separation
+ * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
+ * the low-level tty routines (serial.c, pty.c, console.c).  This
+ * makes for cleaner and more compact code.  -TYT, 9/17/92 
+ *
+ * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
+ * which can be dynamically activated and de-activated by the line
+ * discipline handling modules (like SLIP).
+ *
+ * NOTE: pay no attention to the line discipline code (yet); its
+ * interface is still subject to change in this version...
+ * -- TYT, 1/31/92
+ *
+ * Added functionality to the OPOST tty handling.  No delays, but all
+ * other bits should be there.
+ *     -- Nick Holloway <alfie@xxxxxxxxxxxxxxxxx>, 27th May 1993.
+ *
+ * Rewrote canonical mode and added more termios flags.
+ *     -- julian@xxxxxxxxxxxxxxxxxxxxxx (J. Cowley), 13Jan94
+ *
+ * Reorganized FASYNC support so mouse code can share it.
+ *     -- ctm@xxxxxxxx, 9Sep95
+ *
+ * New TIOCLINUX variants added.
+ *     -- mj@xxxxxxxxxxxxxxxxx, 19-Nov-95
+ * 
+ * Restrict vt switching via ioctl()
+ *      -- grif@xxxxxxxxxx, 5-Dec-95
+ *
+ * Move console and virtual terminal code to more appropriate files,
+ * implement CONFIG_VT and generalize console device interface.
+ *     -- Marko Kohtala <Marko.Kohtala@xxxxxx>, March 97
+ *
+ * Rewrote init_dev and release_dev to eliminate races.
+ *     -- Bill Hawes <whawes@xxxxxxxx>, June 97
+ *
+ * Added devfs support.
+ *      -- C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, 13-Jan-1998
+ *
+ * Added support for a Unix98-style ptmx device.
+ *      -- C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, 14-Jan-1998
+ *
+ * Reduced memory usage for older ARM systems
+ *      -- Russell King <rmk@xxxxxxxxxxxxxxxx>
+ *
+ * Move do_SAK() into process context.  Less stack use in devfs functions.
+ * alloc_tty_struct() always uses kmalloc() -- Andrew Morton 
<andrewm@xxxxxxxxxx> 17Mar01
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <linux/devpts_fs.h>
+#include <linux/file.h>
+#include <linux/console.h>
+#include <linux/timer.h>
+#include <linux/ctype.h>
+#include <linux/kd.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+
+#include <linux/kbd_kern.h>
+#include <linux/vt_kern.h>
+#include <linux/selection.h>
+#include <linux/devfs_fs_kernel.h>
+
+#include <linux/kmod.h>
+
+#ifdef CONFIG_XEN_CONSOLE
+extern void xen_console_init(void);
+#endif
+
+#ifdef CONFIG_VT
+extern void con_init_devfs (void);
+#endif
+
+extern void disable_early_printk(void);
+
+#define CONSOLE_DEV MKDEV(TTY_MAJOR,0)
+#define TTY_DEV MKDEV(TTYAUX_MAJOR,0)
+#define SYSCONS_DEV MKDEV(TTYAUX_MAJOR,1)
+#define PTMX_DEV MKDEV(TTYAUX_MAJOR,2)
+
+#undef TTY_DEBUG_HANGUP
+
+#define TTY_PARANOIA_CHECK 1
+#define CHECK_TTY_COUNT 1
+
+struct termios tty_std_termios;                /* for the benefit of tty 
drivers  */
+struct tty_driver *tty_drivers;                /* linked list of tty drivers */
+
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver ptm_driver[]; /* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver pts_driver[]; /* Unix98 pty slaves;  for /dev/ptmx */
+#endif
+
+static void initialize_tty_struct(struct tty_struct *tty);
+
+static ssize_t tty_read(struct file *, char *, size_t, loff_t *);
+static ssize_t tty_write(struct file *, const char *, size_t, loff_t *);
+static unsigned int tty_poll(struct file *, poll_table *);
+static int tty_open(struct inode *, struct file *);
+static int tty_release(struct inode *, struct file *);
+int tty_ioctl(struct inode * inode, struct file * file,
+             unsigned int cmd, unsigned long arg);
+static int tty_fasync(int fd, struct file * filp, int on);
+extern int vme_scc_init (void);
+extern long vme_scc_console_init(void);
+extern int serial167_init(void);
+extern long serial167_console_init(void);
+extern void console_8xx_init(void);
+extern void au1x00_serial_console_init(void);
+extern int rs_8xx_init(void);
+extern void mac_scc_console_init(void);
+extern void hwc_console_init(void);
+extern void hwc_tty_init(void);
+extern void con3215_init(void);
+extern void tty3215_init(void);
+extern void tub3270_con_init(void);
+extern void tub3270_init(void);
+extern void rs285_console_init(void);
+extern void sa1100_rs_console_init(void);
+extern void sgi_serial_console_init(void);
+extern void sn_sal_serial_console_init(void);
+extern void sci_console_init(void);
+extern void dec_serial_console_init(void);
+extern void tx3912_console_init(void);
+extern void tx3912_rs_init(void);
+extern void txx927_console_init(void);
+extern void txx9_rs_init(void);
+extern void txx9_serial_console_init(void);
+extern void sb1250_serial_console_init(void);
+extern void arc_console_init(void);
+extern int hvc_console_init(void);
+
+#ifndef MIN
+#define MIN(a,b)       ((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a,b)       ((a) < (b) ? (b) : (a))
+#endif
+
+static struct tty_struct *alloc_tty_struct(void)
+{
+       struct tty_struct *tty;
+
+       tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+       if (tty)
+               memset(tty, 0, sizeof(struct tty_struct));
+       return tty;
+}
+
+static inline void free_tty_struct(struct tty_struct *tty)
+{
+       kfree(tty);
+}
+
+/*
+ * This routine returns the name of tty.
+ */
+static char *
+_tty_make_name(struct tty_struct *tty, const char *name, char *buf)
+{
+       int idx = (tty)?MINOR(tty->device) - tty->driver.minor_start:0;
+
+       if (!tty) /* Hmm.  NULL pointer.  That's fun. */
+               strcpy(buf, "NULL tty");
+       else
+               sprintf(buf, name,
+                       idx + tty->driver.name_base);
+               
+       return buf;
+}
+
+#define TTY_NUMBER(tty) (MINOR((tty)->device) - (tty)->driver.minor_start + \
+                        (tty)->driver.name_base)
+
+char *tty_name(struct tty_struct *tty, char *buf)
+{
+       return _tty_make_name(tty, (tty)?tty->driver.name:NULL, buf);
+}
+
+inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device,
+                             const char *routine)
+{
+#ifdef TTY_PARANOIA_CHECK
+       static const char badmagic[] = KERN_WARNING
+               "Warning: bad magic number for tty struct (%s) in %s\n";
+       static const char badtty[] = KERN_WARNING
+               "Warning: null TTY for (%s) in %s\n";
+
+       if (!tty) {
+               printk(badtty, kdevname(device), routine);
+               return 1;
+       }
+       if (tty->magic != TTY_MAGIC) {
+               printk(badmagic, kdevname(device), routine);
+               return 1;
+       }
+#endif
+       return 0;
+}
+
+static int check_tty_count(struct tty_struct *tty, const char *routine)
+{
+#ifdef CHECK_TTY_COUNT
+       struct list_head *p;
+       int count = 0;
+       
+       file_list_lock();
+       for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) {
+               if(list_entry(p, struct file, f_list)->private_data == tty)
+                       count++;
+       }
+       file_list_unlock();
+       if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver.subtype == PTY_TYPE_SLAVE &&
+           tty->link && tty->link->count)
+               count++;
+       if (tty->count != count) {
+               printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
+                                   "!= #fd's(%d) in %s\n",
+                      kdevname(tty->device), tty->count, count, routine);
+               return count;
+       }       
+#endif
+       return 0;
+}
+
+/*
+ *     This is probably overkill for real world processors but
+ *     they are not on hot paths so a little discipline won't do
+ *     any harm.
+ */    
+
+static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
+{
+       down(&tty->termios_sem);
+       tty->termios->c_line = num;
+       up(&tty->termios_sem);
+}
+
+/*
+ *     This guards the refcounted line discipline lists. The lock
+ *     must be taken with irqs off because there are hangup path
+ *     callers who will do ldisc lookups and cannot sleep.
+ */
+
+spinlock_t tty_ldisc_lock = SPIN_LOCK_UNLOCKED;
+DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait);
+struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table     */
+
+int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
+{
+
+       unsigned long flags;
+       int ret = 0;
+
+       if (disc < N_TTY || disc >= NR_LDISCS)
+               return -EINVAL;
+
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       if (new_ldisc) {
+               tty_ldiscs[disc] = *new_ldisc;
+               tty_ldiscs[disc].num = disc;
+               tty_ldiscs[disc].flags |= LDISC_FLAG_DEFINED;
+               tty_ldiscs[disc].refcount = 0;
+       } else {
+               if(tty_ldiscs[disc].refcount)
+                       ret = -EBUSY;
+               else
+                       tty_ldiscs[disc].flags &= ~LDISC_FLAG_DEFINED;
+        }
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+       
+       return ret;
+
+}
+
+
+EXPORT_SYMBOL(tty_register_ldisc);
+
+struct tty_ldisc *tty_ldisc_get(int disc)
+{
+       unsigned long flags;
+       struct tty_ldisc *ld;
+
+       if (disc < N_TTY || disc >= NR_LDISCS)
+               return NULL;
+
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+
+       ld = &tty_ldiscs[disc];
+       /* Check the entry is defined */
+       if(ld->flags & LDISC_FLAG_DEFINED)
+               ld->refcount++;
+       else
+               ld = NULL;
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+       return ld;
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_get);
+
+void tty_ldisc_put(int disc)
+{
+       struct tty_ldisc *ld;
+       unsigned long flags;
+
+       if (disc < N_TTY || disc >= NR_LDISCS)
+               BUG();
+
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       ld = &tty_ldiscs[disc];
+       if(ld->refcount <= 0)
+               BUG();
+       ld->refcount--;
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+}
+      
+EXPORT_SYMBOL_GPL(tty_ldisc_put);
+
+void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+       tty->ldisc = *ld;
+       tty->ldisc.refcount = 0;
+}
+
+/**
+ *     tty_ldisc_try           -       internal helper
+ *     @tty: the tty
+ *
+ *     Make a single attempt to grab and bump the refcount on
+ *     the tty ldisc. Return 0 on failure or 1 on success. This is
+ *     used to implement both the waiting and non waiting versions
+ *     of tty_ldisc_ref
+ */
+
+static int tty_ldisc_try(struct tty_struct *tty)
+{
+       unsigned long flags;
+       struct tty_ldisc *ld;
+       int ret = 0;
+
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       ld = &tty->ldisc;
+       if(test_bit(TTY_LDISC, &tty->flags))
+       {
+               ld->refcount++;
+               ret = 1;
+       }
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+       return ret;
+}
+
+/**
+ *     tty_ldisc_ref_wait      -       wait for the tty ldisc
+ *     @tty: tty device
+ *
+ *     Dereference the line discipline for the terminal and take a
+ *     reference to it. If the line discipline is in flux then
+ *     wait patiently until it changes.
+ *
+ *     Note: Must not be called from an IRQ/timer context. The caller
+ *     must also be careful not to hold other locks that will deadlock
+ *     against a discipline change, such as an existing ldisc reference
+ *     (which we check for)
+ */
+
+struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
+{
+       /* wait_event is a macro */
+       wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
+       return &tty->ldisc;
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
+
+/**
+ *     tty_ldisc_ref           -       get the tty ldisc
+ *     @tty: tty device
+ *
+ *     Dereference the line discipline for the terminal and take a
+ *     reference to it. If the line discipline is in flux then
+ *     return NULL. Can be called from IRQ and timer functions.
+ */
+
+struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
+{
+       if(tty_ldisc_try(tty))
+               return &tty->ldisc;
+       return NULL;
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_ref);
+
+
+void tty_ldisc_deref(struct tty_ldisc *ld)
+{
+
+       unsigned long flags;
+
+       if(ld == NULL)
+               BUG();
+
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       if(ld->refcount == 0)
+               printk(KERN_EMERG "tty_ldisc_deref: no references.\n");
+       else
+               ld->refcount--;
+       if(ld->refcount == 0)
+               wake_up(&tty_ldisc_wait);
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_deref);
+
+/**
+  *     tty_ldisc_enable        -       allow ldisc use
+  *     @tty: terminal to activate ldisc on
+  *
+  *     Set the TTY_LDISC flag when the line discipline can be called
+  *     again. Do neccessary wakeups for existing sleepers.
+  *
+  *     Note: nobody should set this bit except via this function. Clearing
+  *     directly is allowed.
+  */
+
+static void tty_ldisc_enable(struct tty_struct *tty)
+{
+       set_bit(TTY_LDISC, &tty->flags);
+       wake_up(&tty_ldisc_wait);
+}
+
+/**
+ *     tty_set_ldisc           -       set line discipline
+ *     @tty: the terminal to set
+ *     @ldisc: the line discipline
+ *
+ *     Set the discipline of a tty line. Must be called from a process
+ *     context.
+ */
+
+static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
+{
+       int     retval = 0;
+       struct  tty_ldisc o_ldisc;
+       char buf[64];
+       unsigned long flags;
+       struct tty_ldisc *ld;
+
+       if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS))
+               return -EINVAL;
+
+restart:
+
+       if (tty->ldisc.num == ldisc)
+               return 0; /* We are already in the desired discipline */
+
+       ld = tty_ldisc_get(ldisc);
+       /* Eduardo Blanco <ejbs@xxxxxxxxxxxx> */
+       /* Cyrus Durgin <cider@xxxxxxxxxxxxx> */
+       if (ld == NULL)
+       {
+               char modname [20];
+                sprintf(modname, "tty-ldisc-%d", ldisc);
+                request_module (modname);
+               ld = tty_ldisc_get(ldisc);
+       }
+       
+       if (ld == NULL)
+               return -EINVAL;
+
+       
+       o_ldisc = tty->ldisc;
+       tty_wait_until_sent(tty, 0);
+
+       /*
+        *      Make sure we don't change while someone holds a
+        *      reference to the line discipline. The TTY_LDISC bit
+        *      prevents anyone taking a reference once it is clear.
+        *      We need the lock to avoid racing reference takers.
+        */
+
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       if(tty->ldisc.refcount)
+       {
+               /* Free the new ldisc we grabbed. Must drop the lock
+                  first. */
+               spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+               tty_ldisc_put(ldisc);
+               /*
+                * There are several reasons we may be busy, including
+                * random momentary I/O traffic. We must therefore
+                * retry. We could distinguish between blocking ops
+                * and retries if we made tty_ldisc_wait() smarter. That
+                * is up for discussion.
+                */
+               if(wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount 
== 0) < 0)
+                       return -ERESTARTSYS;
+               goto restart;
+       }
+       clear_bit(TTY_LDISC, &tty->flags);
+       clear_bit(TTY_DONT_FLIP, &tty->flags);
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+       /*
+        *      From this point on we know nobody has an ldisc
+        *      usage reference, nor can they obtain one until
+        *      we say so later on.
+        */
+
+       /*
+        * Wait for ->hangup_work and ->flip.work handlers to terminate
+        */
+       run_task_queue(&tq_timer);
+       flush_scheduled_tasks();
+       
+       /* Shutdown the current discipline. */
+       if (tty->ldisc.close)
+               (tty->ldisc.close)(tty);
+
+       /* Now set up the new line discipline. */
+       tty_ldisc_assign(tty, ld);
+       tty_set_termios_ldisc(tty, ldisc);
+       if (tty->ldisc.open)
+               retval = (tty->ldisc.open)(tty);
+       if (retval < 0) {
+               tty_ldisc_put(ldisc);
+               /* There is an outstanding reference here so this is safe */
+               tty_ldisc_assign(tty, tty_ldisc_get(o_ldisc.num));
+               tty_set_termios_ldisc(tty, tty->ldisc.num);
+               if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) {
+                       tty_ldisc_put(o_ldisc.num);
+                       /* This driver is always present */
+                       tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
+                       tty_set_termios_ldisc(tty, N_TTY);
+                       if (tty->ldisc.open) {
+                               int r = tty->ldisc.open(tty);
+
+                               if (r < 0)
+                                       panic("Couldn't open N_TTY ldisc for "
+                                             "%s --- error %d.",
+                                             tty_name(tty, buf), r);
+                       }
+               }
+       }
+       /* At this point we hold a reference to the new ldisc and a
+           reference to the old ldisc. If we ended up flipping back
+           to the existing ldisc we have two references to it */
+
+       if (tty->ldisc.num != o_ldisc.num && tty->driver.set_ldisc)
+               tty->driver.set_ldisc(tty);
+
+       tty_ldisc_put(o_ldisc.num);
+
+       /*
+        *      Allow ldisc referencing to occur as soon as the driver
+        *      ldisc callback completes.
+        */
+       tty_ldisc_enable(tty);
+
+       return retval;
+}
+
+/*
+ * This routine returns a tty driver structure, given a device number
+ */
+struct tty_driver *get_tty_driver(kdev_t device)
+{
+       int     major, minor;
+       struct tty_driver *p;
+       
+       minor = MINOR(device);
+       major = MAJOR(device);
+
+       for (p = tty_drivers; p; p = p->next) {
+               if (p->major != major)
+                       continue;
+               if (minor < p->minor_start)
+                       continue;
+               if (minor >= p->minor_start + p->num)
+                       continue;
+               return p;
+       }
+       return NULL;
+}
+
+/*
+ * If we try to write to, or set the state of, a terminal and we're
+ * not in the foreground, send a SIGTTOU.  If the signal is blocked or
+ * ignored, go ahead and perform the operation.  (POSIX 7.2)
+ */
+int tty_check_change(struct tty_struct * tty)
+{
+       if (current->tty != tty)
+               return 0;
+       if (tty->pgrp <= 0) {
+               printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n");
+               return 0;
+       }
+       if (current->pgrp == tty->pgrp)
+               return 0;
+       if (is_ignored(SIGTTOU))
+               return 0;
+       if (is_orphaned_pgrp(current->pgrp))
+               return -EIO;
+       (void) kill_pg(current->pgrp,SIGTTOU,1);
+       return -ERESTARTSYS;
+}
+
+static ssize_t hung_up_tty_read(struct file * file, char * buf,
+                               size_t count, loff_t *ppos)
+{
+       /* Can't seek (pread) on ttys.  */
+       if (ppos != &file->f_pos)
+               return -ESPIPE;
+       return 0;
+}
+
+static ssize_t hung_up_tty_write(struct file * file, const char * buf,
+                                size_t count, loff_t *ppos)
+{
+       /* Can't seek (pwrite) on ttys.  */
+       if (ppos != &file->f_pos)
+               return -ESPIPE;
+       return -EIO;
+}
+
+/* No kernel lock held - none needed ;) */
+static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait)
+{
+       return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
+}
+
+static int hung_up_tty_ioctl(struct inode * inode, struct file * file,
+                            unsigned int cmd, unsigned long arg)
+{
+       return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+}
+
+static struct file_operations tty_fops = {
+       llseek:         no_llseek,
+       read:           tty_read,
+       write:          tty_write,
+       poll:           tty_poll,
+       ioctl:          tty_ioctl,
+       open:           tty_open,
+       release:        tty_release,
+       fasync:         tty_fasync,
+};
+
+static struct file_operations hung_up_tty_fops = {
+       llseek:         no_llseek,
+       read:           hung_up_tty_read,
+       write:          hung_up_tty_write,
+       poll:           hung_up_tty_poll,
+       ioctl:          hung_up_tty_ioctl,
+       release:        tty_release,
+};
+
+static spinlock_t redirect_lock = SPIN_LOCK_UNLOCKED;
+static struct file *redirect;
+
+/**
+  *     tty_wakeup      -       request more data
+  *     @tty: terminal
+  *
+  *     Internal and external helper for wakeups of tty. This function
+  *     informs the line discipline if present that the driver is ready\
+  *     to receive more output data.
+  */
+
+void tty_wakeup(struct tty_struct *tty)
+{
+       struct tty_ldisc *ld;
+
+       if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) {
+               ld = tty_ldisc_ref(tty);
+               if(ld) {
+                       if(ld->write_wakeup)
+                               ld->write_wakeup(tty);
+                       tty_ldisc_deref(ld);
+               }
+       }
+       wake_up_interruptible(&tty->write_wait);
+}
+
+/*
+ * tty_wakeup/tty_ldisc_flush are actually _GPL exports but we can't do 
+ * that in 2.4 for modutils compat reasons.
+ */
+EXPORT_SYMBOL(tty_wakeup);
+
+
+void tty_ldisc_flush(struct tty_struct *tty)
+{
+       struct tty_ldisc *ld = tty_ldisc_ref(tty);
+       if(ld) {
+               if(ld->flush_buffer)
+                       ld->flush_buffer(tty);
+               tty_ldisc_deref(ld);
+       }
+}
+
+
+/*
+ * tty_wakeup/tty_ldisc_flush are actually _GPL exports but we can't do 
+ * that in 2.4 for modutils compat reasons.
+ */
+EXPORT_SYMBOL(tty_ldisc_flush);
+
+void do_tty_hangup(void *data)
+{
+       struct tty_struct *tty = (struct tty_struct *) data;
+       struct file * cons_filp = NULL;
+       struct file *f = NULL;
+       struct task_struct *p;
+       struct list_head *l;
+       struct tty_ldisc *ld;
+       int    closecount = 0, n;
+
+       if (!tty)
+               return;
+
+       /* inuse_filps is protected by the single kernel lock */
+       lock_kernel();
+
+       spin_lock(&redirect_lock);
+       if (redirect && redirect->private_data == tty) {
+               f = redirect;
+               redirect = NULL;
+       }
+       spin_unlock(&redirect_lock);
+       
+       check_tty_count(tty, "do_tty_hangup");
+       file_list_lock();
+       for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) {
+               struct file * filp = list_entry(l, struct file, f_list);
+               if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV ||
+                   filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) {
+                       cons_filp = filp;
+                       continue;
+               }
+               if (filp->f_op != &tty_fops)
+                       continue;
+               closecount++;
+               tty_fasync(-1, filp, 0);        /* can't block */
+               filp->f_op = &hung_up_tty_fops;
+       }
+       file_list_unlock();
+       
+       /* FIXME! What are the locking issues here? This may me overdoing 
things.. */
+       ld = tty_ldisc_ref(tty);
+       if(ld != NULL)  
+       {
+               if (ld->flush_buffer)
+                       ld->flush_buffer(tty);
+               if (tty->driver.flush_buffer)
+                       tty->driver.flush_buffer(tty);
+               if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && 
ld->write_wakeup)
+                       ld->write_wakeup(tty);
+               if (ld->hangup)
+                       ld->hangup(tty);
+       }
+
+       /* FIXME: Once we trust the LDISC code better we can wait here for
+          ldisc completion and fix the driver call race */
+
+       wake_up_interruptible(&tty->write_wait);
+       wake_up_interruptible(&tty->read_wait);
+
+       /*
+        * Shutdown the current line discipline, and reset it to
+        * N_TTY.
+        */
+
+       if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS)
+       {
+               down(&tty->termios_sem);
+                *tty->termios = tty->driver.init_termios;
+               up(&tty->termios_sem);
+       }
+
+       /* Defer ldisc switch */
+       /* tty_deferred_ldisc_switch(N_TTY)
+          This should get done automatically when the port closes and
+          tty_release is called */
+
+       read_lock(&tasklist_lock);
+       for_each_task(p) {
+               if ((tty->session > 0) && (p->session == tty->session) &&
+                   p->leader) {
+                       send_sig(SIGHUP,p,1);
+                       send_sig(SIGCONT,p,1);
+                       if (tty->pgrp > 0)
+                               p->tty_old_pgrp = tty->pgrp;
+               }
+               if (p->tty == tty)
+                       p->tty = NULL;
+       }
+       read_unlock(&tasklist_lock);
+
+       tty->flags = 0;
+       tty->session = 0;
+       tty->pgrp = -1;
+       tty->ctrl_status = 0;
+       /*
+        *      If one of the devices matches a console pointer, we
+        *      cannot just call hangup() because that will cause
+        *      tty->count and state->count to go out of sync.
+        *      So we just call close() the right number of times.
+        */
+       if (cons_filp) {
+               if (tty->driver.close)
+                       for (n = 0; n < closecount; n++)
+                               tty->driver.close(tty, cons_filp);
+       } else if (tty->driver.hangup)
+               (tty->driver.hangup)(tty);
+
+       /* We don't want to have driver/ldisc interactions beyond
+          the ones we did here. The driver layer expects no
+          calls after ->hangup() from the ldisc side. However we
+          can't yet guarantee all that */
+
+       set_bit(TTY_HUPPED, &tty->flags);
+       if(ld) {
+               tty_ldisc_enable(tty);
+               tty_ldisc_deref(ld);
+       }
+       unlock_kernel();
+       if (f)
+               fput(f);
+}
+
+void tty_hangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+       char    buf[64];
+       
+       printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf));
+#endif
+       schedule_task(&tty->tq_hangup);
+}
+
+void tty_vhangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+       char    buf[64];
+
+       printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
+#endif
+       do_tty_hangup((void *) tty);
+}
+
+int tty_hung_up_p(struct file * filp)
+{
+       return (filp->f_op == &hung_up_tty_fops);
+}
+
+/*
+ * This function is typically called only by the session leader, when
+ * it wants to disassociate itself from its controlling tty.
+ *
+ * It performs the following functions:
+ *     (1)  Sends a SIGHUP and SIGCONT to the foreground process group
+ *     (2)  Clears the tty from being controlling the session
+ *     (3)  Clears the controlling tty for all processes in the
+ *             session group.
+ *
+ * The argument on_exit is set to 1 if called when a process is
+ * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ */
+void disassociate_ctty(int on_exit)
+{
+       struct tty_struct *tty = current->tty;
+       struct task_struct *p;
+       int tty_pgrp = -1;
+
+       if (tty) {
+               tty_pgrp = tty->pgrp;
+               if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY)
+                       tty_vhangup(tty);
+       } else {
+               if (current->tty_old_pgrp) {
+                       kill_pg(current->tty_old_pgrp, SIGHUP, on_exit);
+                       kill_pg(current->tty_old_pgrp, SIGCONT, on_exit);
+               }
+               return;
+       }
+       if (tty_pgrp > 0) {
+               kill_pg(tty_pgrp, SIGHUP, on_exit);
+               if (!on_exit)
+                       kill_pg(tty_pgrp, SIGCONT, on_exit);
+       }
+
+       current->tty_old_pgrp = 0;
+       tty->session = 0;
+       tty->pgrp = -1;
+
+       read_lock(&tasklist_lock);
+       for_each_task(p)
+               if (p->session == current->session)
+                       p->tty = NULL;
+       read_unlock(&tasklist_lock);
+}
+
+void stop_tty(struct tty_struct *tty)
+{
+       if (tty->stopped)
+               return;
+       tty->stopped = 1;
+       if (tty->link && tty->link->packet) {
+               tty->ctrl_status &= ~TIOCPKT_START;
+               tty->ctrl_status |= TIOCPKT_STOP;
+               wake_up_interruptible(&tty->link->read_wait);
+       }
+       if (tty->driver.stop)
+               (tty->driver.stop)(tty);
+}
+
+void start_tty(struct tty_struct *tty)
+{
+       if (!tty->stopped || tty->flow_stopped)
+               return;
+       tty->stopped = 0;
+       if (tty->link && tty->link->packet) {
+               tty->ctrl_status &= ~TIOCPKT_STOP;
+               tty->ctrl_status |= TIOCPKT_START;
+               wake_up_interruptible(&tty->link->read_wait);
+       }
+       if (tty->driver.start)
+               (tty->driver.start)(tty);
+       /* If we have a running line discipline it may need kicking */
+       tty_wakeup(tty);
+}
+
+static ssize_t tty_read(struct file * file, char * buf, size_t count, 
+                       loff_t *ppos)
+{
+       int i;
+       struct tty_struct * tty;
+       struct inode *inode;
+       struct tty_ldisc *ld;
+
+       /* Can't seek (pread) on ttys.  */
+       if (ppos != &file->f_pos)
+               return -ESPIPE;
+
+       tty = (struct tty_struct *)file->private_data;
+       inode = file->f_dentry->d_inode;
+       if (tty_paranoia_check(tty, inode->i_rdev, "tty_read"))
+               return -EIO;
+       if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
+               return -EIO;
+
+       /* This check not only needs to be done before reading, but also
+          whenever read_chan() gets woken up after sleeping, so I've
+          moved it to there.  This should only be done for the N_TTY
+          line discipline, anyway.  Same goes for write_chan(). -- jlc. */
+#if 0
+       if ((inode->i_rdev != CONSOLE_DEV) && /* don't stop on /dev/console */
+           (tty->pgrp > 0) &&
+           (current->tty == tty) &&
+           (tty->pgrp != current->pgrp))
+               if (is_ignored(SIGTTIN) || is_orphaned_pgrp(current->pgrp))
+                       return -EIO;
+               else {
+                       (void) kill_pg(current->pgrp, SIGTTIN, 1);
+                       return -ERESTARTSYS;
+               }
+#endif
+       /* We want to wait for the line discipline to sort out in this
+          situation */
+       ld = tty_ldisc_ref_wait(tty);
+       lock_kernel();
+       if (ld->read)
+               i = (ld->read)(tty,file,buf,count);
+       else
+               i = -EIO;
+       tty_ldisc_deref(ld);
+       unlock_kernel();
+       if (i > 0)
+               inode->i_atime = CURRENT_TIME;
+       return i;
+}
+
+/*
+ * Split writes up in sane blocksizes to avoid
+ * denial-of-service type attacks
+ */
+static inline ssize_t do_tty_write(
+       ssize_t (*write)(struct tty_struct *, struct file *, const unsigned 
char *, size_t),
+       struct tty_struct *tty,
+       struct file *file,
+       const unsigned char *buf,
+       size_t count)
+{
+       ssize_t ret = 0, written = 0;
+       
+       if (file->f_flags & O_NONBLOCK) {
+               if (down_trylock(&tty->atomic_write))
+                       return -EAGAIN;
+       }
+       else {
+               if (down_interruptible(&tty->atomic_write))
+                       return -ERESTARTSYS;
+       }
+       if ( test_bit(TTY_NO_WRITE_SPLIT, &tty->flags) ) {
+               lock_kernel();
+               written = write(tty, file, buf, count);
+               unlock_kernel();
+       } else {
+               for (;;) {
+                       unsigned long size = MAX(PAGE_SIZE*2,16384);
+                       if (size > count)
+                               size = count;
+                       lock_kernel();
+                       ret = write(tty, file, buf, size);
+                       unlock_kernel();
+                       if (ret <= 0)
+                               break;
+                       written += ret;
+                       buf += ret;
+                       count -= ret;
+                       if (!count)
+                               break;
+                       ret = -ERESTARTSYS;
+                       if (signal_pending(current))
+                               break;
+                       if (current->need_resched)
+                               schedule();
+               }
+       }
+       if (written) {
+               file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
+               ret = written;
+       }
+       up(&tty->atomic_write);
+       return ret;
+}
+
+
+static ssize_t tty_write(struct file * file, const char * buf, size_t count,
+                        loff_t *ppos)
+{
+       int is_console;
+       struct tty_struct * tty;
+       struct inode *inode = file->f_dentry->d_inode;
+       ssize_t ret;
+       struct tty_ldisc *ld;
+
+       /* Can't seek (pwrite) on ttys.  */
+       if (ppos != &file->f_pos)
+               return -ESPIPE;
+
+       /*
+        *      For now, we redirect writes from /dev/console as
+        *      well as /dev/tty0.
+        */
+       inode = file->f_dentry->d_inode;
+       is_console = (inode->i_rdev == SYSCONS_DEV ||
+                     inode->i_rdev == CONSOLE_DEV);
+
+       if (is_console) {
+               struct file *p = NULL;
+
+               spin_lock(&redirect_lock);
+               if (redirect) {
+                       get_file(redirect);
+                       p = redirect;
+               }
+               spin_unlock(&redirect_lock);
+
+               if (p) {
+                       ssize_t res = p->f_op->write(p, buf, count, &p->f_pos);
+                       fput(p);
+                       return res;
+               }
+       }
+
+       tty = (struct tty_struct *)file->private_data;
+       if (tty_paranoia_check(tty, inode->i_rdev, "tty_write"))
+               return -EIO;
+       if (!tty || !tty->driver.write || (test_bit(TTY_IO_ERROR, &tty->flags)))
+               return -EIO;
+#if 0
+       if (!is_console && L_TOSTOP(tty) && (tty->pgrp > 0) &&
+           (current->tty == tty) && (tty->pgrp != current->pgrp)) {
+               if (is_orphaned_pgrp(current->pgrp))
+                       return -EIO;
+               if (!is_ignored(SIGTTOU)) {
+                       (void) kill_pg(current->pgrp, SIGTTOU, 1);
+                       return -ERESTARTSYS;
+               }
+       }
+#endif
+
+       ld = tty_ldisc_ref_wait(tty);
+       if (!ld->write)
+               ret = -EIO;
+       else
+               ret = do_tty_write(ld->write, tty, file,
+                               (const unsigned char __user *)buf, count);
+       tty_ldisc_deref(ld);
+       return ret;
+}
+
+/* Semaphore to protect creating and releasing a tty. This is shared with
+   vt.c for deeply disgusting hack reasons */
+static DECLARE_MUTEX(tty_sem);
+
+static void down_tty_sem(int index)
+{
+       down(&tty_sem);
+}
+
+static void up_tty_sem(int index)
+{
+       up(&tty_sem);
+}
+
+static void release_mem(struct tty_struct *tty, int idx);
+
+/*
+ * WSH 06/09/97: Rewritten to remove races and properly clean up after a
+ * failed open.  The new code protects the open with a semaphore, so it's
+ * really quite straightforward.  The semaphore locking can probably be
+ * relaxed for the (most common) case of reopening a tty.
+ */
+static int init_dev(kdev_t device, struct tty_struct **ret_tty)
+{
+       struct tty_struct *tty, *o_tty;
+       struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+       struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+       struct tty_driver *driver;      
+       int retval=0;
+       int idx;
+
+       driver = get_tty_driver(device);
+       if (!driver)
+               return -ENODEV;
+
+       idx = MINOR(device) - driver->minor_start;
+
+       /* 
+        * Check whether we need to acquire the tty semaphore to avoid
+        * race conditions.  For now, play it safe.
+        */
+       down_tty_sem(idx);
+
+       /* check whether we're reopening an existing tty */
+       tty = driver->table[idx];
+       if (tty) goto fast_track;
+
+       /*
+        * First time open is complex, especially for PTY devices.
+        * This code guarantees that either everything succeeds and the
+        * TTY is ready for operation, or else the table slots are vacated
+        * and the allocated memory released.  (Except that the termios 
+        * and locked termios may be retained.)
+        */
+
+       o_tty = NULL;
+       tp = o_tp = NULL;
+       ltp = o_ltp = NULL;
+
+       tty = alloc_tty_struct();
+       if(!tty)
+               goto fail_no_mem;
+       initialize_tty_struct(tty);
+       tty->device = device;
+       tty->driver = *driver;
+
+       tp_loc = &driver->termios[idx];
+       if (!*tp_loc) {
+               tp = (struct termios *) kmalloc(sizeof(struct termios),
+                                               GFP_KERNEL);
+               if (!tp)
+                       goto free_mem_out;
+               *tp = driver->init_termios;
+       }
+
+       ltp_loc = &driver->termios_locked[idx];
+       if (!*ltp_loc) {
+               ltp = (struct termios *) kmalloc(sizeof(struct termios),
+                                                GFP_KERNEL);
+               if (!ltp)
+                       goto free_mem_out;
+               memset(ltp, 0, sizeof(struct termios));
+       }
+
+       if (driver->type == TTY_DRIVER_TYPE_PTY) {
+               o_tty = alloc_tty_struct();
+               if (!o_tty)
+                       goto free_mem_out;
+               initialize_tty_struct(o_tty);
+               o_tty->device = (kdev_t) MKDEV(driver->other->major,
+                                       driver->other->minor_start + idx);
+               o_tty->driver = *driver->other;
+
+               o_tp_loc  = &driver->other->termios[idx];
+               if (!*o_tp_loc) {
+                       o_tp = (struct termios *)
+                               kmalloc(sizeof(struct termios), GFP_KERNEL);
+                       if (!o_tp)
+                               goto free_mem_out;
+                       *o_tp = driver->other->init_termios;
+               }
+
+               o_ltp_loc = &driver->other->termios_locked[idx];
+               if (!*o_ltp_loc) {
+                       o_ltp = (struct termios *)
+                               kmalloc(sizeof(struct termios), GFP_KERNEL);
+                       if (!o_ltp)
+                               goto free_mem_out;
+                       memset(o_ltp, 0, sizeof(struct termios));
+               }
+
+               /*
+                * Everything allocated ... set up the o_tty structure.
+                */
+               driver->other->table[idx] = o_tty;
+               if (!*o_tp_loc)
+                       *o_tp_loc = o_tp;
+               if (!*o_ltp_loc)
+                       *o_ltp_loc = o_ltp;
+               o_tty->termios = *o_tp_loc;
+               o_tty->termios_locked = *o_ltp_loc;
+               (*driver->other->refcount)++;
+               if (driver->subtype == PTY_TYPE_MASTER)
+                       o_tty->count++;
+
+               /* Establish the links in both directions */
+               tty->link   = o_tty;
+               o_tty->link = tty;
+       }
+
+       /* 
+        * All structures have been allocated, so now we install them.
+        * Failures after this point use release_mem to clean up, so 
+        * there's no need to null out the local pointers.
+        */
+       driver->table[idx] = tty;
+       
+       if (!*tp_loc)
+               *tp_loc = tp;
+       if (!*ltp_loc)
+               *ltp_loc = ltp;
+       tty->termios = *tp_loc;
+       tty->termios_locked = *ltp_loc;
+       (*driver->refcount)++;
+       tty->count++;
+
+       /* 
+        * Structures all installed ... call the ldisc open routines.
+        * If we fail here just call release_mem to clean up.  No need
+        * to decrement the use counts, as release_mem doesn't care.
+        */
+       if (tty->ldisc.open) {
+               retval = (tty->ldisc.open)(tty);
+               if (retval)
+                       goto release_mem_out;
+       }
+       if (o_tty && o_tty->ldisc.open) {
+               retval = (o_tty->ldisc.open)(o_tty);
+               if (retval) {
+                       if (tty->ldisc.close)
+                               (tty->ldisc.close)(tty);
+                       goto release_mem_out;
+               }
+               set_bit(TTY_LDISC, &o_tty->flags);
+               tty_ldisc_enable(o_tty);
+       }
+       tty_ldisc_enable(tty);
+       goto success;
+
+       /*
+        * This fast open can be used if the tty is already open.
+        * No memory is allocated, and the only failures are from
+        * attempting to open a closing tty or attempting multiple
+        * opens on a pty master.
+        */
+fast_track:
+       if (test_bit(TTY_CLOSING, &tty->flags)) {
+               retval = -EIO;
+               goto end_init;
+       }
+       if (driver->type == TTY_DRIVER_TYPE_PTY &&
+           driver->subtype == PTY_TYPE_MASTER) {
+               /*
+                * special case for PTY masters: only one open permitted, 
+                * and the slave side open count is incremented as well.
+                */
+               if (tty->count) {
+                       retval = -EIO;
+                       goto end_init;
+               }
+               tty->link->count++;
+       }
+       tty->count++;
+       tty->driver = *driver; /* N.B. why do this every time?? */
+       /* FIXME */
+       if(!test_bit(TTY_LDISC, &tty->flags))
+               printk(KERN_ERR "init_dev but no ldisc\n");
+success:
+       *ret_tty = tty;
+       
+       /* All paths come through here to release the semaphore */
+end_init:
+       up_tty_sem(idx);
+       return retval;
+
+       /* Release locally allocated memory ... nothing placed in slots */
+free_mem_out:
+       if (o_tp)
+               kfree(o_tp);
+       if (o_tty)
+               free_tty_struct(o_tty);
+       if (ltp)
+               kfree(ltp);
+       if (tp)
+               kfree(tp);
+       free_tty_struct(tty);
+
+fail_no_mem:
+       retval = -ENOMEM;
+       goto end_init;
+
+       /* call the tty release_mem routine to clean out this slot */
+release_mem_out:
+       printk(KERN_INFO "init_dev: ldisc open failed, "
+                        "clearing slot %d\n", idx);
+       release_mem(tty, idx);
+       goto end_init;
+}
+
+/*
+ * Releases memory associated with a tty structure, and clears out the
+ * driver table slots.
+ */
+static void release_mem(struct tty_struct *tty, int idx)
+{
+       struct tty_struct *o_tty;
+       struct termios *tp;
+
+       if ((o_tty = tty->link) != NULL) {
+               o_tty->driver.table[idx] = NULL;
+               if (o_tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+                       tp = o_tty->driver.termios[idx];
+                       o_tty->driver.termios[idx] = NULL;
+                       kfree(tp);
+               }
+               o_tty->magic = 0;
+               (*o_tty->driver.refcount)--;
+               list_del_init(&o_tty->tty_files);
+               free_tty_struct(o_tty);
+       }
+
+       tty->driver.table[idx] = NULL;
+       if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+               tp = tty->driver.termios[idx];
+               tty->driver.termios[idx] = NULL;
+               kfree(tp);
+       }
+       tty->magic = 0;
+       (*tty->driver.refcount)--;
+       list_del_init(&tty->tty_files);
+       free_tty_struct(tty);
+}
+
+/*
+ * Even releasing the tty structures is a tricky business.. We have
+ * to be very careful that the structures are all released at the
+ * same time, as interrupts might otherwise get the wrong pointers.
+ *
+ * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
+ * lead to double frees or releasing memory still in use.
+ */
+static void release_dev(struct file * filp)
+{
+       struct tty_struct *tty, *o_tty;
+       int     pty_master, tty_closing, o_tty_closing, do_sleep;
+       int     idx;
+       char    buf[64];
+       unsigned long flags;
+       
+       tty = (struct tty_struct *)filp->private_data;
+       if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, 
"release_dev"))
+               return;
+
+       check_tty_count(tty, "release_dev");
+
+       tty_fasync(-1, filp, 0);
+
+       idx = MINOR(tty->device) - tty->driver.minor_start;
+       pty_master = (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+                     tty->driver.subtype == PTY_TYPE_MASTER);
+       o_tty = tty->link;
+
+#ifdef TTY_PARANOIA_CHECK
+       if (idx < 0 || idx >= tty->driver.num) {
+               printk(KERN_DEBUG "release_dev: bad idx when trying to "
+                                 "free (%s)\n", kdevname(tty->device));
+               return;
+       }
+       if (tty != tty->driver.table[idx]) {
+               printk(KERN_DEBUG "release_dev: driver.table[%d] not tty "
+                                 "for (%s)\n", idx, kdevname(tty->device));
+               return;
+       }
+       if (tty->termios != tty->driver.termios[idx]) {
+               printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios "
+                      "for (%s)\n",
+                      idx, kdevname(tty->device));
+               return;
+       }
+       if (tty->termios_locked != tty->driver.termios_locked[idx]) {
+               printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not "
+                      "termios_locked for (%s)\n",
+                      idx, kdevname(tty->device));
+               return;
+       }
+#endif
+
+#ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "release_dev of %s (tty count=%d)...",
+              tty_name(tty, buf), tty->count);
+#endif
+
+#ifdef TTY_PARANOIA_CHECK
+       if (tty->driver.other) {
+               if (o_tty != tty->driver.other->table[idx]) {
+                       printk(KERN_DEBUG "release_dev: other->table[%d] "
+                                         "not o_tty for (%s)\n",
+                              idx, kdevname(tty->device));
+                       return;
+               }
+               if (o_tty->termios != tty->driver.other->termios[idx]) {
+                       printk(KERN_DEBUG "release_dev: other->termios[%d] "
+                                         "not o_termios for (%s)\n",
+                              idx, kdevname(tty->device));
+                       return;
+               }
+               if (o_tty->termios_locked != 
+                     tty->driver.other->termios_locked[idx]) {
+                       printk(KERN_DEBUG "release_dev: other->termios_locked["
+                                         "%d] not o_termios_locked for (%s)\n",
+                              idx, kdevname(tty->device));
+                       return;
+               }
+               if (o_tty->link != tty) {
+                       printk(KERN_DEBUG "release_dev: bad pty pointers\n");
+                       return;
+               }
+       }
+#endif
+
+       if (tty->driver.close)
+               tty->driver.close(tty, filp);
+
+       /*
+        * Sanity check: if tty->count is going to zero, there shouldn't be
+        * any waiters on tty->read_wait or tty->write_wait.  We test the
+        * wait queues and kick everyone out _before_ actually starting to
+        * close.  This ensures that we won't block while releasing the tty
+        * structure.
+        *
+        * The test for the o_tty closing is necessary, since the master and
+        * slave sides may close in any order.  If the slave side closes out
+        * first, its count will be one, since the master side holds an open.
+        * Thus this test wouldn't be triggered at the time the slave closes,
+        * so we do it now.
+        *
+        * Note that it's possible for the tty to be opened again while we're
+        * flushing out waiters.  By recalculating the closing flags before
+        * each iteration we avoid any problems.
+        */
+       while (1) {
+               tty_closing = tty->count <= 1;
+               o_tty_closing = o_tty &&
+                       (o_tty->count <= (pty_master ? 1 : 0));
+               do_sleep = 0;
+
+               if (tty_closing) {
+                       if (waitqueue_active(&tty->read_wait)) {
+                               wake_up(&tty->read_wait);
+                               do_sleep++;
+                       }
+                       if (waitqueue_active(&tty->write_wait)) {
+                               wake_up(&tty->write_wait);
+                               do_sleep++;
+                       }
+               }
+               if (o_tty_closing) {
+                       if (waitqueue_active(&o_tty->read_wait)) {
+                               wake_up(&o_tty->read_wait);
+                               do_sleep++;
+                       }
+                       if (waitqueue_active(&o_tty->write_wait)) {
+                               wake_up(&o_tty->write_wait);
+                               do_sleep++;
+                       }
+               }
+               if (!do_sleep)
+                       break;
+
+               printk(KERN_WARNING "release_dev: %s: read/write wait queue "
+                                   "active!\n", tty_name(tty, buf));
+               schedule();
+       }       
+
+       /*
+        * The closing flags are now consistent with the open counts on 
+        * both sides, and we've completed the last operation that could 
+        * block, so it's safe to proceed with closing.
+        */
+       if (pty_master) {
+               if (--o_tty->count < 0) {
+                       printk(KERN_WARNING "release_dev: bad pty slave count "
+                                           "(%d) for %s\n",
+                              o_tty->count, tty_name(o_tty, buf));
+                       o_tty->count = 0;
+               }
+       }
+       if (--tty->count < 0) {
+               printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n",
+                      tty->count, tty_name(tty, buf));
+               tty->count = 0;
+       }
+
+       /*
+        * We've decremented tty->count, so we should zero out
+        * filp->private_data, to break the link between the tty and
+        * the file descriptor.  Otherwise if filp_close() blocks before
+        * the file descriptor is removed from the inuse_filp
+        * list, check_tty_count() could observe a discrepancy and
+        * printk a warning message to the user.
+        */
+       filp->private_data = 0;
+
+       /*
+        * Perform some housekeeping before deciding whether to return.
+        *
+        * Set the TTY_CLOSING flag if this was the last open.  In the
+        * case of a pty we may have to wait around for the other side
+        * to close, and TTY_CLOSING makes sure we can't be reopened.
+        */
+       if(tty_closing)
+               set_bit(TTY_CLOSING, &tty->flags);
+       if(o_tty_closing)
+               set_bit(TTY_CLOSING, &o_tty->flags);
+
+       /*
+        * If _either_ side is closing, make sure there aren't any
+        * processes that still think tty or o_tty is their controlling
+        * tty.
+        */
+       if (tty_closing || o_tty_closing) {
+               struct task_struct *p;
+
+               read_lock(&tasklist_lock);
+               for_each_task(p) {
+                       if (p->tty == tty || (o_tty && p->tty == o_tty))
+                               p->tty = NULL;
+               }
+               read_unlock(&tasklist_lock);
+       }
+
+       /* check whether both sides are closing ... */
+       if (!tty_closing || (o_tty && !o_tty_closing))
+               return;
+       
+#ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "freeing tty structure...");
+#endif
+
+       /*
+        * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
+        * kill any delayed work. As this is the final close it does not
+        * race with the set_ldisc code path.
+        */
+       clear_bit(TTY_LDISC, &tty->flags);
+       clear_bit(TTY_DONT_FLIP, &tty->flags);
+
+       /*
+        * Wait for ->hangup_work and ->flip.work handlers to terminate
+        */
+
+       run_task_queue(&tq_timer);
+       flush_scheduled_tasks();
+
+       /*
+        * Wait for any short term users (we know they are just driver
+         * side waiters as the file is closing so user count on the file
+         * side is zero.
+        */
+
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       while(tty->ldisc.refcount)
+       {
+               spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+               wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
+               spin_lock_irqsave(&tty_ldisc_lock, flags);
+       }
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+       /*
+        * Shutdown the current line discipline, and reset it to N_TTY.
+        * N.B. why reset ldisc when we're releasing the memory??
+        * FIXME: this MUST get fixed for the new reflocking
+        */
+       if (tty->ldisc.close)
+               (tty->ldisc.close)(tty);
+       tty_ldisc_put(tty->ldisc.num);
+
+       /*
+        *      Switch the line discipline back
+        */
+       tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
+       tty_set_termios_ldisc(tty,N_TTY); 
+
+       if (o_tty) {
+               /* FIXME: could o_tty be in setldisc here ? */
+               clear_bit(TTY_LDISC, &o_tty->flags);
+               if (o_tty->ldisc.close)
+                       (o_tty->ldisc.close)(o_tty);
+               tty_ldisc_put(o_tty->ldisc.num);
+               tty_ldisc_assign(o_tty, tty_ldisc_get(N_TTY));
+               tty_set_termios_ldisc(o_tty,N_TTY);
+       }
+
+       /* 
+        * The release_mem function takes care of the details of clearing
+        * the slots and preserving the termios structure.
+        */
+       release_mem(tty, idx);
+}
+
+/*
+ * tty_open and tty_release keep up the tty count that contains the
+ * number of opens done on a tty. We cannot use the inode-count, as
+ * different inodes might point to the same tty.
+ *
+ * Open-counting is needed for pty masters, as well as for keeping
+ * track of serial lines: DTR is dropped when the last close happens.
+ * (This is not done solely through tty->count, now.  - Ted 1/27/92)
+ *
+ * The termios state of a pty is reset on first open so that
+ * settings don't persist across reuse.
+ */
+static int tty_open(struct inode * inode, struct file * filp)
+{
+       struct tty_struct *tty;
+       int noctty, retval;
+       kdev_t device;
+       unsigned short saved_flags;
+       char    buf[64];
+
+       saved_flags = filp->f_flags;
+retry_open:
+       noctty = filp->f_flags & O_NOCTTY;
+       device = inode->i_rdev;
+       if (device == TTY_DEV) {
+               if (!current->tty)
+                       return -ENXIO;
+               device = current->tty->device;
+               filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+               /* noctty = 1; */
+       }
+#ifdef CONFIG_VT
+       if (device == CONSOLE_DEV) {
+               extern int fg_console;
+               device = MKDEV(TTY_MAJOR, fg_console + 1);
+               noctty = 1;
+       }
+#endif
+       if (device == SYSCONS_DEV) {
+               struct console *c = console_drivers;
+               while(c && !c->device)
+                       c = c->next;
+               if (!c)
+                        return -ENODEV;
+                device = c->device(c);
+               filp->f_flags |= O_NONBLOCK; /* Don't let /dev/console block */
+               noctty = 1;
+       }
+
+       if (device == PTMX_DEV) {
+#ifdef CONFIG_UNIX98_PTYS
+
+               /* find a free pty. */
+               int major, minor;
+               struct tty_driver *driver;
+
+               /* find a device that is not in use. */
+               retval = -1;
+               for ( major = 0 ; major < UNIX98_NR_MAJORS ; major++ ) {
+                       driver = &ptm_driver[major];
+                       for (minor = driver->minor_start ;
+                            minor < driver->minor_start + driver->num ;
+                            minor++) {
+                               device = MKDEV(driver->major, minor);
+                               if (!init_dev(device, &tty)) goto ptmx_found; 
/* ok! */
+                       }
+               }
+               return -EIO; /* no free ptys */
+       ptmx_found:
+               set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
+               minor -= driver->minor_start;
+               devpts_pty_new(driver->other->name_base + minor, 
MKDEV(driver->other->major, minor + driver->other->minor_start));
+               tty_register_devfs(&pts_driver[major], DEVFS_FL_DEFAULT,
+                                  pts_driver[major].minor_start + minor);
+               noctty = 1;
+               goto init_dev_done;
+
+#else   /* CONFIG_UNIX_98_PTYS */
+
+               return -ENODEV;
+
+#endif  /* CONFIG_UNIX_98_PTYS */
+       }
+
+       retval = init_dev(device, &tty);
+       if (retval)
+               return retval;
+
+#ifdef CONFIG_UNIX98_PTYS
+init_dev_done:
+#endif
+       filp->private_data = tty;
+       file_move(filp, &tty->tty_files);
+       check_tty_count(tty, "tty_open");
+       if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver.subtype == PTY_TYPE_MASTER)
+               noctty = 1;
+#ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "opening %s...", tty_name(tty, buf));
+#endif
+       if (tty->driver.open)
+               retval = tty->driver.open(tty, filp);
+       else
+               retval = -ENODEV;
+       filp->f_flags = saved_flags;
+
+       if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !suser())
+               retval = -EBUSY;
+
+       if (retval) {
+#ifdef TTY_DEBUG_HANGUP
+               printk(KERN_DEBUG "error %d in opening %s...", retval,
+                      tty_name(tty, buf));
+#endif
+
+               release_dev(filp);
+               if (retval != -ERESTARTSYS)
+                       return retval;
+               if (signal_pending(current))
+                       return retval;
+               schedule();
+               /*
+                * Need to reset f_op in case a hangup happened.
+                */
+               filp->f_op = &tty_fops;
+               goto retry_open;
+       }
+       if (!noctty &&
+           current->leader &&
+           !current->tty &&
+           tty->session == 0) {
+               task_lock(current);
+               current->tty = tty;
+               task_unlock(current);
+               current->tty_old_pgrp = 0;
+               tty->session = current->session;
+               tty->pgrp = current->pgrp;
+       }
+       if ((tty->driver.type == TTY_DRIVER_TYPE_SERIAL) &&
+           (tty->driver.subtype == SERIAL_TYPE_CALLOUT) &&
+           (tty->count == 1)) {
+               static int nr_warns;
+               if (nr_warns < 5) {
+                       printk(KERN_WARNING "tty_io.c: "
+                               "process %d (%s) used obsolete /dev/%s - "
+                               "update software to use /dev/ttyS%d\n",
+                               current->pid, current->comm,
+                               tty_name(tty, buf), TTY_NUMBER(tty));
+                       nr_warns++;
+               }
+       }
+       return 0;
+}
+
+static int tty_release(struct inode * inode, struct file * filp)
+{
+       lock_kernel();
+       release_dev(filp);
+       unlock_kernel();
+       return 0;
+}
+
+/* No kernel lock held - fine */
+static unsigned int tty_poll(struct file * filp, poll_table * wait)
+{
+       struct tty_struct * tty;
+       struct tty_ldisc *ld;
+       int ret = 0;
+
+       tty = (struct tty_struct *)filp->private_data;
+       if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, 
"tty_poll"))
+               return 0;
+
+       ld = tty_ldisc_ref_wait(tty);
+       if (ld->poll)
+               ret = (ld->poll)(tty, filp, wait);
+       tty_ldisc_deref(ld);
+       return ret;
+}
+
+static int tty_fasync(int fd, struct file * filp, int on)
+{
+       struct tty_struct * tty;
+       int retval;
+
+       tty = (struct tty_struct *)filp->private_data;
+       if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, 
"tty_fasync"))
+               return 0;
+       
+       retval = fasync_helper(fd, filp, on, &tty->fasync);
+       if (retval <= 0)
+               return retval;
+
+       if (on) {
+               if (!waitqueue_active(&tty->read_wait))
+                       tty->minimum_to_wake = 1;
+               if (filp->f_owner.pid == 0) {
+                       filp->f_owner.pid = (-tty->pgrp) ? : current->pid;
+                       filp->f_owner.uid = current->uid;
+                       filp->f_owner.euid = current->euid;
+               }
+       } else {
+               if (!tty->fasync && !waitqueue_active(&tty->read_wait))
+                       tty->minimum_to_wake = N_TTY_BUF_SIZE;
+       }
+       return 0;
+}
+
+static int tiocsti(struct tty_struct *tty, char * arg)
+{
+       char ch, mbz = 0;
+       struct tty_ldisc *ld;
+
+       if ((current->tty != tty) && !suser())
+               return -EPERM;
+       if (get_user(ch, arg))
+               return -EFAULT;
+       ld = tty_ldisc_ref_wait(tty);
+       ld->receive_buf(tty, &ch, &mbz, 1);
+       tty_ldisc_deref(ld);
+       return 0;
+}
+
+static int tiocgwinsz(struct tty_struct *tty, struct winsize * arg)
+{
+       if (copy_to_user(arg, &tty->winsize, sizeof(*arg)))
+               return -EFAULT;
+       return 0;
+}
+
+static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
+       struct winsize * arg)
+{
+       struct winsize tmp_ws;
+
+       if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
+               return -EFAULT;
+       if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg)))
+               return 0;
+       if (tty->pgrp > 0)
+               kill_pg(tty->pgrp, SIGWINCH, 1);
+       if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0))
+               kill_pg(real_tty->pgrp, SIGWINCH, 1);
+       tty->winsize = tmp_ws;
+       real_tty->winsize = tmp_ws;
+       return 0;
+}
+
+static int tioccons(struct inode *inode, struct file *file)
+{
+       if (inode->i_rdev == SYSCONS_DEV ||
+           inode->i_rdev == CONSOLE_DEV) {
+               struct file *f;
+               if (!suser())
+                       return -EPERM;
+               spin_lock(&redirect_lock);
+               f = redirect;
+               redirect = NULL;
+               spin_unlock(&redirect_lock);
+               if (f)
+                       fput(f);
+               return 0;
+       }
+       spin_lock(&redirect_lock);
+       if (redirect) {
+               spin_unlock(&redirect_lock);
+               return -EBUSY;
+       }
+       get_file(file);
+       redirect = file;
+       spin_unlock(&redirect_lock);
+       return 0;
+}
+
+
+static int fionbio(struct file *file, int *arg)
+{
+       int nonblock;
+
+       if (get_user(nonblock, arg))
+               return -EFAULT;
+
+       if (nonblock)
+               file->f_flags |= O_NONBLOCK;
+       else
+               file->f_flags &= ~O_NONBLOCK;
+       return 0;
+}
+
+static int tiocsctty(struct tty_struct *tty, int arg)
+{
+       if (current->leader &&
+           (current->session == tty->session))
+               return 0;
+       /*
+        * The process must be a session leader and
+        * not have a controlling tty already.
+        */
+       if (!current->leader || current->tty)
+               return -EPERM;
+       if (tty->session > 0) {
+               /*
+                * This tty is already the controlling
+                * tty for another session group!
+                */
+               if ((arg == 1) && suser()) {
+                       /*
+                        * Steal it away
+                        */
+                       struct task_struct *p;
+
+                       read_lock(&tasklist_lock);
+                       for_each_task(p)
+                               if (p->tty == tty)
+                                       p->tty = NULL;
+                       read_unlock(&tasklist_lock);
+               } else
+                       return -EPERM;
+       }
+       task_lock(current);
+       current->tty = tty;
+       task_unlock(current);
+       current->tty_old_pgrp = 0;
+       tty->session = current->session;
+       tty->pgrp = current->pgrp;
+       return 0;
+}
+
+static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, 
pid_t *arg)
+{
+       /*
+        * (tty == real_tty) is a cheap way of
+        * testing if the tty is NOT a master pty.
+        */
+       if (tty == real_tty && current->tty != real_tty)
+               return -ENOTTY;
+       return put_user(real_tty->pgrp, arg);
+}
+
+static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, 
pid_t *arg)
+{
+       pid_t pgrp;
+       int retval = tty_check_change(real_tty);
+
+       if (retval == -EIO)
+               return -ENOTTY;
+       if (retval)
+               return retval;
+       if (!current->tty ||
+           (current->tty != real_tty) ||
+           (real_tty->session != current->session))
+               return -ENOTTY;
+       if (get_user(pgrp, (pid_t *) arg))
+               return -EFAULT;
+       if (pgrp < 0)
+               return -EINVAL;
+       if (session_of_pgrp(pgrp) != current->session)
+               return -EPERM;
+       real_tty->pgrp = pgrp;
+       return 0;
+}
+
+static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t 
*arg)
+{
+       /*
+        * (tty == real_tty) is a cheap way of
+        * testing if the tty is NOT a master pty.
+       */
+       if (tty == real_tty && current->tty != real_tty)
+               return -ENOTTY;
+       if (real_tty->session <= 0)
+               return -ENOTTY;
+       return put_user(real_tty->session, arg);
+}
+
+static int tiocttygstruct(struct tty_struct *tty, struct tty_struct *arg)
+{
+       if (copy_to_user(arg, tty, sizeof(*arg)))
+               return -EFAULT;
+       return 0;
+}
+
+static int tiocsetd(struct tty_struct *tty, int *arg)
+{
+       int ldisc;
+
+       if (get_user(ldisc, arg))
+               return -EFAULT;
+       return tty_set_ldisc(tty, ldisc);
+}
+
+static int send_break(struct tty_struct *tty, int duration)
+{
+       tty->driver.break_ctl(tty, -1);
+       if (!signal_pending(current)) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(duration);
+       }
+       tty->driver.break_ctl(tty, 0);
+       if (signal_pending(current))
+               return -EINTR;
+       return 0;
+}
+
+static int tty_generic_brk(struct tty_struct *tty, struct file *file, unsigned 
int cmd, unsigned long arg)
+{
+       if (cmd == TCSBRK && arg) 
+       {
+               /* tcdrain case */
+               int retval = tty_check_change(tty);
+               if (retval)
+                       return retval;
+               tty_wait_until_sent(tty, 0);
+               if (signal_pending(current))
+                       return -EINTR;
+       }
+       return 0;
+}
+
+/*
+ * Split this up, as gcc can choke on it otherwise..
+ */
+int tty_ioctl(struct inode * inode, struct file * file,
+             unsigned int cmd, unsigned long arg)
+{
+       struct tty_struct *tty, *real_tty;
+       int retval;
+       struct tty_ldisc *ld;
+       
+       tty = (struct tty_struct *)file->private_data;
+       if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl"))
+               return -EINVAL;
+
+       real_tty = tty;
+       if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver.subtype == PTY_TYPE_MASTER)
+               real_tty = tty->link;
+
+       /*
+        * Break handling by driver
+        */
+       if (!tty->driver.break_ctl) {
+               switch(cmd) {
+               case TIOCSBRK:
+               case TIOCCBRK:
+                       if (tty->driver.ioctl)
+                               return tty->driver.ioctl(tty, file, cmd, arg);
+                       return -EINVAL;
+                       
+               /* These two ioctl's always return success; even if */
+               /* the driver doesn't support them. */
+               case TCSBRK:
+               case TCSBRKP:
+                       retval = -ENOIOCTLCMD;
+                       if (tty->driver.ioctl)
+                               retval = tty->driver.ioctl(tty, file, cmd, arg);
+                       /* Not driver handled */
+                       if (retval == -ENOIOCTLCMD)
+                               retval = tty_generic_brk(tty, file, cmd, arg);
+                       return retval;
+               }
+       }
+
+       /*
+        * Factor out some common prep work
+        */
+       switch (cmd) {
+       case TIOCSETD:
+       case TIOCSBRK:
+       case TIOCCBRK:
+       case TCSBRK:
+       case TCSBRKP:                   
+               retval = tty_check_change(tty);
+               if (retval)
+                       return retval;
+               if (cmd != TIOCCBRK) {
+                       tty_wait_until_sent(tty, 0);
+                       if (signal_pending(current))
+                               return -EINTR;
+               }
+               break;
+       }
+
+       switch (cmd) {
+               case TIOCSTI:
+                       return tiocsti(tty, (char *)arg);
+               case TIOCGWINSZ:
+                       return tiocgwinsz(tty, (struct winsize *) arg);
+               case TIOCSWINSZ:
+                       return tiocswinsz(tty, real_tty, (struct winsize *) 
arg);
+               case TIOCCONS:
+                       return real_tty!=tty ? -EINVAL : tioccons(inode, file);
+               case FIONBIO:
+                       return fionbio(file, (int *) arg);
+               case TIOCEXCL:
+                       set_bit(TTY_EXCLUSIVE, &tty->flags);
+                       return 0;
+               case TIOCNXCL:
+                       clear_bit(TTY_EXCLUSIVE, &tty->flags);
+                       return 0;
+               case TIOCNOTTY:
+                       if (current->tty != tty)
+                               return -ENOTTY;
+                       if (current->leader)
+                               disassociate_ctty(0);
+                       task_lock(current);
+                       current->tty = NULL;
+                       task_unlock(current);
+                       return 0;
+               case TIOCSCTTY:
+                       return tiocsctty(tty, arg);
+               case TIOCGPGRP:
+                       return tiocgpgrp(tty, real_tty, (pid_t *) arg);
+               case TIOCSPGRP:
+                       return tiocspgrp(tty, real_tty, (pid_t *) arg);
+               case TIOCGSID:
+                       return tiocgsid(tty, real_tty, (pid_t *) arg);
+               case TIOCGETD:
+                       /* FIXME: check this is ok */
+                       return put_user(tty->ldisc.num, (int *) arg);
+               case TIOCSETD:
+                       return tiocsetd(tty, (int *) arg);
+#ifdef CONFIG_VT
+               case TIOCLINUX:
+                       return tioclinux(tty, arg);
+#endif
+               case TIOCTTYGSTRUCT:
+                       return tiocttygstruct(tty, (struct tty_struct *) arg);
+
+               /*
+                * Break handling
+                */
+               case TIOCSBRK:  /* Turn break on, unconditionally */
+                       tty->driver.break_ctl(tty, -1);
+                       return 0;
+                       
+               case TIOCCBRK:  /* Turn break off, unconditionally */
+                       tty->driver.break_ctl(tty, 0);
+                       return 0;
+               case TCSBRK:   /* SVID version: non-zero arg --> no break */
+                       /*
+                        * XXX is the above comment correct, or the
+                        * code below correct?  Is this ioctl used at
+                        * all by anyone?
+                        */
+                       if (!arg)
+                               return send_break(tty, HZ/4);
+                       return 0;
+               case TCSBRKP:   /* support for POSIX tcsendbreak() */   
+                       return send_break(tty, arg ? arg*(HZ/10) : HZ/4);
+       }
+       if (tty->driver.ioctl) {
+               retval = (tty->driver.ioctl)(tty, file, cmd, arg);
+               if (retval != -ENOIOCTLCMD)
+                       return retval;
+       }
+       ld = tty_ldisc_ref_wait(tty);
+       retval = -EINVAL;
+       if (ld->ioctl) {
+               retval = ld->ioctl(tty, file, cmd, arg);
+               if (retval == -ENOIOCTLCMD)
+                       retval = -EINVAL;
+       }
+       tty_ldisc_deref(ld);
+       return retval;
+}
+
+
+/*
+ * This implements the "Secure Attention Key" ---  the idea is to
+ * prevent trojan horses by killing all processes associated with this
+ * tty when the user hits the "Secure Attention Key".  Required for
+ * super-paranoid applications --- see the Orange Book for more details.
+ * 
+ * This code could be nicer; ideally it should send a HUP, wait a few
+ * seconds, then send a INT, and then a KILL signal.  But you then
+ * have to coordinate with the init process, since all processes associated
+ * with the current tty must be dead before the new getty is allowed
+ * to spawn.
+ *
+ * Now, if it would be correct ;-/ The current code has a nasty hole -
+ * it doesn't catch files in flight. We may send the descriptor to ourselves
+ * via AF_UNIX socket, close it and later fetch from socket. FIXME.
+ *
+ * Nasty bug: do_SAK is being called in interrupt context.  This can
+ * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
+ */
+static void __do_SAK(void *arg)
+{
+#ifdef TTY_SOFT_SAK
+       tty_hangup(tty);
+#else
+       struct tty_struct *tty = arg;
+       struct task_struct *p;
+       int session;
+       int             i;
+       struct file     *filp;
+       struct tty_ldisc *disc;
+       
+       if (!tty)
+               return;
+       session  = tty->session;
+       /* We don't want an ldisc switch during this */
+       disc = tty_ldisc_ref(tty);
+       if (disc && disc->flush_buffer)
+               disc->flush_buffer(tty);
+       tty_ldisc_deref(disc);
+
+       if (tty->driver.flush_buffer)
+               tty->driver.flush_buffer(tty);
+
+       read_lock(&tasklist_lock);
+       for_each_task(p) {
+               if ((p->tty == tty) ||
+                   ((session > 0) && (p->session == session))) {
+                       send_sig(SIGKILL, p, 1);
+                       continue;
+               }
+               task_lock(p);
+               if (p->files) {
+                       read_lock(&p->files->file_lock);
+                       for (i=0; i < p->files->max_fds; i++) {
+                               filp = fcheck_files(p->files, i);
+                               if (filp && (filp->f_op == &tty_fops) &&
+                                   (filp->private_data == tty)) {
+                                       send_sig(SIGKILL, p, 1);
+                                       break;
+                               }
+                       }
+                       read_unlock(&p->files->file_lock);
+               }
+               task_unlock(p);
+       }
+       read_unlock(&tasklist_lock);
+#endif
+}
+
+/*
+ * The tq handling here is a little racy - tty->SAK_tq may already be queued.
+ * But there's no mechanism to fix that without futzing with tqueue_lock.
+ * Fortunately we don't need to worry, because if ->SAK_tq is already queued,
+ * the values which we write to it will be identical to the values which it
+ * already has. --akpm
+ */
+void do_SAK(struct tty_struct *tty)
+{
+       if (!tty)
+               return;
+       PREPARE_TQUEUE(&tty->SAK_tq, __do_SAK, tty);
+       schedule_task(&tty->SAK_tq);
+}
+
+/*
+ * This routine is called out of the software interrupt to flush data
+ * from the flip buffer to the line discipline.
+ */
+static void flush_to_ldisc(void *private_)
+{
+       struct tty_struct *tty = (struct tty_struct *) private_;
+       unsigned char   *cp;
+       char            *fp;
+       int             count;
+       unsigned long   flags;
+       struct tty_ldisc *disc;
+
+       disc = tty_ldisc_ref(tty);
+       if (disc == NULL)       /*  !TTY_LDISC */
+               return;
+
+       if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
+               queue_task(&tty->flip.tqueue, &tq_timer);
+               goto out;
+       }
+       if (tty->flip.buf_num) {
+               cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+               fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+               tty->flip.buf_num = 0;
+
+               save_flags(flags); cli();
+               tty->flip.char_buf_ptr = tty->flip.char_buf;
+               tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+       } else {
+               cp = tty->flip.char_buf;
+               fp = tty->flip.flag_buf;
+               tty->flip.buf_num = 1;
+
+               save_flags(flags); cli();
+               tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+               tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+       }
+       count = tty->flip.count;
+       tty->flip.count = 0;
+       restore_flags(flags);
+       
+       disc->receive_buf(tty, cp, fp, count);
+out:
+       tty_ldisc_deref(disc);
+}
+
+/*
+ *     Call the ldisc flush directly from a driver. This function may
+ *     return an error and need retrying by the user.
+ */
+
+int tty_push_data(struct tty_struct *tty, unsigned char *cp, unsigned char 
*fp, int count)
+{
+       int ret = 0;
+       struct tty_ldisc *disc;
+       
+       disc = tty_ldisc_ref(tty);
+       if(test_bit(TTY_DONT_FLIP, &tty->flags))
+               ret = -EAGAIN;
+       else if(disc == NULL)
+               ret = -EIO;
+       else
+               disc->receive_buf(tty, cp, fp, count);
+       tty_ldisc_deref(disc);
+       return ret;
+
+}
+
+/*
+ * Routine which returns the baud rate of the tty
+ *
+ * Note that the baud_table needs to be kept in sync with the
+ * include/asm/termbits.h file.
+ */
+static int baud_table[] = {
+       0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800,
+       9600, 19200, 38400, 57600, 115200, 230400, 460800,
+#ifdef __sparc__
+       76800, 153600, 307200, 614400, 921600
+#else
+       500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000,
+       2500000, 3000000, 3500000, 4000000
+#endif
+};
+
+static int n_baud_table = sizeof(baud_table)/sizeof(int);
+
+int tty_get_baud_rate(struct tty_struct *tty)
+{
+       unsigned int cflag, i;
+
+       cflag = tty->termios->c_cflag;
+
+       i = cflag & CBAUD;
+       if (i & CBAUDEX) {
+               i &= ~CBAUDEX;
+               if (i < 1 || i+15 >= n_baud_table) 
+                       tty->termios->c_cflag &= ~CBAUDEX;
+               else
+                       i += 15;
+       }
+       if (i==15 && tty->alt_speed) {
+               if (!tty->warned) {
+                       printk(KERN_WARNING "Use of setserial/setrocket to "
+                                           "set SPD_* flags is deprecated\n");
+                       tty->warned = 1;
+               }
+               return(tty->alt_speed);
+       }
+       
+       return baud_table[i];
+}
+
+void tty_flip_buffer_push(struct tty_struct *tty)
+{
+       if (tty->low_latency)
+               flush_to_ldisc((void *) tty);
+       else
+               queue_task(&tty->flip.tqueue, &tq_timer);
+}
+
+/*
+ * This subroutine initializes a tty structure.
+ */
+static void initialize_tty_struct(struct tty_struct *tty)
+{
+       memset(tty, 0, sizeof(struct tty_struct));
+       tty->magic = TTY_MAGIC;
+       tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
+       tty->pgrp = -1;
+       tty->flip.char_buf_ptr = tty->flip.char_buf;
+       tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+       tty->flip.tqueue.routine = flush_to_ldisc;
+       tty->flip.tqueue.data = tty;
+       init_MUTEX(&tty->flip.pty_sem);
+       init_MUTEX(&tty->termios_sem);
+       init_waitqueue_head(&tty->write_wait);
+       init_waitqueue_head(&tty->read_wait);
+       tty->tq_hangup.routine = do_tty_hangup;
+       tty->tq_hangup.data = tty;
+       sema_init(&tty->atomic_read, 1);
+       sema_init(&tty->atomic_write, 1);
+       spin_lock_init(&tty->read_lock);
+       INIT_LIST_HEAD(&tty->tty_files);
+       INIT_TQUEUE(&tty->SAK_tq, 0, 0);
+}
+
+/*
+ * The default put_char routine if the driver did not define one.
+ */
+void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
+{
+       tty->driver.write(tty, 0, &ch, 1);
+}
+
+/*
+ * Register a tty device described by <driver>, with minor number <minor>.
+ */
+void tty_register_devfs (struct tty_driver *driver, unsigned int flags, 
unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+       umode_t mode = S_IFCHR | S_IRUSR | S_IWUSR;
+       kdev_t device = MKDEV (driver->major, minor);
+       int idx = minor - driver->minor_start;
+       char buf[32];
+
+       switch (device) {
+               case TTY_DEV:
+               case PTMX_DEV:
+                       mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+                       break;
+               default:
+                       if (driver->major == PTY_MASTER_MAJOR)
+                               mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+                       break;
+       }
+       if ( (minor <  driver->minor_start) || 
+            (minor >= driver->minor_start + driver->num) ) {
+               printk(KERN_ERR "Attempt to register invalid minor number "
+                      "with devfs (%d:%d).\n", (int)driver->major,(int)minor);
+               return;
+       }
+#  ifdef CONFIG_UNIX98_PTYS
+       if ( (driver->major >= UNIX98_PTY_SLAVE_MAJOR) &&
+            (driver->major < UNIX98_PTY_SLAVE_MAJOR + UNIX98_NR_MAJORS) )
+               flags |= DEVFS_FL_CURRENT_OWNER;
+#  endif
+       sprintf(buf, driver->name, idx + driver->name_base);
+       devfs_register (NULL, buf, flags | DEVFS_FL_DEFAULT,
+                       driver->major, minor, mode, &tty_fops, NULL);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+void tty_unregister_devfs (struct tty_driver *driver, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+       void * handle;
+       int idx = minor - driver->minor_start;
+       char buf[32];
+
+       sprintf(buf, driver->name, idx + driver->name_base);
+       handle = devfs_find_handle (NULL, buf, driver->major, minor,
+                                   DEVFS_SPECIAL_CHR, 0);
+       devfs_unregister (handle);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+EXPORT_SYMBOL(tty_register_devfs);
+EXPORT_SYMBOL(tty_unregister_devfs);
+
+/*
+ * Called by a tty driver to register itself.
+ */
+int tty_register_driver(struct tty_driver *driver)
+{
+       int error;
+        int i;
+
+       if (driver->flags & TTY_DRIVER_INSTALLED)
+               return 0;
+
+       error = devfs_register_chrdev(driver->major, driver->name, &tty_fops);
+       if (error < 0)
+               return error;
+       else if(driver->major == 0)
+               driver->major = error;
+
+       if (!driver->put_char)
+               driver->put_char = tty_default_put_char;
+       
+       driver->prev = 0;
+       driver->next = tty_drivers;
+       if (tty_drivers) tty_drivers->prev = driver;
+       tty_drivers = driver;
+       
+       if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+               for(i = 0; i < driver->num; i++)
+                   tty_register_devfs(driver, 0, driver->minor_start + i);
+       }
+       proc_tty_register_driver(driver);
+       return error;
+}
+
+/*
+ * Called by a tty driver to unregister itself.
+ */
+int tty_unregister_driver(struct tty_driver *driver)
+{
+       int     retval;
+       struct tty_driver *p;
+       int     i, found = 0;
+       struct termios *tp;
+       const char *othername = NULL;
+       
+       if (*driver->refcount)
+               return -EBUSY;
+
+       for (p = tty_drivers; p; p = p->next) {
+               if (p == driver)
+                       found++;
+               else if (p->major == driver->major)
+                       othername = p->name;
+       }
+       
+       if (!found)
+               return -ENOENT;
+
+       if (othername == NULL) {
+               retval = devfs_unregister_chrdev(driver->major, driver->name);
+               if (retval)
+                       return retval;
+       } else
+               devfs_register_chrdev(driver->major, othername, &tty_fops);
+
+       if (driver->prev)
+               driver->prev->next = driver->next;
+       else
+               tty_drivers = driver->next;
+       
+       if (driver->next)
+               driver->next->prev = driver->prev;
+
+       /*
+        * Free the termios and termios_locked structures because
+        * we don't want to get memory leaks when modular tty
+        * drivers are removed from the kernel.
+        */
+       for (i = 0; i < driver->num; i++) {
+               tp = driver->termios[i];
+               if (tp) {
+                       driver->termios[i] = NULL;
+                       kfree(tp);
+               }
+               tp = driver->termios_locked[i];
+               if (tp) {
+                       driver->termios_locked[i] = NULL;
+                       kfree(tp);
+               }
+               tty_unregister_devfs(driver, driver->minor_start + i);
+       }
+       proc_tty_unregister_driver(driver);
+       return 0;
+}
+
+
+/*
+ * Initialize the console device. This is called *early*, so
+ * we can't necessarily depend on lots of kernel help here.
+ * Just do some early initializations, and do the complex setup
+ * later.
+ */
+void __init console_init(void)
+{
+       /* Setup the default TTY line discipline. */
+       memset(tty_ldiscs, 0, NR_LDISCS*sizeof(struct tty_ldisc));
+       (void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY);
+
+       /*
+        * Set up the standard termios.  Individual tty drivers may 
+        * deviate from this; this is used as a template.
+        */
+       memset(&tty_std_termios, 0, sizeof(struct termios));
+       memcpy(tty_std_termios.c_cc, INIT_C_CC, NCCS);
+       tty_std_termios.c_iflag = ICRNL | IXON;
+       tty_std_termios.c_oflag = OPOST | ONLCR;
+       tty_std_termios.c_cflag = B38400 | CS8 | CREAD | HUPCL;
+       tty_std_termios.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
+               ECHOCTL | ECHOKE | IEXTEN;
+
+       /*
+        * set up the console device so that later boot sequences can 
+        * inform about problems etc..
+        */
+#ifdef CONFIG_EARLY_PRINTK
+       disable_early_printk(); 
+#endif
+
+#ifdef CONFIG_XEN_CONSOLE
+        xen_console_init();
+#endif
+
+#ifdef CONFIG_VT
+       con_init();
+#endif
+#ifdef CONFIG_AU1X00_SERIAL_CONSOLE
+       au1x00_serial_console_init();
+#endif
+#ifdef CONFIG_SERIAL_CONSOLE
+#if (defined(CONFIG_8xx) || defined(CONFIG_CPM2))
+       console_8xx_init();
+#elif defined(CONFIG_MAC_SERIAL) && defined(CONFIG_SERIAL)
+       if (_machine == _MACH_Pmac)
+               mac_scc_console_init();
+       else
+               serial_console_init();
+#elif defined(CONFIG_MAC_SERIAL)
+       mac_scc_console_init();
+#elif defined(CONFIG_PARISC)
+       pdc_console_init();
+#elif defined(CONFIG_SERIAL)
+       serial_console_init();
+#endif /* CONFIG_8xx */
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || 
defined(CONFIG_MVME147_SCC)
+       vme_scc_console_init();
+#endif
+#if defined(CONFIG_SERIAL167)
+       serial167_console_init();
+#endif
+#if defined(CONFIG_SH_SCI)
+       sci_console_init();
+#endif
+#endif
+#ifdef CONFIG_SERIAL_DEC_CONSOLE
+       dec_serial_console_init();
+#endif
+#ifdef CONFIG_TN3270_CONSOLE
+       tub3270_con_init();
+#endif
+#ifdef CONFIG_TN3215
+       con3215_init();
+#endif
+#ifdef CONFIG_HWC
+        hwc_console_init();
+#endif
+#ifdef CONFIG_STDIO_CONSOLE
+       stdio_console_init();
+#endif
+#ifdef CONFIG_SERIAL_21285_CONSOLE
+       rs285_console_init();
+#endif
+#ifdef CONFIG_SERIAL_SA1100_CONSOLE
+       sa1100_rs_console_init();
+#endif
+#ifdef CONFIG_ARC_CONSOLE
+       arc_console_init();
+#endif
+#ifdef CONFIG_SERIAL_AMBA_CONSOLE
+       ambauart_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912_CONSOLE
+       tx3912_console_init();
+#endif
+#ifdef CONFIG_TXX927_SERIAL_CONSOLE
+       txx927_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TXX9_CONSOLE
+       txx9_serial_console_init();
+#endif
+#ifdef CONFIG_SIBYTE_SB1250_DUART_CONSOLE
+       sb1250_serial_console_init();
+#endif
+#ifdef CONFIG_IP22_SERIAL
+       sgi_serial_console_init();
+#endif
+}
+
+static struct tty_driver dev_tty_driver, dev_syscons_driver;
+#ifdef CONFIG_UNIX98_PTYS
+static struct tty_driver dev_ptmx_driver;
+#endif
+#ifdef CONFIG_HVC_CONSOLE
+       hvc_console_init();
+#endif
+#ifdef CONFIG_VT
+static struct tty_driver dev_console_driver;
+#endif
+
+/*
+ * Ok, now we can initialize the rest of the tty devices and can count
+ * on memory allocations, interrupts etc..
+ */
+void __init tty_init(void)
+{
+       /*
+        * dev_tty_driver and dev_console_driver are actually magic
+        * devices which get redirected at open time.  Nevertheless,
+        * we register them so that register_chrdev is called
+        * appropriately.
+        */
+       memset(&dev_tty_driver, 0, sizeof(struct tty_driver));
+       dev_tty_driver.magic = TTY_DRIVER_MAGIC;
+       dev_tty_driver.driver_name = "/dev/tty";
+       dev_tty_driver.name = dev_tty_driver.driver_name + 5;
+       dev_tty_driver.name_base = 0;
+       dev_tty_driver.major = TTYAUX_MAJOR;
+       dev_tty_driver.minor_start = 0;
+       dev_tty_driver.num = 1;
+       dev_tty_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+       dev_tty_driver.subtype = SYSTEM_TYPE_TTY;
+       
+       if (tty_register_driver(&dev_tty_driver))
+               panic("Couldn't register /dev/tty driver\n");
+
+       dev_syscons_driver = dev_tty_driver;
+       dev_syscons_driver.driver_name = "/dev/console";
+       dev_syscons_driver.name = dev_syscons_driver.driver_name + 5;
+       dev_syscons_driver.major = TTYAUX_MAJOR;
+       dev_syscons_driver.minor_start = 1;
+       dev_syscons_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+       dev_syscons_driver.subtype = SYSTEM_TYPE_SYSCONS;
+
+       if (tty_register_driver(&dev_syscons_driver))
+               panic("Couldn't register /dev/console driver\n");
+
+       /* console calls tty_register_driver() before kmalloc() works.
+        * Thus, we can't devfs_register() then.  Do so now, instead. 
+        */
+#ifdef CONFIG_VT
+       con_init_devfs();
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+       dev_ptmx_driver = dev_tty_driver;
+       dev_ptmx_driver.driver_name = "/dev/ptmx";
+       dev_ptmx_driver.name = dev_ptmx_driver.driver_name + 5;
+       dev_ptmx_driver.major= MAJOR(PTMX_DEV);
+       dev_ptmx_driver.minor_start = MINOR(PTMX_DEV);
+       dev_ptmx_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+       dev_ptmx_driver.subtype = SYSTEM_TYPE_SYSPTMX;
+
+       if (tty_register_driver(&dev_ptmx_driver))
+               panic("Couldn't register /dev/ptmx driver\n");
+#endif
+       
+#ifdef CONFIG_VT
+       dev_console_driver = dev_tty_driver;
+       dev_console_driver.driver_name = "/dev/vc/0";
+       dev_console_driver.name = dev_console_driver.driver_name + 5;
+       dev_console_driver.major = TTY_MAJOR;
+       dev_console_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+       dev_console_driver.subtype = SYSTEM_TYPE_CONSOLE;
+
+       if (tty_register_driver(&dev_console_driver))
+               panic("Couldn't register /dev/tty0 driver\n");
+
+       kbd_init();
+#endif
+
+#ifdef CONFIG_SGI_L1_SERIAL_CONSOLE
+       if (ia64_platform_is("sn2")) {
+               sn_sal_serial_console_init();
+               return; /* only one console right now for SN2 */
+       }
+#endif
+#ifdef CONFIG_ESPSERIAL  /* init ESP before rs, so rs doesn't see the port */
+       espserial_init();
+#endif
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || 
defined(CONFIG_MVME147_SCC)
+       vme_scc_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912
+       tx3912_rs_init();
+#endif
+#ifdef CONFIG_ROCKETPORT
+       rp_init();
+#endif
+#ifdef CONFIG_SERIAL167
+       serial167_init();
+#endif
+#ifdef CONFIG_CYCLADES
+       cy_init();
+#endif
+#ifdef CONFIG_STALLION
+       stl_init();
+#endif
+#ifdef CONFIG_ISTALLION
+       stli_init();
+#endif
+#ifdef CONFIG_DIGI
+       pcxe_init();
+#endif
+#ifdef CONFIG_DIGIEPCA
+       pc_init();
+#endif
+#ifdef CONFIG_SPECIALIX
+       specialix_init();
+#endif
+#if (defined(CONFIG_8xx) || defined(CONFIG_CPM2))
+       rs_8xx_init();
+#endif /* CONFIG_8xx */
+       pty_init();
+#ifdef CONFIG_MOXA_SMARTIO
+       mxser_init();
+#endif 
+#ifdef CONFIG_MOXA_INTELLIO
+       moxa_init();
+#endif 
+#ifdef CONFIG_VT
+       vcs_init();
+#endif
+#ifdef CONFIG_TN3270
+       tub3270_init();
+#endif
+#ifdef CONFIG_TN3215
+       tty3215_init();
+#endif
+#ifdef CONFIG_HWC
+       hwc_tty_init();
+#endif
+#ifdef CONFIG_A2232
+       a2232board_init();
+#endif
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h        Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,494 @@
+#ifndef _I386_PGTABLE_H
+#define _I386_PGTABLE_H
+
+#include <linux/config.h>
+#include <asm-xen/hypervisor.h>
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * the i386, we use that, but "fold" the mid level into the top-level page
+ * table, so that we physically have the same two-level page table as the
+ * i386 mmu expects.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the i386 page table tree.
+ */
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <linux/threads.h>
+
+#ifndef _I386_BITOPS_H
+#include <asm/bitops.h>
+#endif
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+extern unsigned long empty_zero_page[1024];
+extern pgd_t swapper_pg_dir[1024];
+extern kmem_cache_t *pgd_cache;
+extern kmem_cache_t *pmd_cache;
+extern spinlock_t pgd_lock;
+extern struct page *pgd_list;
+
+void pmd_ctor(void *, kmem_cache_t *, unsigned long);
+void pgd_ctor(void *, kmem_cache_t *, unsigned long);
+void pgd_dtor(void *, kmem_cache_t *, unsigned long);
+void pgtable_cache_init(void);
+void paging_init(void);
+
+/*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
+#ifdef CONFIG_X86_PAE
+# include <asm/pgtable-3level-defs.h>
+# define PMD_SIZE      (1UL << PMD_SHIFT)
+# define PMD_MASK      (~(PMD_SIZE-1))
+#else
+# include <asm/pgtable-2level-defs.h>
+#endif
+
+#define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK     (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
+#define FIRST_USER_PGD_NR      0
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT   22
+#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
+
+/* Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8*1024*1024)
+#define VMALLOC_START  (((unsigned long) high_memory + vmalloc_earlyreserve + \
+                       2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END   (PKMAP_BASE-2*PAGE_SIZE)
+#else
+# define VMALLOC_END   (FIXADDR_START-2*PAGE_SIZE)
+#endif
+
+/*
+ * The 4MB page is guessing..  Detailed in the infamous "Chapter H"
+ * of the Pentium details, but assuming intel did the straightforward
+ * thing, this bit set in the page directory entry just means that
+ * the page directory entry points directly to a 4MB-aligned block of
+ * memory. 
+ */
+#define _PAGE_BIT_PRESENT      0
+#define _PAGE_BIT_RW           1
+#define _PAGE_BIT_USER         2
+#define _PAGE_BIT_PWT          3
+#define _PAGE_BIT_PCD          4
+#define _PAGE_BIT_ACCESSED     5
+#define _PAGE_BIT_DIRTY                6
+#define _PAGE_BIT_PSE          7       /* 4 MB (or 2MB) page, Pentium+, if 
present.. */
+#define _PAGE_BIT_GLOBAL       8       /* Global TLB entry PPro+ */
+#define _PAGE_BIT_UNUSED1      9       /* available for programmer */
+#define _PAGE_BIT_UNUSED2      10
+#define _PAGE_BIT_UNUSED3      11
+#define _PAGE_BIT_NX           63
+
+#define _PAGE_PRESENT  0x001
+#define _PAGE_RW       0x002
+#define _PAGE_USER     0x004
+#define _PAGE_PWT      0x008
+#define _PAGE_PCD      0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY    0x040
+#define _PAGE_PSE      0x080   /* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_GLOBAL   0x100   /* Global TLB entry PPro+ */
+#define _PAGE_UNUSED1  0x200   /* available for programmer */
+#define _PAGE_UNUSED2  0x400
+#define _PAGE_UNUSED3  0x800
+
+#define _PAGE_FILE     0x040   /* set:pagecache unset:swap */
+#define _PAGE_PROTNONE 0x080   /* If not present */
+#ifdef CONFIG_X86_PAE
+#define _PAGE_NX       (1ULL<<_PAGE_BIT_NX)
+#else
+#define _PAGE_NX       0
+#endif
+
+#define _PAGE_TABLE    (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED 
| _PAGE_DIRTY)
+#define _KERNPG_TABLE  (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | 
_PAGE_DIRTY)
+#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE \
+       __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED \
+       __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+
+#define PAGE_SHARED_EXEC \
+       __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC \
+       __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_COPY_EXEC \
+       __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY \
+       PAGE_COPY_NOEXEC
+#define PAGE_READONLY \
+       __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_READONLY_EXEC \
+       __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#define _PAGE_KERNEL \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
+#define _PAGE_KERNEL_EXEC \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
+#define __PAGE_KERNEL_RO               (__PAGE_KERNEL & ~_PAGE_RW)
+#define __PAGE_KERNEL_NOCACHE          (__PAGE_KERNEL | _PAGE_PCD)
+#define __PAGE_KERNEL_LARGE            (__PAGE_KERNEL | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_EXEC       (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+
+#define PAGE_KERNEL            __pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO         __pgprot(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC       __pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_NOCACHE    __pgprot(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_LARGE      __pgprot(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
+
+/*
+ * The i386 can't do page protection for execute, and considers that
+ * the same are read. Also, write permissions imply read permissions.
+ * This is the closest we can get..
+ */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+/*
+ * Define this if things work differently on an i386 and an i486:
+ * it will (on an i486) warn about kernel memory accesses that are
+ * done without a 'verify_area(VERIFY_WRITE,..)'
+ */
+#undef TEST_VERIFY_AREA
+
+/* The boot page tables (all created as a single array) */
+extern unsigned long pg0[];
+
+#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_clear(xp)  do { set_pte(xp, __pte(0)); } while (0)
+
+#define pmd_none(x)    (!pmd_val(x))
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+   can temporarily clear it. */
+#define pmd_present(x) (pmd_val(x))
+#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
+#define pmd_bad(x)     ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & 
~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
+
+
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_user(pte_t pte)          { return (pte).pte_low & 
_PAGE_USER; }
+static inline int pte_read(pte_t pte)          { return (pte).pte_low & 
_PAGE_USER; }
+static inline int pte_dirty(pte_t pte)         { return (pte).pte_low & 
_PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)         { return (pte).pte_low & 
_PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte)         { return (pte).pte_low & 
_PAGE_RW; }
+
+/*
+ * The following only works if pte_present() is not true.
+ */
+static inline int pte_file(pte_t pte)          { return (pte).pte_low & 
_PAGE_FILE; }
+
+static inline pte_t pte_rdprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_USER; 
return pte; }
+static inline pte_t pte_exprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_USER; 
return pte; }
+static inline pte_t pte_mkclean(pte_t pte)     { (pte).pte_low &= 
~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)       { (pte).pte_low &= 
~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_RW; 
return pte; }
+static inline pte_t pte_mkread(pte_t pte)      { (pte).pte_low |= _PAGE_USER; 
return pte; }
+static inline pte_t pte_mkexec(pte_t pte)      { (pte).pte_low |= _PAGE_USER; 
return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)     { (pte).pte_low |= _PAGE_DIRTY; 
return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)     { (pte).pte_low |= 
_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte)     { (pte).pte_low |= _PAGE_RW; 
return pte; }
+
+#ifdef CONFIG_X86_PAE
+# include <asm/pgtable-3level.h>
+#else
+# include <asm/pgtable-2level.h>
+#endif
+
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+{
+       if (!pte_dirty(*ptep))
+               return 0;
+       return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
+}
+
+static inline int ptep_test_and_clear_young(pte_t *ptep)
+{
+       if (!pte_young(*ptep))
+               return 0;
+       return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
+}
+
+static inline void ptep_set_wrprotect(pte_t *ptep)
+{
+       if (pte_write(*ptep))
+               clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
+}
+
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+       if (!pte_dirty(*ptep))
+               set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable".  On processors 
which do not support
+ * it, this is a no-op.
+ */
+#define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3)                        
                  \
+                                ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | 
_PAGE_PWT)) : (prot))
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
+#define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE)
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+       pte.pte_low &= _PAGE_CHG_MASK;
+       pte.pte_low |= pgprot_val(newprot);
+#ifdef CONFIG_X86_PAE
+       /*
+        * Chop off the NX bit (if present), and add the NX portion of
+        * the newprot (if present):
+        */
+       pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
+       pte.pte_high |= (pgprot_val(newprot) >> 32) & \
+                                       (__supported_pte_mask >> 32);
+#endif
+       return pte;
+}
+
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+#define pmd_large(pmd) \
+((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
+
+/*
+ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+ *
+ * this macro returns the index of the entry in the pgd page which would
+ * control the given virtual address
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pgd_index_k(addr) pgd_index(addr)
+
+/*
+ * pgd_offset() returns a (pgd_t *)
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
+ */
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+
+/*
+ * a shortcut which implies the use of the kernel's pgd, instead
+ * of a process's
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/*
+ * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+ *
+ * this macro returns the index of the entry in the pmd page which would
+ * control the given virtual address
+ */
+#define pmd_index(address) \
+               (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/*
+ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+ *
+ * this macro returns the index of the entry in the pte page which would
+ * control the given virtual address
+ */
+#define pte_index(address) \
+               (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir, address) \
+       ((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
+
+/*
+ * Helper function that returns the kernel pagetable entry controlling
+ * the virtual address 'address'. NULL means no pagetable entry present.
+ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
+ * as a pte too.
+ */
+extern pte_t *lookup_address(unsigned long address);
+
+/*
+ * Make a given kernel text page executable/non-executable.
+ * Returns the previous executability setting of that page (which
+ * is used to restore the previous state). Used by the SMP bootup code.
+ * NOTE: this is an __init function for security reasons.
+ */
+#ifdef CONFIG_X86_PAE
+ extern int set_kernel_exec(unsigned long vaddr, int enable);
+#else
+ static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 
0;}
+#endif
+
+extern void noexec_setup(const char *str);
+
+#if defined(CONFIG_HIGHPTE)
+#define pte_offset_map(dir, address) \
+       ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
+        pte_index(address))
+#define pte_offset_map_nested(dir, address) \
+       ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \
+        pte_index(address))
+#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
+#else
+#define pte_offset_map(dir, address) \
+       ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
+#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+#endif
+
+/*
+ * The i386 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ *
+ * Also, we only update the dirty/accessed state if we set
+ * the dirty bit by hand in the kernel, since the hardware
+ * will do the accessed bit for us, and we don't want to
+ * race with other CPU's that might be updating the dirty
+ * bit at the same time.
+ */
+#define update_mmu_cache(vma,address,pte) do { } while (0)
+#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+       do {                                                              \
+               if (__dirty) {                                            \
+                       if ( likely((__vma)->vm_mm == current->mm) ) {    \
+                           HYPERVISOR_update_va_mapping((__address), 
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned 
long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
+                       } else {                                          \
+                            xen_l1_entry_update((__ptep), (__entry).pte_low); \
+                           flush_tlb_page((__vma), (__address));         \
+                       }                                                 \
+               }                                                         \
+       } while (0)
+
+#define __HAVE_ARCH_PTEP_ESTABLISH
+#define ptep_establish(__vma, __address, __ptep, __entry)              \
+do {                                                                   \
+       ptep_set_access_flags(__vma, __address, __ptep, __entry, 1);    \
+} while (0)
+
+#define __HAVE_ARCH_PTEP_ESTABLISH_NEW
+#define ptep_establish_new(__vma, __address, __ptep, __entry)          \
+do {                                                                   \
+       if (likely((__vma)->vm_mm == current->mm)) {                    \
+               HYPERVISOR_update_va_mapping((__address),               \
+                                            __entry, 0);               \
+       } else {                                                        \
+               xen_l1_entry_update((__ptep), (__entry).pte_low);       \
+       }                                                               \
+} while (0)
+
+#ifndef CONFIG_XEN_SHADOW_MODE
+void make_lowmem_page_readonly(void *va);
+void make_lowmem_page_writable(void *va);
+void make_page_readonly(void *va);
+void make_page_writable(void *va);
+void make_pages_readonly(void *va, unsigned int nr);
+void make_pages_writable(void *va, unsigned int nr);
+#else
+#define make_lowmem_page_readonly(_va) ((void)0)
+#define make_lowmem_page_writable(_va) ((void)0)
+#define make_page_readonly(_va)        ((void)0)
+#define make_page_writable(_va)        ((void)0)
+#define make_pages_readonly(_va, _nr)  ((void)0)
+#define make_pages_writable(_va, _nr)  ((void)0)
+#endif
+
+#define virt_to_ptep(__va)                                             \
+({                                                                     \
+       pgd_t *__pgd = pgd_offset_k((unsigned long)(__va));             \
+       pud_t *__pud = pud_offset(__pgd, (unsigned long)(__va));        \
+       pmd_t *__pmd = pmd_offset(__pud, (unsigned long)(__va));        \
+       pte_offset_kernel(__pmd, (unsigned long)(__va));                \
+})
+
+#define arbitrary_virt_to_machine(__va)                                        
\
+({                                                                     \
+       pte_t *__pte = virt_to_ptep(__va);                              \
+       unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK;     \
+       __pa | ((unsigned long)(__va) & (PAGE_SIZE-1));                 \
+})
+
+#endif /* !__ASSEMBLY__ */
+
+#ifndef CONFIG_DISCONTIGMEM
+#define kern_addr_valid(addr)  (1)
+#endif /* !CONFIG_DISCONTIGMEM */
+
+int direct_remap_area_pages(struct mm_struct *mm,
+                            unsigned long address, 
+                            unsigned long machine_addr,
+                            unsigned long size, 
+                            pgprot_t prot,
+                            domid_t  domid);
+int __direct_remap_area_pages(struct mm_struct *mm,
+                             unsigned long address, 
+                             unsigned long size, 
+                             mmu_update_t *v);
+
+#define io_remap_page_range(vma,from,phys,size,prot) \
+direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
+
+#define io_remap_pfn_range(vma,from,pfn,size,prot) \
+direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO)
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define __HAVE_ARCH_PTEP_MKDIRTY
+#define __HAVE_ARCH_PTE_SAME
+#include <asm-generic/pgtable.h>
+
+#endif /* _I386_PGTABLE_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c  Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,906 @@
+/*
+ *  boot.c - Architecture-Specific Low-Level ACPI Boot Support
+ *
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@xxxxxxxxx>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include <asm/pgtable.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_XEN
+#include <asm/fixmap.h>
+#endif
+
+void (*pm_power_off)(void) = NULL;
+
+#ifdef CONFIG_X86_64
+
+static inline void  acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
+extern void __init clustered_apic_check(void);
+static inline int ioapic_setup_disabled(void) { return 0; }
+#include <asm/proto.h>
+
+#else  /* X86 */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#endif /* X86 */
+
+#define BAD_MADT_ENTRY(entry, end) (                                       \
+               (!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
+               ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX                 "ACPI: "
+
+#ifdef CONFIG_ACPI_PCI
+int acpi_noirq __initdata;     /* skip ACPI IRQ initialization */
+int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization 
*/
+#else
+int acpi_noirq __initdata = 1;
+int acpi_pci_disabled __initdata = 1;
+#endif
+int acpi_ht __initdata = 1;    /* enable HT */
+
+int acpi_lapic;
+int acpi_ioapic;
+int acpi_strict;
+EXPORT_SYMBOL(acpi_strict);
+
+acpi_interrupt_flags acpi_sci_flags __initdata;
+int acpi_sci_override_gsi __initdata;
+int acpi_skip_timer_override __initdata;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+#endif
+
+#ifndef __HAVE_ARCH_CMPXCHG
+#warning ACPI uses CMPXCHG, i486 and later hardware
+#endif
+
+#define MAX_MADT_ENTRIES       256
+u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
+                       { [0 ... MAX_MADT_ENTRIES-1] = 0xff };
+EXPORT_SYMBOL(x86_acpiid_to_apicid);
+
+/* --------------------------------------------------------------------------
+                              Boot-time Configuration
+   -------------------------------------------------------------------------- 
*/
+
+/*
+ * The default interrupt routing model is PIC (8259).  This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+enum acpi_irq_model_id         acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+
+/* rely on all ACPI tables being in the direct mapping */
+char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+{
+       if (!phys_addr || !size)
+       return NULL;
+
+       if (phys_addr < (end_pfn_map << PAGE_SHIFT))
+               return __va(phys_addr);
+
+       return NULL;
+}
+
+#else
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+char *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+       unsigned long base, offset, mapped_size;
+       int idx;
+
+#ifndef CONFIG_XEN
+       if (phys + size < 8*1024*1024) 
+               return __va(phys); 
+#endif
+
+       offset = phys & (PAGE_SIZE - 1);
+       mapped_size = PAGE_SIZE - offset;
+       set_fixmap(FIX_ACPI_END, phys);
+       base = fix_to_virt(FIX_ACPI_END);
+
+       /*
+        * Most cases can be covered by the below.
+        */
+       idx = FIX_ACPI_END;
+       while (mapped_size < size) {
+               if (--idx < FIX_ACPI_BEGIN)
+                       return NULL;    /* cannot handle this */
+               phys += PAGE_SIZE;
+               set_fixmap(idx, phys);
+               mapped_size += PAGE_SIZE;
+       }
+
+       return ((unsigned char *) base + offset);
+}
+#endif
+
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_mcfg *mcfg;
+
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+       if (!mcfg) {
+               printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+               return -ENODEV;
+       }
+
+       if (mcfg->base_reserved) {
+               printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+               return -ENODEV;
+       }
+
+       pci_mmcfg_base_addr = mcfg->base_address;
+
+       return 0;
+}
+#else
+#define        acpi_parse_mcfg NULL
+#endif /* !CONFIG_PCI_MMCONFIG */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static int __init
+acpi_parse_madt (
+       unsigned long           phys_addr,
+       unsigned long           size)
+{
+       struct acpi_table_madt  *madt = NULL;
+
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
+       if (!madt) {
+               printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+               return -ENODEV;
+       }
+
+       if (madt->lapic_address) {
+               acpi_lapic_addr = (u64) madt->lapic_address;
+
+               printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+                       madt->lapic_address);
+       }
+
+       acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
+       
+       return 0;
+}
+
+
+static int __init
+acpi_parse_lapic (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_lapic *processor = NULL;
+
+       processor = (struct acpi_table_lapic*) header;
+
+       if (BAD_MADT_ENTRY(processor, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /* no utility in registering a disabled processor */
+       if (processor->flags.enabled == 0)
+               return 0;
+
+       x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
+
+       mp_register_lapic (
+               processor->id,                                     /* APIC ID */
+               processor->flags.enabled);                        /* Enabled? */
+
+       return 0;
+}
+
+static int __init
+acpi_parse_lapic_addr_ovr (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
+
+       lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
+
+       if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
+               return -EINVAL;
+
+       acpi_lapic_addr = lapic_addr_ovr->address;
+
+       return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_lapic_nmi *lapic_nmi = NULL;
+
+       lapic_nmi = (struct acpi_table_lapic_nmi*) header;
+
+       if (BAD_MADT_ENTRY(lapic_nmi, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       if (lapic_nmi->lint != 1)
+               printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+       return 0;
+}
+
+
+#endif /*CONFIG_X86_LOCAL_APIC*/
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+
+static int __init
+acpi_parse_ioapic (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_ioapic *ioapic = NULL;
+
+       ioapic = (struct acpi_table_ioapic*) header;
+
+       if (BAD_MADT_ENTRY(ioapic, end))
+               return -EINVAL;
+ 
+       acpi_table_print_madt_entry(header);
+
+       mp_register_ioapic (
+               ioapic->id,
+               ioapic->address,
+               ioapic->global_irq_base);
+ 
+       return 0;
+}
+
+/*
+ * Parse Interrupt Source Override for the ACPI SCI
+ */
+static void
+acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
+{
+       if (trigger == 0)       /* compatible SCI trigger is level */
+               trigger = 3;
+
+       if (polarity == 0)      /* compatible SCI polarity is low */
+               polarity = 3;
+
+       /* Command-line over-ride via acpi_sci= */
+       if (acpi_sci_flags.trigger)
+               trigger = acpi_sci_flags.trigger;
+
+       if (acpi_sci_flags.polarity)
+               polarity = acpi_sci_flags.polarity;
+
+       /*
+        * mp_config_acpi_legacy_irqs() already setup IRQs < 16
+        * If GSI is < 16, this will update its flags,
+        * else it will create a new mp_irqs[] entry.
+        */
+       mp_override_legacy_irq(gsi, polarity, trigger, gsi);
+
+       /*
+        * stash over-ride to indicate we've been here
+        * and for later update of acpi_fadt
+        */
+       acpi_sci_override_gsi = gsi;
+       return;
+}
+
+static int __init
+acpi_parse_int_src_ovr (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_int_src_ovr *intsrc = NULL;
+
+       intsrc = (struct acpi_table_int_src_ovr*) header;
+
+       if (BAD_MADT_ENTRY(intsrc, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       if (intsrc->bus_irq == acpi_fadt.sci_int) {
+               acpi_sci_ioapic_setup(intsrc->global_irq,
+                       intsrc->flags.polarity, intsrc->flags.trigger);
+               return 0;
+       }
+
+       if (acpi_skip_timer_override &&
+               intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
+                       printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+                       return 0;
+       }
+
+       mp_override_legacy_irq (
+               intsrc->bus_irq,
+               intsrc->flags.polarity,
+               intsrc->flags.trigger,
+               intsrc->global_irq);
+
+       return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_nmi_src *nmi_src = NULL;
+
+       nmi_src = (struct acpi_table_nmi_src*) header;
+
+       if (BAD_MADT_ENTRY(nmi_src, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /* TBD: Support nimsrc entries? */
+
+       return 0;
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+#ifdef CONFIG_ACPI_BUS
+
+/*
+ * acpi_pic_sci_set_trigger()
+ * 
+ * use ELCR to set PIC-mode trigger type for SCI
+ *
+ * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
+ * it may require Edge Trigger -- use "acpi_sci=edge"
+ *
+ * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
+ * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
+ * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
+ * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
+ */
+
+void __init
+acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
+{
+       unsigned int mask = 1 << irq;
+       unsigned int old, new;
+
+       /* Real old ELCR mask */
+       old = inb(0x4d0) | (inb(0x4d1) << 8);
+
+       /*
+        * If we use ACPI to set PCI irq's, then we should clear ELCR
+        * since we will set it correctly as we enable the PCI irq
+        * routing.
+        */
+       new = acpi_noirq ? old : 0;
+
+       /*
+        * Update SCI information in the ELCR, it isn't in the PCI
+        * routing tables..
+        */
+       switch (trigger) {
+       case 1: /* Edge - clear */
+               new &= ~mask;
+               break;
+       case 3: /* Level - set */
+               new |= mask;
+               break;
+       }
+
+       if (old == new)
+               return;
+
+       printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
+       outb(new, 0x4d0);
+       outb(new >> 8, 0x4d1);
+}
+
+
+#endif /* CONFIG_ACPI_BUS */
+
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (use_pci_vector() && !platform_legacy_irq(gsi))
+               *irq = IO_APIC_VECTOR(gsi);
+       else
+#endif
+               *irq = gsi;
+       return 0;
+}
+
+unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
+{
+       unsigned int irq;
+       unsigned int plat_gsi = gsi;
+
+#ifdef CONFIG_X86_IO_APIC
+       if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
+               plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
+       }
+#endif
+       acpi_gsi_to_irq(plat_gsi, &irq);
+       return irq;
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+
+/*
+ *  ACPI based hotplug support for CPU
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int
+acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+       /* TBD */
+       return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+
+int
+acpi_unmap_lsapic(int cpu)
+{
+       /* TBD */
+       return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
+static unsigned long __init
+acpi_scan_rsdp (
+       unsigned long           start,
+       unsigned long           length)
+{
+       unsigned long           offset = 0;
+       unsigned long           sig_len = sizeof("RSD PTR ") - 1;
+       unsigned long           vstart = (unsigned long)isa_bus_to_virt(start);
+
+       /*
+        * Scan all 16-byte boundaries of the physical memory region for the
+        * RSDP signature.
+        */
+       for (offset = 0; offset < length; offset += 16) {
+               if (strncmp((char *) (vstart + offset), "RSD PTR ", sig_len))
+                       continue;
+               return (start + offset);
+       }
+
+       return 0;
+}
+
+static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_sbf *sb;
+
+       if (!phys_addr || !size)
+       return -EINVAL;
+
+       sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
+       if (!sb) {
+               printk(KERN_WARNING PREFIX "Unable to map SBF\n");
+               return -ENODEV;
+       }
+
+       sbf_port = sb->sbf_cmos; /* Save CMOS port */
+
+       return 0;
+}
+
+
+#ifdef CONFIG_HPET_TIMER
+
+static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
+{
+       struct acpi_table_hpet *hpet_tbl;
+
+       if (!phys || !size)
+               return -EINVAL;
+
+       hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size);
+       if (!hpet_tbl) {
+               printk(KERN_WARNING PREFIX "Unable to map HPET\n");
+               return -ENODEV;
+       }
+
+       if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
+               printk(KERN_WARNING PREFIX "HPET timers must be located in "
+                      "memory.\n");
+               return -1;
+       }
+
+#ifdef CONFIG_X86_64
+        vxtime.hpet_address = hpet_tbl->addr.addrl |
+                ((long) hpet_tbl->addr.addrh << 32);
+
+        printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+               hpet_tbl->id, vxtime.hpet_address);
+#else  /* X86 */
+       {
+               extern unsigned long hpet_address;
+
+               hpet_address = hpet_tbl->addr.addrl;
+               printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+                       hpet_tbl->id, hpet_address);
+       }
+#endif /* X86 */
+
+       return 0;
+}
+#else
+#define        acpi_parse_hpet NULL
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+extern u32 pmtmr_ioport;
+#endif
+
+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
+{
+       struct fadt_descriptor_rev2 *fadt = NULL;
+
+       fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
+       if(!fadt) {
+               printk(KERN_WARNING PREFIX "Unable to map FADT\n");
+               return 0;
+       }
+
+#ifdef CONFIG_ACPI_INTERPRETER
+       /* initialize sci_int early for INT_SRC_OVR MADT parsing */
+       acpi_fadt.sci_int = fadt->sci_int;
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+       /* detect the location of the ACPI PM Timer */
+       if (fadt->revision >= FADT2_REVISION_ID) {
+               /* FADT rev. 2 */
+               if (fadt->xpm_tmr_blk.address_space_id != 
ACPI_ADR_SPACE_SYSTEM_IO)
+                       return 0;
+
+               pmtmr_ioport = fadt->xpm_tmr_blk.address;
+       } else {
+               /* FADT rev. 1 */
+               pmtmr_ioport = fadt->V1_pm_tmr_blk;
+       }
+       if (pmtmr_ioport)
+               printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", 
pmtmr_ioport);
+#endif
+       return 0;
+}
+
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+       unsigned long           rsdp_phys = 0;
+
+       if (efi_enabled) {
+               if (efi.acpi20)
+                       return __pa(efi.acpi20);
+               else if (efi.acpi)
+                       return __pa(efi.acpi);
+       }
+       /*
+        * Scan memory looking for the RSDP signature. First search EBDA (low
+        * memory) paragraphs and then search upper memory (E0000-FFFFF).
+        */
+       rsdp_phys = acpi_scan_rsdp (0, 0x400);
+       if (!rsdp_phys)
+               rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
+
+       set_fixmap(FIX_ACPI_RSDP_PAGE, rsdp_phys);
+
+       return rsdp_phys;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Parse LAPIC entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init
+acpi_parse_madt_lapic_entries(void)
+{
+       int count;
+
+       /* 
+        * Note that the LAPIC address is obtained from the MADT (32-bit value)
+        * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+        */
+
+       count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, 
acpi_parse_lapic_addr_ovr, 0);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing LAPIC address override 
entry\n");
+               return count;
+       }
+
+       mp_register_lapic_address(acpi_lapic_addr);
+
+       count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
+                                      MAX_APICS);
+       if (!count) { 
+               printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return -ENODEV;
+       }
+       else if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, 
acpi_parse_lapic_nmi, 0);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+       return 0;
+}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+/*
+ * Parse IOAPIC related entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init
+acpi_parse_madt_ioapic_entries(void)
+{
+       int count;
+
+       /*
+        * ACPI interpreter is required to complete interrupt setup,
+        * so if it is off, don't enumerate the io-apics with ACPI.
+        * If MPS is present, it will handle them,
+        * otherwise the system will stay in PIC mode
+        */
+       if (acpi_disabled || acpi_noirq) {
+               return -ENODEV;
+        }
+
+       /*
+        * if "noapic" boot option, don't look for IO-APICs
+        */
+       if (skip_ioapic_setup) {
+               printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
+                       "due to 'noapic' option.\n");
+               return -ENODEV;
+       }
+
+       count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, 
MAX_IO_APICS);
+       if (!count) {
+               printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+               return -ENODEV;
+       }
+       else if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+               return count;
+       }
+
+       count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, 
acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing interrupt source 
overrides entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       /*
+        * If BIOS did not supply an INT_SRC_OVR for the SCI
+        * pretend we got one so we can set the SCI flags.
+        */
+       if (!acpi_sci_override_gsi)
+               acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
+
+       /* Fill in identity legacy mapings where no override */
+       mp_config_acpi_legacy_irqs();
+
+       count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 
NR_IRQ_VECTORS);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       return 0;
+}
+#else
+static inline int acpi_parse_madt_ioapic_entries(void)
+{
+       return -1;
+}
+#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */
+
+
+static void __init
+acpi_process_madt(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+       int count, error;
+
+       count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
+       if (count >= 1) {
+
+               /*
+                * Parse MADT LAPIC entries
+                */
+               error = acpi_parse_madt_lapic_entries();
+               if (!error) {
+                       acpi_lapic = 1;
+
+                       /*
+                        * Parse MADT IO-APIC entries
+                        */
+                       error = acpi_parse_madt_ioapic_entries();
+                       if (!error) {
+                               acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+                               acpi_irq_balance_set(NULL);
+                               acpi_ioapic = 1;
+
+                               smp_found_config = 1;
+                               clustered_apic_check();
+                       }
+               }
+               if (error == -EINVAL) {
+                       /*
+                        * Dell Precision Workstation 410, 610 come here.
+                        */
+                       printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling 
ACPI\n");
+                       disable_acpi();
+               }
+       }
+#endif
+       return;
+}
+
+/*
+ * acpi_boot_table_init() and acpi_boot_init()
+ *  called from setup_arch(), always.
+ *     1. checksums all tables
+ *     2. enumerates lapics
+ *     3. enumerates io-apics
+ *
+ * acpi_table_init() is separate to allow reading SRAT without
+ * other side effects.
+ *
+ * side effects of acpi_boot_init:
+ *     acpi_lapic = 1 if LAPIC found
+ *     acpi_ioapic = 1 if IOAPIC found
+ *     if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ *     if acpi_blacklisted() acpi_disabled = 1;
+ *     acpi_irq_model=...
+ *     ...
+ *
+ * return value: (currently ignored)
+ *     0: success
+ *     !0: failure
+ */
+
+int __init
+acpi_boot_table_init(void)
+{
+       int error;
+
+       /*
+        * If acpi_disabled, bail out
+        * One exception: acpi=ht continues far enough to enumerate LAPICs
+        */
+       if (acpi_disabled && !acpi_ht)
+                return 1;
+
+       /* 
+        * Initialize the ACPI boot-time table parser.
+        */
+       error = acpi_table_init();
+       if (error) {
+               disable_acpi();
+               return error;
+       }
+
+#ifdef __i386__
+       check_acpi_pci();
+#endif
+
+       acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+       /*
+        * blacklist may disable ACPI entirely
+        */
+       error = acpi_blacklisted();
+       if (error) {
+               extern int acpi_force;
+
+               if (acpi_force) {
+                       printk(KERN_WARNING PREFIX "acpi=force override\n");
+               } else {
+                       printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
+                       disable_acpi();
+                       return error;
+               }
+       }
+
+       return 0;
+}
+
+
+int __init acpi_boot_init(void)
+{
+       /*
+        * If acpi_disabled, bail out
+        * One exception: acpi=ht continues far enough to enumerate LAPICs
+        */
+       if (acpi_disabled && !acpi_ht)
+                return 1;
+
+       acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+       /*
+        * set sci_int and PM timer address
+        */
+       acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
+
+       /*
+        * Process the Multiple APIC Description Table (MADT), if present
+        */
+       acpi_process_madt();
+
+       acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+       acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+
+       return 0;
+}
+
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c      Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,451 @@
+/******************************************************************************
+ * blktap_datapath.c
+ * 
+ * XenLinux virtual block-device tap.
+ * Block request routing data path.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ * -- see full header in blktap.c
+ */
+ 
+#include "blktap.h"
+#include <asm-xen/evtchn.h>
+
+/*-----[ The data paths ]-------------------------------------------------*/
+
+/* Connection to a single backend domain. */
+blkif_front_ring_t blktap_be_ring;
+
+/*-----[ Tracking active requests ]---------------------------------------*/
+
+/* this must be the same as MAX_PENDING_REQS in blkback.c */
+#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
+
+active_req_t     active_reqs[MAX_ACTIVE_REQS];
+ACTIVE_RING_IDX  active_req_ring[MAX_ACTIVE_REQS];
+spinlock_t       active_req_lock = SPIN_LOCK_UNLOCKED;
+ACTIVE_RING_IDX  active_prod, active_cons;
+#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
+#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
+
+inline active_req_t *get_active_req(void) 
+{
+    ACTIVE_RING_IDX idx;
+    active_req_t *ar;
+    unsigned long flags;
+        
+    ASSERT(active_cons != active_prod);   
+    
+    spin_lock_irqsave(&active_req_lock, flags);
+    idx =  active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
+    ar = &active_reqs[idx];
+    spin_unlock_irqrestore(&active_req_lock, flags);
+    
+    return ar;
+}
+
+inline void free_active_req(active_req_t *ar) 
+{
+    unsigned long flags;
+        
+    spin_lock_irqsave(&active_req_lock, flags);
+    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
+    spin_unlock_irqrestore(&active_req_lock, flags);
+}
+
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
+{
+    return &active_reqs[idx];   
+}
+
+void active_reqs_init(void)
+{
+    ACTIVE_RING_IDX i;
+    
+    active_cons = 0;
+    active_prod = MAX_ACTIVE_REQS;
+    memset(active_reqs, 0, sizeof(active_reqs));
+    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
+        active_req_ring[i] = i;
+}
+
+/* Requests passing through the tap to the backend hijack the id field
+ * in the request message.  In it we put the AR index _AND_ the fe domid.
+ * the domid is used by the backend to map the pages properly.
+ */
+
+static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+{
+    return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) );
+}
+
+/*-----[ Ring helpers ]---------------------------------------------------*/
+
+static void maybe_trigger_blktap_schedule(void);
+
+inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
+{
+    blkif_response_t *resp_d;
+    active_req_t *ar;
+    
+    ar = &active_reqs[ID_TO_IDX(rsp->id)];
+    rsp->id = ar->id;
+            
+    resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
+            blkif->blk_ring.rsp_prod_pvt);
+    memcpy(resp_d, rsp, sizeof(blkif_response_t));
+    wmb();
+    blkif->blk_ring.rsp_prod_pvt++;
+            
+    blkif_put(ar->blkif);
+    free_active_req(ar);
+    
+    return 0;
+}
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+    blkif_request_t *req_d;
+
+    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
+        WPRINTK("Tap trying to access an unconnected backend!\n");
+        return 0;
+    }
+    
+    req_d = RING_GET_REQUEST(&blktap_be_ring,
+            blktap_be_ring.req_prod_pvt);
+    memcpy(req_d, req, sizeof(blkif_request_t));
+    wmb();
+    blktap_be_ring.req_prod_pvt++;
+            
+    return 0;
+}
+
+void kick_fe_domain(blkif_t *blkif) 
+{
+    RING_PUSH_RESPONSES(&blkif->blk_ring);
+    notify_via_evtchn(blkif->evtchn);
+    DPRINTK("notified FE(dom %u)\n", blkif->domid);
+
+    /* We just feed up a batch of request slots... */
+    maybe_trigger_blktap_schedule();
+    
+}
+
+void kick_be_domain(void)
+{
+    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) 
+        return;
+    
+    wmb(); /* Ensure that the frontend can see the requests. */
+    RING_PUSH_REQUESTS(&blktap_be_ring);
+    notify_via_evtchn(blktap_be_evtchn);
+    DPRINTK("notified BE\n");
+}
+
+/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+
+/*-----[ Scheduler list maint -from blkback ]--- */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+    return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( !__on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+    if ( __on_blkdev_list(blkif) )
+    {
+        list_del(&blkif->blkdev_list);
+        blkif->blkdev_list.next = NULL;
+        blkif_put(blkif);
+    }
+    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( __on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+    {
+        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+        blkif_get(blkif);
+    }
+    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/*-----[ Scheduler functions - from blkback ]--- */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+
+static int blkio_schedule(void *arg)
+{
+    DECLARE_WAITQUEUE(wq, current);
+
+    blkif_t          *blkif;
+    struct list_head *ent;
+
+    daemonize(
+        "xentapd"
+        );
+
+    for ( ; ; )
+    {
+        /* Wait for work to do. */
+        add_wait_queue(&blkio_schedule_wait, &wq);
+        set_current_state(TASK_INTERRUPTIBLE);
+        if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || 
+             list_empty(&blkio_schedule_list) )
+            schedule();
+        __set_current_state(TASK_RUNNING);
+        remove_wait_queue(&blkio_schedule_wait, &wq);
+
+        /* Queue up a batch of requests. */
+        while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
+                !list_empty(&blkio_schedule_list) )
+        {
+            ent = blkio_schedule_list.next;
+            blkif = list_entry(ent, blkif_t, blkdev_list);
+            blkif_get(blkif);
+            remove_from_blkdev_list(blkif);
+            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+                add_to_blkdev_list_tail(blkif);
+            blkif_put(blkif);
+        }
+    }
+}
+
+static void maybe_trigger_blktap_schedule(void)
+{
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
+    if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS/2)) &&
+         !list_empty(&blkio_schedule_list) ) 
+        wake_up(&blkio_schedule_wait);
+}
+
+void blkif_deschedule(blkif_t *blkif)
+{
+    remove_from_blkdev_list(blkif);
+}
+
+void __init blkdev_schedule_init(void)
+{
+    spin_lock_init(&blkio_schedule_list_lock);
+    INIT_LIST_HEAD(&blkio_schedule_list);
+
+    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+        BUG();
+}
+    
+/*-----[ Interrupt entry from a frontend ]------ */
+
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    blkif_t *blkif = dev_id;
+
+    add_to_blkdev_list_tail(blkif);
+    maybe_trigger_blktap_schedule();
+    return IRQ_HANDLED;
+}
+
+/*-----[ Other Frontend Ring functions ]-------- */
+
+/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+    /* we have pending messages from the real frontend. */
+
+    blkif_request_t *req_s;
+    RING_IDX i, rp;
+    unsigned long flags;
+    active_req_t *ar;
+    int more_to_do = 0;
+    int notify_be = 0, notify_user = 0;
+    
+    if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
+    
+    /* lock both rings */
+    spin_lock_irqsave(&blkif_io_lock, flags);
+
+    rp = blkif->blk_ring.sring->req_prod;
+    rmb();
+    
+    for ( i = blkif->blk_ring.req_cons; 
+         (i != rp) && 
+            !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
+          i++ )
+    {
+        
+        if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) 
+        {
+            more_to_do = 1;
+            break;
+        }
+        
+        req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
+        /* This is a new request:  
+         * Assign an active request record, and remap the id. 
+         */
+        ar = get_active_req();
+        ar->id = req_s->id;
+        ar->nr_pages = req_s->nr_segments; 
+        blkif_get(blkif);
+        ar->blkif = blkif;
+        req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
+        /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
+
+        /* FE -> BE interposition point is here. */
+        
+        /* ------------------------------------------------------------- */
+        /* BLKIF_OP_PROBE_HACK:                                          */
+        /* Signal to the backend that we are a tap domain.               */
+
+        if (req_s->operation == BLKIF_OP_PROBE) {
+            DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
+            req_s->frame_and_sects[1] = BLKTAP_COOKIE;
+        }
+
+        /* ------------------------------------------------------------- */
+
+        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+            
+            /* Copy the response message to UFERing */
+            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
+            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
+
+            DPRINTK("req->UFERing\n"); 
+            blktap_write_fe_ring(req_s);
+            notify_user = 1;
+        }
+
+        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
+        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+            
+            /* be included to prevent noise from the fe when its off */
+            /* copy the request message to the BERing */
+
+            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
+                    (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
+                    (unsigned)blktap_be_ring.req_prod_pvt & 
+                    (RING_SIZE((&blktap_be_ring)-1)));
+            
+            write_req_to_be_ring(req_s);
+            notify_be = 1;
+        }
+    }
+
+    blkif->blk_ring.req_cons = i;
+    
+    /* unlock rings */
+    spin_unlock_irqrestore(&blkif_io_lock, flags);
+    
+    if (notify_user)
+        blktap_kick_user();
+    if (notify_be)
+        kick_be_domain();
+    
+    return more_to_do;
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
+                                  struct pt_regs *ptregs)
+{
+    blkif_response_t  *resp_s;
+    blkif_t *blkif;
+    RING_IDX rp, i;
+    unsigned long flags;
+
+    DPRINTK("PT got BE interrupt.\n");
+
+    /* lock both rings */
+    spin_lock_irqsave(&blkif_io_lock, flags);
+    
+    rp = blktap_be_ring.sring->rsp_prod;
+    rmb();
+      
+    for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
+    {
+        resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
+        
+        /* BE -> FE interposition point is here. */
+    
+        blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
+        
+        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+            /* Copy the response message to UBERing */
+            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
+            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
+
+            DPRINTK("rsp->UBERing\n"); 
+            blktap_write_be_ring(resp_s);
+            blktap_kick_user();
+
+        }
+       
+        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
+        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+            
+            /* (fe included to prevent random interference from the BE) */
+            /* Copy the response message to FERing */
+         
+            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
+                    (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
+                    (unsigned)blkif->blk_ring.rsp_prod_pvt & 
+                    (RING_SIZE((&blkif->blk_ring)-1)));
+
+            write_resp_to_fe_ring(blkif, resp_s);
+            kick_fe_domain(blkif);
+
+        }
+    }
+    
+    blktap_be_ring.rsp_cons = i;
+    
+
+    spin_unlock_irqrestore(&blkif_io_lock, flags);
+    
+    return IRQ_HANDLED;
+}
+
+/* Debug : print the current ring indices. */
+
+void print_be_ring_idxs(void)
+{
+    if (blktap_be_ring.sring != NULL) {
+        WPRINTK("BE Ring: \n--------\n");
+        WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
+            "| req_prod: %2d, rsp_prod: %2d\n",
+            blktap_be_ring.rsp_cons,
+            blktap_be_ring.req_prod_pvt,
+            blktap_be_ring.sring->req_prod,
+            blktap_be_ring.sring->rsp_prod);
+    }
+}        
diff -r 35b74976f598 -r a83ac0806d6b linux-2.4.30-xen-sparse/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/Makefile  Fri Jul 15 13:39:50 2005
@@ -0,0 +1,592 @@
+VERSION = 2
+PATCHLEVEL = 4
+SUBLEVEL = 30
+EXTRAVERSION =
+
+KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
+
+# SUBARCH always tells us the underlying machine architecture.
+# Unless overridden, by default ARCH is equivalent to SUBARCH.
+# This will be overriden for Xen and UML builds.
+SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e 
s/arm.*/arm/ -e s/sa110/arm/)
+ARCH ?= $(SUBARCH)
+
+## XXX The following hack can be discarded after users have adjusted to the
+## architectural name change 'xeno' -> 'xen'.
+ifeq ($(ARCH),xeno)
+  ARCH := xen
+endif
+
+KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g")
+
+CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
+         else if [ -x /bin/bash ]; then echo /bin/bash; \
+         else echo sh; fi ; fi)
+TOPDIR := $(shell /bin/pwd)
+
+HPATH          = $(TOPDIR)/include
+FINDHPATH      = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net 
$(HPATH)/math-emu
+
+HOSTCC         = gcc
+HOSTCFLAGS     = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+
+CROSS_COMPILE  =
+
+#
+# Include the make variables (CC, etc...)
+#
+
+AS             = $(CROSS_COMPILE)as
+LD             = $(CROSS_COMPILE)ld
+CC             = $(CROSS_COMPILE)gcc
+CPP            = $(CC) -E
+AR             = $(CROSS_COMPILE)ar
+NM             = $(CROSS_COMPILE)nm
+STRIP          = $(CROSS_COMPILE)strip
+OBJCOPY                = $(CROSS_COMPILE)objcopy
+OBJDUMP                = $(CROSS_COMPILE)objdump
+MAKEFILES      = $(TOPDIR)/.config
+GENKSYMS       = /sbin/genksyms
+DEPMOD         = /sbin/depmod
+MODFLAGS       = -DMODULE
+CFLAGS_KERNEL  =
+PERL           = perl
+AWK            = awk
+RPM            := $(shell if [ -x "/usr/bin/rpmbuild" ]; then echo rpmbuild; \
+                       else echo rpm; fi)
+
+export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \
+       CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \
+       CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL 
AWK
+
+all:   do-it-all
+
+#
+# Make "config" the default target if there is no configuration file or
+# "depend" the target if there is no top-level dependency information.
+#
+
+ifeq (.config,$(wildcard .config))
+include .config
+ifeq (.depend,$(wildcard .depend))
+include .depend
+do-it-all:     Version vmlinux
+else
+CONFIGURATION = depend
+do-it-all:     depend
+endif
+else
+CONFIGURATION = config
+do-it-all:     config
+endif
+
+#
+# INSTALL_PATH specifies where to place the updated kernel and system map
+# images.  Uncomment if you want to place them anywhere other than root.
+#
+
+#export        INSTALL_PATH=/boot
+
+#
+# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
+# relocations required by build roots.  This is not defined in the
+# makefile but the arguement can be passed to make if needed.
+#
+
+MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
+export MODLIB
+
+#
+# standard CFLAGS
+#
+
+CPPFLAGS := -D__KERNEL__ -I$(HPATH)
+
+CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \
+         -fno-strict-aliasing -fno-common
+ifndef CONFIG_FRAME_POINTER
+CFLAGS += -fomit-frame-pointer
+endif
+AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS)
+
+#
+# ROOT_DEV specifies the default root-device when making the image.
+# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
+# the default of FLOPPY is used by 'build'.
+# This is i386 specific.
+#
+
+export ROOT_DEV = CURRENT
+
+#
+# If you want to preset the SVGA mode, uncomment the next line and
+# set SVGA_MODE to whatever number you want.
+# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
+# The number is the same as you would ordinarily press at bootup.
+# This is i386 specific.
+#
+
+export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA
+
+#
+# If you want the RAM disk device, define this to be the size in blocks.
+# This is i386 specific.
+#
+
+#export RAMDISK = -DRAMDISK=512
+
+CORE_FILES     =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o
+NETWORKS       =net/network.o
+
+LIBS           =$(TOPDIR)/lib/lib.a
+SUBDIRS                =kernel drivers mm fs net ipc lib crypto
+
+DRIVERS-n :=
+DRIVERS-y :=
+DRIVERS-m :=
+DRIVERS-  :=
+
+DRIVERS-$(CONFIG_ACPI_BOOT) += drivers/acpi/acpi.o
+DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o
+DRIVERS-y += drivers/char/char.o \
+       drivers/block/block.o \
+       drivers/misc/misc.o \
+       drivers/net/net.o
+DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o
+DRIVERS-$(CONFIG_DRM_NEW) += drivers/char/drm/drm.o
+DRIVERS-$(CONFIG_DRM_OLD) += drivers/char/drm-4.0/drm.o
+DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a
+DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o
+DRIVERS-$(CONFIG_DEV_APPLETALK) += drivers/net/appletalk/appletalk.o
+DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o
+DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o
+DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o
+DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o
+DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o
+DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a
+DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o
+DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o
+DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o
+
+ifneq 
($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),)
+DRIVERS-y += drivers/cdrom/driver.o
+endif
+
+DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o
+DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o
+DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o
+DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o
+DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o
+DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o
+DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o
+DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a
+DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o
+DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o
+DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a
+DRIVERS-$(CONFIG_PPC32) += drivers/macintosh/macintosh.o
+DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o
+DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o
+DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o
+DRIVERS-$(CONFIG_VT) += drivers/video/video.o
+DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a
+DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o
+DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a
+DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o
+DRIVERS-$(CONFIG_USB_GADGET) += drivers/usb/gadget/built-in.o
+DRIVERS-y +=drivers/media/media.o
+DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o
+DRIVERS-$(CONFIG_HIL) += drivers/hil/hil.o
+DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o
+DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o
+DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o
+DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o
+DRIVERS-$(CONFIG_GSC) += drivers/gsc/gscbus.o
+DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o
+DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o
+DRIVERS-$(CONFIG_ISDN_BOOL) += drivers/isdn/vmlinux-obj.o
+DRIVERS-$(CONFIG_CRYPTO) += crypto/crypto.o
+
+DRIVERS := $(DRIVERS-y)
+
+
+# files removed with 'make clean'
+CLEAN_FILES = \
+       kernel/ksyms.lst include/linux/compile.h \
+       vmlinux System.map \
+       .tmp* \
+       drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \
+       drivers/char/conmakehash \
+       drivers/char/drm/*-mod.c \
+       drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \
+       drivers/zorro/devlist.h drivers/zorro/gen-devlist \
+       drivers/sound/bin2hex drivers/sound/hex2hex \
+       drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \
+       drivers/scsi/aic7xxx/aicasm/aicasm \
+       drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \
+       drivers/scsi/aic7xxx/aicasm/aicasm_gram.h \
+       drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.c \
+       drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.h \
+       drivers/scsi/aic7xxx/aicasm/aicasm_macro_scan.c \
+       drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \
+       drivers/scsi/aic7xxx/aicasm/aicdb.h \
+       drivers/scsi/aic7xxx/aicasm/y.tab.h \
+       drivers/scsi/53c700_d.h \
+       drivers/tc/lk201-map.c \
+       net/khttpd/make_times_h \
+       net/khttpd/times.h \
+       submenu* \
+       drivers/ieee1394/oui.c
+# directories removed with 'make clean'
+CLEAN_DIRS = \
+       modules
+
+# files removed with 'make mrproper'
+MRPROPER_FILES = \
+       include/linux/autoconf.h include/linux/version.h \
+       lib/crc32table.h lib/gen_crc32table \
+       drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \
+       drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \
+       drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \
+       drivers/net/hamradio/soundmodem/gentbl \
+       drivers/sound/*_boot.h drivers/sound/.*.boot \
+       drivers/sound/msndinit.c \
+       drivers/sound/msndperm.c \
+       drivers/sound/pndsperm.c \
+       drivers/sound/pndspini.c \
+       drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \
+       .version .config* config.in config.old \
+       scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \
+       scripts/lxdialog/*.o scripts/lxdialog/lxdialog \
+       .menuconfig.log \
+       include/asm \
+       .hdepend scripts/mkdep scripts/split-include scripts/docproc \
+       $(TOPDIR)/include/linux/modversions.h \
+       kernel.spec
+
+# directories removed with 'make mrproper'
+MRPROPER_DIRS = \
+       include/config \
+       $(TOPDIR)/include/linux/modules
+
+
+include arch/$(ARCH)/Makefile
+
+# Extra cflags for kbuild 2.4.  The default is to forbid includes by kernel 
code
+# from user space headers.  Some UML code requires user space headers, in the
+# UML Makefiles add 'kbuild_2_4_nostdinc :=' before include Rules.make.  No
+# other kernel code should include user space headers, if you need
+# 'kbuild_2_4_nostdinc :=' or -I/usr/include for kernel code and you are not 
UML
+# then your code is broken!  KAO.
+
+kbuild_2_4_nostdinc    := -nostdinc -iwithprefix include
+export kbuild_2_4_nostdinc
+
+export CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL
+
+export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS
+
+.S.s:
+       $(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $<
+.S.o:
+       $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -c -o $*.o $<
+
+Version: dummy
+       @rm -f include/linux/compile.h
+
+boot: vmlinux
+       @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot
+
+vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o 
init/do_mounts.o linuxsubdirs
+       $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \
+               --start-group \
+               $(CORE_FILES) \
+               $(DRIVERS) \
+               $(NETWORKS) \
+               $(LIBS) \
+               --end-group \
+               -o vmlinux
+       $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] 
\)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map
+
+symlinks:
+       rm -f include/asm
+       ( cd include ; ln -sf asm-$(ARCH) asm)
+       @if [ ! -d include/linux/modules ]; then \
+               mkdir include/linux/modules; \
+       fi
+
+oldconfig: symlinks
+       $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in
+
+xconfig: symlinks
+       $(MAKE) -C scripts kconfig.tk
+       wish -f scripts/kconfig.tk
+
+menuconfig: include/linux/version.h symlinks
+       $(MAKE) -C scripts/lxdialog all
+       $(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in
+
+config: symlinks
+       $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in
+
+include/config/MARKER: scripts/split-include include/linux/autoconf.h
+       scripts/split-include include/linux/autoconf.h include/config
+       @ touch include/config/MARKER
+
+linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS))
+
+$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h 
include/config/MARKER
+       $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@)
+
+$(TOPDIR)/include/linux/version.h: include/linux/version.h
+$(TOPDIR)/include/linux/compile.h: include/linux/compile.h
+
+newversion:
+       . scripts/mkversion > .tmpversion
+       @mv -f .tmpversion .version
+
+uts_len                := 64
+uts_truncate   := sed -e 's/\(.\{1,$(uts_len)\}\).*/\1/'
+
+include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion
+       @echo -n \#`cat .version` > .ver1
+       @if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver1; fi
+       @if [ -f .name ]; then  echo -n \-`cat .name` >> .ver1; fi
+       @LANG=C echo ' '`date` >> .ver1
+       @echo \#define UTS_VERSION \"`cat .ver1 | $(uts_truncate)`\" > .ver
+       @LANG=C echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver
+       @echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver
+       @echo \#define LINUX_COMPILE_HOST \"`hostname | $(uts_truncate)`\" >> 
.ver
+       @([ -x /bin/dnsdomainname ] && /bin/dnsdomainname > .ver1) || \
+        ([ -x /bin/domainname ] && /bin/domainname > .ver1) || \
+        echo > .ver1
+       @echo \#define LINUX_COMPILE_DOMAIN \"`cat .ver1 | $(uts_truncate)`\" 
>> .ver
+       @echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -n 1`\" 
>> .ver
+       @mv -f .ver $@
+       @rm -f .ver1
+
+include/linux/version.h: ./Makefile
+       @expr length "$(KERNELRELEASE)" \<= $(uts_len) > /dev/null || \
+         (echo KERNELRELEASE \"$(KERNELRELEASE)\" exceeds $(uts_len) 
characters >&2; false)
+       @echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver
+       @echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + 
$(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver
+       @echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' 
>>.ver
+       @mv -f .ver $@
+
+comma  := ,
+
+init/version.o: init/version.c include/linux/compile.h include/config/MARKER
+       $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(SUBARCH)"' 
-DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o init/version.o 
init/version.c
+
+init/main.o: init/main.c include/config/MARKER
+       $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst 
$(comma),_,$(subst -,_,$(*F))) -c -o $@ $<
+
+init/do_mounts.o: init/do_mounts.c include/config/MARKER
+       $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst 
$(comma),_,$(subst -,_,$(*F))) -c -o $@ $<
+
+fs lib mm ipc kernel drivers net: dummy
+       $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@)
+
+TAGS: dummy
+       { find include/asm-${ARCH} -name '*.h' -print ; \
+       find include -type d \( -name "asm-*" -o -name config \) -prune -o 
-name '*.h' -print ; \
+       find $(SUBDIRS) init arch/${ARCH} -name '*.[chS]' ; } | grep -v SCCS | 
grep -v '\.svn' | etags -
+
+# Exuberant ctags works better with -I
+tags: dummy
+       CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I 
__initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \
+       ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \
+       find include -type d \( -name "asm-*" -o -name config \) -prune -o 
-name '*.h' -print | xargs ctags $$CTAGSF -a && \
+       find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a
+
+ifdef CONFIG_MODULES
+ifdef CONFIG_MODVERSIONS
+MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h
+endif
+
+.PHONY: modules
+modules: $(patsubst %, _mod_%, $(SUBDIRS))
+
+.PHONY: $(patsubst %, _mod_%, $(SUBDIRS))
+$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h 
include/config/MARKER
+       $(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" 
MAKING_MODULES=1 modules
+
+.PHONY: modules_install
+modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post
+
+.PHONY: _modinst_
+_modinst_:
+       @rm -rf $(MODLIB)/kernel
+       @rm -f $(MODLIB)/build
+       @mkdir -p $(MODLIB)/kernel
+       @ln -s $(TOPDIR) $(MODLIB)/build
+
+# If System.map exists, run depmod.  This deliberately does not have a
+# dependency on System.map since that would run the dependency tree on
+# vmlinux.  This depmod is only for convenience to give the initial
+# boot a modules.dep even before / is mounted read-write.  However the
+# boot script depmod is the master version.
+ifeq "$(strip $(INSTALL_MOD_PATH))" ""
+depmod_opts    :=
+else
+depmod_opts    := -b $(INSTALL_MOD_PATH) -r
+endif
+.PHONY: _modinst_post
+_modinst_post: _modinst_post_pcmcia
+       if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) 
$(KERNELRELEASE); fi
+
+# Backwards compatibilty symlinks for people still using old versions
+# of pcmcia-cs with hard coded pathnames on insmod.  Remove
+# _modinst_post_pcmcia for kernel 2.4.1.
+.PHONY: _modinst_post_pcmcia
+_modinst_post_pcmcia:
+       cd $(MODLIB); \
+       mkdir -p pcmcia; \
+       find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} 
pcmcia
+
+.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS))
+$(patsubst %, _modinst_%, $(SUBDIRS)) :
+       $(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install
+
+# modules disabled....
+
+else
+modules modules_install: dummy
+       @echo
+       @echo "The present kernel configuration has modules disabled."
+       @echo "Type 'make config' and enable loadable module support."
+       @echo "Then build a kernel with module support enabled."
+       @echo
+       @exit 1
+endif
+
+clean: archclean
+       find . \( -name '*.[oas]' -o -name core -o -name '.*.flags' \) -type f 
-print \
+               | grep -v lxdialog/ | xargs rm -f
+       rm -f $(CLEAN_FILES)
+       rm -rf $(CLEAN_DIRS)
+       $(MAKE) -C Documentation/DocBook clean
+
+mrproper: clean archmrproper
+       find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f
+       rm -f $(MRPROPER_FILES)
+       rm -rf $(MRPROPER_DIRS)
+       $(MAKE) -C Documentation/DocBook mrproper
+
+distclean: mrproper
+       rm -f core `find . \( -not -type d \) -and \
+               \( -name '*.orig' -o -name '*.rej' -o -name '*~' \
+               -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \
+               -o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f 
-print` TAGS tags
+
+backup: mrproper
+       cd .. && tar cf - linux/ | gzip -9 > backup.gz
+       sync
+
+sgmldocs: 
+       chmod 755 $(TOPDIR)/scripts/docgen
+       chmod 755 $(TOPDIR)/scripts/gen-all-syms
+       chmod 755 $(TOPDIR)/scripts/kernel-doc
+       $(MAKE) -C $(TOPDIR)/Documentation/DocBook books
+
+psdocs: sgmldocs
+       $(MAKE) -C Documentation/DocBook ps
+
+pdfdocs: sgmldocs
+       $(MAKE) -C Documentation/DocBook pdf
+
+htmldocs: sgmldocs
+       $(MAKE) -C Documentation/DocBook html
+
+mandocs:
+       chmod 755 $(TOPDIR)/scripts/kernel-doc
+       chmod 755 $(TOPDIR)/scripts/split-man
+       $(MAKE) -C Documentation/DocBook man
+
+sums:
+       find . -type f -print | sort | xargs sum > .SUMS
+
+dep-files: scripts/mkdep archdep include/linux/version.h
+       rm -f .depend .hdepend
+       $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) 
_FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)"
+ifdef CONFIG_MODVERSIONS
+       $(MAKE) update-modverfile
+endif
+       scripts/mkdep -- `find $(FINDHPATH) \( -name SCCS -o -name .svn \) 
-prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend
+       scripts/mkdep -- init/*.c > .depend
+
+ifdef CONFIG_MODVERSIONS
+MODVERFILE := $(TOPDIR)/include/linux/modversions.h
+else
+MODVERFILE :=
+endif
+export MODVERFILE
+
+depend dep: dep-files
+
+checkconfig:
+       find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w 
scripts/checkconfig.pl
+
+checkhelp:
+       find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w 
scripts/checkhelp.pl
+
+checkincludes:
+       find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w 
scripts/checkincludes.pl
+
+ifdef CONFIGURATION
+..$(CONFIGURATION):
+       @echo
+       @echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 
'make" $(CONFIGURATION)"'"
+       @echo
+       $(MAKE) $(CONFIGURATION)
+       @echo
+       @echo "Successful. Try re-making (ignore the error that follows)"
+       @echo
+       exit 1
+
+#dummy: ..$(CONFIGURATION)
+dummy:
+
+else
+
+dummy:
+
+endif
+
+include Rules.make
+
+#
+# This generates dependencies for the .h files.
+#
+
+scripts/mkdep: scripts/mkdep.c
+       $(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c
+
+scripts/split-include: scripts/split-include.c
+       $(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c
+
+#
+# RPM target
+#
+#      If you do a make spec before packing the tarball you can rpm -ta it
+#
+spec:
+       . scripts/mkspec >kernel.spec
+
+#
+#      Build a tar ball, generate an rpm from it and pack the result
+#      There arw two bits of magic here
+#      1) The use of /. to avoid tar packing just the symlink
+#      2) Removing the .dep files as they have source paths in them that
+#         will become invalid
+#
+rpm:   clean spec
+       find . \( -size 0 -o -name .depend -o -name .hdepend \) -type f -print 
| xargs rm -f
+       set -e; \
+       cd $(TOPDIR)/.. ; \
+       ln -sf $(TOPDIR) $(KERNELPATH) ; \
+       tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \
+       rm $(KERNELPATH) ; \
+       cd $(TOPDIR) ; \
+       . scripts/mkversion > .version ; \
+       $(RPM) -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \
+       rm $(TOPDIR)/../$(KERNELPATH).tar.gz
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h      Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,133 @@
+/******************************************************************************
+ * block.h
+ * 
+ * Shared definitions between all levels of XenLinux Virtual block devices.
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004-2005, Christian Limpach
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/major.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/xen-public/io/blkif.h>
+#include <asm-xen/xen-public/io/ring.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#if 1 
+#define IPRINTK(fmt, args...) \
+    printk(KERN_INFO "xen_blk: " fmt, ##args)
+#else
+#define IPRINTK(fmt, args...) ((void)0)
+#endif
+
+#if 1 
+#define WPRINTK(fmt, args...) \
+    printk(KERN_WARNING "xen_blk: " fmt, ##args)
+#else
+#define WPRINTK(fmt, args...) ((void)0)
+#endif
+ 
+#if 0
+#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 0
+#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
+#endif
+
+struct xlbd_type_info {
+    int partn_shift;
+    int disks_per_major;
+    char *devname;
+    char *diskname;
+};
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.  They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct xlbd_major_info {
+    int major;
+    int index;
+    int usage;
+    struct xlbd_type_info *type;
+};
+
+struct xlbd_disk_info {
+    int xd_device;
+    struct xlbd_major_info *mi;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+    struct xlbd_disk_info  *next_waiting;
+    request_queue_t        *rq;
+#endif
+};
+
+typedef struct xen_block {
+    int usage;
+} xen_block_t;
+
+extern spinlock_t blkif_io_lock;
+
+extern int blkif_open(struct inode *inode, struct file *filep);
+extern int blkif_release(struct inode *inode, struct file *filep);
+extern int blkif_ioctl(struct inode *inode, struct file *filep,
+                       unsigned command, unsigned long argument);
+extern int blkif_check(dev_t dev);
+extern int blkif_revalidate(dev_t dev);
+extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+extern void blkif_control_probe_send(
+    blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
+#endif
+extern void do_blkif_request (request_queue_t *rq); 
+
+extern void xlvbd_update_vbds(void);
+
+/* Virtual block-device subsystem. */
+extern int  xlvbd_init(void);
+extern void xlvbd_cleanup(void); 
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/xor.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/xor.h  Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,328 @@
+/*
+ * x86-64 changes / gcc fixes from Andi Kleen. 
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ *
+ * This hasn't been optimized for the hammer yet, but there are likely
+ * no advantages to be gotten from x86-64 here anyways.
+ */
+
+typedef struct { unsigned long a,b; } __attribute__((aligned(16))) xmm_store_t;
+
+/* Doesn't use gcc to save the XMM registers, because there is no easy way to 
+   tell it to do a clts before the register saving. */
+#define XMMS_SAVE do {                         \
+       preempt_disable();                      \
+       if (!(current_thread_info()->status & TS_USEDFPU))      \
+               clts();                         \
+       __asm__ __volatile__ (                  \
+               "movups %%xmm0,(%1)     ;\n\t"  \
+               "movups %%xmm1,0x10(%1) ;\n\t"  \
+               "movups %%xmm2,0x20(%1) ;\n\t"  \
+               "movups %%xmm3,0x30(%1) ;\n\t"  \
+               : "=&r" (cr0)                   \
+               : "r" (xmm_save)                \
+               : "memory");                    \
+} while(0)
+
+#define XMMS_RESTORE do {                      \
+       asm volatile (                          \
+               "sfence                 ;\n\t"  \
+               "movups (%1),%%xmm0     ;\n\t"  \
+               "movups 0x10(%1),%%xmm1 ;\n\t"  \
+               "movups 0x20(%1),%%xmm2 ;\n\t"  \
+               "movups 0x30(%1),%%xmm3 ;\n\t"  \
+               :                               \
+               : "r" (cr0), "r" (xmm_save)     \
+               : "memory");                    \
+       if (!(current_thread_info()->status & TS_USEDFPU))      \
+               stts();                         \
+       preempt_enable();                       \
+} while(0)
+
+#define OFFS(x)                "16*("#x")"
+#define PF_OFFS(x)     "256+16*("#x")"
+#define        PF0(x)          "       prefetchnta "PF_OFFS(x)"(%[p1])         
;\n"
+#define LD(x,y)                "       movaps   "OFFS(x)"(%[p1]), %%xmm"#y"    
;\n"
+#define ST(x,y)                "       movaps %%xmm"#y",   "OFFS(x)"(%[p1])    
;\n"
+#define PF1(x)         "       prefetchnta "PF_OFFS(x)"(%[p2])         ;\n"
+#define PF2(x)         "       prefetchnta "PF_OFFS(x)"(%[p3])         ;\n"
+#define PF3(x)         "       prefetchnta "PF_OFFS(x)"(%[p4])         ;\n"
+#define PF4(x)         "       prefetchnta "PF_OFFS(x)"(%[p5])         ;\n"
+#define PF5(x)         "       prefetchnta "PF_OFFS(x)"(%[p6])         ;\n"
+#define XO1(x,y)       "       xorps   "OFFS(x)"(%[p2]), %%xmm"#y"     ;\n"
+#define XO2(x,y)       "       xorps   "OFFS(x)"(%[p3]), %%xmm"#y"     ;\n"
+#define XO3(x,y)       "       xorps   "OFFS(x)"(%[p4]), %%xmm"#y"     ;\n"
+#define XO4(x,y)       "       xorps   "OFFS(x)"(%[p5]), %%xmm"#y"     ;\n"
+#define XO5(x,y)       "       xorps   "OFFS(x)"(%[p6]), %%xmm"#y"     ;\n"
+
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+        unsigned int lines = bytes >> 8;
+       unsigned long cr0;
+       xmm_store_t xmm_save[4];
+
+       XMMS_SAVE;
+
+        asm volatile (
+#undef BLOCK
+#define BLOCK(i) \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addq %[inc], %[p1]           ;\n"
+        "       addq %[inc], %[p2]           ;\n"
+               "               decl %[cnt] ; jnz 1b"
+       : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
+       : [inc] "r" (256UL) 
+        : "memory");
+
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3)
+{
+       unsigned int lines = bytes >> 8;
+       xmm_store_t xmm_save[4];
+       unsigned long cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addq %[inc], %[p1]           ;\n"
+        "       addq %[inc], %[p2]          ;\n"
+        "       addq %[inc], %[p3]           ;\n"
+               "               decl %[cnt] ; jnz 1b"
+       : [cnt] "+r" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+       : [inc] "r" (256UL)
+       : "memory"); 
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4)
+{
+       unsigned int lines = bytes >> 8;
+       xmm_store_t xmm_save[4]; 
+       unsigned long cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               PF3(i)                                  \
+                               PF3(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               XO3(i,0)                                \
+                       XO3(i+1,1)                      \
+                               XO3(i+2,2)              \
+                                       XO3(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addq %[inc], %[p1]           ;\n"
+        "       addq %[inc], %[p2]           ;\n"
+        "       addq %[inc], %[p3]           ;\n"
+        "       addq %[inc], %[p4]           ;\n"
+       "       decl %[cnt] ; jnz 1b"
+       : [cnt] "+c" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+       : [inc] "r" (256UL)
+        : "memory" );
+
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+        unsigned int lines = bytes >> 8;
+       xmm_store_t xmm_save[4];
+       unsigned long cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               PF3(i)                                  \
+                               PF3(i+2)                \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               PF4(i)                                  \
+                               PF4(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO3(i,0)                                \
+                       XO3(i+1,1)                      \
+                               XO3(i+2,2)              \
+                                       XO3(i+3,3)      \
+               XO4(i,0)                                \
+                       XO4(i+1,1)                      \
+                               XO4(i+2,2)              \
+                                       XO4(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addq %[inc], %[p1]           ;\n"
+        "       addq %[inc], %[p2]           ;\n"
+        "       addq %[inc], %[p3]           ;\n"
+        "       addq %[inc], %[p4]           ;\n"
+        "       addq %[inc], %[p5]           ;\n"
+       "       decl %[cnt] ; jnz 1b"
+       : [cnt] "+c" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4), 
+         [p5] "+r" (p5)
+       : [inc] "r" (256UL)
+       : "memory");
+
+       XMMS_RESTORE;
+}
+
+static struct xor_block_template xor_block_sse = {
+        .name = "generic_sse",
+        .do_2 = xor_sse_2,
+        .do_3 = xor_sse_3,
+        .do_4 = xor_sse_4,
+        .do_5 = xor_sse_5,
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES                              \
+       do {                                            \
+               xor_speed(&xor_block_sse);      \
+       } while (0)
+
+/* We force the use of the SSE xor block because it can write around L2.
+   We may also be able to load into the L1 only depending on how the cpu
+   deals with a load to a line that is being prefetched.  */
+#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/xen_proc.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/xen_proc.h        Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,13 @@
+
+#ifndef __ASM_XEN_PROC_H__
+#define __ASM_XEN_PROC_H__
+
+#include <linux/config.h>
+#include <linux/proc_fs.h>
+
+extern struct proc_dir_entry *create_xen_proc_entry(
+    const char *name, mode_t mode);
+extern void remove_xen_proc_entry(
+    const char *name);
+
+#endif /* __ASM_XEN_PROC_H__ */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h    Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,26 @@
+#ifndef __i386_MMU_H
+#define __i386_MMU_H
+
+#include <asm/semaphore.h>
+/*
+ * The i386 doesn't have a mmu context, but
+ * we put the segment information here.
+ *
+ * cpu_vm_mask is used to optimize ldt flushing.
+ */
+typedef struct { 
+       int size;
+       struct semaphore sem;
+       void *ldt;
+       unsigned pinned:1;
+       struct list_head unpinned;
+} mm_context_t;
+
+extern struct list_head mm_unpinned;
+extern spinlock_t mm_unpinned_lock;
+
+/* mm/memory.c:exit_mmap hook */
+extern void _arch_exit_mmap(struct mm_struct *mm);
+#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+
+#endif
diff -r 35b74976f598 -r a83ac0806d6b linux-2.4.30-xen-sparse/include/linux/irq.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/include/linux/irq.h       Fri Jul 15 13:39:50 2005
@@ -0,0 +1,80 @@
+#ifndef __irq_h
+#define __irq_h
+
+/*
+ * Please do not include this file in generic code.  There is currently
+ * no requirement for any architecture to implement anything held
+ * within this file.
+ *
+ * Thanks. --rmk
+ */
+
+#include <linux/config.h>
+
+#if !defined(CONFIG_ARCH_S390)
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+
+#include <asm/irq.h>
+#include <asm/ptrace.h>
+
+/*
+ * IRQ line status.
+ */
+#define IRQ_INPROGRESS 1       /* IRQ handler active - do not enter! */
+#define IRQ_DISABLED   2       /* IRQ disabled - do not enter! */
+#define IRQ_PENDING    4       /* IRQ pending - replay on enable */
+#define IRQ_REPLAY     8       /* IRQ has been replayed but not acked yet */
+#define IRQ_AUTODETECT 16      /* IRQ is being autodetected */
+#define IRQ_WAITING    32      /* IRQ not yet seen - for autodetection */
+#define IRQ_LEVEL      64      /* IRQ level triggered */
+#define IRQ_MASKED     128     /* IRQ masked - shouldn't be seen again */
+#define IRQ_PER_CPU    256     /* IRQ is per CPU */
+
+/*
+ * Interrupt controller descriptor. This is all we need
+ * to describe about the low-level hardware. 
+ */
+struct hw_interrupt_type {
+       const char * typename;
+       unsigned int (*startup)(unsigned int irq);
+       void (*shutdown)(unsigned int irq);
+       void (*enable)(unsigned int irq);
+       void (*disable)(unsigned int irq);
+       void (*ack)(unsigned int irq);
+       void (*end)(unsigned int irq);
+       void (*set_affinity)(unsigned int irq, unsigned long mask);
+};
+
+typedef struct hw_interrupt_type  hw_irq_controller;
+
+/*
+ * This is the "IRQ descriptor", which contains various information
+ * about the irq, including what kind of hardware handling it has,
+ * whether it is disabled etc etc.
+ *
+ * Pad this out to 32 bytes for cache and indexing reasons.
+ */
+typedef struct {
+       unsigned int status;            /* IRQ status */
+       hw_irq_controller *handler;
+       struct irqaction *action;       /* IRQ action list */
+       unsigned int depth;             /* nested irq disables */
+       spinlock_t lock;
+} ____cacheline_aligned irq_desc_t;
+
+extern irq_desc_t irq_desc [NR_IRQS];
+
+#include <asm/hw_irq.h> /* the arch dependent stuff */
+
+extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction 
*);
+extern int setup_irq(unsigned int , struct irqaction * );
+extern int teardown_irq(unsigned int , struct irqaction * );
+
+extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
+extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
+
+#endif
+
+#endif /* __irq_h */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile      Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,38 @@
+#
+# Makefile for X86_64 specific PCI routines
+#
+# Reuse the i386 PCI subsystem
+#
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+CFLAGS += -Iarch/$(XENARCH)/pci
+
+CFLAGS += -Iarch/i386/pci
+
+c-i386-obj-y           := i386.o
+c-i386-obj-y           += fixup.o
+c-i386-obj-$(CONFIG_ACPI_PCI)  += acpi.o
+c-i386-obj-y                   += legacy.o common.o
+c-i386-obj-$(CONFIG_PCI_DIRECT)+= direct.o
+c-xen-obj-y            += irq.o
+# mmconfig has a 64bit special
+c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
+
+c-obj-$(CONFIG_NUMA)   += k8-bus.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-xen-obj-y)):
+       @ln -fsn $(srctree)/arch/xen/i386/pci/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/x86_64/pci/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(c-i386-obj-y)):
+       @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
+
+obj-y  += $(c-i386-obj-y) $(c-obj-y)
+obj-y  += $(c-xen-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
+clean-files += $(patsubst %.o,%.c,$(c-i386-obj-y) $(c-i386-obj-))
+clean-files += $(patsubst %.o,%.c,$(c-xen-obj-y) $(c-xen-obj-))
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h   Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,140 @@
+#ifndef __XEN_SYNCH_BITOPS_H__
+#define __XEN_SYNCH_BITOPS_H__
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Heavily modified to provide guaranteed strong synchronisation
+ * when communicating with Xen or other guest OSes running on other CPUs.
+ */
+
+#include <linux/config.h>
+
+#define ADDR (*(volatile long *) addr)
+
+static __inline__ void synch_set_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ ( 
+        "lock btsl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_clear_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btrl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_change_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btcl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btsl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btrl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+
+    __asm__ __volatile__ (
+        "lock btcl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+struct __synch_xchg_dummy { unsigned long a[100]; };
+#define __synch_xg(x) ((struct __synch_xchg_dummy *)(x))
+
+#define synch_cmpxchg(ptr, old, new) \
+((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\
+                                     (unsigned long)(old), \
+                                     (unsigned long)(new), \
+                                     sizeof(*(ptr))))
+
+static inline unsigned long __synch_cmpxchg(volatile void *ptr,
+                                           unsigned long old,
+                                           unsigned long new, int size)
+{
+       unsigned long prev;
+       switch (size) {
+       case 1:
+               __asm__ __volatile__("lock; cmpxchgb %b1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__synch_xg(ptr)),
+                                      "0"(old)
+                                    : "memory");
+               return prev;
+       case 2:
+               __asm__ __volatile__("lock; cmpxchgw %w1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__synch_xg(ptr)),
+                                      "0"(old)
+                                    : "memory");
+               return prev;
+#ifdef CONFIG_X86_64
+       case 4:
+               __asm__ __volatile__("lock; cmpxchgl %k1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__synch_xg(ptr)),
+                                      "0"(old)
+                                    : "memory");
+               return prev;
+       case 8:
+               __asm__ __volatile__("lock; cmpxchgq %1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__synch_xg(ptr)),
+                                      "0"(old)
+                                    : "memory");
+               return prev;
+#else
+       case 4:
+               __asm__ __volatile__("lock; cmpxchgl %1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__synch_xg(ptr)),
+                                      "0"(old)
+                                    : "memory");
+               return prev;
+#endif
+       }
+       return old;
+}
+
+static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & 
+            (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "btl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
+    return oldbit;
+}
+
+#define synch_test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ synch_const_test_bit((nr),(addr)) : \
+ synch_var_test_bit((nr),(addr)))
+
+#endif /* __XEN_SYNCH_BITOPS_H__ */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c        Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,18 @@
+
+#include <linux/config.h>
+#include <linux/proc_fs.h>
+
+static struct proc_dir_entry *xen_base;
+
+struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
+{
+    if ( xen_base == NULL )
+        if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
+            panic("Couldn't create /proc/xen");
+    return create_proc_entry(name, mode, xen_base);
+}
+
+void remove_xen_proc_entry(const char *name)
+{
+    remove_proc_entry(name, xen_base);
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,194 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/user.h>
+#include <linux/elfcore.h>
+#include <linux/mca.h>
+#include <linux/sched.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/pm.h>
+#include <linux/pci.h>
+#include <linux/apm_bios.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/mmx.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/nmi.h>
+#include <asm/ist.h>
+#include <asm/kdebug.h>
+
+extern void dump_thread(struct pt_regs *, struct user *);
+extern spinlock_t rtc_lock;
+
+/* This is definitely a GPL-only symbol */
+EXPORT_SYMBOL_GPL(cpu_gdt_table);
+
+#if defined(CONFIG_APM_MODULE)
+extern void machine_real_restart(unsigned char *, int);
+EXPORT_SYMBOL(machine_real_restart);
+extern void default_idle(void);
+EXPORT_SYMBOL(default_idle);
+#endif
+
+#ifdef CONFIG_SMP
+extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
+#endif
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || 
defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+
+extern unsigned long cpu_khz;
+extern unsigned long get_cmos_time(void);
+
+/* platform dependent support */
+EXPORT_SYMBOL(boot_cpu_data);
+#ifdef CONFIG_DISCONTIGMEM
+EXPORT_SYMBOL(node_data);
+EXPORT_SYMBOL(physnode_map);
+#endif
+#ifdef CONFIG_X86_NUMAQ
+EXPORT_SYMBOL(xquad_portio);
+#endif
+EXPORT_SYMBOL(dump_thread);
+EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(ioremap_nocache);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(pm_idle);
+#ifdef CONFIG_ACPI_BOOT
+EXPORT_SYMBOL(pm_power_off);
+#endif
+EXPORT_SYMBOL(get_cmos_time);
+EXPORT_SYMBOL(cpu_khz);
+EXPORT_SYMBOL(apm_info);
+
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__down_failed_trylock);
+EXPORT_SYMBOL(__up_wakeup);
+/* Networking helper routines. */
+EXPORT_SYMBOL(csum_partial_copy_generic);
+/* Delay loops */
+EXPORT_SYMBOL(__ndelay);
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__delay);
+EXPORT_SYMBOL(__const_udelay);
+
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strstr);
+
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__copy_from_user_ll);
+EXPORT_SYMBOL(__copy_to_user_ll);
+EXPORT_SYMBOL(strnlen_user);
+
+EXPORT_SYMBOL(dma_alloc_coherent);
+EXPORT_SYMBOL(dma_free_coherent);
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pcibios_penalize_isa_irq);
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+#ifdef CONFIG_PCI_BIOS
+EXPORT_SYMBOL(pcibios_set_irq_routing);
+EXPORT_SYMBOL(pcibios_get_irq_routing_table);
+#endif
+
+#ifdef CONFIG_X86_USE_3DNOW
+EXPORT_SYMBOL(_mmx_memcpy);
+EXPORT_SYMBOL(mmx_clear_page);
+EXPORT_SYMBOL(mmx_copy_page);
+#endif
+
+#ifdef CONFIG_X86_HT
+EXPORT_SYMBOL(smp_num_siblings);
+EXPORT_SYMBOL(cpu_sibling_map);
+#endif
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_data);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(cpu_callout_map);
+EXPORT_SYMBOL(__write_lock_failed);
+EXPORT_SYMBOL(__read_lock_failed);
+
+/* Global SMP stuff */
+EXPORT_SYMBOL(smp_call_function);
+
+/* TLB flushing */
+EXPORT_SYMBOL(flush_tlb_page);
+EXPORT_SYMBOL_GPL(flush_tlb_all);
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+#endif
+
+#ifdef CONFIG_MCA
+EXPORT_SYMBOL(machine_id);
+#endif
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(get_wchan);
+
+EXPORT_SYMBOL(rtc_lock);
+
+EXPORT_SYMBOL_GPL(set_nmi_callback);
+EXPORT_SYMBOL_GPL(unset_nmi_callback);
+
+#undef memcmp
+extern int memcmp(const void *,const void *,__kernel_size_t);
+EXPORT_SYMBOL(memcmp);
+
+EXPORT_SYMBOL(register_die_notifier);
+#ifdef CONFIG_HAVE_DEC_LOCK
+EXPORT_SYMBOL(_atomic_dec_and_lock);
+#endif
+
+EXPORT_SYMBOL(__PAGE_KERNEL);
+
+#ifdef CONFIG_HIGHMEM
+EXPORT_SYMBOL(kmap);
+EXPORT_SYMBOL(kunmap);
+EXPORT_SYMBOL(kmap_atomic);
+EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_to_page);
+#endif
+
+#if defined(CONFIG_X86_SPEEDSTEP_SMI) || 
defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
+EXPORT_SYMBOL(ist_info);
+#endif
+
+EXPORT_SYMBOL(csum_partial);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c    Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,1375 @@
+/*
+ *  linux/arch/x86-64/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Nov 2001 Dave Jones <davej@xxxxxxx>
+ *  Forked from i386 setup code.
+ *
+ *  $Id$
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include <linux/console.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/kallsyms.h>
+#include <linux/edd.h>
+#include <linux/percpu.h>
+#include <asm/mtrr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/msr.h>
+#include <asm/desc.h>
+#include <video/edid.h>
+#include <asm/e820.h>
+#include <asm/dma.h>
+#include <asm/mpspec.h>
+#include <asm/mmu_context.h>
+#include <asm/bootsetup.h>
+#include <asm/proto.h>
+#include <asm/setup.h>
+#include <asm/mach_apic.h>
+#include <asm/numa.h>
+#include <asm-xen/xen-public/physdev.h>
+#include "setup_arch_pre.h"
+#include <asm-xen/hypervisor.h>
+
+#define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
+
+#include <asm/mach-xen/setup_arch_post.h>
+
+extern unsigned long start_pfn;
+
+#if 0
+struct edid_info {
+        unsigned char dummy[128];
+};
+#endif
+
+extern struct edid_info edid_info;
+
+/* Allows setting of maximum possible memory size  */
+unsigned long xen_override_max_pfn;
+/*
+ * Machine setup..
+ */
+
+struct cpuinfo_x86 boot_cpu_data;
+
+unsigned long mmu_cr4_features;
+EXPORT_SYMBOL_GPL(mmu_cr4_features);
+
+int acpi_disabled;
+EXPORT_SYMBOL(acpi_disabled);
+#ifdef CONFIG_ACPI_BOOT
+extern int __initdata acpi_ht;
+extern acpi_interrupt_flags    acpi_sci_flags;
+int __initdata acpi_force = 0;
+#endif
+
+int acpi_numa __initdata;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x10000000;
+
+/* Boot loader ID as an integer, for the benefit of proc_dointvec */
+int bootloader_type;
+
+unsigned long saved_video_mode;
+
+#ifdef CONFIG_SWIOTLB
+int swiotlb;
+EXPORT_SYMBOL(swiotlb);
+#endif
+
+/*
+ * Setup options
+ */
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+struct sys_desc_table_struct {
+       unsigned short length;
+       unsigned char table[0];
+};
+
+struct edid_info edid_info;
+struct e820map e820;
+
+unsigned char aux_device_present;
+
+extern int root_mountflags;
+extern char _text, _etext, _edata, _end;
+
+char command_line[COMMAND_LINE_SIZE];
+
+struct resource standard_io_resources[] = {
+       { .name = "dma1", .start = 0x00, .end = 0x1f,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "pic1", .start = 0x20, .end = 0x21,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "timer0", .start = 0x40, .end = 0x43,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "timer1", .start = 0x50, .end = 0x53,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "keyboard", .start = 0x60, .end = 0x6f,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "dma page reg", .start = 0x80, .end = 0x8f,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "pic2", .start = 0xa0, .end = 0xa1,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "dma2", .start = 0xc0, .end = 0xdf,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "fpu", .start = 0xf0, .end = 0xff,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO }
+};
+
+#define STANDARD_IO_RESOURCES \
+       (sizeof standard_io_resources / sizeof standard_io_resources[0])
+
+#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
+
+struct resource data_resource = {
+       .name = "Kernel data",
+       .start = 0,
+       .end = 0,
+       .flags = IORESOURCE_RAM,
+};
+struct resource code_resource = {
+       .name = "Kernel code",
+       .start = 0,
+       .end = 0,
+       .flags = IORESOURCE_RAM,
+};
+
+#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+static struct resource system_rom_resource = {
+       .name = "System ROM",
+       .start = 0xf0000,
+       .end = 0xfffff,
+       .flags = IORESOURCE_ROM,
+};
+
+static struct resource extension_rom_resource = {
+       .name = "Extension ROM",
+       .start = 0xe0000,
+       .end = 0xeffff,
+       .flags = IORESOURCE_ROM,
+};
+
+static struct resource adapter_rom_resources[] = {
+       { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
+               .flags = IORESOURCE_ROM },
+       { .name = "Adapter ROM", .start = 0, .end = 0,
+               .flags = IORESOURCE_ROM },
+       { .name = "Adapter ROM", .start = 0, .end = 0,
+               .flags = IORESOURCE_ROM },
+       { .name = "Adapter ROM", .start = 0, .end = 0,
+               .flags = IORESOURCE_ROM },
+       { .name = "Adapter ROM", .start = 0, .end = 0,
+               .flags = IORESOURCE_ROM },
+       { .name = "Adapter ROM", .start = 0, .end = 0,
+               .flags = IORESOURCE_ROM }
+};
+#endif
+
+#define ADAPTER_ROM_RESOURCES \
+       (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+
+static struct resource video_rom_resource = {
+       .name = "Video ROM",
+       .start = 0xc0000,
+       .end = 0xc7fff,
+       .flags = IORESOURCE_ROM,
+};
+
+static struct resource video_ram_resource = {
+       .name = "Video RAM area",
+       .start = 0xa0000,
+       .end = 0xbffff,
+       .flags = IORESOURCE_RAM,
+};
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+
+static int __init romchecksum(unsigned char *rom, unsigned long length)
+{
+       unsigned char *p, sum = 0;
+
+       for (p = rom; p < rom + length; p++)
+               sum += *p;
+       return sum == 0;
+}
+
+static void __init probe_roms(void)
+{
+       unsigned long start, length, upper;
+       unsigned char *rom;
+       int           i;
+
+       /* video rom */
+       upper = adapter_rom_resources[0].start;
+       for (start = video_rom_resource.start; start < upper; start += 2048) {
+               rom = isa_bus_to_virt(start);
+               if (!romsignature(rom))
+                       continue;
+
+               video_rom_resource.start = start;
+
+               /* 0 < length <= 0x7f * 512, historically */
+               length = rom[2] * 512;
+
+               /* if checksum okay, trust length byte */
+               if (length && romchecksum(rom, length))
+                       video_rom_resource.end = start + length - 1;
+
+               request_resource(&iomem_resource, &video_rom_resource);
+               break;
+                       }
+
+       start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
+       if (start < upper)
+               start = upper;
+
+       /* system rom */
+       request_resource(&iomem_resource, &system_rom_resource);
+       upper = system_rom_resource.start;
+
+       /* check for extension rom (ignore length byte!) */
+       rom = isa_bus_to_virt(extension_rom_resource.start);
+       if (romsignature(rom)) {
+               length = extension_rom_resource.end - 
extension_rom_resource.start + 1;
+               if (romchecksum(rom, length)) {
+                       request_resource(&iomem_resource, 
&extension_rom_resource);
+                       upper = extension_rom_resource.start;
+               }
+       }
+
+       /* check for adapter roms on 2k boundaries */
+       for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
+               rom = isa_bus_to_virt(start);
+               if (!romsignature(rom))
+                       continue;
+
+               /* 0 < length <= 0x7f * 512, historically */
+               length = rom[2] * 512;
+
+               /* but accept any length that fits if checksum okay */
+               if (!length || start + length > upper || !romchecksum(rom, 
length))
+                       continue;
+
+               adapter_rom_resources[i].start = start;
+               adapter_rom_resources[i].end = start + length - 1;
+               request_resource(&iomem_resource, &adapter_rom_resources[i]);
+
+               start = adapter_rom_resources[i++].end & ~2047UL;
+       }
+}
+#endif
+
+/*
+ * Point at the empty zero page to start with. We map the real shared_info
+ * page as soon as fixmap is up and running.
+ */
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
+
+u32 *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+
+EXPORT_SYMBOL(phys_to_machine_mapping);
+
+DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
+DEFINE_PER_CPU(int, nr_multicall_ents);
+
+/* Raw start-of-day parameters from the hypervisor. */
+union xen_start_info_union xen_start_info_union;
+
+static __init void parse_cmdline_early (char ** cmdline_p)
+{
+       char c = ' ', *to = command_line, *from = COMMAND_LINE;
+       int len = 0, max_cmdline;
+
+       if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
+               max_cmdline = COMMAND_LINE_SIZE;
+       memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
+       /* Save unparsed command line copy for /proc/cmdline */
+       saved_command_line[max_cmdline-1] = '\0';
+
+       for (;;) {
+               if (c != ' ') 
+                       goto next_char; 
+
+#ifdef  CONFIG_SMP
+               /*
+                * If the BIOS enumerates physical processors before logical,
+                * maxcpus=N at enumeration-time can be used to disable HT.
+                */
+               else if (!memcmp(from, "maxcpus=", 8)) {
+                       extern unsigned int maxcpus;
+
+                       maxcpus = simple_strtoul(from + 8, NULL, 0);
+               }
+#endif
+#ifdef CONFIG_ACPI_BOOT
+               /* "acpi=off" disables both ACPI table parsing and interpreter 
init */
+               if (!memcmp(from, "acpi=off", 8))
+                       disable_acpi();
+
+               if (!memcmp(from, "acpi=force", 10)) { 
+                       /* add later when we do DMI horrors: */
+                       acpi_force = 1;
+                       acpi_disabled = 0;
+               }
+
+               /* acpi=ht just means: do ACPI MADT parsing 
+                  at bootup, but don't enable the full ACPI interpreter */
+               if (!memcmp(from, "acpi=ht", 7)) { 
+                       if (!acpi_force)
+                               disable_acpi();
+                       acpi_ht = 1; 
+               }
+                else if (!memcmp(from, "pci=noacpi", 10)) 
+                       acpi_disable_pci();
+               else if (!memcmp(from, "acpi=noirq", 10))
+                       acpi_noirq_set();
+
+               else if (!memcmp(from, "acpi_sci=edge", 13))
+                       acpi_sci_flags.trigger =  1;
+               else if (!memcmp(from, "acpi_sci=level", 14))
+                       acpi_sci_flags.trigger = 3;
+               else if (!memcmp(from, "acpi_sci=high", 13))
+                       acpi_sci_flags.polarity = 1;
+               else if (!memcmp(from, "acpi_sci=low", 12))
+                       acpi_sci_flags.polarity = 3;
+
+               /* acpi=strict disables out-of-spec workarounds */
+               else if (!memcmp(from, "acpi=strict", 11)) {
+                       acpi_strict = 1;
+               }
+#endif
+
+#if 0
+               if (!memcmp(from, "nolapic", 7) ||
+                   !memcmp(from, "disableapic", 11))
+                       disable_apic = 1;
+
+               if (!memcmp(from, "noapic", 6)) 
+                       skip_ioapic_setup = 1;
+
+               if (!memcmp(from, "apic", 4)) { 
+                       skip_ioapic_setup = 0;
+                       ioapic_force = 1;
+               }
+#endif
+                       
+               if (!memcmp(from, "mem=", 4))
+                       parse_memopt(from+4, &from); 
+
+#ifdef CONFIG_DISCONTIGMEM
+               if (!memcmp(from, "numa=", 5))
+                       numa_setup(from+5); 
+#endif
+
+#ifdef CONFIG_GART_IOMMU 
+               if (!memcmp(from,"iommu=",6)) { 
+                       iommu_setup(from+6); 
+               }
+#endif
+
+               if (!memcmp(from,"oops=panic", 10))
+                       panic_on_oops = 1;
+
+               if (!memcmp(from, "noexec=", 7))
+                       nonx_setup(from + 7);
+
+       next_char:
+               c = *(from++);
+               if (!c)
+                       break;
+               if (COMMAND_LINE_SIZE <= ++len)
+                       break;
+               *(to++) = c;
+       }
+       *to = '\0';
+       *cmdline_p = command_line;
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+static void __init contig_initmem_init(void)
+{
+        unsigned long bootmap_size, bootmap; 
+
+        /*
+        * partially used pages are not usable - thus
+        * we are rounding upwards:
+        */
+
+        bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
+        bootmap = start_pfn;
+        bootmap_size = init_bootmem(bootmap, end_pfn);
+        reserve_bootmem(bootmap, bootmap_size);
+        
+        free_bootmem(start_pfn << PAGE_SHIFT, (end_pfn - start_pfn) << 
PAGE_SHIFT);   
+        printk("Registering memory for bootmem: from  %lx, size = %lx\n",
+                     start_pfn << PAGE_SHIFT, (end_pfn - start_pfn) << 
PAGE_SHIFT);
+        /* 
+         * This should cover kernel_end
+         */
+#if 0
+        reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+                                      bootmap_size + PAGE_SIZE-1) - 
(HIGH_MEMORY));
+#endif
+        reserve_bootmem(0, (PFN_PHYS(start_pfn) +
+                            bootmap_size + PAGE_SIZE-1));
+
+} 
+#endif
+
+/* Use inline assembly to define this because the nops are defined 
+   as inline assembly strings in the include files and we cannot 
+   get them easily into strings. */
+asm("\t.data\nk8nops: " 
+    K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+    K8_NOP7 K8_NOP8); 
+    
+extern unsigned char k8nops[];
+static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
+     NULL,
+     k8nops,
+     k8nops + 1,
+     k8nops + 1 + 2,
+     k8nops + 1 + 2 + 3,
+     k8nops + 1 + 2 + 3 + 4,
+     k8nops + 1 + 2 + 3 + 4 + 5,
+     k8nops + 1 + 2 + 3 + 4 + 5 + 6,
+     k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+}; 
+
+/* Replace instructions with better alternatives for this CPU type.
+
+   This runs before SMP is initialized to avoid SMP problems with
+   self modifying code. This implies that assymetric systems where
+   APs have less capabilities than the boot processor are not handled. 
+   In this case boot with "noreplacement". */ 
+void apply_alternatives(void *start, void *end) 
+{ 
+       struct alt_instr *a; 
+       int diff, i, k;
+       for (a = start; (void *)a < end; a++) { 
+               if (!boot_cpu_has(a->cpuid))
+                       continue;
+
+               BUG_ON(a->replacementlen > a->instrlen); 
+               __inline_memcpy(a->instr, a->replacement, a->replacementlen); 
+               diff = a->instrlen - a->replacementlen; 
+
+               /* Pad the rest with nops */
+               for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
+                       k = diff;
+                       if (k > ASM_NOP_MAX)
+                               k = ASM_NOP_MAX;
+                       __inline_memcpy(a->instr + i, k8_nops[k], k); 
+               } 
+       }
+} 
+
+static int no_replacement __initdata = 0; 
+ 
+void __init alternative_instructions(void)
+{
+       extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+       if (no_replacement) 
+               return;
+       apply_alternatives(__alt_instructions, __alt_instructions_end);
+}
+
+static int __init noreplacement_setup(char *s)
+{ 
+     no_replacement = 1; 
+     return 0; 
+} 
+
+__setup("noreplacement", noreplacement_setup); 
+
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+struct edd edd;
+#ifdef CONFIG_EDD_MODULE
+EXPORT_SYMBOL(edd);
+#endif
+/**
+ * copy_edd() - Copy the BIOS EDD information
+ *              from boot_params into a safe place.
+ *
+ */
+static inline void copy_edd(void)
+{
+     memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
+     memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
+     edd.mbr_signature_nr = EDD_MBR_SIG_NR;
+     edd.edd_info_nr = EDD_NR;
+}
+#else
+static inline void copy_edd(void)
+{
+}
+#endif
+
+#if 0
+#define EBDA_ADDR_POINTER 0x40E
+static void __init reserve_ebda_region(void)
+{
+       unsigned int addr;
+       /** 
+        * there is a real-mode segmented pointer pointing to the 
+        * 4K EBDA area at 0x40E
+        */
+       addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
+       addr <<= 4;
+       if (addr)
+               reserve_bootmem_generic(addr, PAGE_SIZE);
+}
+#endif
+
+/*
+ * Guest physical starts from 0.
+ */
+
+unsigned long __init xen_end_of_ram(void)
+{
+        unsigned long max_end_pfn = xen_start_info.nr_pages;
+
+       if ( xen_override_max_pfn <  max_end_pfn)
+               xen_override_max_pfn = max_end_pfn;
+       
+        return xen_override_max_pfn;
+}
+
+static void __init print_memory_map(char *who)
+{
+        int i;
+
+        for (i = 0; i < e820.nr_map; i++) {
+                early_printk(" %s: %016Lx - %016Lx ", who,
+                        e820.map[i].addr,
+                        e820.map[i].addr + e820.map[i].size);
+                switch (e820.map[i].type) {
+                case E820_RAM:  early_printk("(usable)\n");
+                                break;
+                case E820_RESERVED:
+                                early_printk("(reserved)\n");
+                                break;
+                case E820_ACPI:
+                                early_printk("(ACPI data)\n");
+                                break;
+                case E820_NVS:
+                                early_printk("(ACPI NVS)\n");
+                                break;
+                default:        early_printk("type %u\n", e820.map[i].type);
+                                break;
+                }
+        }
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+       unsigned long low_mem_size;
+       int i, j;
+       physdev_op_t op;
+
+#if 0
+       ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+#else
+       ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); 
+#endif
+       drive_info = DRIVE_INFO;
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+       screen_info = SCREEN_INFO;
+#endif
+       edid_info = EDID_INFO;
+       aux_device_present = AUX_DEVICE_INFO;
+       saved_video_mode = SAVED_VIDEO_MODE;
+       bootloader_type = LOADER_TYPE;
+
+#ifdef CONFIG_BLK_DEV_RAM
+       rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+       rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+       rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+/*        register_console(&xen_console); */
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+       /* This is drawn from a dump from vgacon:startup in standard Linux. */
+       screen_info.orig_video_mode = 3; 
+       screen_info.orig_video_isVGA = 1;
+       screen_info.orig_video_lines = 25;
+       screen_info.orig_video_cols = 80;
+       screen_info.orig_video_ega_bx = 3;
+       screen_info.orig_video_points = 16;
+#endif       
+        ARCH_SETUP
+        print_memory_map(machine_specific_memory_setup());
+
+        /*     copy_edd();  */
+
+       if (!MOUNT_ROOT_RDONLY)
+               root_mountflags &= ~MS_RDONLY;
+       init_mm.start_code = (unsigned long) &_text;
+       init_mm.end_code = (unsigned long) &_etext;
+       init_mm.end_data = (unsigned long) &_edata;
+/*     init_mm.brk = (unsigned long) &_end; */
+        init_mm.brk = start_pfn << PAGE_SHIFT;
+
+
+#if 0  /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
+       code_resource.start = virt_to_phys(&_text);
+       code_resource.end = virt_to_phys(&_etext)-1;
+       data_resource.start = virt_to_phys(&_etext);
+       data_resource.end = virt_to_phys(&_edata)-1;
+#endif
+       parse_cmdline_early(cmdline_p);
+
+       early_identify_cpu(&boot_cpu_data);
+
+       /*
+        * partially used pages are not usable - thus
+        * we are rounding upwards:
+        */
+#if 0
+       end_pfn = e820_end_of_ram();
+#else
+        end_pfn = xen_end_of_ram();
+#endif
+
+       check_efer();
+
+       init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
+
+#ifdef CONFIG_ACPI_NUMA
+       /*
+        * Parse SRAT to discover nodes.
+        */
+       acpi_numa_init();
+#endif
+
+#ifdef CONFIG_DISCONTIGMEM
+       numa_initmem_init(0, end_pfn); 
+#else
+       contig_initmem_init(); 
+#endif
+
+       /* Reserve direct mapping and shared info etc. */
+//     reserve_bootmem_generic(table_start << PAGE_SHIFT, (table_end + 1 - 
table_start) << PAGE_SHIFT);
+
+//     reserve_bootmem_generic(0, (table_end + 1) << PAGE_SHIFT);
+
+       /* reserve kernel */
+//     kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
+
+#if 0
+       /*
+        * reserve physical page 0 - it's a special BIOS page on many boxes,
+        * enabling clean reboots, SMP operation, laptop functions.
+        */
+       reserve_bootmem_generic(0, PAGE_SIZE);
+#endif
+
+       /* reserve ebda region */
+/*     reserve_ebda_region(); */
+
+#ifdef CONFIG_SMP
+       /*
+        * But first pinch a few for the stack/trampoline stuff
+        * FIXME: Don't need the extra page at 4K, but need to fix
+        * trampoline before removing it. (see the GDT stuff)
+        */
+       reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
+
+       /* Reserve SMP trampoline */
+       reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
+#endif
+
+#ifdef CONFIG_ACPI_SLEEP
+       /*
+        * Reserve low memory region for sleep support.
+        */
+       acpi_reserve_bootmem();
+#endif
+#ifdef CONFIG_BLK_DEV_INITRD
+       if (xen_start_info.mod_start) {
+               if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
+                       /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/
+                       initrd_start = INITRD_START + PAGE_OFFSET;
+                       initrd_end = initrd_start+INITRD_SIZE;
+                       initrd_below_start_ok = 1;
+               } else {
+                       printk(KERN_ERR "initrd extends beyond end of memory "
+                               "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+                               (unsigned long)(INITRD_START + INITRD_SIZE),
+                               (unsigned long)(end_pfn << PAGE_SHIFT));
+                       initrd_start = 0;
+               }
+       }
+#endif
+       paging_init();
+#ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * Find and reserve possible boot-time SMP configuration:
+        */
+       find_smp_config();
+#endif
+       /* Make sure we have a large enough P->M table. */
+       if (end_pfn > xen_start_info.nr_pages) {
+               phys_to_machine_mapping = alloc_bootmem(
+                       max_pfn * sizeof(unsigned long));
+               memset(phys_to_machine_mapping, ~0,
+                       max_pfn * sizeof(unsigned long));
+               memcpy(phys_to_machine_mapping,
+                       (unsigned long *)xen_start_info.mfn_list,
+                       xen_start_info.nr_pages * sizeof(unsigned long));
+               free_bootmem(
+                       __pa(xen_start_info.mfn_list), 
+                       PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
+                       sizeof(unsigned long))));
+       }
+
+       pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
+
+       for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+       {       
+            pfn_to_mfn_frame_list[j] = 
+                 virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+       }
+
+#if 0
+       check_ioapic();
+#endif
+
+#ifdef CONFIG_ACPI_BOOT
+       /*
+        * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
+        * Call this early for SRAT node setup.
+        */
+       acpi_boot_table_init();
+
+       /*
+        * Read APIC and some other early information from ACPI tables.
+        */
+       acpi_boot_init();
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * get boot-time SMP configuration:
+        */
+       if (smp_found_config)
+               get_smp_config();
+#ifndef CONFIG_XEN
+       init_apic_mappings();
+#endif
+#endif
+
+        /* XXX Disable irqdebug until we have a way to avoid interrupt
+        * conflicts. */
+/*     noirqdebug_setup(""); */
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       /*
+        * Request address space for all standard RAM and ROM resources
+        * and also for regions reported as reserved by the e820.
+        */
+       probe_roms();
+#endif
+/*     e820_reserve_resources();  */
+
+       request_resource(&iomem_resource, &video_ram_resource);
+
+       {
+       unsigned i;
+       /* request I/O space for devices used on all i[345]86 PCs */
+       for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+               request_resource(&ioport_resource, &standard_io_resources[i]);
+       }
+
+       /* Will likely break when you have unassigned resources with more
+          than 4GB memory and bridges that don't support more than 4GB. 
+          Doing it properly would require to use pci_alloc_consistent
+          in this case. */
+       low_mem_size = ((end_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
+       if (low_mem_size > pci_mem_start)
+               pci_mem_start = low_mem_size;
+
+#ifdef CONFIG_GART_IOMMU
+       iommu_hole_init();
+#endif
+
+       op.cmd             = PHYSDEVOP_SET_IOPL;
+       op.u.set_iopl.iopl = current->thread.io_pl = 1;
+       HYPERVISOR_physdev_op(&op);
+
+       if (xen_start_info.flags & SIF_INITDOMAIN) {
+               if (!(xen_start_info.flags & SIF_PRIVILEGED))
+                       panic("Xen granted us console access "
+                             "but not privileged status");
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+       conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+       conswitchp = &dummy_con;
+#endif
+#endif
+       } else {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+               extern const struct consw xennull_con;
+               extern int console_use_vt;
+#if defined(CONFIG_VGA_CONSOLE)
+               /* disable VGA driver */
+               ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
+#endif
+               conswitchp = &xennull_con;
+               console_use_vt = 0;
+#endif
+       }
+}
+
+static int __init get_model_name(struct cpuinfo_x86 *c)
+{
+       unsigned int *v;
+
+       if (c->x86_cpuid_level < 0x80000004)
+               return 0;
+
+       v = (unsigned int *) c->x86_model_id;
+       cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+       cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+       cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+       c->x86_model_id[48] = 0;
+       return 1;
+}
+
+
+static void __init display_cacheinfo(struct cpuinfo_x86 *c)
+{
+       unsigned int n, dummy, eax, ebx, ecx, edx;
+
+       n = c->x86_cpuid_level;
+
+       if (n >= 0x80000005) {
+               cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
+               printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache 
%dK (%d bytes/line)\n",
+                       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+               c->x86_cache_size=(ecx>>24)+(edx>>24);
+               /* On K8 L1 TLB is inclusive, so don't count it */
+               c->x86_tlbsize = 0;
+       }
+
+       if (n >= 0x80000006) {
+               cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
+               ecx = cpuid_ecx(0x80000006);
+               c->x86_cache_size = ecx >> 16;
+               c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
+
+               printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+               c->x86_cache_size, ecx & 0xFF);
+       }
+
+       if (n >= 0x80000007)
+               cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power); 
+       if (n >= 0x80000008) {
+               cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); 
+               c->x86_virt_bits = (eax >> 8) & 0xff;
+               c->x86_phys_bits = eax & 0xff;
+       }
+}
+
+
+static int __init init_amd(struct cpuinfo_x86 *c)
+{
+       int r;
+       int level;
+#ifdef CONFIG_NUMA
+       int cpu;
+#endif
+
+       /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+          3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+       clear_bit(0*32+31, &c->x86_capability);
+       
+       /* C-stepping K8? */
+       level = cpuid_eax(1);
+       if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
+               set_bit(X86_FEATURE_K8_C, &c->x86_capability);
+
+       r = get_model_name(c);
+       if (!r) { 
+               switch (c->x86) { 
+               case 15:
+                       /* Should distinguish Models here, but this is only
+                          a fallback anyways. */
+                       strcpy(c->x86_model_id, "Hammer");
+                       break; 
+               } 
+       } 
+       display_cacheinfo(c);
+
+       if (c->x86_cpuid_level >= 0x80000008) {
+               c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+               if (c->x86_num_cores & (c->x86_num_cores - 1))
+                       c->x86_num_cores = 1;
+
+#ifdef CONFIG_NUMA
+               /* On a dual core setup the lower bits of apic id
+                  distingush the cores. Fix up the CPU<->node mappings
+                  here based on that.
+                  Assumes number of cores is a power of two.
+                  When using SRAT use mapping from SRAT. */
+               cpu = c->x86_apicid;
+               if (acpi_numa <= 0 && c->x86_num_cores > 1) {
+                       cpu_to_node[cpu] = cpu >> hweight32(c->x86_num_cores - 
1);
+                       if (!node_online(cpu_to_node[cpu]))
+                               cpu_to_node[cpu] = first_node(node_online_map);
+               }
+               printk(KERN_INFO "CPU %d(%d) -> Node %d\n",
+                               cpu, c->x86_num_cores, cpu_to_node[cpu]);
+#endif
+       }
+
+       return r;
+}
+
+static void __init detect_ht(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+       u32     eax, ebx, ecx, edx;
+       int     index_lsb, index_msb, tmp;
+       int     cpu = smp_processor_id();
+       
+       if (!cpu_has(c, X86_FEATURE_HT))
+               return;
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+       smp_num_siblings = (ebx & 0xff0000) >> 16;
+       
+       if (smp_num_siblings == 1) {
+               printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+       } else if (smp_num_siblings > 1) {
+               index_lsb = 0;
+               index_msb = 31;
+               /*
+                * At this point we only support two siblings per
+                * processor package.
+                */
+               if (smp_num_siblings > NR_CPUS) {
+                       printk(KERN_WARNING "CPU: Unsupported number of the 
siblings %d", smp_num_siblings);
+                       smp_num_siblings = 1;
+                       return;
+               }
+               tmp = smp_num_siblings;
+               while ((tmp & 1) == 0) {
+                       tmp >>=1 ;
+                       index_lsb++;
+               }
+               tmp = smp_num_siblings;
+               while ((tmp & 0x80000000 ) == 0) {
+                       tmp <<=1 ;
+                       index_msb--;
+               }
+               if (index_lsb != index_msb )
+                       index_msb++;
+               phys_proc_id[cpu] = phys_pkg_id(index_msb);
+               
+               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+                      phys_proc_id[cpu]);
+       }
+#endif
+}
+
+static void __init sched_cmp_hack(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+       /* AMD dual core looks like HT but isn't really. Hide it from the
+          scheduler. This works around problems with the domain scheduler.
+          Also probably gives slightly better scheduling and disables
+          SMT nice which is harmful on dual core.
+          TBD tune the domain scheduler for dual core. */
+       if (c->x86_vendor == X86_VENDOR_AMD && cpu_has(c, 
X86_FEATURE_CMP_LEGACY))
+               smp_num_siblings = 1;
+#endif
+}
+       
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+       /* Cache sizes */
+       unsigned n;
+
+       init_intel_cacheinfo(c);
+       n = c->x86_cpuid_level;
+       if (n >= 0x80000008) {
+               unsigned eax = cpuid_eax(0x80000008);
+               c->x86_virt_bits = (eax >> 8) & 0xff;
+               c->x86_phys_bits = eax & 0xff;
+       }
+
+       if (c->x86 == 15)
+               c->x86_cache_alignment = c->x86_clflush_size * 2;
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+       char *v = c->x86_vendor_id;
+
+       if (!strcmp(v, "AuthenticAMD"))
+               c->x86_vendor = X86_VENDOR_AMD;
+       else if (!strcmp(v, "GenuineIntel"))
+               c->x86_vendor = X86_VENDOR_INTEL;
+       else
+               c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+struct cpu_model_info {
+       int vendor;
+       int family;
+       char *model_names[16];
+};
+
+/* Do some early cpuid on the boot CPU to get some parameter that are
+   needed before check_bugs. Everything advanced is in identify_cpu
+   below. */
+void __init early_identify_cpu(struct cpuinfo_x86 *c)
+{
+       u32 tfms;
+
+       c->loops_per_jiffy = loops_per_jiffy;
+       c->x86_cache_size = -1;
+       c->x86_vendor = X86_VENDOR_UNKNOWN;
+       c->x86_model = c->x86_mask = 0; /* So far unknown... */
+       c->x86_vendor_id[0] = '\0'; /* Unset */
+       c->x86_model_id[0] = '\0';  /* Unset */
+       c->x86_clflush_size = 64;
+       c->x86_cache_alignment = c->x86_clflush_size;
+       c->x86_num_cores = 1;
+       c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
+       c->x86_cpuid_level = 0;
+       memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+       /* Get vendor name */
+       cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
+             (unsigned int *)&c->x86_vendor_id[0],
+             (unsigned int *)&c->x86_vendor_id[8],
+             (unsigned int *)&c->x86_vendor_id[4]);
+               
+       get_cpu_vendor(c);
+
+       /* Initialize the standard set of capabilities */
+       /* Note that the vendor-specific code below might override */
+
+       /* Intel-defined flags: level 0x00000001 */
+       if (c->cpuid_level >= 0x00000001) {
+               __u32 misc;
+               cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
+                     &c->x86_capability[0]);
+               c->x86 = (tfms >> 8) & 0xf;
+               c->x86_model = (tfms >> 4) & 0xf;
+               c->x86_mask = tfms & 0xf;
+               if (c->x86 == 0xf) {
+                       c->x86 += (tfms >> 20) & 0xff;
+                       c->x86_model += ((tfms >> 16) & 0xF) << 4;
+               } 
+               if (c->x86_capability[0] & (1<<19)) 
+                       c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+               c->x86_apicid = misc >> 24;
+       } else {
+               /* Have CPUID level 0 only - unheard of */
+               c->x86 = 4;
+       }
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+       int i;
+       u32 xlvl;
+
+       early_identify_cpu(c);
+
+       /* AMD-defined flags: level 0x80000001 */
+       xlvl = cpuid_eax(0x80000000);
+       c->x86_cpuid_level = xlvl;
+       if ((xlvl & 0xffff0000) == 0x80000000) {
+               if (xlvl >= 0x80000001) {
+                       c->x86_capability[1] = cpuid_edx(0x80000001);
+                       c->x86_capability[5] = cpuid_ecx(0x80000001);
+               }
+               if (xlvl >= 0x80000004)
+                       get_model_name(c); /* Default name */
+       }
+
+       /* Transmeta-defined flags: level 0x80860001 */
+       xlvl = cpuid_eax(0x80860000);
+       if ((xlvl & 0xffff0000) == 0x80860000) {
+               /* Don't set x86_cpuid_level here for now to not confuse. */
+               if (xlvl >= 0x80860001)
+                       c->x86_capability[2] = cpuid_edx(0x80860001);
+       }
+
+       /*
+        * Vendor-specific initialization.  In this section we
+        * canonicalize the feature flags, meaning if there are
+        * features a certain CPU supports which CPUID doesn't
+        * tell us, CPUID claiming incorrect flags, or other bugs,
+        * we handle them here.
+        *
+        * At the end of this section, c->x86_capability better
+        * indicate the features this CPU genuinely supports!
+        */
+       switch (c->x86_vendor) {
+       case X86_VENDOR_AMD:
+               init_amd(c);
+               break;
+
+       case X86_VENDOR_INTEL:
+               init_intel(c);
+               break;
+
+       case X86_VENDOR_UNKNOWN:
+       default:
+               display_cacheinfo(c);
+               break;
+       }
+
+       select_idle_routine(c);
+       detect_ht(c); 
+       sched_cmp_hack(c);
+
+       /*
+        * On SMP, boot_cpu_data holds the common feature set between
+        * all CPUs; so make sure that we indicate which features are
+        * common between the CPUs.  The first time this routine gets
+        * executed, c == &boot_cpu_data.
+        */
+       if (c != &boot_cpu_data) {
+               /* AND the already accumulated flags with these */
+               for (i = 0 ; i < NCAPINTS ; i++)
+                       boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+       }
+
+#ifdef CONFIG_X86_MCE
+       mcheck_init(c);
+#endif
+#ifdef CONFIG_NUMA
+       if (c != &boot_cpu_data)
+               numa_add_cpu(c - cpu_data);
+#endif
+}
+ 
+
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+       if (c->x86_model_id[0])
+               printk("%s", c->x86_model_id);
+
+       if (c->x86_mask || c->cpuid_level >= 0) 
+               printk(" stepping %02x\n", c->x86_mask);
+       else
+               printk("\n");
+}
+
+/*
+ *     Get CPU information for use by the procfs.
+ */
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+       struct cpuinfo_x86 *c = v;
+
+       /* 
+        * These flag bits must match the definitions in <asm/cpufeature.h>.
+        * NULL means this bit is undefined or reserved; either way it doesn't
+        * have meaning as far as Linux is concerned.  Note that it's important
+        * to realize there is a difference between this table and CPUID -- if
+        * applications want to get the raw CPUID data, they should access
+        * /dev/cpu/<cpu_nr>/cpuid instead.
+        */
+       static char *x86_cap_flags[] = {
+               /* Intel-defined */
+               "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+               "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+               "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+               "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
+
+               /* AMD-defined */
+               "pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
+               NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
+
+               /* Transmeta-defined */
+               "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+               /* Other (Linux-defined) */
+               "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, 
NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+               /* Intel-defined (#2) */
+               "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est",
+               "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+               /* AMD-defined (#2) */
+               "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+       };
+       static char *x86_power_flags[] = { 
+               "ts",   /* temperature sensor */
+               "fid",  /* frequency id control */
+               "vid",  /* voltage id control */
+               "ttp",  /* thermal trip */
+       };
+
+
+#ifdef CONFIG_SMP
+       if (!cpu_online(c-cpu_data))
+               return 0;
+#endif
+
+       seq_printf(m,"processor\t: %u\n"
+                    "vendor_id\t: %s\n"
+                    "cpu family\t: %d\n"
+                    "model\t\t: %d\n"
+                    "model name\t: %s\n",
+                    (unsigned)(c-cpu_data),
+                    c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+                    c->x86,
+                    (int)c->x86_model,
+                    c->x86_model_id[0] ? c->x86_model_id : "unknown");
+       
+       if (c->x86_mask || c->cpuid_level >= 0)
+               seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+       else
+               seq_printf(m, "stepping\t: unknown\n");
+       
+       if (cpu_has(c,X86_FEATURE_TSC)) {
+               seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
+                            cpu_khz / 1000, (cpu_khz % 1000));
+       }
+
+       /* Cache size */
+       if (c->x86_cache_size >= 0) 
+               seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+       
+#ifdef CONFIG_SMP
+       seq_printf(m, "physical id\t: %d\n", phys_proc_id[c - cpu_data]);
+       seq_printf(m, "siblings\t: %d\n", c->x86_num_cores * smp_num_siblings);
+#endif 
+
+       seq_printf(m,
+               "fpu\t\t: yes\n"
+               "fpu_exception\t: yes\n"
+               "cpuid level\t: %d\n"
+               "wp\t\t: yes\n"
+               "flags\t\t:",
+                  c->cpuid_level);
+
+       { 
+               int i; 
+               for ( i = 0 ; i < 32*NCAPINTS ; i++ )
+                       if ( test_bit(i, &c->x86_capability) &&
+                            x86_cap_flags[i] != NULL )
+                               seq_printf(m, " %s", x86_cap_flags[i]);
+       }
+               
+       seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+                  c->loops_per_jiffy/(500000/HZ),
+                  (c->loops_per_jiffy/(5000/HZ)) % 100);
+
+       if (c->x86_tlbsize > 0) 
+               seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
+       seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
+       seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
+
+       seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", 
+                  c->x86_phys_bits, c->x86_virt_bits);
+
+       seq_printf(m, "power management:");
+       {
+               unsigned i;
+               for (i = 0; i < 32; i++) 
+                       if (c->x86_power & (1 << i)) {
+                               if (i < ARRAY_SIZE(x86_power_flags))
+                                       seq_printf(m, " %s", 
x86_power_flags[i]);
+                               else
+                                       seq_printf(m, " [%d]", i);
+                       }
+       }
+       seq_printf(m, "\n");
+
+       if (c->x86_num_cores > 1)
+               seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+
+       seq_printf(m, "\n\n"); 
+
+       return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+       return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       ++*pos;
+       return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+       .start =c_start,
+       .next = c_next,
+       .stop = c_stop,
+       .show = show_cpuinfo,
+};
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/drivers/scsi/aic7xxx/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/drivers/scsi/aic7xxx/Makefile     Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,97 @@
+#
+# drivers/scsi/aic7xxx/Makefile
+#
+# Makefile for the Linux aic7xxx SCSI driver.
+#
+
+O_TARGET := aic7xxx_drv.o
+
+list-multi     := aic7xxx.o aic79xx.o
+
+obj-$(CONFIG_SCSI_AIC7XXX)     += aic7xxx.o
+ifeq ($(CONFIG_PCI),y)
+obj-$(CONFIG_SCSI_AIC79XX)     += aic79xx.o
+endif
+
+EXTRA_CFLAGS += -I$(TOPDIR)/drivers/scsi -Werror
+#EXTRA_CFLAGS += -g
+
+# Platform Specific Files
+obj-aic7xxx = aic7xxx_osm.o aic7xxx_proc.o
+
+# Core Files
+obj-aic7xxx += aic7xxx_core.o aic7xxx_93cx6.o
+ifeq ($(CONFIG_AIC7XXX_REG_PRETTY_PRINT),y)
+obj-aic7xxx += aic7xxx_reg_print.o
+endif
+
+#EISA Specific Files
+AIC7XXX_EISA_ARCH = $(filter i386 alpha xen,$(ARCH))
+ifneq ($(AIC7XXX_EISA_ARCH),)
+obj-aic7xxx += aic7770.o
+# Platform Specific EISA Files
+obj-aic7xxx += aic7770_osm.o
+endif
+
+#PCI Specific Files
+ifeq ($(CONFIG_PCI),y)
+obj-aic7xxx += aic7xxx_pci.o
+# Platform Specific PCI Files
+obj-aic7xxx += aic7xxx_osm_pci.o
+endif
+
+# Platform Specific U320 Files
+obj-aic79xx = aic79xx_osm.o aic79xx_proc.o aic79xx_osm_pci.o
+# Core Files
+obj-aic79xx += aic79xx_core.o aic79xx_pci.o
+ifeq ($(CONFIG_AIC79XX_REG_PRETTY_PRINT),y)
+obj-aic79xx += aic79xx_reg_print.o
+endif
+
+# Override our module desitnation
+MOD_DESTDIR = $(shell cd .. && $(CONFIG_SHELL) $(TOPDIR)/scripts/pathdown.sh)
+
+include $(TOPDIR)/Rules.make
+
+aic7xxx_core.o: aic7xxx_seq.h
+$(obj-aic7xxx): aic7xxx_reg.h
+aic7xxx.o: aic7xxx_seq.h aic7xxx_reg.h $(obj-aic7xxx)
+       $(LD) $(LD_RFLAG) -r -o $@ $(obj-aic7xxx)
+
+aic79xx_core.o: aic79xx_seq.h
+$(obj-aic79xx): aic79xx_reg.h
+aic79xx.o: aic79xx_seq.h aic79xx_reg.h $(obj-aic79xx)
+       $(LD) $(LD_RFLAG) -r -o $@ $(obj-aic79xx)
+
+ifeq ($(CONFIG_AIC7XXX_BUILD_FIRMWARE),y)
+aic7xxx_gen = aic7xxx_seq.h aic7xxx_reg.h
+ifeq ($(CONFIG_AIC7XXX_REG_PRETTY_PRINT),y)
+aic7xxx_gen += aic7xxx_reg_print.c
+aic7xxx_asm_cmd = aicasm/aicasm -I. -r aic7xxx_reg.h           \
+                -p aic7xxx_reg_print.c -i aic7xxx_osm.h        \
+                -o aic7xxx_seq.h aic7xxx.seq
+else
+aic7xxx_asm_cmd = aicasm/aicasm -I. -r aic7xxx_reg.h           \
+                -o aic7xxx_seq.h aic7xxx.seq
+endif
+$(aic7xxx_gen): aic7xxx.seq aic7xxx.reg aicasm/aicasm
+       $(aic7xxx_asm_cmd)
+endif
+
+ifeq ($(CONFIG_AIC79XX_BUILD_FIRMWARE),y)
+aic79xx_gen = aic79xx_seq.h aic79xx_reg.h
+ifeq ($(CONFIG_AIC79XX_REG_PRETTY_PRINT),y)
+aic79xx_gen += aic79xx_reg_print.c
+aic79xx_asm_cmd = aicasm/aicasm -I. -r aic79xx_reg.h           \
+                -p aic79xx_reg_print.c -i aic79xx_osm.h        \
+                -o aic79xx_seq.h aic79xx.seq
+else
+aic79xx_asm_cmd = aicasm/aicasm -I. -r aic79xx_reg.h \
+                -o aic79xx_seq.h aic79xx.seq
+endif
+$(aic79xx_gen): aic79xx.seq aic79xx.reg aicasm/aicasm
+       $(aic79xx_asm_cmd)
+endif
+
+aicasm/aicasm: aicasm/*.[chyl]
+       $(MAKE) -C aicasm
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c     Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,540 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/frontend/vbd.c
+ * 
+ * Xenolinux virtual block-device driver.
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ */
+
+#include "common.h"
+#include <linux/blk.h>
+
+/*
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.
+ * potentially combinations of the two) in the naming scheme and in a few 
+ * other places (like default readahead, etc).
+ */
+#define XLIDE_MAJOR_NAME  "hd"
+#define XLSCSI_MAJOR_NAME "sd"
+#define XLVBD_MAJOR_NAME "xvd"
+
+#define XLIDE_DEVS_PER_MAJOR   2
+#define XLSCSI_DEVS_PER_MAJOR 16
+#define XLVBD_DEVS_PER_MAJOR  16
+
+#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
+#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
+
+#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
+#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
+
+#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
+#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
+
+/* The below are for the generic drivers/block/ll_rw_block.c code. */
+static int xlide_blksize_size[256];
+static int xlide_hardsect_size[256];
+static int xlide_max_sectors[256];
+static int xlscsi_blksize_size[256];
+static int xlscsi_hardsect_size[256];
+static int xlscsi_max_sectors[256];
+static int xlvbd_blksize_size[256];
+static int xlvbd_hardsect_size[256];
+static int xlvbd_max_sectors[256];
+
+/* Information about our VBDs. */
+#define MAX_VBDS 64
+static int nr_vbds;
+static vdisk_t *vbd_info;
+
+static struct block_device_operations xlvbd_block_fops = 
+{
+    open:               blkif_open,
+    release:            blkif_release,
+    ioctl:              blkif_ioctl,
+    check_media_change: blkif_check,
+    revalidate:         blkif_revalidate,
+};
+
+static int xlvbd_get_vbd_info(vdisk_t *disk_info)
+{
+    vdisk_t         *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
+    blkif_request_t  req;
+    blkif_response_t rsp;
+    int              nr;
+
+    memset(&req, 0, sizeof(req));
+    req.operation   = BLKIF_OP_PROBE;
+    req.nr_segments = 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    blkif_control_probe_send(&req, &rsp,
+                             (unsigned long)(virt_to_machine(buf)));
+#else
+    req.frame_and_sects[0] = virt_to_machine(buf) | 7;
+
+    blkif_control_send(&req, &rsp);
+#endif
+
+    if ( rsp.status <= 0 )
+    {
+        printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
+        return -1;
+    }
+
+    if ( (nr = rsp.status) > MAX_VBDS )
+         nr = MAX_VBDS;
+    memcpy(disk_info, buf, nr * sizeof(vdisk_t));
+
+    return nr;
+}
+
+/*
+ * xlvbd_init_device - initialise a VBD device
+ * @disk:              a vdisk_t describing the VBD
+ *
+ * Takes a vdisk_t * that describes a VBD the domain has access to.
+ * Performs appropriate initialisation and registration of the device.
+ *
+ * Care needs to be taken when making re-entrant calls to ensure that
+ * corruption does not occur.  Also, devices that are in use should not have
+ * their details updated.  This is the caller's responsibility.
+ */
+static int xlvbd_init_device(vdisk_t *xd)
+{
+    int device = xd->device;
+    int major  = MAJOR(device); 
+    int minor  = MINOR(device);
+    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
+    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
+    char *major_name;
+    struct gendisk *gd;
+    struct block_device *bd;
+    xl_disk_t *disk;
+    int i, rc = 0, max_part, partno;
+    unsigned long capacity;
+
+    unsigned char buf[64];
+
+    if ( (bd = bdget(device)) == NULL )
+        return -1;
+
+    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
+    {
+        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
+        rc = -1;
+        goto out;
+    }
+
+    if ( is_ide ) {
+
+       major_name = XLIDE_MAJOR_NAME; 
+       max_part   = XLIDE_MAX_PART;
+
+    } else if ( is_scsi ) {
+
+       major_name = XLSCSI_MAJOR_NAME;
+       max_part   = XLSCSI_MAX_PART;
+
+    } else { 
+
+        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
+       printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
+              major, minor);
+       is_scsi    = 1; 
+       major_name = "cciss"; 
+       max_part   = XLSCSI_MAX_PART;
+
+    }
+    
+    partno = minor & (max_part - 1); 
+    
+    if ( (gd = get_gendisk(device)) == NULL )
+    {
+        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
+        if ( rc < 0 )
+        {
+            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
+            goto out;
+        }
+
+        if ( is_ide )
+        { 
+            blksize_size[major]  = xlide_blksize_size;
+            hardsect_size[major] = xlide_hardsect_size;
+            max_sectors[major]   = xlide_max_sectors;
+            read_ahead[major]    = 8;
+        } 
+        else if ( is_scsi )
+        { 
+            blksize_size[major]  = xlscsi_blksize_size;
+            hardsect_size[major] = xlscsi_hardsect_size;
+            max_sectors[major]   = xlscsi_max_sectors;
+            read_ahead[major]    = 8;
+        }
+        else
+        { 
+            blksize_size[major]  = xlvbd_blksize_size;
+            hardsect_size[major] = xlvbd_hardsect_size;
+            max_sectors[major]   = xlvbd_max_sectors;
+            read_ahead[major]    = 8;
+        }
+
+        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request);
+
+        /*
+         * Turn off barking 'headactive' mode. We dequeue buffer heads as
+         * soon as we pass them to the back-end driver.
+         */
+        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
+
+        /* Construct an appropriate gendisk structure. */
+        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
+        gd->major      = major;
+        gd->major_name = major_name; 
+    
+        gd->max_p      = max_part; 
+        if ( is_ide )
+        { 
+            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
+            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
+        } 
+        else if ( is_scsi )
+        { 
+            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
+            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
+        }
+        else
+        { 
+            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
+            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
+        }
+
+        /* 
+        ** The sizes[] and part[] arrays hold the sizes and other 
+        ** information about every partition with this 'major' (i.e. 
+        ** every disk sharing the 8 bit prefix * max partns per disk) 
+        */
+        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
+        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
+                            GFP_KERNEL);
+        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
+        memset(gd->part,  0, max_part * gd->nr_real 
+               * sizeof(struct hd_struct));
+
+
+        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
+                                   GFP_KERNEL);
+        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
+
+        gd->next   = NULL;            
+        gd->fops   = &xlvbd_block_fops;
+
+        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
+                             GFP_KERNEL);
+        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
+    
+        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
+        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
+
+        add_gendisk(gd);
+
+        blk_size[major] = gd->sizes;
+    }
+
+    if ( xd->info & VDISK_READONLY )
+        set_device_ro(device, 1);
+
+    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
+
+    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
+    capacity = (unsigned long)xd->capacity;
+
+    if ( partno != 0 )
+    {
+        /*
+         * If this was previously set up as a real disc we will have set 
+         * up partition-table information. Virtual partitions override 
+         * 'real' partitions, and the two cannot coexist on a device.
+         */
+        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+             (gd->sizes[minor & ~(max_part-1)] != 0) )
+        {
+            /*
+             * Any non-zero sub-partition entries must be cleaned out before
+             * installing 'virtual' partition entries. The two types cannot
+             * coexist, and virtual partitions are favoured.
+             */
+            kdev_t dev = device & ~(max_part-1);
+            for ( i = max_part - 1; i > 0; i-- )
+            {
+                invalidate_device(dev+i, 1);
+                gd->part[MINOR(dev+i)].start_sect = 0;
+                gd->part[MINOR(dev+i)].nr_sects   = 0;
+                gd->sizes[MINOR(dev+i)]           = 0;
+            }
+            printk(KERN_ALERT
+                   "Virtual partitions found for /dev/%s - ignoring any "
+                   "real partition information we may have found.\n",
+                   disk_name(gd, MINOR(device), buf));
+        }
+
+        /* Need to skankily setup 'partition' information */
+        gd->part[minor].start_sect = 0; 
+        gd->part[minor].nr_sects   = capacity; 
+        gd->sizes[minor]           = capacity >>(BLOCK_SIZE_BITS-9); 
+
+        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+    }
+    else
+    {
+        gd->part[minor].nr_sects = capacity;
+        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
+        
+        /* Some final fix-ups depending on the device type */
+        if ( xd->info & VDISK_REMOVABLE )
+        { 
+            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
+            printk(KERN_ALERT 
+                   "Skipping partition check on %s /dev/%s\n", 
+                   (xd->info & VDISK_CDROM) ? "cdrom" : "removable",
+                   disk_name(gd, MINOR(device), buf)); 
+        }
+        else
+        {
+            /* Only check partitions on real discs (not virtual!). */
+            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+            {
+                printk(KERN_ALERT
+                       "Skipping partition check on virtual /dev/%s\n",
+                       disk_name(gd, MINOR(device), buf));
+                break;
+            }
+            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
+        }
+    }
+
+ out:
+    bdput(bd);    
+    return rc;
+}
+
+
+/*
+ * xlvbd_remove_device - remove a device node if possible
+ * @device:       numeric device ID
+ *
+ * Updates the gendisk structure and invalidates devices.
+ *
+ * This is OK for now but in future, should perhaps consider where this should
+ * deallocate gendisks / unregister devices.
+ */
+static int xlvbd_remove_device(int device)
+{
+    int i, rc = 0, minor = MINOR(device);
+    struct gendisk *gd;
+    struct block_device *bd;
+    xl_disk_t *disk = NULL;
+
+    if ( (bd = bdget(device)) == NULL )
+        return -1;
+
+    if ( ((gd = get_gendisk(device)) == NULL) ||
+         ((disk = xldev_to_xldisk(device)) == NULL) )
+        BUG();
+
+    if ( disk->usage != 0 )
+    {
+        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
+        rc = -1;
+        goto out;
+    }
+ 
+    if ( (minor & (gd->max_p-1)) != 0 )
+    {
+        /* 1: The VBD is mapped to a partition rather than a whole unit. */
+        invalidate_device(device, 1);
+       gd->part[minor].start_sect = 0;
+        gd->part[minor].nr_sects   = 0;
+        gd->sizes[minor]           = 0;
+
+        /* Clear the consists-of-virtual-partitions flag if possible. */
+        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
+        for ( i = 1; i < gd->max_p; i++ )
+            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
+                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+
+        /*
+         * If all virtual partitions are now gone, and a 'whole unit' VBD is
+         * present, then we can try to grok the unit's real partition table.
+         */
+        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
+             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
+        {
+            register_disk(gd,
+                          device&~(gd->max_p-1), 
+                          gd->max_p, 
+                          &xlvbd_block_fops,
+                          gd->part[minor&~(gd->max_p-1)].nr_sects);
+        }
+    }
+    else
+    {
+        /*
+         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
+         * NB. The partition entries are only cleared if there are no VBDs
+         * mapped to individual partitions on this unit.
+         */
+        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
+        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
+        while ( i >= 0 )
+        {
+            invalidate_device(device+i, 1);
+            gd->part[minor+i].start_sect = 0;
+            gd->part[minor+i].nr_sects   = 0;
+            gd->sizes[minor+i]           = 0;
+            i--;
+        }
+    }
+
+ out:
+    bdput(bd);
+    return rc;
+}
+
+/*
+ * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
+ * state. The VBDs need to be updated in this way when the domain is
+ * initialised and also each time we receive an XLBLK_UPDATE event.
+ */
+void xlvbd_update_vbds(void)
+{
+    int i, j, k, old_nr, new_nr;
+    vdisk_t *old_info, *new_info, *merged_info;
+
+    old_info = vbd_info;
+    old_nr   = nr_vbds;
+
+    new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
+    if (!new_info)
+        return;
+
+    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
+        goto out;
+
+    /*
+     * Final list maximum size is old list + new list. This occurs only when
+     * old list and new list do not overlap at all, and we cannot yet destroy
+     * VBDs in the old list because the usage counts are busy.
+     */
+    merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
+    if (!merged_info)
+        goto out;
+
+    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
+    i = j = k = 0;
+
+    while ( (i < old_nr) && (j < new_nr) )
+    {
+        if ( old_info[i].device < new_info[j].device )
+        {
+            if ( xlvbd_remove_device(old_info[i].device) != 0 )
+                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+            i++;
+        }
+        else if ( old_info[i].device > new_info[j].device )
+        {
+            if ( xlvbd_init_device(&new_info[j]) == 0 )
+                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+            j++;
+        }
+        else
+        {
+            if ( ((old_info[i].capacity == new_info[j].capacity) &&
+                  (old_info[i].info == new_info[j].info)) ||
+                 (xlvbd_remove_device(old_info[i].device) != 0) )
+                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+            else if ( xlvbd_init_device(&new_info[j]) == 0 )
+                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+            i++; j++;
+        }
+    }
+
+    for ( ; i < old_nr; i++ )
+    {
+        if ( xlvbd_remove_device(old_info[i].device) != 0 )
+            memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+    }
+
+    for ( ; j < new_nr; j++ )
+    {
+        if ( xlvbd_init_device(&new_info[j]) == 0 )
+            memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+    }
+
+    vbd_info = merged_info;
+    nr_vbds  = k;
+
+    kfree(old_info);
+out:
+    kfree(new_info);
+}
+
+
+/*
+ * Set up all the linux device goop for the virtual block devices (vbd's) that
+ * we know about. Note that although from the backend driver's p.o.v. VBDs are
+ * addressed simply an opaque 16-bit device number, the domain creation tools 
+ * conventionally allocate these numbers to correspond to those used by 'real' 
+ * linux -- this is just for convenience as it means e.g. that the same 
+ * /etc/fstab can be used when booting with or without Xen.
+ */
+int xlvbd_init(void)
+{
+    int i;
+    
+    /*
+     * If compiled as a module, we don't support unloading yet. We therefore 
+     * permanently increment the reference count to disallow it.
+     */
+    SET_MODULE_OWNER(&xlvbd_block_fops);
+    MOD_INC_USE_COUNT;
+
+    /* Initialize the global arrays. */
+    for ( i = 0; i < 256; i++ ) 
+    {
+        xlide_blksize_size[i]  = 1024;
+        xlide_hardsect_size[i] = 512;
+        xlide_max_sectors[i]   = 512;
+
+        xlscsi_blksize_size[i]  = 1024;
+        xlscsi_hardsect_size[i] = 512;
+        xlscsi_max_sectors[i]   = 512;
+
+        xlvbd_blksize_size[i]  = 512;
+        xlvbd_hardsect_size[i] = 512;
+        xlvbd_max_sectors[i]   = 512;
+    }
+
+    vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
+    if (!vbd_info)
+        return -ENOMEM;
+
+    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
+
+    if ( nr_vbds < 0 )
+    {
+        kfree(vbd_info);
+        vbd_info = NULL;
+        nr_vbds  = 0;
+    }
+    else
+    {
+        for ( i = 0; i < nr_vbds; i++ )
+            xlvbd_init_device(&vbd_info[i]);
+    }
+
+    return 0;
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h     Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,97 @@
+#ifndef _X8664_TLBFLUSH_H
+#define _X8664_TLBFLUSH_H
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+
+#define __flush_tlb()  xen_tlb_flush()
+
+/*
+ * Global pages have to be flushed a bit differently. Not a real
+ * performance problem because this does not happen often.
+ */
+#define __flush_tlb_global()   xen_tlb_flush()
+
+
+extern unsigned long pgkern_mask;
+
+#define __flush_tlb_all() __flush_tlb_global()
+
+#define __flush_tlb_one(addr)  xen_invlpg(addr)
+
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the x86_64 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+       if (mm == current->active_mm)
+               __flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+       unsigned long addr)
+{
+       if (vma->vm_mm == current->active_mm)
+               __flush_tlb_one(addr);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+       unsigned long start, unsigned long end)
+{
+       if (vma->vm_mm == current->active_mm)
+               __flush_tlb();
+}
+
+#else
+
+#include <asm/smp.h>
+
+#define local_flush_tlb() \
+       __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb()    flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long 
start, unsigned long end)
+{
+       flush_tlb_mm(vma->vm_mm);
+}
+
+#define TLBSTATE_OK    1
+#define TLBSTATE_LAZY  2
+
+#endif
+
+#define flush_tlb_kernel_range(start, end) flush_tlb_all()
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+                                     unsigned long start, unsigned long end)
+{
+       /* x86_64 does not keep any page table caches in TLB */
+}
+
+#endif /* _X8664_TLBFLUSH_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S     Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,57 @@
+/*
+ * Code for the vsyscall page.  This version uses the old int $0x80 method.
+ *
+ * NOTE:
+ * 1) __kernel_vsyscall _must_ be first in this page.
+ * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
+ *    for details.
+ */
+#include <asm/ia32_unistd.h>
+#include <asm/offset.h>
+
+       .text
+       .section .text.vsyscall,"ax"
+       .globl __kernel_vsyscall
+       .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+       int $0x80
+       ret
+.LEND_vsyscall:
+       .size __kernel_vsyscall,.-.LSTART_vsyscall
+       .previous
+
+       .section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+       .long .LENDCIE-.LSTARTCIE
+.LSTARTCIE:
+       .long 0                 /* CIE ID */
+       .byte 1                 /* Version number */
+       .string "zR"            /* NUL-terminated augmentation string */
+       .uleb128 1              /* Code alignment factor */
+       .sleb128 -4             /* Data alignment factor */
+       .byte 8                 /* Return address register column */
+       .uleb128 1              /* Augmentation value length */
+       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+       .byte 0x0c              /* DW_CFA_def_cfa */
+       .uleb128 4
+       .uleb128 4
+       .byte 0x88              /* DW_CFA_offset, column 0x8 */
+       .uleb128 1
+       .align 4
+.LENDCIE:
+
+       .long .LENDFDE1-.LSTARTFDE1     /* Length FDE */
+.LSTARTFDE1:
+       .long .LSTARTFDE1-.LSTARTFRAME  /* CIE pointer */
+       .long .LSTART_vsyscall-.        /* PC-relative start address */
+       .long .LEND_vsyscall-.LSTART_vsyscall
+       .uleb128 0                      /* Augmentation length */
+       .align 4
+.LENDFDE1:
+               
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#define SYSCALL_ENTER_KERNEL    int $0x80
+#include "vsyscall-sigreturn.S"
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/init_task.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/init_task.c        Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,49 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union 
+       __attribute__((__section__(".data.init_task"))) =
+               { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */ 
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp;
+
+#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c    Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,1115 @@
+/*
+ *     Intel Multiprocessor Specification 1.1 and 1.4
+ *     compliant MP-table parsing routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *             Alan Cox        :       Added EBDA scanning
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *             Maciej W. Rozycki:      Bits for default MP configurations
+ *             Paul Diefenbaugh:       Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bitops.h>
+
+#include <asm/smp.h>
+#include <asm/acpi.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#include <bios_ebda.h>
+
+/* Have we found an MP table */
+int smp_found_config;
+unsigned int __initdata maxcpus = NR_CPUS;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES];
+int mp_bus_id_to_node [MAX_MP_BUSSES];
+int mp_bus_id_to_local [MAX_MP_BUSSES];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+int mp_current_pci_id;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_logical_apicid = -1U;
+/* Internal processor count */
+static unsigned int __initdata num_processors;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+       int sum = 0;
+
+       while (len--)
+               sum += *mp++;
+
+       return sum & 0xFF;
+}
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record; 
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] 
__initdata;
+
+#ifdef CONFIG_X86_NUMAQ
+static int MP_valid_apicid(int apicid, int version)
+{
+       return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
+}
+#elif !defined(CONFIG_XEN)
+static int MP_valid_apicid(int apicid, int version)
+{
+       if (version >= 0x14)
+               return apicid < 0xff;
+       else
+               return apicid < 0xf;
+}
+#endif
+
+#ifndef CONFIG_XEN
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+       int ver, apicid;
+       physid_mask_t tmp;
+       
+       if (!(m->mpc_cpuflag & CPU_ENABLED))
+               return;
+
+       apicid = mpc_apic_id(m, translation_table[mpc_record]);
+
+       if (m->mpc_featureflag&(1<<0))
+               Dprintk("    Floating point unit present.\n");
+       if (m->mpc_featureflag&(1<<7))
+               Dprintk("    Machine Exception supported.\n");
+       if (m->mpc_featureflag&(1<<8))
+               Dprintk("    64 bit compare & exchange supported.\n");
+       if (m->mpc_featureflag&(1<<9))
+               Dprintk("    Internal APIC present.\n");
+       if (m->mpc_featureflag&(1<<11))
+               Dprintk("    SEP present.\n");
+       if (m->mpc_featureflag&(1<<12))
+               Dprintk("    MTRR  present.\n");
+       if (m->mpc_featureflag&(1<<13))
+               Dprintk("    PGE  present.\n");
+       if (m->mpc_featureflag&(1<<14))
+               Dprintk("    MCA  present.\n");
+       if (m->mpc_featureflag&(1<<15))
+               Dprintk("    CMOV  present.\n");
+       if (m->mpc_featureflag&(1<<16))
+               Dprintk("    PAT  present.\n");
+       if (m->mpc_featureflag&(1<<17))
+               Dprintk("    PSE  present.\n");
+       if (m->mpc_featureflag&(1<<18))
+               Dprintk("    PSN  present.\n");
+       if (m->mpc_featureflag&(1<<19))
+               Dprintk("    Cache Line Flush Instruction present.\n");
+       /* 20 Reserved */
+       if (m->mpc_featureflag&(1<<21))
+               Dprintk("    Debug Trace and EMON Store present.\n");
+       if (m->mpc_featureflag&(1<<22))
+               Dprintk("    ACPI Thermal Throttle Registers  present.\n");
+       if (m->mpc_featureflag&(1<<23))
+               Dprintk("    MMX  present.\n");
+       if (m->mpc_featureflag&(1<<24))
+               Dprintk("    FXSR  present.\n");
+       if (m->mpc_featureflag&(1<<25))
+               Dprintk("    XMM  present.\n");
+       if (m->mpc_featureflag&(1<<26))
+               Dprintk("    Willamette New Instructions  present.\n");
+       if (m->mpc_featureflag&(1<<27))
+               Dprintk("    Self Snoop  present.\n");
+       if (m->mpc_featureflag&(1<<28))
+               Dprintk("    HT  present.\n");
+       if (m->mpc_featureflag&(1<<29))
+               Dprintk("    Thermal Monitor present.\n");
+       /* 30, 31 Reserved */
+
+
+       if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+               Dprintk("    Bootup CPU\n");
+               boot_cpu_physical_apicid = m->mpc_apicid;
+               boot_cpu_logical_apicid = apicid;
+       }
+
+       if (num_processors >= NR_CPUS) {
+               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+                       "  Processor ignored.\n", NR_CPUS); 
+               return;
+       }
+
+       if (num_processors >= maxcpus) {
+               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+                       " Processor ignored.\n", maxcpus); 
+               return;
+       }
+       num_processors++;
+       ver = m->mpc_apicver;
+
+       if (!MP_valid_apicid(apicid, ver)) {
+               printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
+                       m->mpc_apicid, MAX_APICS);
+               --num_processors;
+               return;
+       }
+
+       tmp = apicid_to_cpu_present(apicid);
+       physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
+       
+       /*
+        * Validate version
+        */
+       if (ver == 0x0) {
+               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! 
fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+               ver = 0x10;
+       }
+       apic_version[m->mpc_apicid] = ver;
+       bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+}
+#else
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+       num_processors++;
+}
+#endif /* CONFIG_XEN */
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+       char str[7];
+
+       memcpy(str, m->mpc_bustype, 6);
+       str[6] = 0;
+
+       mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+
+       if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+       } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+       } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+               mpc_oem_pci_bus(m, translation_table[mpc_record]);
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+               mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+               mp_current_pci_id++;
+       } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+       } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
+       } else {
+               printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+       }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+       if (!(m->mpc_flags & MPC_APIC_USABLE))
+               return;
+
+       printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+               m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found 
%d).\n",
+                       MAX_IO_APICS, nr_ioapics);
+               panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+       }
+       if (!m->mpc_apicaddr) {
+               printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+                       " found in MP table, skipping!\n");
+               return;
+       }
+       mp_ioapics[nr_ioapics] = *m;
+       nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+       mp_irqs [mp_irq_entries] = *m;
+       Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+               " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+                       m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+       Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+               " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+                       m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+       /*
+        * Well it seems all SMP boards in existence
+        * use ExtINT/LVT1 == LINT0 and
+        * NMI/LVT2 == LINT1 - the following check
+        * will show us if this assumptions is false.
+        * Until then we do not have to add baggage.
+        */
+       if ((m->mpc_irqtype == mp_ExtINT) &&
+               (m->mpc_destapiclint != 0))
+                       BUG();
+       if ((m->mpc_irqtype == mp_NMI) &&
+               (m->mpc_destapiclint != 1))
+                       BUG();
+}
+
+#ifdef CONFIG_X86_NUMAQ
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+       printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, 
local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, 
m->trans_local);
+
+       if (mpc_record >= MAX_MPC_ENTRY) 
+               printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+       else
+               translation_table[mpc_record] = m; /* stash this for later */
+       if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+               node_set_online(m->trans_quad);
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+       unsigned short oemsize)
+{
+       int count = sizeof (*oemtable); /* the header size */
+       unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+       
+       mpc_record = 0;
+       printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", 
oemtable);
+       if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+       {
+               printk(KERN_WARNING "SMP mpc oemtable: bad signature 
[%c%c%c%c]!\n",
+                       oemtable->oem_signature[0],
+                       oemtable->oem_signature[1],
+                       oemtable->oem_signature[2],
+                       oemtable->oem_signature[3]);
+               return;
+       }
+       if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+       {
+               printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+               return;
+       }
+       while (count < oemtable->oem_length) {
+               switch (*oemptr) {
+                       case MP_TRANSLATION:
+                       {
+                               struct mpc_config_translation *m=
+                                       (struct mpc_config_translation *)oemptr;
+                               MP_translation_info(m);
+                               oemptr += sizeof(*m);
+                               count += sizeof(*m);
+                               ++mpc_record;
+                               break;
+                       }
+                       default:
+                       {
+                               printk(KERN_WARNING "Unrecognised OEM table 
entry type! - %d\n", (int) *oemptr);
+                               return;
+                       }
+               }
+       }
+}
+
+static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+               char *productid)
+{
+       if (strncmp(oem, "IBM NUMA", 8))
+               printk("Warning!  May not be a NUMA-Q system!\n");
+       if (mpc->mpc_oemptr)
+               smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+                               mpc->mpc_oemsize);
+}
+#endif /* CONFIG_X86_NUMAQ */
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+       char str[16];
+       char oem[10];
+       int count=sizeof(*mpc);
+       unsigned char *mpt=((unsigned char *)mpc)+count;
+
+       if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+               printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+                       *(u32 *)mpc->mpc_signature);
+               return 0;
+       }
+       if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+               printk(KERN_ERR "SMP mptable: checksum error!\n");
+               return 0;
+       }
+       if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+               printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+                       mpc->mpc_spec);
+               return 0;
+       }
+       if (!mpc->mpc_lapic) {
+               printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+               return 0;
+       }
+       memcpy(oem,mpc->mpc_oem,8);
+       oem[8]=0;
+       printk(KERN_INFO "OEM ID: %s ",oem);
+
+       memcpy(str,mpc->mpc_productid,12);
+       str[12]=0;
+       printk("Product ID: %s ",str);
+
+       mps_oem_check(mpc, oem, str);
+
+       printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+       /* 
+        * Save the local APIC address (it might be non-default) -- but only
+        * if we're not using ACPI.
+        */
+       if (!acpi_lapic)
+               mp_lapic_addr = mpc->mpc_lapic;
+
+       /*
+        *      Now process the configuration blocks.
+        */
+       mpc_record = 0;
+       while (count < mpc->mpc_length) {
+               switch(*mpt) {
+                       case MP_PROCESSOR:
+                       {
+                               struct mpc_config_processor *m=
+                                       (struct mpc_config_processor *)mpt;
+                               /* ACPI may have already provided this data */
+                               if (!acpi_lapic)
+                                       MP_processor_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+                       case MP_BUS:
+                       {
+                               struct mpc_config_bus *m=
+                                       (struct mpc_config_bus *)mpt;
+                               MP_bus_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+                       case MP_IOAPIC:
+                       {
+                               struct mpc_config_ioapic *m=
+                                       (struct mpc_config_ioapic *)mpt;
+                               MP_ioapic_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       case MP_INTSRC:
+                       {
+                               struct mpc_config_intsrc *m=
+                                       (struct mpc_config_intsrc *)mpt;
+
+                               MP_intsrc_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       case MP_LINTSRC:
+                       {
+                               struct mpc_config_lintsrc *m=
+                                       (struct mpc_config_lintsrc *)mpt;
+                               MP_lintsrc_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       default:
+                       {
+                               count = mpc->mpc_length;
+                               break;
+                       }
+               }
+               ++mpc_record;
+       }
+       clustered_apic_check();
+       if (!num_processors)
+               printk(KERN_ERR "SMP mptable: no processors registered!\n");
+       return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+       unsigned int port;
+
+       port = 0x4d0 + (irq >> 3);
+       return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+       struct mpc_config_intsrc intsrc;
+       int i;
+       int ELCR_fallback = 0;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqflag = 0;                 /* conforming */
+       intsrc.mpc_srcbus = 0;
+       intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+       intsrc.mpc_irqtype = mp_INT;
+
+       /*
+        *  If true, we have an ISA/PCI system with no IRQ entries
+        *  in the MP table. To prevent the PCI interrupts from being set up
+        *  incorrectly, we try to use the ELCR. The sanity check to see if
+        *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+        *  never be level sensitive, so we simply see if the ELCR agrees.
+        *  If it does, we assume it's valid.
+        */
+       if (mpc_default_type == 5) {
+               printk(KERN_INFO "ISA/PCI bus type with no IRQ information... 
falling back to ELCR\n");
+
+               if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || 
ELCR_trigger(13))
+                       printk(KERN_WARNING "ELCR contains invalid data... not 
using ELCR\n");
+               else {
+                       printk(KERN_INFO "Using ELCR to identify PCI 
interrupts\n");
+                       ELCR_fallback = 1;
+               }
+       }
+
+       for (i = 0; i < 16; i++) {
+               switch (mpc_default_type) {
+               case 2:
+                       if (i == 0 || i == 13)
+                               continue;       /* IRQ0 & IRQ13 not connected */
+                       /* fall through */
+               default:
+                       if (i == 2)
+                               continue;       /* IRQ2 is never connected */
+               }
+
+               if (ELCR_fallback) {
+                       /*
+                        *  If the ELCR indicates a level-sensitive interrupt, 
we
+                        *  copy that information over to the MP table in the
+                        *  irqflag field (level sensitive, active high 
polarity).
+                        */
+                       if (ELCR_trigger(i))
+                               intsrc.mpc_irqflag = 13;
+                       else
+                               intsrc.mpc_irqflag = 0;
+               }
+
+               intsrc.mpc_srcbusirq = i;
+               intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
+               MP_intsrc_info(&intsrc);
+       }
+
+       intsrc.mpc_irqtype = mp_ExtINT;
+       intsrc.mpc_srcbusirq = 0;
+       intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
+       MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+       struct mpc_config_processor processor;
+       struct mpc_config_bus bus;
+       struct mpc_config_ioapic ioapic;
+       struct mpc_config_lintsrc lintsrc;
+       int linttypes[2] = { mp_ExtINT, mp_NMI };
+       int i;
+
+       /*
+        * local APIC has default address
+        */
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+       /*
+        * 2 CPUs, numbered 0 & 1.
+        */
+       processor.mpc_type = MP_PROCESSOR;
+       /* Either an integrated APIC or a discrete 82489DX. */
+       processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       processor.mpc_cpuflag = CPU_ENABLED;
+       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+                                  (boot_cpu_data.x86_model << 4) |
+                                  boot_cpu_data.x86_mask;
+       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+       for (i = 0; i < 2; i++) {
+               processor.mpc_apicid = i;
+               MP_processor_info(&processor);
+       }
+
+       bus.mpc_type = MP_BUS;
+       bus.mpc_busid = 0;
+       switch (mpc_default_type) {
+               default:
+                       printk("???\n");
+                       printk(KERN_ERR "Unknown standard configuration %d\n",
+                               mpc_default_type);
+                       /* fall through */
+               case 1:
+               case 5:
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       break;
+               case 2:
+               case 6:
+               case 3:
+                       memcpy(bus.mpc_bustype, "EISA  ", 6);
+                       break;
+               case 4:
+               case 7:
+                       memcpy(bus.mpc_bustype, "MCA   ", 6);
+       }
+       MP_bus_info(&bus);
+       if (mpc_default_type > 4) {
+               bus.mpc_busid = 1;
+               memcpy(bus.mpc_bustype, "PCI   ", 6);
+               MP_bus_info(&bus);
+       }
+
+       ioapic.mpc_type = MP_IOAPIC;
+       ioapic.mpc_apicid = 2;
+       ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       ioapic.mpc_flags = MPC_APIC_USABLE;
+       ioapic.mpc_apicaddr = 0xFEC00000;
+       MP_ioapic_info(&ioapic);
+
+       /*
+        * We set up most of the low 16 IO-APIC pins according to MPS rules.
+        */
+       construct_default_ioirq_mptable(mpc_default_type);
+
+       lintsrc.mpc_type = MP_LINTSRC;
+       lintsrc.mpc_irqflag = 0;                /* conforming */
+       lintsrc.mpc_srcbusid = 0;
+       lintsrc.mpc_srcbusirq = 0;
+       lintsrc.mpc_destapic = MP_APIC_ALL;
+       for (i = 0; i < 2; i++) {
+               lintsrc.mpc_irqtype = linttypes[i];
+               lintsrc.mpc_destapiclint = i;
+               MP_lintsrc_info(&lintsrc);
+       }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+       struct intel_mp_floating *mpf = mpf_found;
+
+       /*
+        * ACPI may be used to obtain the entire SMP configuration or just to 
+        * enumerate/configure processors (CONFIG_ACPI_BOOT).  Note that 
+        * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+        * processors, where MPS only supports physical.
+        */
+       if (acpi_lapic && acpi_ioapic) {
+               printk(KERN_INFO "Using ACPI (MADT) for SMP configuration 
information\n");
+               return;
+       }
+       else if (acpi_lapic)
+               printk(KERN_INFO "Using ACPI for processor (LAPIC) 
configuration information\n");
+
+       printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 
mpf->mpf_specification);
+       if (mpf->mpf_feature2 & (1<<7)) {
+               printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
+               pic_mode = 1;
+       } else {
+               printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
+               pic_mode = 0;
+       }
+
+       /*
+        * Now see if we need to read further.
+        */
+       if (mpf->mpf_feature1 != 0) {
+
+               printk(KERN_INFO "Default MP configuration #%d\n", 
mpf->mpf_feature1);
+               construct_default_ISA_mptable(mpf->mpf_feature1);
+
+       } else if (mpf->mpf_physptr) {
+
+               /*
+                * Read the physical hardware table.  Anything here will
+                * override the defaults.
+                */
+               if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+                       smp_found_config = 0;
+                       printk(KERN_ERR "BIOS bug, MP table errors 
detected!...\n");
+                       printk(KERN_ERR "... disabling SMP support. (tell your 
hw vendor)\n");
+                       return;
+               }
+               /*
+                * If there are no explicit MP IRQ entries, then we are
+                * broken.  We set up most of the low 16 IO-APIC pins to
+                * ISA defaults and hope it will work.
+                */
+               if (!mp_irq_entries) {
+                       struct mpc_config_bus bus;
+
+                       printk(KERN_ERR "BIOS bug, no explicit IRQ entries, 
using default mptable. (tell your hw vendor)\n");
+
+                       bus.mpc_type = MP_BUS;
+                       bus.mpc_busid = 0;
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       MP_bus_info(&bus);
+
+                       construct_default_ioirq_mptable(0);
+               }
+
+       } else
+               BUG();
+
+       printk(KERN_INFO "Processors: %d\n", num_processors);
+       /*
+        * Only use the first configuration found.
+        */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+       unsigned long *bp = isa_bus_to_virt(base);
+       struct intel_mp_floating *mpf;
+
+       Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+       if (sizeof(*mpf) != 16)
+               printk("Error: MPF size\n");
+
+       while (length > 0) {
+               mpf = (struct intel_mp_floating *)bp;
+               if ((*bp == SMP_MAGIC_IDENT) &&
+                       (mpf->mpf_length == 1) &&
+                       !mpf_checksum((unsigned char *)bp, 16) &&
+                       ((mpf->mpf_specification == 1)
+                               || (mpf->mpf_specification == 4)) ) {
+
+                       smp_found_config = 1;
+                       printk(KERN_INFO "found SMP MP-table at %08lx\n",
+                                               virt_to_phys(mpf));
+                       if (mpf->mpf_physptr) {
+                               /*
+                                * We cannot access to MPC table to compute
+                                * table size yet, as only few megabytes from
+                                * the bottom is mapped now.
+                                * PC-9800's MPC table places on the very last
+                                * of physical memory; so that simply reserving
+                                * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+                                * in reserve_bootmem.
+                                */
+                               unsigned long size = PAGE_SIZE;
+                               unsigned long end = max_low_pfn * PAGE_SIZE;
+                               if (mpf->mpf_physptr + size > end)
+                                       size = end - mpf->mpf_physptr;
+                               reserve_bootmem(mpf->mpf_physptr, size);
+                       }
+
+                       mpf_found = mpf;
+                       return 1;
+               }
+               bp += 4;
+               length -= 16;
+       }
+       return 0;
+}
+
+void __init find_smp_config (void)
+{
+       unsigned int address;
+
+       /*
+        * FIXME: Linux assumes you have 640K of base ram..
+        * this continues the error...
+        *
+        * 1) Scan the bottom 1K for a signature
+        * 2) Scan the top 1K of base RAM
+        * 3) Scan the 64K of bios
+        */
+       if (smp_scan_config(0x0,0x400) ||
+               smp_scan_config(639*0x400,0x400) ||
+                       smp_scan_config(0xF0000,0x10000))
+               return;
+       /*
+        * If it is an SMP machine we should know now, unless the
+        * configuration is in an EISA/MCA bus machine with an
+        * extended bios data area.
+        *
+        * there is a real-mode segmented pointer pointing to the
+        * 4K EBDA area at 0x40E, calculate and scan it here.
+        *
+        * NOTE! There are Linux loaders that will corrupt the EBDA
+        * area, and as such this kind of SMP config may be less
+        * trustworthy, simply because the SMP table may have been
+        * stomped on during early boot. These loaders are buggy and
+        * should be fixed.
+        *
+        * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
+        */
+
+       address = get_bios_ebda();
+       if (address)
+               smp_scan_config(address, 0x400);
+}
+
+/* --------------------------------------------------------------------------
+                            ACPI-based MP Configuration
+   -------------------------------------------------------------------------- 
*/
+
+#ifdef CONFIG_ACPI_BOOT
+
+void __init mp_register_lapic_address (
+       u64                     address)
+{
+#ifndef CONFIG_XEN
+       mp_lapic_addr = (unsigned long) address;
+
+       if (boot_cpu_physical_apicid == -1U)
+               boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+       Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+#endif
+}
+
+
+void __init mp_register_lapic (
+       u8                      id, 
+       u8                      enabled)
+{
+       struct mpc_config_processor processor;
+       int                     boot_cpu = 0;
+       
+       if (MAX_APICS - id <= 0) {
+               printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+                       id, MAX_APICS);
+               return;
+       }
+
+       if (id == boot_cpu_physical_apicid)
+               boot_cpu = 1;
+
+#ifndef CONFIG_XEN
+       processor.mpc_type = MP_PROCESSOR;
+       processor.mpc_apicid = id;
+       processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
+       processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+       processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
+               (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+#endif
+
+       MP_processor_info(&processor);
+}
+
+#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || 
defined(CONFIG_ACPI_BOOT))
+
+#define MP_ISA_BUS             0
+#define MP_MAX_IOAPIC_PIN      127
+
+struct mp_ioapic_routing {
+       int                     apic_id;
+       int                     gsi_base;
+       int                     gsi_end;
+       u32                     pin_programmed[4];
+} mp_ioapic_routing[MAX_IO_APICS];
+
+
+static int mp_find_ioapic (
+       int                     gsi)
+{
+       int                     i = 0;
+
+       /* Find the IOAPIC that manages this GSI. */
+       for (i = 0; i < nr_ioapics; i++) {
+               if ((gsi >= mp_ioapic_routing[i].gsi_base)
+                       && (gsi <= mp_ioapic_routing[i].gsi_end))
+                       return i;
+       }
+
+       printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+
+       return -1;
+}
+       
+
+void __init mp_register_ioapic (
+       u8                      id, 
+       u32                     address,
+       u32                     gsi_base)
+{
+       int                     idx = 0;
+
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+                       "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+               panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+       }
+       if (!address) {
+               printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+                       " found in MADT table, skipping!\n");
+               return;
+       }
+
+       idx = nr_ioapics++;
+
+       mp_ioapics[idx].mpc_type = MP_IOAPIC;
+       mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+       mp_ioapics[idx].mpc_apicaddr = address;
+
+       mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+       mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+       
+       /* 
+        * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+        * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+        */
+       mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+       mp_ioapic_routing[idx].gsi_base = gsi_base;
+       mp_ioapic_routing[idx].gsi_end = gsi_base + 
+               io_apic_get_redir_entries(idx);
+
+       printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+               "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
+               mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+               mp_ioapic_routing[idx].gsi_base,
+               mp_ioapic_routing[idx].gsi_end);
+
+       return;
+}
+
+
+void __init mp_override_legacy_irq (
+       u8                      bus_irq,
+       u8                      polarity, 
+       u8                      trigger, 
+       u32                     gsi)
+{
+       struct mpc_config_intsrc intsrc;
+       int                     ioapic = -1;
+       int                     pin = -1;
+
+       /* 
+        * Convert 'gsi' to 'ioapic.pin'.
+        */
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0)
+               return;
+       pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+       /*
+        * TBD: This check is for faulty timer entries, where the override
+        *      erroneously sets the trigger to level, resulting in a HUGE 
+        *      increase of timer interrupts!
+        */
+       if ((bus_irq == 0) && (trigger == 3))
+               trigger = 1;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqtype = mp_INT;
+       intsrc.mpc_irqflag = (trigger << 2) | polarity;
+       intsrc.mpc_srcbus = MP_ISA_BUS;
+       intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
+       intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
+       intsrc.mpc_dstirq = pin;                                    /* INTIN# */
+
+       Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+               intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+               (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+               intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+
+       mp_irqs[mp_irq_entries] = intsrc;
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!\n");
+
+       return;
+}
+
+
+void __init mp_config_acpi_legacy_irqs (void)
+{
+       struct mpc_config_intsrc intsrc;
+       int                     i = 0;
+       int                     ioapic = -1;
+
+       /* 
+        * Fabricate the legacy ISA bus (bus #31).
+        */
+       mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+       Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+       /*
+        * ES7000 has no legacy identity mappings
+        */
+       if (es7000_plat)
+               return;
+
+       /* 
+        * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+        */
+       ioapic = mp_find_ioapic(0);
+       if (ioapic < 0)
+               return;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqflag = 0;                                 /* Conforming */
+       intsrc.mpc_srcbus = MP_ISA_BUS;
+       intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+
+       /* 
+        * Use the default configuration for the IRQs 0-15.  Unless
+        * overriden by (MADT) interrupt source override entries.
+        */
+       for (i = 0; i < 16; i++) {
+               int idx;
+
+               for (idx = 0; idx < mp_irq_entries; idx++) {
+                       struct mpc_config_intsrc *irq = mp_irqs + idx;
+
+                       /* Do we already have a mapping for this ISA IRQ? */
+                       if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq 
== i)
+                               break;
+
+                       /* Do we already have a mapping for this IOAPIC pin */
+                       if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+                               (irq->mpc_dstirq == i))
+                               break;
+               }
+
+               if (idx != mp_irq_entries) {
+                       printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+                       continue;                       /* IRQ already used */
+               }
+
+               intsrc.mpc_irqtype = mp_INT;
+               intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
+               intsrc.mpc_dstirq = i;
+
+               Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+                       "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+                       (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+                       intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+                       intsrc.mpc_dstirq);
+
+               mp_irqs[mp_irq_entries] = intsrc;
+               if (++mp_irq_entries == MAX_IRQ_SOURCES)
+                       panic("Max # of irq sources exceeded!\n");
+       }
+}
+
+int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
+{
+       int                     ioapic = -1;
+       int                     ioapic_pin = 0;
+       int                     idx, bit = 0;
+
+#ifdef CONFIG_ACPI_BUS
+       /* Don't set up the ACPI SCI because it's already set up */
+       if (acpi_fadt.sci_int == gsi)
+               return gsi;
+#endif
+
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0) {
+               printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+               return gsi;
+       }
+
+       ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+       if (ioapic_renumber_irq)
+               gsi = ioapic_renumber_irq(ioapic, gsi);
+
+       /* 
+        * Avoid pin reprogramming.  PRTs typically include entries  
+        * with redundant pin->gsi mappings (but unique PCI devices);
+        * we only program the IOAPIC on the first.
+        */
+       bit = ioapic_pin % 32;
+       idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+       if (idx > 3) {
+               printk(KERN_ERR "Invalid reference to IOAPIC pin "
+                       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
+                       ioapic_pin);
+               return gsi;
+       }
+       if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+               Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+                       mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+               return gsi;
+       }
+
+       mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+       io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+                   edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
+                   active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
+       return gsi;
+}
+
+#endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/
+#endif /*CONFIG_ACPI_BOOT*/
diff -r 35b74976f598 -r a83ac0806d6b linux-2.4.30-xen-sparse/arch/xen/config.in
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/config.in        Fri Jul 15 13:39:50 2005
@@ -0,0 +1,337 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/config-language.txt.
+#
+mainmenu_name "Linux Kernel Configuration"
+
+define_bool CONFIG_XEN y
+
+define_bool CONFIG_X86 y
+define_bool CONFIG_ISA y
+define_bool CONFIG_SBUS n
+
+define_bool CONFIG_UID16 y
+
+mainmenu_option next_comment
+comment 'Xen'
+bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST
+bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+    bool 'USB-device backend driver' CONFIG_XEN_USB_BACKEND
+fi
+bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES
+bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND
+bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND
+bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT
+bool 'USB-device frontend driver' CONFIG_XEN_USB_FRONTEND
+endmenu
+# The IBM S/390 patch needs this.
+define_bool CONFIG_NO_IDLE_HZ y
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   define_bool CONFIG_FOREIGN_PAGES y
+else
+   define_bool CONFIG_FOREIGN_PAGES n
+   define_bool CONFIG_NETDEVICES y
+   define_bool CONFIG_VT n
+fi
+
+mainmenu_option next_comment
+comment 'Code maturity level options'
+bool 'Prompt for development and/or incomplete code/drivers' 
CONFIG_EXPERIMENTAL
+endmenu
+
+mainmenu_option next_comment
+comment 'Loadable module support'
+bool 'Enable loadable module support' CONFIG_MODULES
+if [ "$CONFIG_MODULES" = "y" ]; then
+   bool '  Set version information on all module symbols' CONFIG_MODVERSIONS
+   bool '  Kernel module loader' CONFIG_KMOD
+fi
+endmenu
+
+mainmenu_option next_comment
+comment 'Processor type and features'
+choice 'Processor family' \
+       "Pentium-Pro/Celeron/Pentium-II         CONFIG_M686 \
+        Pentium-III/Celeron(Coppermine)        CONFIG_MPENTIUMIII \
+        Pentium-4                              CONFIG_MPENTIUM4 \
+        Athlon/Duron/K7                        CONFIG_MK7 \
+        Opteron/Athlon64/Hammer/K8             CONFIG_MK8 \
+        VIA-C3-2                               CONFIG_MVIAC3_2" Pentium-Pro
+
+   define_bool CONFIG_X86_WP_WORKS_OK y
+   define_bool CONFIG_X86_INVLPG y
+   define_bool CONFIG_X86_CMPXCHG y
+   define_bool CONFIG_X86_XADD y
+   define_bool CONFIG_X86_BSWAP y
+   define_bool CONFIG_X86_POPAD_OK y
+   define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+   define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
+
+   define_bool CONFIG_X86_GOOD_APIC y
+   define_bool CONFIG_X86_PGE y
+   define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
+   define_bool CONFIG_X86_TSC y
+
+if [ "$CONFIG_M686" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 5
+fi
+if [ "$CONFIG_MPENTIUMIII" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 5
+fi
+if [ "$CONFIG_MPENTIUM4" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 7
+fi
+if [ "$CONFIG_MK8" = "y" ]; then
+   define_bool CONFIG_MK7 y
+fi
+if [ "$CONFIG_MK7" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 6
+   define_bool CONFIG_X86_USE_3DNOW y
+fi
+if [ "$CONFIG_MVIAC3_2" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 5
+fi
+
+#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+#   tristate 'BIOS Enhanced Disk Drive calls determine boot disk 
(EXPERIMENTAL)' CONFIG_EDD
+#fi
+
+choice 'High Memory Support' \
+       "off    CONFIG_NOHIGHMEM \
+        4GB    CONFIG_HIGHMEM4G" off
+#       64GB   CONFIG_HIGHMEM64G" off
+if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
+   define_bool CONFIG_HIGHMEM y
+fi
+if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
+   define_bool CONFIG_HIGHMEM y
+   define_bool CONFIG_X86_PAE y
+fi
+
+if [ "$CONFIG_HIGHMEM" = "y" ]; then
+   bool 'HIGHMEM I/O support' CONFIG_HIGHIO
+fi
+
+define_int CONFIG_FORCE_MAX_ZONEORDER 11
+
+#bool 'Symmetric multi-processing support' CONFIG_SMP
+#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+#   define_bool CONFIG_HAVE_DEC_LOCK y
+#fi
+endmenu
+
+mainmenu_option next_comment
+comment 'General setup'
+
+bool 'Networking support' CONFIG_NET
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   bool 'PCI support' CONFIG_PCI
+   source drivers/pci/Config.in
+
+   bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG
+
+   if [ "$CONFIG_HOTPLUG" = "y" ] ; then
+      source drivers/pcmcia/Config.in
+      source drivers/hotplug/Config.in
+   else
+      define_bool CONFIG_PCMCIA n
+      define_bool CONFIG_HOTPLUG_PCI n
+   fi
+fi
+
+bool 'System V IPC' CONFIG_SYSVIPC
+bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
+bool 'Sysctl support' CONFIG_SYSCTL
+if [ "$CONFIG_PROC_FS" = "y" ]; then
+   choice 'Kernel core (/proc/kcore) format' \
+       "ELF            CONFIG_KCORE_ELF        \
+        A.OUT          CONFIG_KCORE_AOUT" ELF
+fi
+tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
+bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
+tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
+bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER
+
+endmenu
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   source drivers/mtd/Config.in
+
+   source drivers/parport/Config.in
+
+   source drivers/pnp/Config.in
+
+   source drivers/block/Config.in
+
+   source drivers/md/Config.in
+fi
+
+if [ "$CONFIG_NET" = "y" ]; then
+   source net/Config.in
+fi
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   mainmenu_option next_comment
+   comment 'ATA/IDE/MFM/RLL support'
+
+   tristate 'ATA/IDE/MFM/RLL support' CONFIG_IDE
+
+   if [ "$CONFIG_IDE" != "n" ]; then
+      source drivers/ide/Config.in
+   else
+      define_bool CONFIG_BLK_DEV_HD n
+   fi
+   endmenu
+fi
+
+mainmenu_option next_comment
+comment 'SCSI support'
+
+tristate 'SCSI support' CONFIG_SCSI
+
+if [ "$CONFIG_SCSI" != "n" ]; then
+   source drivers/scsi/Config.in
+fi
+endmenu
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   source drivers/message/fusion/Config.in
+
+   source drivers/ieee1394/Config.in
+
+   source drivers/message/i2o/Config.in
+
+   if [ "$CONFIG_NET" = "y" ]; then
+      mainmenu_option next_comment
+      comment 'Network device support'
+
+      bool 'Network device support' CONFIG_NETDEVICES
+      if [ "$CONFIG_NETDEVICES" = "y" ]; then
+         source drivers/net/Config.in
+         if [ "$CONFIG_ATM" = "y" -o "$CONFIG_ATM" = "m" ]; then
+            source drivers/atm/Config.in
+         fi
+      fi
+      endmenu
+   fi
+
+   source net/ax25/Config.in
+
+   source net/irda/Config.in
+
+   mainmenu_option next_comment
+   comment 'ISDN subsystem'
+   if [ "$CONFIG_NET" != "n" ]; then
+      tristate 'ISDN support' CONFIG_ISDN
+      if [ "$CONFIG_ISDN" != "n" ]; then
+         source drivers/isdn/Config.in
+      fi
+   fi
+   endmenu
+
+   if [ "$CONFIG_ISA" = "y" ]; then
+       mainmenu_option next_comment
+       comment 'Old CD-ROM drivers (not SCSI, not IDE)'
+   
+       bool 'Support non-SCSI/IDE/ATAPI CDROM drives' CONFIG_CD_NO_IDESCSI
+       if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then
+          source drivers/cdrom/Config.in
+       fi
+       endmenu
+   fi
+
+   #
+   # input before char - char/joystick depends on it. As does USB.
+   #
+   source drivers/input/Config.in
+else
+   #
+   # Block device driver configuration
+   #
+   mainmenu_option next_comment
+   comment 'Block devices'
+   tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
+   dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
+   tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
+   if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
+      int '  Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
+   fi
+   dep_bool '  Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD 
$CONFIG_BLK_DEV_RAM
+   bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS
+   define_bool CONFIG_BLK_DEV_HD n
+   endmenu
+fi
+
+source drivers/char/Config.in
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ]; 
then
+   source drivers/media/Config.in
+fi
+
+source fs/Config.in
+
+mainmenu_option next_comment
+comment 'Console drivers'
+
+define_bool CONFIG_XEN_CONSOLE y
+
+if [ "$CONFIG_VT" = "y" ]; then
+   bool 'VGA text console' CONFIG_VGA_CONSOLE
+   bool 'Dummy console' CONFIG_DUMMY_CONSOLE 
+   if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+      bool 'Video mode selection support' CONFIG_VIDEO_SELECT
+      if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+         tristate 'MDA text console (dual-headed) (EXPERIMENTAL)' 
CONFIG_MDA_CONSOLE
+         source drivers/video/Config.in
+      fi
+   fi
+fi
+endmenu
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   mainmenu_option next_comment
+   comment 'Sound'
+
+   tristate 'Sound card support' CONFIG_SOUND
+   if [ "$CONFIG_SOUND" != "n" ]; then
+      source drivers/sound/Config.in
+   fi
+   endmenu
+fi
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ]; 
then
+   if [ "$CONFIG_XEN_USB_FRONTEND" = "y" -o "$CONFIG_XEN_USB_BACKEND" = "y" ]; 
then
+       define_bool CONFIG_USB y
+   fi
+   source drivers/usb/Config.in
+fi
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   source net/bluetooth/Config.in
+fi
+
+mainmenu_option next_comment
+comment 'Kernel hacking'
+
+bool 'Kernel debugging' CONFIG_DEBUG_KERNEL
+if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then
+   bool '  Check for stack overflows' CONFIG_DEBUG_STACKOVERFLOW
+   bool '  Debug high memory support' CONFIG_DEBUG_HIGHMEM
+   bool '  Debug memory allocations' CONFIG_DEBUG_SLAB
+   bool '  Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
+   bool '  Magic SysRq key' CONFIG_MAGIC_SYSRQ
+   bool '  Spinlock debugging' CONFIG_DEBUG_SPINLOCK
+   bool '  Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE
+   bool '  Load all symbols for debugging' CONFIG_KALLSYMS
+   bool '  Compile the kernel with frame pointers' CONFIG_FRAME_POINTER
+fi
+
+int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0
+
+endmenu
+
+source crypto/Config.in
+source lib/Config.in
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/kernel/devmem.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/devmem.c  Fri Jul 15 13:39:50 2005
@@ -0,0 +1,158 @@
+/*
+ *  Originally from linux/drivers/char/mem.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Added devfs support. 
+ *    Jan-11-1998, C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>
+ *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@xxxxxxx>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/raw.h>
+#include <linux/tty.h>
+#include <linux/capability.h>
+#include <linux/smp_lock.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/ptrace.h>
+#include <linux/device.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+static inline int uncached_access(struct file *file, unsigned long addr)
+{
+        if (file->f_flags & O_SYNC)
+                return 1;
+        /* Xen sets correct MTRR type on non-RAM for us. */
+        return 0;
+}
+
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly to the 
+ * memory location. 
+ */
+static ssize_t read_mem(struct file * file, char __user * buf,
+                       size_t count, loff_t *ppos)
+{
+       unsigned long i, p = *ppos;
+       ssize_t read = -EFAULT;
+       void *v;
+
+       if ((v = ioremap(p, count)) == NULL) {
+               /*
+                * Some programs (e.g., dmidecode) groove off into weird RAM
+                * areas where no table scan possibly exist (because Xen will
+                * have stomped on them!). These programs get rather upset if
+                 * we let them know that Xen failed their access, so we fake
+                 * out a read of all zeroes. :-)
+                */
+               for (i = 0; i < count; i++)
+                       if (put_user(0, buf+i))
+                               return -EFAULT;
+               return count;
+       }
+       if (copy_to_user(buf, v, count))
+               goto out;
+
+       read = count;
+       *ppos += read;
+out:
+       iounmap(v);
+       return read;
+}
+
+static ssize_t write_mem(struct file * file, const char __user * buf, 
+                        size_t count, loff_t *ppos)
+{
+       unsigned long p = *ppos;
+       ssize_t written = -EFAULT;
+       void *v;
+
+       if ((v = ioremap(p, count)) == NULL)
+               return -EFAULT;
+       if (copy_to_user(v, buf, count))
+               goto out;
+
+       written = count;
+       *ppos += written;
+out:
+       iounmap(v);
+       return written;
+}
+
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+       int uncached;
+
+       uncached = uncached_access(file, offset);
+       if (uncached)
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       /* Don't try to swap out physical pages.. */
+       vma->vm_flags |= VM_RESERVED;
+
+       /*
+        * Don't dump addresses that are not real memory to a core file.
+        */
+       if (uncached)
+               vma->vm_flags |= VM_IO;
+
+       if (io_remap_page_range(vma, vma->vm_start, offset, 
+                               vma->vm_end-vma->vm_start, vma->vm_page_prot))
+               return -EAGAIN;
+
+       return 0;
+}
+
+/*
+ * The memory devices use the full 32/64 bits of the offset, and so we cannot
+ * check against negative addresses: they are ok. The return value is weird,
+ * though, in that case (0).
+ *
+ * also note that seeking relative to the "end of file" isn't supported:
+ * it has no meaning, so it returns -EINVAL.
+ */
+static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+{
+       loff_t ret;
+
+       down(&file->f_dentry->d_inode->i_sem);
+       switch (orig) {
+               case 0:
+                       file->f_pos = offset;
+                       ret = file->f_pos;
+                       force_successful_syscall_return();
+                       break;
+               case 1:
+                       file->f_pos += offset;
+                       ret = file->f_pos;
+                       force_successful_syscall_return();
+                       break;
+               default:
+                       ret = -EINVAL;
+       }
+       up(&file->f_dentry->d_inode->i_sem);
+       return ret;
+}
+
+static int open_mem(struct inode * inode, struct file * filp)
+{
+       return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+struct file_operations mem_fops = {
+       .llseek         = memory_lseek,
+       .read           = read_mem,
+       .write          = write_mem,
+       .mmap           = mmap_mem,
+       .open           = open_mem,
+};
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h    Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,177 @@
+#ifndef _ASM_I386_DMA_MAPPING_H
+#define _ASM_I386_DMA_MAPPING_H
+
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/scatterlist.h>
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+                          dma_addr_t *dma_handle, int flag);
+
+void dma_free_coherent(struct device *dev, size_t size,
+                        void *vaddr, dma_addr_t dma_handle);
+
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+              enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+       flush_write_buffers();
+       return virt_to_bus(ptr);
+}
+
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+                enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+}
+
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+          enum dma_data_direction direction)
+{
+       int i;
+
+       BUG_ON(direction == DMA_NONE);
+
+       for (i = 0; i < nents; i++ ) {
+               BUG_ON(!sg[i].page);
+
+               sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
+       }
+
+       flush_write_buffers();
+       return nents;
+}
+
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+            size_t size, enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+       return page_to_phys(page) + offset;
+}
+
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+              enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+}
+
+
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+            enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+}
+
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+                       enum dma_data_direction direction)
+{
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t 
size,
+                       enum dma_data_direction direction)
+{
+       flush_write_buffers();
+}
+
+static inline void
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+                             unsigned long offset, size_t size,
+                             enum dma_data_direction direction)
+{
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+                                unsigned long offset, size_t size,
+                                enum dma_data_direction direction)
+{
+       flush_write_buffers();
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+                   enum dma_data_direction direction)
+{
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+                   enum dma_data_direction direction)
+{
+       flush_write_buffers();
+}
+
+static inline int
+dma_mapping_error(dma_addr_t dma_addr)
+{
+       return 0;
+}
+
+static inline int
+dma_supported(struct device *dev, u64 mask)
+{
+        /*
+         * we fall back to GFP_DMA when the mask isn't all 1s,
+         * so we can't guarantee allocations that must be
+         * within a tighter range than GFP_DMA..
+         */
+        if(mask < 0x00ffffff)
+                return 0;
+
+       return 1;
+}
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+       if(!dev->dma_mask || !dma_supported(dev, mask))
+               return -EIO;
+
+       *dev->dma_mask = mask;
+
+       return 0;
+}
+
+static inline int
+dma_get_cache_alignment(void)
+{
+       /* no easy way to get cache size on all x86, so return the
+        * maximum possible, to be safe */
+       return (1 << L1_CACHE_SHIFT_MAX);
+}
+
+#define dma_is_consistent(d)   (1)
+
+static inline void
+dma_cache_sync(void *vaddr, size_t size,
+              enum dma_data_direction direction)
+{
+       flush_write_buffers();
+}
+
+#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
+extern int
+dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+                           dma_addr_t device_addr, size_t size, int flags);
+
+extern void
+dma_release_declared_memory(struct device *dev);
+
+extern void *
+dma_mark_declared_memory_occupied(struct device *dev,
+                                 dma_addr_t device_addr, size_t size);
+
+#endif
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/vga.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/vga.h    Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,20 @@
+/*
+ *     Access to VGA videoram
+ *
+ *     (c) 1998 Martin Mares <mj@xxxxxx>
+ */
+
+#ifndef _LINUX_ASM_VGA_H_
+#define _LINUX_ASM_VGA_H_
+
+/*
+ *     On the PC, we can just recalculate addresses and then
+ *     access the videoram directly without any black magic.
+ */
+
+#define VGA_MAP_MEM(x) (unsigned long)isa_bus_to_virt(x)
+
+#define vga_readb(x) (*(x))
+#define vga_writeb(x,y) (*(y) = (x))
+
+#endif
diff -r 35b74976f598 -r a83ac0806d6b linux-2.4.30-xen-sparse/include/linux/blk.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/include/linux/blk.h       Fri Jul 15 13:39:50 2005
@@ -0,0 +1,409 @@
+#ifndef _BLK_H
+#define _BLK_H
+
+#include <linux/blkdev.h>
+#include <linux/locks.h>
+#include <linux/config.h>
+#include <linux/spinlock.h>
+
+/*
+ * Spinlock for protecting the request queue which
+ * is mucked around with in interrupts on potentially
+ * multiple CPU's..
+ */
+extern spinlock_t io_request_lock;
+
+/*
+ * Initialization functions.
+ */
+extern int isp16_init(void);
+extern int cdu31a_init(void);
+extern int acsi_init(void);
+extern int mcd_init(void);
+extern int mcdx_init(void);
+extern int sbpcd_init(void);
+extern int aztcd_init(void);
+extern int sony535_init(void);
+extern int gscd_init(void);
+extern int cm206_init(void);
+extern int optcd_init(void);
+extern int sjcd_init(void);
+extern int cdi_init(void);
+extern int hd_init(void);
+extern int ide_init(void);
+extern int xd_init(void);
+extern int mfm_init(void);
+extern int loop_init(void);
+extern int md_init(void);
+extern int ap_init(void);
+extern int ddv_init(void);
+extern int z2_init(void);
+extern int swim3_init(void);
+extern int swimiop_init(void);
+extern int amiga_floppy_init(void);
+extern int atari_floppy_init(void);
+extern int ez_init(void);
+extern int bpcd_init(void);
+extern int ps2esdi_init(void);
+extern int jsfd_init(void);
+extern int viodasd_init(void);
+extern int viocd_init(void);
+
+#if defined(CONFIG_ARCH_S390)
+extern int dasd_init(void);
+extern int xpram_init(void);
+extern int tapeblock_init(void);
+#endif /* CONFIG_ARCH_S390 */
+
+#if defined(CONFIG_XEN)
+extern int xlblk_init(void);
+#endif /* CONFIG_XEN */
+
+extern void set_device_ro(kdev_t dev,int flag);
+void add_blkdev_randomness(int major);
+
+extern int floppy_init(void);
+extern int rd_doload;          /* 1 = load ramdisk, 0 = don't load */
+extern int rd_prompt;          /* 1 = prompt for ramdisk, 0 = don't prompt */
+extern int rd_image_start;     /* starting block # of image */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
+
+extern unsigned long initrd_start,initrd_end;
+extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < 
memory_start */
+void initrd_init(void);
+
+#endif
+
+                
+/*
+ * end_request() and friends. Must be called with the request queue spinlock
+ * acquired. All functions called within end_request() _must_be_ atomic.
+ *
+ * Several drivers define their own end_request and call
+ * end_that_request_first() and end_that_request_last()
+ * for parts of the original function. This prevents
+ * code duplication in drivers.
+ */
+
+static inline void blkdev_dequeue_request(struct request * req)
+{
+       list_del(&req->queue);
+}
+
+int end_that_request_first(struct request *req, int uptodate, char *name);
+void end_that_request_last(struct request *req);
+
+#if defined(MAJOR_NR) || defined(IDE_DRIVER)
+
+#undef DEVICE_ON
+#undef DEVICE_OFF
+
+/*
+ * Add entries as needed.
+ */
+
+#ifdef IDE_DRIVER
+
+#define DEVICE_NR(device)      (MINOR(device) >> PARTN_BITS)
+#define DEVICE_NAME "ide"
+
+#elif (MAJOR_NR == RAMDISK_MAJOR)
+
+/* ram disk */
+#define DEVICE_NAME "ramdisk"
+#define DEVICE_NR(device) (MINOR(device))
+#define DEVICE_NO_RANDOM
+
+#elif (MAJOR_NR == Z2RAM_MAJOR)
+
+/* Zorro II Ram */
+#define DEVICE_NAME "Z2RAM"
+#define DEVICE_REQUEST do_z2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == FLOPPY_MAJOR)
+
+static void floppy_off(unsigned int nr);
+
+#define DEVICE_NAME "floppy"
+#define DEVICE_INTR do_floppy
+#define DEVICE_REQUEST do_fd_request
+#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 
5 ))
+#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device))
+
+#elif (MAJOR_NR == HD_MAJOR)
+
+/* Hard disk:  timeout is 6 seconds. */
+#define DEVICE_NAME "hard disk"
+#define DEVICE_INTR do_hd
+#define TIMEOUT_VALUE (6*HZ)
+#define DEVICE_REQUEST do_hd_request
+#define DEVICE_NR(device) (MINOR(device)>>6)
+
+#elif (SCSI_DISK_MAJOR(MAJOR_NR))
+
+#define DEVICE_NAME "scsidisk"
+#define TIMEOUT_VALUE (2*HZ)
+#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + 
(MINOR(device) >> 4))
+
+/* Kludge to use the same number for both char and block major numbers */
+#elif  (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER)
+
+#define DEVICE_NAME "Multiple devices driver"
+#define DEVICE_REQUEST do_md_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SCSI_TAPE_MAJOR)
+
+#define DEVICE_NAME "scsitape"
+#define DEVICE_INTR do_st  
+#define DEVICE_NR(device) (MINOR(device) & 0x7f)
+
+#elif (MAJOR_NR == OSST_MAJOR)
+
+#define DEVICE_NAME "onstream" 
+#define DEVICE_INTR do_osst
+#define DEVICE_NR(device) (MINOR(device) & 0x7f) 
+#define DEVICE_ON(device) 
+#define DEVICE_OFF(device) 
+
+#elif (MAJOR_NR == SCSI_CDROM_MAJOR)
+
+#define DEVICE_NAME "CD-ROM"
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == XT_DISK_MAJOR)
+
+#define DEVICE_NAME "xt disk"
+#define DEVICE_REQUEST do_xd_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == PS2ESDI_MAJOR)
+
+#define DEVICE_NAME "PS/2 ESDI"
+#define DEVICE_REQUEST do_ps2esdi_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == CDU31A_CDROM_MAJOR)
+
+#define DEVICE_NAME "CDU31A"
+#define DEVICE_REQUEST do_cdu31a_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || 
defined(CONFIG_ATARI_ACSI_MODULE))
+
+#define DEVICE_NAME "ACSI"
+#define DEVICE_INTR do_acsi
+#define DEVICE_REQUEST do_acsi_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcd */
+#define DEVICE_REQUEST do_mcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcdx */
+#define DEVICE_REQUEST do_mcdx_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #1"
+#define DEVICE_REQUEST do_sbpcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #2"
+#define DEVICE_REQUEST do_sbpcd2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #3"
+#define DEVICE_REQUEST do_sbpcd3_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #4"
+#define DEVICE_REQUEST do_sbpcd4_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == AZTECH_CDROM_MAJOR)
+
+#define DEVICE_NAME "Aztech CD-ROM"
+#define DEVICE_REQUEST do_aztcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CDU535_CDROM_MAJOR)
+
+#define DEVICE_NAME "SONY-CDU535"
+#define DEVICE_INTR do_cdu535
+#define DEVICE_REQUEST do_cdu535_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR)
+
+#define DEVICE_NAME "Goldstar R420"
+#define DEVICE_REQUEST do_gscd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CM206_CDROM_MAJOR)
+#define DEVICE_NAME "Philips/LMS CD-ROM cm206"
+#define DEVICE_REQUEST do_cm206_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == OPTICS_CDROM_MAJOR)
+
+#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM"
+#define DEVICE_REQUEST do_optcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SANYO_CDROM_MAJOR)
+
+#define DEVICE_NAME "Sanyo H94A CD-ROM"
+#define DEVICE_REQUEST do_sjcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == APBLOCK_MAJOR)
+
+#define DEVICE_NAME "apblock"
+#define DEVICE_REQUEST ap_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DDV_MAJOR)
+
+#define DEVICE_NAME "ddv"
+#define DEVICE_REQUEST ddv_request
+#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS)
+
+#elif (MAJOR_NR == MFM_ACORN_MAJOR)
+
+#define DEVICE_NAME "mfm disk"
+#define DEVICE_INTR do_mfm
+#define DEVICE_REQUEST do_mfm_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == NBD_MAJOR)
+
+#define DEVICE_NAME "nbd"
+#define DEVICE_REQUEST do_nbd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MDISK_MAJOR)
+
+#define DEVICE_NAME "mdisk"
+#define DEVICE_REQUEST mdisk_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DASD_MAJOR)
+
+#define DEVICE_NAME "dasd"
+#define DEVICE_REQUEST do_dasd_request
+#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS)
+
+#elif (MAJOR_NR == I2O_MAJOR)
+
+#define DEVICE_NAME "I2O block"
+#define DEVICE_REQUEST i2ob_request
+#define DEVICE_NR(device) (MINOR(device)>>4)
+
+#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR)
+
+#define DEVICE_NAME "ida"
+#define TIMEOUT_VALUE (25*HZ)
+#define DEVICE_REQUEST do_ida_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#endif /* MAJOR_NR == whatever */
+
+/* provide DEVICE_xxx defaults, if not explicitly defined
+ * above in the MAJOR_NR==xxx if-elif tree */
+#ifndef DEVICE_ON
+#define DEVICE_ON(device) do {} while (0)
+#endif
+#ifndef DEVICE_OFF
+#define DEVICE_OFF(device) do {} while (0)
+#endif
+
+#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR)
+#if !defined(IDE_DRIVER)
+
+#ifndef CURRENT
+#define CURRENT 
blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+#ifndef QUEUE_EMPTY
+#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+
+#ifndef DEVICE_NAME
+#define DEVICE_NAME "unknown"
+#endif
+
+#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev)
+
+#ifdef DEVICE_INTR
+static void (*DEVICE_INTR)(void) = NULL;
+#endif
+
+#define SET_INTR(x) (DEVICE_INTR = (x))
+
+#ifdef DEVICE_REQUEST
+static void (DEVICE_REQUEST)(request_queue_t *);
+#endif 
+  
+#ifdef DEVICE_INTR
+#define CLEAR_INTR SET_INTR(NULL)
+#else
+#define CLEAR_INTR
+#endif
+
+#define INIT_REQUEST \
+       if (QUEUE_EMPTY) {\
+               CLEAR_INTR; \
+               return; \
+       } \
+       if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \
+               panic(DEVICE_NAME ": request list destroyed"); \
+       if (CURRENT->bh) { \
+               if (!buffer_locked(CURRENT->bh)) \
+                       panic(DEVICE_NAME ": block not locked"); \
+       }
+
+#endif /* !defined(IDE_DRIVER) */
+
+
+#ifndef LOCAL_END_REQUEST      /* If we have our own end_request, we do not 
want to include this mess */
+
+#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR)
+
+static inline void end_request(int uptodate) {
+       struct request *req = CURRENT;
+
+       if (end_that_request_first(req, uptodate, DEVICE_NAME))
+               return;
+
+#ifndef DEVICE_NO_RANDOM
+       add_blkdev_randomness(MAJOR(req->rq_dev));
+#endif
+       DEVICE_OFF(req->rq_dev);
+       blkdev_dequeue_request(req);
+       end_that_request_last(req);
+}
+
+#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */
+#endif /* LOCAL_END_REQUEST */
+
+#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */
+#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */
+
+#endif /* _BLK_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/include/asm-xen/irq.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/irq.h     Fri Jul 15 13:39:50 2005
@@ -0,0 +1,65 @@
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+/*
+ *     linux/include/asm/irq.h
+ *
+ *     (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *     IRQ/IPI changes taken from work by Thomas Radke
+ *     <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ */
+
+#include <linux/config.h>
+#include <asm/hypervisor.h>
+#include <asm/ptrace.h>
+
+/*
+ * The flat IRQ space is divided into two regions:
+ *  1. A one-to-one mapping of real physical IRQs. This space is only used
+ *     if we have physical device-access privilege. This region is at the 
+ *     start of the IRQ space so that existing device drivers do not need
+ *     to be modified to translate physical IRQ numbers into our IRQ space.
+ *  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+ *     are bound using the provided bind/unbind functions.
+ */
+
+#define PIRQ_BASE   0
+#define NR_PIRQS  128
+
+#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
+#define NR_DYNIRQS  128
+
+#define NR_IRQS   (NR_PIRQS + NR_DYNIRQS)
+
+#define pirq_to_irq(_x)   ((_x) + PIRQ_BASE)
+#define irq_to_pirq(_x)   ((_x) - PIRQ_BASE)
+
+#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
+#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
+
+/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
+extern int  bind_virq_to_irq(int virq);
+extern void unbind_virq_from_irq(int virq);
+extern int  bind_evtchn_to_irq(int evtchn);
+extern void unbind_evtchn_from_irq(int evtchn);
+
+static __inline__ int irq_cannonicalize(int irq)
+{
+    return (irq == 2) ? 9 : irq;
+}
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+extern void irq_suspend(void);
+extern void irq_resume(void);
+
+
+#define CPU_MASK_NONE 0
+
+/* XXX SMH: no-op for compat w/ 2.6 shared files */ 
+#define irq_ctx_init(cpu) do { ; } while (0)
+
+#endif /* _ASM_IRQ_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c       Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,83 @@
+/*
+ *     Local APIC handling, local APIC timers
+ *
+ *     (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively.
+ *     Maciej W. Rozycki       :       Various updates and fixes.
+ *     Mikael Pettersson       :       Power Management for UP-APIC.
+ *     Pavel Machek and
+ *     Mikael Pettersson       :       PM converted to driver model.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+
+#include <mach_apic.h>
+
+#include "io_ports.h"
+
+/*
+ * Debug level
+ */
+int apic_verbosity;
+
+int get_physical_broadcast(void)
+{
+        return 0xff;
+}
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+       printk("unexpected IRQ trap at vector %02x\n", irq);
+       /*
+        * Currently unexpected vectors happen only on SMP and APIC.
+        * We _must_ ack these because every local APIC has only N
+        * irq slots per priority level, and a 'hanging, unacked' IRQ
+        * holds up an irq slot - in excessive cases (when multiple
+        * unexpected vectors occur) that might lock up the APIC
+        * completely.
+        */
+       ack_APIC_irq();
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (smp_found_config)
+               if (!skip_ioapic_setup && nr_ioapics)
+                       setup_IO_APIC();
+#endif
+
+       return 0;
+}
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pci.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pci.h    Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,117 @@
+#ifndef __i386_PCI_H
+#define __i386_PCI_H
+
+#include <linux/config.h>
+
+#ifdef __KERNEL__
+#include <linux/mm.h>          /* for struct page */
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+   already-configured bus numbers - to be used for buggy BIOSes
+   or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses()    0
+#endif
+#define pcibios_scan_all_fns(a, b)     0
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO         0x1000
+#define PCIBIOS_MIN_MEM                (pci_mem_start)
+
+#define PCIBIOS_MIN_CARDBUS_IO 0x4000
+
+void pcibios_config_init(void);
+struct pci_bus * pcibios_scan_root(int bus);
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* Dynamic DMA mapping stuff.
+ * i386 has everything mapped statically.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+#include <linux/string.h>
+#include <asm/io.h>
+
+struct pci_dev;
+
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS    (1)
+
+/* pci_unmap_{page,single} is a nop so... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME)         (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)        do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME)           (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)  do { } while (0)
+
+/* This is always fine. */
+#define pci_dac_dma_supported(pci_dev, mask)   (1)
+
+static inline dma64_addr_t
+pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long 
offset, int direction)
+{
+       return ((dma64_addr_t) page_to_phys(page) +
+               (dma64_addr_t) offset);
+}
+
+static inline struct page *
+pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+       return pfn_to_page(dma_addr >> PAGE_SHIFT);
+}
+
+static inline unsigned long
+pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+       return (dma_addr & ~PAGE_MASK);
+}
+
+static inline void
+pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, 
size_t len, int direction)
+{
+}
+
+static inline void
+pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t 
dma_addr, size_t len, int direction)
+{
+       flush_write_buffers();
+}
+
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+                              enum pci_mmap_state mmap_state, int 
write_combine);
+
+
+static inline void pcibios_add_platform_entries(struct pci_dev *dev)
+{
+}
+
+#endif /* __KERNEL__ */
+
+/* implement the pci_ DMA API in terms of the generic device dma_ one */
+#include <asm-generic/pci-dma-compat.h>
+
+/* generic pci stuff */
+#include <asm-generic/pci.h>
+
+/* On Xen we have to scan all functions since Xen hides bridges from
+ * us.  If a bridge is at fn=0 and that slot has a multifunction
+ * device, we won't find the additional devices without scanning all
+ * functions. */
+#undef pcibios_scan_all_fns
+#define pcibios_scan_all_fns(a, b)     1
+
+#endif /* __i386_PCI_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h   Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,203 @@
+#ifndef _I386_PAGE_H
+#define _I386_PAGE_H
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT     12
+#define PAGE_SIZE      (1UL << PAGE_SHIFT)
+#define PAGE_MASK      (~(PAGE_SIZE-1))
+
+#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/foreign_page.h>
+
+#define arch_free_page(_page,_order)                   \
+({     int foreign = PageForeign(_page);               \
+       if (foreign)                                    \
+               (PageForeignDestructor(_page))(_page);  \
+       foreign;                                        \
+})
+#define HAVE_ARCH_FREE_PAGE
+
+#ifdef CONFIG_XEN_SCRUB_PAGES
+#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+#else
+#define scrub_pages(_p,_n) ((void)0)
+#endif
+
+#ifdef CONFIG_X86_USE_3DNOW
+
+#include <asm/mmx.h>
+
+#define clear_page(page)       mmx_clear_page((void *)(page))
+#define copy_page(to,from)     mmx_copy_page(to,from)
+
+#else
+
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | 
__GFP_ZERO, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+/*
+ *     On older X86 processors it's not a win to use MMX here it seems.
+ *     Maybe the K6-III ?
+ */
+ 
+#define clear_page(page)       memset((void *)(page), 0, PAGE_SIZE)
+#define copy_page(to,from)     memcpy((void *)(to), (void *)(from), PAGE_SIZE)
+
+#endif
+
+#define clear_user_page(page, vaddr, pg)       clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)    copy_page(to, from)
+
+/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+extern unsigned int *phys_to_machine_mapping;
+#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)]))
+#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)]))
+static inline unsigned long phys_to_machine(unsigned long phys)
+{
+       unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+       machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+       return machine;
+}
+static inline unsigned long machine_to_phys(unsigned long machine)
+{
+       unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+       phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+       return phys;
+}
+
+/*
+ * These are used to make use of C type-checking..
+ */
+extern int nx_enabled;
+#ifdef CONFIG_X86_PAE
+extern unsigned long long __supported_pte_mask;
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+typedef struct { unsigned long long pmd; } pmd_t;
+typedef struct { unsigned long long pgd; } pgd_t;
+typedef struct { unsigned long long pgprot; } pgprot_t;
+#define pmd_val(x)     ((x).pmd)
+#define pte_val(x)     ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
+#define __pmd(x) ((pmd_t) { (x) } )
+#define HPAGE_SHIFT    21
+#else
+typedef struct { unsigned long pte_low; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define boot_pte_t pte_t /* or would you rather have a typedef */
+#define pte_val(x)     (((x).pte_low & 1) ? machine_to_phys((x).pte_low) : \
+                        (x).pte_low)
+#define pte_val_ma(x)  ((x).pte_low)
+#define HPAGE_SHIFT    22
+#endif
+#define PTE_MASK       PAGE_MASK
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HPAGE_SIZE     ((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK     (~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+
+static inline unsigned long pgd_val(pgd_t x)
+{
+       unsigned long ret = x.pgd;
+       if (ret) ret = machine_to_phys(ret);
+       return ret;
+}
+#define pgprot_val(x)  ((x).pgprot)
+
+#define __pte(x) ({ unsigned long _x = (x); \
+    (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pte_ma(x)    ((pte_t) { (x) } )
+#define __pgd(x) ({ unsigned long _x = (x); \
+    (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+#define __pgprot(x)    ((pgprot_t) { (x) } )
+
+#endif /* !__ASSEMBLY__ */
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)       (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+/*
+ * This handles the memory map.. We could make this a config
+ * option, but too many people screw it up, and too few need
+ * it.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB. 
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ */
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This much address space is reserved for vmalloc() and iomap()
+ * as well as fixmap mappings.
+ */
+extern unsigned int __VMALLOC_RESERVE;
+
+/* Pure 2^n version of get_order */
+static __inline__ int get_order(unsigned long size)
+{
+       int order;
+
+       size = (size-1) >> (PAGE_SHIFT-1);
+       order = -1;
+       do {
+               size >>= 1;
+               order++;
+       } while (size);
+       return order;
+}
+
+extern int sysctl_legacy_va_layout;
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef __ASSEMBLY__
+#define __PAGE_OFFSET          (0xC0000000)
+#else
+#define __PAGE_OFFSET          (0xC0000000UL)
+#endif
+
+
+#define PAGE_OFFSET            ((unsigned long)__PAGE_OFFSET)
+#define VMALLOC_RESERVE                ((unsigned long)__VMALLOC_RESERVE)
+#define MAXMEM                 
(HYPERVISOR_VIRT_START-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define __pa(x)                        ((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x)                        ((void *)((unsigned 
long)(x)+PAGE_OFFSET))
+#define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
+#ifndef CONFIG_DISCONTIGMEM
+#define pfn_to_page(pfn)       (mem_map + (pfn))
+#define page_to_pfn(page)      ((unsigned long)((page) - mem_map))
+#define pfn_valid(pfn)         ((pfn) < max_mapnr)
+#endif /* !CONFIG_DISCONTIGMEM */
+#define virt_to_page(kaddr)    pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS \
+       (VM_READ | VM_WRITE | \
+       ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+                VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+/* VIRT <-> MACHINE conversion */
+#define virt_to_machine(_a)    (phys_to_machine(__pa(_a)))
+#define machine_to_virt(_m)    (__va(machine_to_phys(_m)))
+
+#endif /* __KERNEL__ */
+
+#endif /* _I386_PAGE_H */
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h    Fri Jul 
15 13:39:50 2005
@@ -0,0 +1,108 @@
+#ifndef __I386_SCHED_H
+#define __I386_SCHED_H
+
+#include <linux/config.h>
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Used for LDT copy/destruction.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context(struct mm_struct *mm);
+
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct 
*tsk)
+{
+#if 0 /* XEN: no lazy tlb */
+       unsigned cpu = smp_processor_id();
+       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+               per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY;
+#endif
+}
+
+#define prepare_arch_switch(rq,next)   __prepare_arch_switch()
+#define finish_arch_switch(rq, next)   spin_unlock_irq(&(rq)->lock)
+#define task_running(rq, p)            ((rq)->curr == (p))
+
+static inline void __prepare_arch_switch(void)
+{
+       /*
+        * Save away %fs and %gs. No need to save %es and %ds, as those
+        * are always kernel segments while inside the kernel. Must
+        * happen before reload of cr3/ldt (i.e., not in __switch_to).
+        */
+       __asm__ __volatile__ ( "movl %%fs,%0 ; movl %%gs,%1"
+               : "=m" (*(int *)&current->thread.fs),
+                 "=m" (*(int *)&current->thread.gs));
+       __asm__ __volatile__ ( "movl %0,%%fs ; movl %0,%%gs"
+               : : "r" (0) );
+}
+
+extern void mm_pin(struct mm_struct *mm);
+extern void mm_unpin(struct mm_struct *mm);
+void mm_pin_all(void);
+
+static inline void switch_mm(struct mm_struct *prev,
+                            struct mm_struct *next,
+                            struct task_struct *tsk)
+{
+       int cpu = smp_processor_id();
+       struct mmuext_op _op[2], *op = _op;
+
+       if (likely(prev != next)) {
+               if (!next->context.pinned)
+                       mm_pin(next);
+
+               /* stop flush ipis for the previous mm */
+               cpu_clear(cpu, prev->cpu_vm_mask);
+#if 0 /* XEN: no lazy tlb */
+               per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
+               per_cpu(cpu_tlbstate, cpu).active_mm = next;
+#endif
+               cpu_set(cpu, next->cpu_vm_mask);
+
+               /* Re-load page tables: load_cr3(next->pgd) */
+               per_cpu(cur_pgd, cpu) = next->pgd;
+               op->cmd = MMUEXT_NEW_BASEPTR;
+               op->mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
+               op++;
+
+               /*
+                * load the LDT, if the LDT is different:
+                */
+               if (unlikely(prev->context.ldt != next->context.ldt)) {
+                       /* load_LDT_nolock(&next->context, cpu) */
+                       op->cmd = MMUEXT_SET_LDT;
+                       op->linear_addr = (unsigned long)next->context.ldt;
+                       op->nr_ents     = next->context.size;
+                       op++;
+               }
+
+               BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
+       }
+#if 0 /* XEN: no lazy tlb */
+       else {
+               per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
+               BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
+
+               if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+                       /* We were in lazy tlb mode and leave_mm disabled 
+                        * tlb flush IPI delivery. We must reload %cr3.
+                        */
+                       load_cr3(next->pgd);
+                       load_LDT_nolock(&next->context, cpu);
+               }
+       }
+#endif
+}
+
+#define deactivate_mm(tsk, mm) \
+       asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+
+#define activate_mm(prev, next) \
+       switch_mm((prev),(next),NULL)
+
+#endif
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/xen/blkback/Makefile
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/Makefile      Fri Jul 15 
13:39:50 2005
@@ -0,0 +1,2 @@
+
+obj-y  := blkback.o control.o interface.o vbd.o
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.4.30-xen-sparse/arch/xen/kernel/time.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.4.30-xen-sparse/arch/xen/kernel/time.c    Fri Jul 15 13:39:50 2005
@@ -0,0 +1,721 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
+ * (C) 2002-2003 - Keir Fraser - University of Cambridge
+ ****************************************************************************
+ *
+ *        File: arch/xen/kernel/time.c
+ *      Author: Rolf Neugebauer and Keir Fraser
+ * 
+ * Description: Interface with Xen to get correct notion of time
+ */
+
+/*
+ *  linux/arch/i386/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *
+ * This file contains the PC-specific time handling details:
+ * reading the RTC at bootup, etc..
+ * 1994-07-02    Alan Modra
+ * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
+ * 1995-03-26    Markus Kuhn
+ *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
+ *      precision CMOS clock update
+ * 1996-05-03    Ingo Molnar
+ *      fixed time warps in do_[slow|fast]_gettimeoffset()
+ * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
+ *  "A Kernel Model for Precision Timekeeping" by Dave Mills
+ * 1998-09-05    (Various)
+ * More robust do_fast_gettimeoffset() algorithm implemented
+ * (works with APM, Cyrix 6x86MX and Centaur C6),
+ * monotonic gettimeofday() with fast_get_timeoffset(),
+ * drift-proof precision TSC calibration on boot
+ * (C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, Andrew D.
+ * Balsa <andrebalsa@xxxxxxxxxx>, Philip Gladstone <philip@xxxxxxxxxx>;
+ * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@xxxxxxxxxxxxx>).
+ * 1998-12-16    Andrea Arcangeli
+ * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
+ * because was not accounting lost_ticks.
+ * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
+ * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ * serialize accesses to xtime/lost_ticks).
+ */
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/msr.h>
+#include <asm/delay.h>
+#include <asm/mpspec.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+#include <asm/div64.h>
+#include <asm/hypervisor.h>
+#include <asm-xen/xen-public/dom0_ops.h>
+
+#include <linux/mc146818rtc.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/sysctl.h>
+#include <linux/sysrq.h>
+
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+extern rwlock_t xtime_lock;
+extern unsigned long wall_jiffies;
+
+unsigned long cpu_khz; /* get this from Xen, used elsewhere */
+
+static unsigned int rdtsc_bitshift;
+static u32 st_scale_f; /* convert ticks -> usecs */
+static u32 st_scale_i; /* convert ticks -> usecs */
+
+/* These are peridically updated in shared_info, and then copied here. */
+static u32 shadow_tsc_stamp;
+static u64 shadow_system_time;
+static u32 shadow_time_version;
+static struct timeval shadow_tv;
+
+/*
+ * We use this to ensure that gettimeofday() is monotonically increasing. We
+ * only break this guarantee if the wall clock jumps backwards "a long way".
+ */
+static struct timeval last_seen_tv = {0,0};
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+/* Periodically propagate synchronised time base to the RTC and to Xen. */
+static long last_update_to_rtc, last_update_to_xen;
+#endif
+
+/* Periodically take synchronised time base from Xen, if we need it. */
+static long last_update_from_xen;   /* UTC seconds when last read Xen clock. */
+
+/* Keep track of last time we did processing/updating of jiffies and xtime. */
+static u64 processed_system_time;   /* System time (ns) at last processing. */
+
+#define NS_PER_TICK (1000000000ULL/HZ)
+
+#ifndef NSEC_PER_SEC
+#define NSEC_PER_SEC (1000000000L)
+#endif
+
+#define HANDLE_USEC_UNDERFLOW(_tv)         \
+    do {                                   \
+        while ( (_tv).tv_usec < 0 )        \
+        {                                  \
+            (_tv).tv_usec += 1000000;      \
+            (_tv).tv_sec--;                \
+        }                                  \
+    } while ( 0 )
+#define HANDLE_USEC_OVERFLOW(_tv)          \
+    do {                                   \
+        while ( (_tv).tv_usec >= 1000000 ) \
+        {                                  \
+            (_tv).tv_usec -= 1000000;      \
+            (_tv).tv_sec++;                \
+        }                                  \
+    } while ( 0 )
+static inline void __normalize_time(time_t *sec, s64 *nsec)
+{
+       while (*nsec >= NSEC_PER_SEC) {
+               (*nsec) -= NSEC_PER_SEC;
+               (*sec)++;
+       }
+       while (*nsec < 0) {
+               (*nsec) += NSEC_PER_SEC;
+               (*sec)--;
+       }
+}
+
+/* Dynamically-mapped IRQs. */
+static int time_irq, debug_irq;
+
+/* Does this guest OS track Xen time, or set its wall clock independently? */
+static int independent_wallclock = 0;
+static int __init __independent_wallclock(char *str)
+{
+    independent_wallclock = 1;
+    return 1;
+}
+__setup("independent_wallclock", __independent_wallclock);
+#define INDEPENDENT_WALLCLOCK() \
+    (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+/*
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be
+ * called 500 ms after the second nowtime has started, because when
+ * nowtime is written into the registers of the CMOS clock, it will
+ * jump to the next second precisely 500 ms later. Check the Motorola
+ * MC146818A or Dallas DS12887 data sheet for details.
+ *
+ * BUG: This routine does not handle hour overflow properly; it just
+ *      sets the minutes. Usually you'll only notice that after reboot!
+ */
+static int set_rtc_mmss(unsigned long nowtime)
+{
+    int retval = 0;
+    int real_seconds, real_minutes, cmos_minutes;
+    unsigned char save_control, save_freq_select;
+
+    /* gets recalled with irq locally disabled */
+    spin_lock(&rtc_lock);
+    save_control = CMOS_READ(RTC_CONTROL);
+    CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+    save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+    CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+    cmos_minutes = CMOS_READ(RTC_MINUTES);
+    if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
+        BCD_TO_BIN(cmos_minutes);
+
+    /*
+     * since we're only adjusting minutes and seconds, don't interfere with
+     * hour overflow. This avoids messing with unknown time zones but requires
+     * your RTC not to be off by more than 15 minutes
+     */
+    real_seconds = nowtime % 60;
+    real_minutes = nowtime / 60;
+    if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 )
+        real_minutes += 30;  /* correct for half hour time zone */
+    real_minutes %= 60;
+
+    if ( abs(real_minutes - cmos_minutes) < 30 )
+    {
+        if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
+        {
+            BIN_TO_BCD(real_seconds);
+            BIN_TO_BCD(real_minutes);
+        }
+        CMOS_WRITE(real_seconds,RTC_SECONDS);
+        CMOS_WRITE(real_minutes,RTC_MINUTES);
+    }
+    else 
+    {
+        printk(KERN_WARNING
+               "set_rtc_mmss: can't update from %d to %d\n",
+               cmos_minutes, real_minutes);
+        retval = -1;
+    }
+
+    /* The following flags have to be released exactly in this order,
+     * otherwise the DS12887 (popular MC146818A clone with integrated
+     * battery and quartz) will not reset the oscillator and will not
+     * update precisely 500 ms later. You won't find this mentioned in
+     * the Dallas Semiconductor data sheets, but who believes data
+     * sheets anyway ...                           -- Markus Kuhn
+     */
+    CMOS_WRITE(save_control, RTC_CONTROL);
+    CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+    spin_unlock(&rtc_lock);
+
+    return retval;
+}
+#endif
+
+
+/*
+ * Reads a consistent set of time-base values from Xen, into a shadow data
+ * area. Must be called with the xtime_lock held for writing.
+ */
+static void __get_time_values_from_xen(void)
+{
+    do {
+        shadow_time_version = HYPERVISOR_shared_info->time_version2;
+        rmb();
+        shadow_tv.tv_sec    = HYPERVISOR_shared_info->wc_sec;
+        shadow_tv.tv_usec   = HYPERVISOR_shared_info->wc_usec;
+        shadow_tsc_stamp    = 
+            (u32)(HYPERVISOR_shared_info->tsc_timestamp >> rdtsc_bitshift);
+        shadow_system_time  = HYPERVISOR_shared_info->system_time;
+        rmb();
+    }
+    while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 );
+}
+
+#define TIME_VALUES_UP_TO_DATE \
+ ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
+
+
+/*
+ * Returns the system time elapsed, in ns, since the current shadow_timestamp
+ * was calculated. Must be called with the xtime_lock held for reading.
+ */
+static inline unsigned long __get_time_delta_usecs(void)
+{
+    s32      delta_tsc;
+    u32      low;
+    u64      delta, tsc;
+
+    rdtscll(tsc);
+    low = (u32)(tsc >> rdtsc_bitshift);
+    delta_tsc = (s32)(low - shadow_tsc_stamp);
+    if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
+    delta = ((u64)delta_tsc * st_scale_f);
+    delta >>= 32;
+    delta += ((u64)delta_tsc * st_scale_i);
+
+    return (unsigned long)delta;
+}
+
+
+/*
+ * Returns the current time-of-day in UTC timeval format.
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+    unsigned long flags, lost;
+    struct timeval _tv;
+    s64 nsec;
+
+ again:
+    read_lock_irqsave(&xtime_lock, flags);
+
+    _tv.tv_usec = __get_time_delta_usecs();
+    if ( (lost = (jiffies - wall_jiffies)) != 0 )
+        _tv.tv_usec += lost * (1000000 / HZ);
+    _tv.tv_sec   = xtime.tv_sec;
+    _tv.tv_usec += xtime.tv_usec;
+
+    nsec = shadow_system_time - processed_system_time;
+    __normalize_time(&_tv.tv_sec, &nsec);
+    _tv.tv_usec += (long)nsec / 1000L;
+
+    if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
+    {
+        /*
+         * We may have blocked for a long time, rendering our calculations
+         * invalid (e.g. the time delta may have overflowed). Detect that
+         * and recalculate with fresh values.
+         */
+        read_unlock_irqrestore(&xtime_lock, flags);
+        write_lock_irqsave(&xtime_lock, flags);
+        __get_time_values_from_xen();
+        write_unlock_irqrestore(&xtime_lock, flags);
+        goto again;
+    }
+
+    HANDLE_USEC_OVERFLOW(_tv);
+
+    /* Ensure that time-of-day is monotonically increasing. */
+    if ( (_tv.tv_sec < last_seen_tv.tv_sec) ||
+         ((_tv.tv_sec == last_seen_tv.tv_sec) &&
+          (_tv.tv_usec < last_seen_tv.tv_usec)) )
+        _tv = last_seen_tv;
+    last_seen_tv = _tv;
+
+    read_unlock_irqrestore(&xtime_lock, flags);
+
+    *tv = _tv;
+}
+
+
+/*
+ * Sets the current time-of-day based on passed-in UTC timeval parameter.
+ */
+void do_settimeofday(struct timeval *tv)
+{
+    struct timeval newtv;
+    s64            nsec;
+    suseconds_t    usec;
+    
+    if ( !INDEPENDENT_WALLCLOCK() )
+        return;
+    
+    write_lock_irq(&xtime_lock);
+    
+    /*
+     * Ensure we don't get blocked for a long time so that our time delta
+     * overflows. If that were to happen then our shadow time values would
+     * be stale, so we can retry with fresh ones.
+     */
+ again:
+    usec = tv->tv_usec - __get_time_delta_usecs();
+
+    nsec = shadow_system_time - processed_system_time;
+    __normalize_time(&tv->tv_sec, &nsec);
+    usec -= (long)nsec / 1000L;
+
+    if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
+    {
+        __get_time_values_from_xen();
+        goto again;
+    }
+    tv->tv_usec = usec;
+    
+    HANDLE_USEC_UNDERFLOW(*tv);
+    
+    newtv = *tv;
+    
+    tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
+    HANDLE_USEC_UNDERFLOW(*tv);
+
+    xtime = *tv;
+    time_adjust = 0;  /* stop active adjtime() */
+    time_status |= STA_UNSYNC;
+    time_maxerror = NTP_PHASE_LIMIT;
+    time_esterror = NTP_PHASE_LIMIT;
+
+    /* Reset all our running time counts. They make no sense now. */
+    last_seen_tv.tv_sec = 0;
+    last_update_from_xen = 0;
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+    if ( xen_start_info.flags & SIF_INITDOMAIN )
+    {
+        dom0_op_t op;
+        last_update_to_rtc = last_update_to_xen = 0;
+        op.cmd = DOM0_SETTIME;
+        op.u.settime.secs        = newtv.tv_sec;
+        op.u.settime.usecs       = newtv.tv_usec;
+        op.u.settime.system_time = shadow_system_time;
+        write_unlock_irq(&xtime_lock);
+        HYPERVISOR_dom0_op(&op);
+    }
+    else
+#endif
+    {
+        write_unlock_irq(&xtime_lock);
+    }
+}
+
+
+asmlinkage long sys_stime(int *tptr)
+{
+    int value;
+    struct timeval tv;
+
+    if ( !capable(CAP_SYS_TIME) )
+        return -EPERM;
+
+    if ( get_user(value, tptr) )
+        return -EFAULT;
+
+    tv.tv_sec  = value;
+    tv.tv_usec = 0;
+
+    do_settimeofday(&tv);
+
+    return 0;
+}
+
+
+/* Convert jiffies to system time. Call with xtime_lock held for reading. */
+static inline u64 __jiffies_to_st(unsigned long j) 
+{
+    return processed_system_time + ((j - jiffies) * NS_PER_TICK);
+}
+
+
+static inline void do_timer_interrupt(int irq, void *dev_id,
+                                      struct pt_regs *regs)
+{
+    s64 delta;
+    unsigned long ticks = 0;
+    long sec_diff;
+
+    do {
+        __get_time_values_from_xen();
+        
+        delta = (s64)(shadow_system_time + 
+                      ((s64)__get_time_delta_usecs() * 1000LL) -
+                      processed_system_time);
+    }
+    while ( !TIME_VALUES_UP_TO_DATE );
+
+    if ( unlikely(delta < 0) )
+    {
+        printk("Timer ISR: Time went backwards: %lld\n", delta);
+        return;
+    }
+
+    /* Process elapsed jiffies since last call. */
+    while ( delta >= NS_PER_TICK )
+    {
+        ticks++;
+        delta -= NS_PER_TICK;
+        processed_system_time += NS_PER_TICK;
+    }
+
+    if ( ticks != 0 )
+    {
+        do_timer_ticks(ticks);
+
+        if ( user_mode(regs) )
+            update_process_times_us(ticks, 0);
+        else
+            update_process_times_us(0, ticks);
+    }
+
+    /*
+     * Take synchronised time from Xen once a minute if we're not
+     * synchronised ourselves, and we haven't chosen to keep an independent
+     * time base.
+     */
+    if ( !INDEPENDENT_WALLCLOCK() &&
+         ((time_status & STA_UNSYNC) != 0) &&
+         (xtime.tv_sec > (last_update_from_xen + 60)) )
+    {
+        /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */
+        shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ);
+        HANDLE_USEC_UNDERFLOW(shadow_tv);
+
+        /*
+         * Reset our running time counts if they are invalidated by a warp
+         * backwards of more than 500ms.
+         */
+        sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
+        if ( unlikely(abs(sec_diff) > 1) ||
+             unlikely(((sec_diff * 1000000) + 
+                       xtime.tv_usec - shadow_tv.tv_usec) > 500000) )
+        {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+            last_update_to_rtc = last_update_to_xen = 0;
+#endif
+            last_seen_tv.tv_sec = 0;
+        }
+
+        /* Update our unsynchronised xtime appropriately. */
+        xtime = shadow_tv;
+
+        last_update_from_xen = xtime.tv_sec;
+    }
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+    if ( (xen_start_info.flags & SIF_INITDOMAIN) &&
+         ((time_status & STA_UNSYNC) == 0) )
+    {
+        /* Send synchronised time to Xen approximately every minute. */
+        if ( xtime.tv_sec > (last_update_to_xen + 60) )
+        {
+            dom0_op_t op;
+            struct timeval tv = xtime;
+
+            tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ);
+            HANDLE_USEC_OVERFLOW(tv);
+
+            op.cmd = DOM0_SETTIME;
+            op.u.settime.secs        = tv.tv_sec;
+            op.u.settime.usecs       = tv.tv_usec;
+            op.u.settime.system_time = shadow_system_time;
+            HYPERVISOR_dom0_op(&op);
+
+            last_update_to_xen = xtime.tv_sec;
+        }
+
+        /*
+         * If we have an externally synchronized Linux clock, then update CMOS
+         * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called
+         * as close as possible to 500 ms before the new second starts.
+         */
+        if ( (xtime.tv_sec > (last_update_to_rtc + 660)) &&
+             (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) &&
+             (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) )
+        {
+            if ( set_rtc_mmss(xtime.tv_sec) == 0 )
+                last_update_to_rtc = xtime.tv_sec;
+            else
+                last_update_to_rtc = xtime.tv_sec - 600;
+        }
+    }
+#endif
+}
+
+
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+    write_lock(&xtime_lock);
+    do_timer_interrupt(irq, NULL, regs);
+    write_unlock(&xtime_lock);
+}
+
+static struct irqaction irq_timer = {
+    timer_interrupt, 
+    SA_INTERRUPT, 
+    0, 
+    "timer", 
+    NULL, 
+    NULL
+};
+
+
+/*
+ * This function works out when the the next timer function has to be
+ * executed (by looking at the timer list) and sets the Xen one-shot
+ * domain timer to the appropriate value. This is typically called in
+ * cpu_idle() before the domain blocks.
+ * 
+ * The function returns a non-0 value on error conditions.
+ * 
+ * It must be called with interrupts disabled.
+ */
+extern spinlock_t timerlist_lock;
+int set_timeout_timer(void)
+{
+    struct timer_list *timer;
+    u64 alarm = 0;
+    int ret = 0;
+
+    spin_lock(&timerlist_lock);
+
+    /*
+     * This is safe against long blocking (since calculations are not based on 
+     * TSC deltas). It is also safe against warped system time since
+     * suspend-resume is cooperative and we would first get locked out. It is 
+     * safe against normal updates of jiffies since interrupts are off.
+     */
+    if ( (timer = next_timer_event()) != NULL )
+        alarm = __jiffies_to_st(timer->expires);
+
+    /* Tasks on the timer task queue expect to be executed on the next tick. */
+    if ( TQ_ACTIVE(tq_timer) )
+        alarm = __jiffies_to_st(jiffies + 1);
+
+    /* Failure is pretty bad, but we'd best soldier on. */
+    if ( HYPERVISOR_set_timer_op(alarm) != 0 )
+        ret = -1;
+    
+    spin_unlock(&timerlist_lock);
+
+    return ret;
+}
+
+
+/* Time debugging. */
+static void dbg_time_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+    unsigned long flags, j;
+    u64 s_now, j_st;
+    struct timeval s_tv, tv;
+
+    struct timer_list *timer;
+    u64 t_st;
+
+    read_lock_irqsave(&xtime_lock, flags);
+    s_tv.tv_sec  = shadow_tv.tv_sec;
+    s_tv.tv_usec = shadow_tv.tv_usec;
+    s_now        = shadow_system_time;
+    read_unlock_irqrestore(&xtime_lock, flags);
+
+    do_gettimeofday(&tv);
+
+    j = jiffies;
+    j_st = __jiffies_to_st(j);
+
+    timer = next_timer_event();
+    t_st = __jiffies_to_st(timer->expires);
+
+    printk(KERN_ALERT "time: shadow_st=0x%X:%08X\n",
+           (u32)(s_now>>32), (u32)s_now);
+    printk(KERN_ALERT "time: wct=%lds %ldus shadow_wct=%lds %ldus\n",
+           tv.tv_sec, tv.tv_usec, s_tv.tv_sec, s_tv.tv_usec);
+    printk(KERN_ALERT "time: jiffies=%lu(0x%X:%08X) timeout=%lu(0x%X:%08X)\n",
+           jiffies,(u32)(j_st>>32), (u32)j_st,
+           timer->expires,(u32)(t_st>>32), (u32)t_st);
+    printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n",
+           (u32)(processed_system_time>>32), (u32)processed_system_time);
+
+#ifdef CONFIG_MAGIC_SYSRQ
+    handle_sysrq('t',NULL,NULL,NULL);
+#endif
+}
+
+static struct irqaction dbg_time = {
+    dbg_time_int, 
+    SA_SHIRQ, 
+    0, 
+    "timer_dbg", 
+    &dbg_time_int,
+    NULL
+};
+
+void __init time_init(void)
+{
+    unsigned long long alarm;
+    u64 __cpu_khz, __cpu_ghz, cpu_freq, scale, scale2;
+    unsigned int cpu_ghz;
+
+    __cpu_khz = __cpu_ghz = cpu_freq = HYPERVISOR_shared_info->cpu_freq;
+    do_div(__cpu_khz, 1000UL);
+    cpu_khz = (u32)__cpu_khz;
+    do_div(__cpu_ghz, 1000000000UL);
+    cpu_ghz = (unsigned int)__cpu_ghz;
+
+    printk("Xen reported: %lu.%03lu MHz processor.\n", 
+           cpu_khz / 1000, cpu_khz % 1000);
+
+    xtime.tv_sec = HYPERVISOR_shared_info->wc_sec;
+    xtime.tv_usec = HYPERVISOR_shared_info->wc_usec;
+    processed_system_time = shadow_system_time;
+
+    for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
+        continue;
+
+    scale = 1000000LL << (32 + rdtsc_bitshift);
+    do_div(scale, (u32)cpu_freq);
+
+    if ( (cpu_freq >> 32) != 0 )
+    {
+        scale2 = 1000000LL << rdtsc_bitshift;
+        do_div(scale2, (u32)(cpu_freq>>32));
+        scale += scale2;
+    }
+
+    st_scale_f = scale & 0xffffffff;
+    st_scale_i = scale >> 32;
+
+    __get_time_values_from_xen();
+    processed_system_time = shadow_system_time;
+
+    time_irq  = bind_virq_to_irq(VIRQ_TIMER);
+    debug_irq = bind_virq_to_irq(VIRQ_DEBUG);
+
+    (void)setup_irq(time_irq, &irq_timer);
+    (void)setup_irq(debug_irq, &dbg_time);
+
+    rdtscll(alarm);
+}
+
+void time_suspend(void)
+{
+}
+
+void time_resume(void)
+{
+    unsigned long flags;
+    write_lock_irqsave(&xtime_lock, flags);
+    /* Get timebases for new environment. */ 
+    __get_time_values_from_xen();
+    /* Reset our own concept of passage of system time. */
+    processed_system_time = shadow_system_time;
+    /* Accept a warp in UTC (wall-clock) time. */
+    last_seen_tv.tv_sec = 0;
+    /* Make sure we resync UTC time with Xen on next timer interrupt. */
+    last_update_from_xen = 0;
+    write_unlock_irqrestore(&xtime_lock, flags);
+}
+
+/*
+ * /proc/sys/xen: This really belongs in another file. It can stay here for
+ * now however.
+ */
+static ctl_table xen_subtable[] = {
+    {1, "independent_wallclock", &independent_wallclock,
+     sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
+    {0}
+};
+static ctl_table xen_table[] = {
+    {123, "xen", NULL, 0, 0555, xen_subtable},
+    {0}
+};
+static int __init xen_sysctl_init(void)
+{
+    (void)register_sysctl_table(xen_table, 0);
+    return 0;
+}
+__initcall(xen_sysctl_init);
diff -r 35b74976f598 -r a83ac0806d6b 
linux-2.6.11-xen-sparse/drivers/char/tty_io.c
--- /dev/null   Fri Jul 15 12:52:02 2005
+++ b/linux-2.6.11-xen-sparse/drivers/char/tty_io.c     Fri Jul 15 13:39:50 2005
@@ -0,0 +1,2988 @@
+/*
+ *  linux/drivers/char/tty_io.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
+ * or rs-channels. It also implements echoing, cooked mode etc.
+ *
+ * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
+ *
+ * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
+ * tty_struct and tty_queue structures.  Previously there was an array
+ * of 256 tty_struct's which was statically allocated, and the
+ * tty_queue structures were allocated at boot time.  Both are now
+ * dynamically allocated only when the tty is open.
+ *
+ * Also restructured routines so that there is more of a separation
+ * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
+ * the low-level tty routines (serial.c, pty.c, console.c).  This
+ * makes for cleaner and more compact code.  -TYT, 9/17/92 
+ *
+ * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
+ * which can be dynamically activated and de-activated by the line
+ * discipline handling modules (like SLIP).
+ *
+ * NOTE: pay no attention to the line discipline code (yet); its
+ * interface is still subject to change in this version...
+ * -- TYT, 1/31/92
+ *
+ * Added functionality to the OPOST tty handling.  No delays, but all
+ * other bits should be there.
+ *     -- Nick Holloway <alfie@xxxxxxxxxxxxxxxxx>, 27th May 1993.
+ *
+ * Rewrote canonical mode and added more termios flags.
+ *     -- julian@xxxxxxxxxxxxxxxxxxxxxx (J. Cowley), 13Jan94
+ *
+ * Reorganized FASYNC support so mouse code can share it.
+ *     -- ctm@xxxxxxxx, 9Sep95
+ *
+ * New TIOCLINUX variants added.
+ *     -- mj@xxxxxxxxxxxxxxxxx, 19-Nov-95
+ * 
+ * Restrict vt switching via ioctl()
+ *      -- grif@xxxxxxxxxx, 5-Dec-95
+ *
+ * Move console and virtual terminal code to more appropriate files,
+ * implement CONFIG_VT and generalize console device interface.
+ *     -- Marko Kohtala <Marko.Kohtala@xxxxxx>, March 97
+ *
+ * Rewrote init_dev and release_dev to eliminate races.
+ *     -- Bill Hawes <whawes@xxxxxxxx>, June 97
+ *
+ * Added devfs support.
+ *      -- C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, 13-Jan-1998
+ *
+ * Added support for a Unix98-style ptmx device.
+ *      -- C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, 14-Jan-1998
+ *
+ * Reduced memory usage for older ARM systems
+ *      -- Russell King <rmk@xxxxxxxxxxxxxxxx>
+ *
+ * Move do_SAK() into process context.  Less stack use in devfs functions.
+ * alloc_tty_struct() always uses kmalloc() -- Andrew Morton 
<andrewm@xxxxxxxxxx> 17Mar01
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <linux/devpts_fs.h>
+#include <linux/file.h>
+#include <linux/console.h>
+#include <linux/timer.h>
+#include <linux/ctype.h>
+#include <linux/kd.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/wait.h>
+#include <linux/bitops.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/kbd_kern.h>
+#include <linux/vt_kern.h>
+#include <linux/selection.h>
+#include <linux/devfs_fs_kernel.h>
+
+#include <linux/kmod.h>
+
+#undef TTY_DEBUG_HANGUP
+
+#define TTY_PARANOIA_CHECK 1
+#define CHECK_TTY_COUNT 1
+
+struct termios tty_std_termios = {     /* for the benefit of tty drivers  */
+       .c_iflag = ICRNL | IXON,
+       .c_oflag = OPOST | ONLCR,
+       .c_cflag = B38400 | CS8 | CREAD | HUPCL,
+       .c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
+                  ECHOCTL | ECHOKE | IEXTEN,
+       .c_cc = INIT_C_CC
+};
+
+EXPORT_SYMBOL(tty_std_termios);
+
+/* This list gets poked at by procfs and various bits of boot up code. This
+   could do with some rationalisation such as pulling the tty proc function
+   into this file */
+   
+LIST_HEAD(tty_drivers);                        /* linked list of tty drivers */
+
+/* Semaphore to protect creating and releasing a tty. This is shared with
+   vt.c for deeply disgusting hack reasons */
+DECLARE_MUTEX(tty_sem);
+
+int console_use_vt = 1;
+
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver *ptm_driver;  /* Unix98 pty masters; for /dev/ptmx */
+extern int pty_limit;          /* Config limit on Unix98 ptys */
+static DEFINE_IDR(allocated_ptys);
+static DECLARE_MUTEX(allocated_ptys_lock);
+static int ptmx_open(struct inode *, struct file *);
+#endif
+
+extern void disable_early_printk(void);
+
+static void initialize_tty_struct(struct tty_struct *tty);
+
+static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
+static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
+ssize_t redirected_tty_write(struct file *, const char __user *, size_t, 
loff_t *);
+static unsigned int tty_poll(struct file *, poll_table *);
+static int tty_open(struct inode *, struct file *);
+static int tty_release(struct inode *, struct file *);
+int tty_ioctl(struct inode * inode, struct file * file,
+             unsigned int cmd, unsigned long arg);
+static int tty_fasync(int fd, struct file * filp, int on);
+extern void rs_360_init(void);
+static void release_mem(struct tty_struct *tty, int idx);
+
+
+static struct tty_struct *alloc_tty_struct(void)
+{
+       struct tty_struct *tty;
+
+       tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+       if (tty)
+               memset(tty, 0, sizeof(struct tty_struct));
+       return tty;
+}
+
+static inline void free_tty_struct(struct tty_struct *tty)
+{
+       kfree(tty->write_buf);
+       kfree(tty);
+}
+
+#define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
+
+char *tty_name(struct tty_struct *tty, char *buf)
+{
+       if (!tty) /* Hmm.  NULL pointer.  That's fun. */
+               strcpy(buf, "NULL tty");
+       else
+               strcpy(buf, tty->name);
+       return buf;
+}
+
+EXPORT_SYMBOL(tty_name);
+
+inline int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
+                             const char *routine)
+{
+#ifdef TTY_PARANOIA_CHECK
+       if (!tty) {
+               printk(KERN_WARNING
+                       "null TTY for (%d:%d) in %s\n",
+                       imajor(inode), iminor(inode), routine);
+               return 1;
+       }
+       if (tty->magic != TTY_MAGIC) {
+               printk(KERN_WARNING
+                       "bad magic number for tty struct (%d:%d) in %s\n",
+                       imajor(inode), iminor(inode), routine);
+               return 1;
+       }
+#endif
+       return 0;
+}
+
+static int check_tty_count(struct tty_struct *tty, const char *routine)
+{
+#ifdef CHECK_TTY_COUNT
+       struct list_head *p;
+       int count = 0;
+       
+       file_list_lock();
+       list_for_each(p, &tty->tty_files) {
+               count++;
+       }
+       file_list_unlock();
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver->subtype == PTY_TYPE_SLAVE &&
+           tty->link && tty->link->count)
+               count++;
+       if (tty->count != count) {
+               printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
+                                   "!= #fd's(%d) in %s\n",
+                      tty->name, tty->count, count, routine);
+               return count;
+       }       
+#endif
+       return 0;
+}
+
+/*
+ *     This is probably overkill for real world processors but
+ *     they are not on hot paths so a little discipline won't do 
+ *     any harm.
+ */
+ 
+static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
+{
+       down(&tty->termios_sem);
+       tty->termios->c_line = num;
+       up(&tty->termios_sem);
+}
+
+/*
+ *     This guards the refcounted line discipline lists. The lock
+ *     must be taken with irqs off because there are hangup path
+ *     callers who will do ldisc lookups and cannot sleep.
+ */
+ 
+static DEFINE_SPINLOCK(tty_ldisc_lock);
+static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait);
+static struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table     
*/
+
+int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
+{
+       unsigned long flags;
+       int ret = 0;
+       
+       if (disc < N_TTY || disc >= NR_LDISCS)
+               return -EINVAL;
+       
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       if (new_ldisc) {
+               tty_ldiscs[disc] = *new_ldisc;
+               tty_ldiscs[disc].num = disc;
+               tty_ldiscs[disc].flags |= LDISC_FLAG_DEFINED;
+               tty_ldiscs[disc].refcount = 0;
+       } else {
+               if(tty_ldiscs[disc].refcount)
+                       ret = -EBUSY;
+               else
+                       tty_ldiscs[disc].flags &= ~LDISC_FLAG_DEFINED;
+       }
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+       
+       return ret;
+}
+
+EXPORT_SYMBOL(tty_register_ldisc);
+
+struct tty_ldisc *tty_ldisc_get(int disc)
+{
+       unsigned long flags;
+       struct tty_ldisc *ld;
+
+       if (disc < N_TTY || disc >= NR_LDISCS)
+               return NULL;
+       
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+
+       ld = &tty_ldiscs[disc];
+       /* Check the entry is defined */
+       if(ld->flags & LDISC_FLAG_DEFINED)
+       {
+               /* If the module is being unloaded we can't use it */
+               if (!try_module_get(ld->owner))
+                       ld = NULL;
+               else /* lock it */
+                       ld->refcount++;
+       }
+       else
+               ld = NULL;
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+       return ld;
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_get);
+
+void tty_ldisc_put(int disc)
+{
+       struct tty_ldisc *ld;
+       unsigned long flags;
+       
+       if (disc < N_TTY || disc >= NR_LDISCS)
+               BUG();
+               
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       ld = &tty_ldiscs[disc];
+       if(ld->refcount == 0)
+               BUG();
+       ld->refcount --;
+       module_put(ld->owner);
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+}
+       
+EXPORT_SYMBOL_GPL(tty_ldisc_put);
+
+static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+       tty->ldisc = *ld;
+       tty->ldisc.refcount = 0;
+}
+
+/**
+ *     tty_ldisc_try           -       internal helper
+ *     @tty: the tty
+ *
+ *     Make a single attempt to grab and bump the refcount on
+ *     the tty ldisc. Return 0 on failure or 1 on success. This is
+ *     used to implement both the waiting and non waiting versions
+ *     of tty_ldisc_ref
+ */
+
+static int tty_ldisc_try(struct tty_struct *tty)
+{
+       unsigned long flags;
+       struct tty_ldisc *ld;
+       int ret = 0;
+       
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       ld = &tty->ldisc;
+       if(test_bit(TTY_LDISC, &tty->flags))
+       {
+               ld->refcount++;
+               ret = 1;
+       }
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+       return ret;
+}
+
+/**
+ *     tty_ldisc_ref_wait      -       wait for the tty ldisc
+ *     @tty: tty device
+ *
+ *     Dereference the line discipline for the terminal and take a 
+ *