WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Update to Linux 2.6.15.

# HG changeset patch
# User cl349@xxxxxxxxxxxxxxxxxxxx
# Node ID 5a63f675107cd84970e299a291485420d97bc139
# Parent  d609de73b9faca3da11509e04f09e092e065ffcd
Update to Linux 2.6.15.

Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>

diff -r d609de73b9fa -r 5a63f675107c buildconfigs/linux-defconfig_xen0_x86_32
--- a/buildconfigs/linux-defconfig_xen0_x86_32  Wed Feb  1 17:06:16 2006
+++ b/buildconfigs/linux-defconfig_xen0_x86_32  Wed Feb  1 18:00:19 2006
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen0
-# Tue Jan 31 18:56:38 2006
-#
+# Linux kernel version: 2.6.15-xen0
+# Wed Feb  1 15:54:13 2006
+#
+CONFIG_X86_32=y
+CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_X86=y
-CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_MMU=y
 CONFIG_UID16=y
 CONFIG_GENERIC_ISA_DMA=y
@@ -35,6 +36,7 @@
 CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
 CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
@@ -62,6 +64,24 @@
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
+
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
 
 #
 # Processor type and features
@@ -108,8 +128,10 @@
 CONFIG_X86_INVLPG=y
 CONFIG_X86_BSWAP=y
 CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
 CONFIG_X86_GOOD_APIC=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
 # CONFIG_SMP is not set
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
@@ -142,6 +164,7 @@
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
 CONFIG_MTRR=y
 # CONFIG_REGPARM is not set
 CONFIG_SECCOMP=y
@@ -257,6 +280,10 @@
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
 # CONFIG_NETFILTER_NETLINK is not set
 
 #
@@ -346,8 +373,11 @@
 # CONFIG_NET_DIVERT is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
 # CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
 
 #
 # Network testing
@@ -409,16 +439,7 @@
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=4096
 CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
 # CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
 # CONFIG_ATA_OVER_ETH is not set
 
 #
@@ -464,6 +485,7 @@
 # CONFIG_BLK_DEV_CY82C693 is not set
 # CONFIG_BLK_DEV_CS5520 is not set
 # CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_CS5535 is not set
 # CONFIG_BLK_DEV_HPT34X is not set
 # CONFIG_BLK_DEV_HPT366 is not set
 # CONFIG_BLK_DEV_SC1200 is not set
@@ -519,6 +541,7 @@
 #
 # SCSI low-level drivers
 #
+# CONFIG_ISCSI_TCP is not set
 CONFIG_BLK_DEV_3W_XXXX_RAID=y
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_ACARD is not set
@@ -548,16 +571,17 @@
 CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
 # CONFIG_SCSI_SATA_NV is not set
+# CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
 CONFIG_SCSI_SATA_PROMISE=y
-# CONFIG_SCSI_SATA_QSTOR is not set
 CONFIG_SCSI_SATA_SX4=y
 CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
 # CONFIG_SCSI_SATA_SIS is not set
 # CONFIG_SCSI_SATA_ULI is not set
 # CONFIG_SCSI_SATA_VIA is not set
 # CONFIG_SCSI_SATA_VITESSE is not set
 CONFIG_SCSI_SATA_INTEL_COMBINED=y
-# CONFIG_SCSI_CPQFCTS is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA_PIO is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -566,7 +590,6 @@
 # CONFIG_SCSI_INIA100 is not set
 # CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
 # CONFIG_SCSI_QLOGIC_FC is not set
 # CONFIG_SCSI_QLOGIC_1280 is not set
 CONFIG_SCSI_QLA2XXX=y
@@ -807,7 +830,6 @@
 #
 # Serial drivers
 #
-# CONFIG_SERIAL_8250 is not set
 
 #
 # Non-8250 serial port support
@@ -870,6 +892,7 @@
 # TPM devices
 #
 # CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
 
 #
 # I2C support
@@ -954,12 +977,15 @@
 #
 # USB Device Class drivers
 #
-# CONFIG_USB_BLUETOOTH_TTY is not set
 # CONFIG_USB_ACM is not set
 # CONFIG_USB_PRINTER is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; 
see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
 #
 # CONFIG_USB_STORAGE is not set
 
@@ -1212,6 +1238,11 @@
 # CONFIG_NLS_UTF8 is not set
 
 #
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
+#
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
@@ -1228,10 +1259,11 @@
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
 CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
 CONFIG_EARLY_PRINTK=y
 # CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_4KSTACKS is not set
@@ -1312,4 +1344,3 @@
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_X86_BIOS_REBOOT=y
-CONFIG_PC=y
diff -r d609de73b9fa -r 5a63f675107c buildconfigs/linux-defconfig_xen0_x86_64
--- a/buildconfigs/linux-defconfig_xen0_x86_64  Wed Feb  1 17:06:16 2006
+++ b/buildconfigs/linux-defconfig_xen0_x86_64  Wed Feb  1 18:00:19 2006
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen0
-# Tue Jan 31 16:21:00 2006
+# Linux kernel version: 2.6.15-xen0
+# Wed Feb  1 15:50:08 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -40,6 +40,7 @@
 CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
 CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
@@ -67,6 +68,24 @@
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
+
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
 
 #
 # Processor type and features
@@ -88,7 +107,6 @@
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
-# CONFIG_NUMA is not set
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
@@ -97,6 +115,7 @@
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
 CONFIG_SWIOTLB=y
 CONFIG_DUMMY_IOMMU=y
@@ -196,6 +215,10 @@
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
 # CONFIG_NETFILTER_NETLINK is not set
 
 #
@@ -285,8 +308,11 @@
 # CONFIG_NET_DIVERT is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
 # CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
 
 #
 # Network testing
@@ -348,16 +374,7 @@
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
 # CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
 # CONFIG_ATA_OVER_ETH is not set
 
 #
@@ -458,6 +475,7 @@
 #
 # SCSI low-level drivers
 #
+# CONFIG_ISCSI_TCP is not set
 CONFIG_BLK_DEV_3W_XXXX_RAID=y
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_ACARD is not set
@@ -488,10 +506,12 @@
 CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
 # CONFIG_SCSI_SATA_NV is not set
+# CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
 CONFIG_SCSI_SATA_PROMISE=y
-# CONFIG_SCSI_SATA_QSTOR is not set
 CONFIG_SCSI_SATA_SX4=y
 CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
 # CONFIG_SCSI_SATA_SIS is not set
 # CONFIG_SCSI_SATA_ULI is not set
 # CONFIG_SCSI_SATA_VIA is not set
@@ -499,7 +519,6 @@
 CONFIG_SCSI_SATA_INTEL_COMBINED=y
 CONFIG_SCSI_BUSLOGIC=y
 # CONFIG_SCSI_OMIT_FLASHPOINT is not set
-# CONFIG_SCSI_CPQFCTS is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA is not set
 # CONFIG_SCSI_EATA_PIO is not set
@@ -510,7 +529,6 @@
 # CONFIG_SCSI_INIA100 is not set
 # CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
 # CONFIG_SCSI_QLOGIC_FC is not set
 # CONFIG_SCSI_QLOGIC_1280 is not set
 CONFIG_SCSI_QLA2XXX=y
@@ -750,7 +768,6 @@
 #
 # Serial drivers
 #
-# CONFIG_SERIAL_8250 is not set
 
 #
 # Non-8250 serial port support
@@ -800,6 +817,7 @@
 # TPM devices
 #
 # CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
 
 #
 # I2C support
@@ -884,12 +902,15 @@
 #
 # USB Device Class drivers
 #
-# CONFIG_USB_BLUETOOTH_TTY is not set
 # CONFIG_USB_ACM is not set
 # CONFIG_USB_PRINTER is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; 
see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
 #
 # CONFIG_USB_STORAGE is not set
 
@@ -989,6 +1010,7 @@
 CONFIG_INFINIBAND_IPOIB=y
 CONFIG_INFINIBAND_IPOIB_DEBUG=y
 CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
+CONFIG_INFINIBAND_SRP=y
 
 #
 # SN Devices
@@ -1155,9 +1177,10 @@
 # CONFIG_NLS_UTF8 is not set
 
 #
-# Profiling support
+# Instrumentation Support
 #
 # CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
 
 #
 # Kernel hacking
@@ -1173,10 +1196,10 @@
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
 CONFIG_FRAME_POINTER=y
-# CONFIG_CHECKING is not set
+# CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
 
 #
 # Security options
diff -r d609de73b9fa -r 5a63f675107c buildconfigs/linux-defconfig_xenU_x86_32
--- a/buildconfigs/linux-defconfig_xenU_x86_32  Wed Feb  1 17:06:16 2006
+++ b/buildconfigs/linux-defconfig_xenU_x86_32  Wed Feb  1 18:00:19 2006
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xenU
-# Tue Jan 31 18:57:16 2006
-#
+# Linux kernel version: 2.6.15-xenU
+# Wed Feb  1 17:28:35 2006
+#
+CONFIG_X86_32=y
+CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_X86=y
-CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_MMU=y
 CONFIG_UID16=y
 CONFIG_GENERIC_ISA_DMA=y
@@ -35,6 +36,7 @@
 # CONFIG_IKCONFIG is not set
 # CONFIG_CPUSETS is not set
 CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
@@ -63,6 +65,24 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
 CONFIG_STOP_MACHINE=y
+
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
 
 #
 # Processor type and features
@@ -109,8 +129,10 @@
 CONFIG_X86_INVLPG=y
 CONFIG_X86_BSWAP=y
 CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
 CONFIG_X86_GOOD_APIC=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
 CONFIG_SMP=y
 CONFIG_SMP_ALTERNATIVES=y
 CONFIG_NR_CPUS=8
@@ -141,6 +163,7 @@
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
 # CONFIG_REGPARM is not set
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
@@ -212,8 +235,11 @@
 # CONFIG_NET_DIVERT is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
 # CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
 
 #
 # Network testing
@@ -248,16 +274,7 @@
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=4096
 CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
 # CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
 # CONFIG_ATA_OVER_ETH is not set
 
 #
@@ -295,6 +312,7 @@
 #
 # SCSI low-level drivers
 #
+# CONFIG_ISCSI_TCP is not set
 # CONFIG_SCSI_SATA is not set
 # CONFIG_SCSI_DEBUG is not set
 
@@ -503,6 +521,11 @@
 # CONFIG_NLS_UTF8 is not set
 
 #
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
+#
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
@@ -519,10 +542,11 @@
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
 CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
 CONFIG_EARLY_PRINTK=y
 # CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_4KSTACKS is not set
@@ -598,4 +622,3 @@
 CONFIG_X86_SMP=y
 CONFIG_X86_BIOS_REBOOT=y
 CONFIG_X86_TRAMPOLINE=y
-CONFIG_PC=y
diff -r d609de73b9fa -r 5a63f675107c buildconfigs/linux-defconfig_xenU_x86_64
--- a/buildconfigs/linux-defconfig_xenU_x86_64  Wed Feb  1 17:06:16 2006
+++ b/buildconfigs/linux-defconfig_xenU_x86_64  Wed Feb  1 18:00:19 2006
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xenU
-# Tue Jan 31 19:51:18 2006
+# Linux kernel version: 2.6.15-xenU
+# Wed Feb  1 15:49:27 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -42,6 +42,7 @@
 # CONFIG_IKCONFIG is not set
 # CONFIG_CPUSETS is not set
 CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
@@ -72,6 +73,24 @@
 CONFIG_STOP_MACHINE=y
 
 #
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
+#
 # Processor type and features
 #
 # CONFIG_MK8 is not set
@@ -90,7 +109,6 @@
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 CONFIG_PREEMPT_BKL=y
-# CONFIG_NUMA is not set
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
@@ -99,6 +117,7 @@
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
 CONFIG_NR_CPUS=8
 # CONFIG_HOTPLUG_CPU is not set
@@ -219,6 +238,10 @@
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
 # CONFIG_NETFILTER_NETLINK is not set
 
 #
@@ -384,10 +407,18 @@
 CONFIG_NET_DIVERT=y
 # CONFIG_ECONET is not set
 CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
 CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_CLK_JIFFIES=y
 # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
 # CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
 CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
@@ -401,8 +432,10 @@
 CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
 CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
 CONFIG_NET_CLS=y
 # CONFIG_NET_CLS_BASIC is not set
 CONFIG_NET_CLS_TCINDEX=m
@@ -411,13 +444,14 @@
 CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 CONFIG_CLS_U32_PERF=y
-CONFIG_NET_CLS_IND=y
 # CONFIG_CLS_U32_MARK is not set
 CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 # CONFIG_NET_EMATCH is not set
 # CONFIG_NET_CLS_ACT is not set
 CONFIG_NET_CLS_POLICE=y
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_ESTIMATOR=y
 
 #
 # Network testing
@@ -496,7 +530,6 @@
 CONFIG_BT_HCIUART=m
 CONFIG_BT_HCIUART_H4=y
 CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_BCSP_TXCRC=y
 CONFIG_BT_HCIVHCI=m
 # CONFIG_IEEE80211 is not set
 
@@ -524,16 +557,7 @@
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
 # CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
 # CONFIG_ATA_OVER_ETH is not set
 
 #
@@ -572,6 +596,7 @@
 #
 # SCSI low-level drivers
 #
+# CONFIG_ISCSI_TCP is not set
 CONFIG_SCSI_SATA=m
 # CONFIG_SCSI_DEBUG is not set
 
@@ -647,6 +672,7 @@
 #
 # ATM drivers
 #
+# CONFIG_ATM_DUMMY is not set
 CONFIG_ATM_TCP=m
 CONFIG_PPP=m
 CONFIG_PPP_MULTILINK=y
@@ -655,6 +681,7 @@
 CONFIG_PPP_SYNC_TTY=m
 CONFIG_PPP_DEFLATE=m
 # CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
 CONFIG_PPPOE=m
 CONFIG_PPPOATM=m
 # CONFIG_SLIP is not set
@@ -705,7 +732,7 @@
 CONFIG_FS_POSIX_ACL=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
 CONFIG_XFS_SECURITY=y
 CONFIG_XFS_POSIX_ACL=y
 # CONFIG_XFS_RT is not set
@@ -877,9 +904,10 @@
 CONFIG_NLS_UTF8=m
 
 #
-# Profiling support
+# Instrumentation Support
 #
 # CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
 
 #
 # Kernel hacking
@@ -895,9 +923,10 @@
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
 CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
 
 #
 # Security options
diff -r d609de73b9fa -r 5a63f675107c buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32   Wed Feb  1 17:06:16 2006
+++ b/buildconfigs/linux-defconfig_xen_x86_32   Wed Feb  1 18:00:19 2006
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen
-# Tue Jan 31 19:01:58 2006
-#
+# Linux kernel version: 2.6.15-xen
+# Wed Feb  1 17:28:24 2006
+#
+CONFIG_X86_32=y
+CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_X86=y
-CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_MMU=y
 CONFIG_UID16=y
 CONFIG_GENERIC_ISA_DMA=y
@@ -38,6 +39,7 @@
 # CONFIG_IKCONFIG is not set
 # CONFIG_CPUSETS is not set
 CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EMBEDDED=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
@@ -47,7 +49,6 @@
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SHMEM=y
 CONFIG_CC_ALIGN_FUNCTIONS=0
 CONFIG_CC_ALIGN_LABELS=0
@@ -67,6 +68,24 @@
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
 CONFIG_STOP_MACHINE=y
+
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
 
 #
 # Processor type and features
@@ -113,8 +132,10 @@
 CONFIG_X86_INVLPG=y
 CONFIG_X86_BSWAP=y
 CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
 CONFIG_X86_GOOD_APIC=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
 CONFIG_SMP=y
 CONFIG_SMP_ALTERNATIVES=y
 CONFIG_NR_CPUS=8
@@ -148,6 +169,7 @@
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
 CONFIG_MTRR=y
 # CONFIG_REGPARM is not set
 CONFIG_SECCOMP=y
@@ -156,8 +178,8 @@
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=250
 CONFIG_PHYSICAL_START=0x100000
+# CONFIG_CRASH_DUMP is not set
 CONFIG_HOTPLUG_CPU=y
-# CONFIG_CRASH_DUMP is not set
 
 #
 # Power management options (ACPI, APM)
@@ -175,6 +197,7 @@
 CONFIG_ACPI_HOTKEY=m
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_HOTPLUG_CPU=y
 CONFIG_ACPI_THERMAL=m
 CONFIG_ACPI_ASUS=m
 CONFIG_ACPI_IBM=m
@@ -185,7 +208,7 @@
 CONFIG_ACPI_POWER=y
 CONFIG_ACPI_SYSTEM=y
 # CONFIG_X86_PM_TIMER is not set
-# CONFIG_ACPI_CONTAINER is not set
+CONFIG_ACPI_CONTAINER=m
 
 #
 # CPU Frequency scaling
@@ -206,7 +229,6 @@
 # CONFIG_PCI_LEGACY_PROC is not set
 # CONFIG_PCI_DEBUG is not set
 CONFIG_SCx200=m
-# CONFIG_HOTPLUG_CPU is not set
 
 #
 # PCCARD (PCMCIA/CardBus) support
@@ -341,6 +363,10 @@
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
 CONFIG_NETFILTER_NETLINK=m
 CONFIG_NETFILTER_NETLINK_QUEUE=m
 CONFIG_NETFILTER_NETLINK_LOG=m
@@ -534,10 +560,18 @@
 CONFIG_ECONET_AUNUDP=y
 CONFIG_ECONET_NATIVE=y
 CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
 CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_CLK_JIFFIES=y
 # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
 # CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
 CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
@@ -551,8 +585,10 @@
 CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
 CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
 CONFIG_NET_CLS=y
 CONFIG_NET_CLS_BASIC=m
 CONFIG_NET_CLS_TCINDEX=m
@@ -561,7 +597,6 @@
 CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 # CONFIG_CLS_U32_PERF is not set
-# CONFIG_NET_CLS_IND is not set
 # CONFIG_CLS_U32_MARK is not set
 CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
@@ -574,6 +609,8 @@
 CONFIG_NET_EMATCH_TEXT=m
 # CONFIG_NET_CLS_ACT is not set
 CONFIG_NET_CLS_POLICE=y
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_ESTIMATOR=y
 
 #
 # Network testing
@@ -676,7 +713,6 @@
 CONFIG_BT_HCIUART=m
 CONFIG_BT_HCIUART_H4=y
 CONFIG_BT_HCIUART_BCSP=y
-# CONFIG_BT_HCIUART_BCSP_TXCRC is not set
 CONFIG_BT_HCIBCM203X=m
 # CONFIG_BT_HCIBPA10X is not set
 CONFIG_BT_HCIBFUSB=m
@@ -731,6 +767,7 @@
 CONFIG_NFTL=m
 CONFIG_NFTL_RW=y
 CONFIG_INFTL=m
+CONFIG_RFD_FTL=m
 
 #
 # RAM/ROM/Flash chip drivers
@@ -821,6 +858,12 @@
 CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
 # CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set
 # CONFIG_MTD_NAND_NANDSIM is not set
+
+#
+# OneNAND Flash Device Drivers
+#
+CONFIG_MTD_ONENAND=m
+# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
 
 #
 # Parallel port support
@@ -896,18 +939,9 @@
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_CDROM_PKTCDVD_BUFFERS=8
 # CONFIG_CDROM_PKTCDVD_WCACHE is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
 CONFIG_ATA_OVER_ETH=m
 
 #
@@ -957,6 +991,7 @@
 CONFIG_BLK_DEV_CY82C693=y
 CONFIG_BLK_DEV_CS5520=y
 CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_CS5535=m
 CONFIG_BLK_DEV_HPT34X=y
 # CONFIG_HPT34X_AUTODMA is not set
 CONFIG_BLK_DEV_HPT366=y
@@ -1010,12 +1045,13 @@
 #
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
-# CONFIG_SCSI_ISCSI_ATTRS is not set
+CONFIG_SCSI_ISCSI_ATTRS=m
 CONFIG_SCSI_SAS_ATTRS=m
 
 #
 # SCSI low-level drivers
 #
+CONFIG_ISCSI_TCP=m
 CONFIG_BLK_DEV_3W_XXXX_RAID=m
 CONFIG_SCSI_3W_9XXX=m
 CONFIG_SCSI_ACARD=m
@@ -1046,16 +1082,17 @@
 CONFIG_SCSI_ATA_PIIX=m
 CONFIG_SCSI_SATA_MV=m
 CONFIG_SCSI_SATA_NV=m
+CONFIG_SCSI_PDC_ADMA=m
+# CONFIG_SCSI_SATA_QSTOR is not set
 CONFIG_SCSI_SATA_PROMISE=m
-# CONFIG_SCSI_SATA_QSTOR is not set
 CONFIG_SCSI_SATA_SX4=m
 CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIL24=m
 CONFIG_SCSI_SATA_SIS=m
 CONFIG_SCSI_SATA_ULI=m
 CONFIG_SCSI_SATA_VIA=m
 CONFIG_SCSI_SATA_VITESSE=m
 CONFIG_SCSI_SATA_INTEL_COMBINED=y
-# CONFIG_SCSI_CPQFCTS is not set
 CONFIG_SCSI_DMX3191D=m
 CONFIG_SCSI_EATA_PIO=m
 CONFIG_SCSI_FUTURE_DOMAIN=m
@@ -1074,11 +1111,9 @@
 CONFIG_SCSI_IPR=m
 # CONFIG_SCSI_IPR_TRACE is not set
 # CONFIG_SCSI_IPR_DUMP is not set
-CONFIG_SCSI_QLOGIC_ISP=m
 CONFIG_SCSI_QLOGIC_FC=m
 CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
 CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLOGIC_1280_1040=y
 CONFIG_SCSI_QLA2XXX=m
 CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
@@ -1206,7 +1241,6 @@
 # PHY device support
 #
 CONFIG_PHYLIB=m
-CONFIG_PHYCONTROL=y
 
 #
 # MII PHY device drivers
@@ -1302,7 +1336,6 @@
 # CONFIG_IXGB_NAPI is not set
 CONFIG_S2IO=m
 # CONFIG_S2IO_NAPI is not set
-# CONFIG_2BUFF_MODE is not set
 
 #
 # Token Ring devices
@@ -1418,6 +1451,7 @@
 #
 # ATM drivers
 #
+CONFIG_ATM_DUMMY=m
 CONFIG_ATM_TCP=m
 CONFIG_ATM_LANAI=m
 CONFIG_ATM_ENI=m
@@ -1462,6 +1496,7 @@
 CONFIG_PPP_SYNC_TTY=m
 CONFIG_PPP_DEFLATE=m
 CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
 CONFIG_PPPOE=m
 CONFIG_PPPOATM=m
 CONFIG_SLIP=m
@@ -1674,6 +1709,7 @@
 CONFIG_TOUCHSCREEN_MK712=m
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_PCSPKR=m
+CONFIG_INPUT_WISTRON_BTNS=m
 CONFIG_INPUT_UINPUT=m
 
 #
@@ -1816,17 +1852,20 @@
 # PCMCIA character devices
 #
 CONFIG_SYNCLINK_CS=m
+CONFIG_CARDMAN_4000=m
+CONFIG_CARDMAN_4040=m
 CONFIG_MWAVE=m
 CONFIG_SCx200_GPIO=m
 CONFIG_RAW_DRIVER=m
+CONFIG_MAX_RAW_DEVS=256
 # CONFIG_HPET is not set
-CONFIG_MAX_RAW_DEVS=256
 CONFIG_HANGCHECK_TIMER=m
 
 #
 # TPM devices
 #
 # CONFIG_TCG_TPM is not set
+CONFIG_TELCLOCK=m
 
 #
 # I2C support
@@ -1883,6 +1922,7 @@
 CONFIG_SENSORS_PCF8591=m
 CONFIG_SENSORS_RTC8564=m
 CONFIG_SENSORS_MAX6875=m
+CONFIG_RTC_X1205_I2C=m
 # CONFIG_I2C_DEBUG_CORE is not set
 # CONFIG_I2C_DEBUG_ALGO is not set
 # CONFIG_I2C_DEBUG_BUS is not set
@@ -1964,6 +2004,7 @@
 # Video Adapters
 #
 CONFIG_VIDEO_BT848=m
+# CONFIG_VIDEO_BT848_DVB is not set
 CONFIG_VIDEO_SAA6588=m
 CONFIG_VIDEO_BWQCAM=m
 CONFIG_VIDEO_CQCAM=m
@@ -1990,7 +2031,10 @@
 CONFIG_VIDEO_HEXIUM_GEMINI=m
 CONFIG_VIDEO_CX88=m
 # CONFIG_VIDEO_CX88_DVB is not set
+CONFIG_VIDEO_EM28XX=m
 CONFIG_VIDEO_OVCAMCHIP=m
+CONFIG_VIDEO_AUDIO_DECODER=m
+CONFIG_VIDEO_DECODER=m
 
 #
 # Radio Adapters
@@ -2098,6 +2142,7 @@
 # ATSC (North American/Korean Terresterial DTV) frontends
 #
 CONFIG_DVB_NXT2002=m
+CONFIG_DVB_NXT200X=m
 CONFIG_DVB_OR51211=m
 CONFIG_DVB_OR51132=m
 CONFIG_DVB_BCM3510=m
@@ -2118,7 +2163,6 @@
 CONFIG_FB_CFB_FILLRECT=m
 CONFIG_FB_CFB_COPYAREA=m
 CONFIG_FB_CFB_IMAGEBLIT=m
-CONFIG_FB_SOFT_CURSOR=m
 # CONFIG_FB_MACMODES is not set
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_TILEBLITTING=y
@@ -2134,6 +2178,7 @@
 CONFIG_VIDEO_SELECT=y
 CONFIG_FB_HGA=m
 # CONFIG_FB_HGA_ACCEL is not set
+CONFIG_FB_S1D13XXX=m
 CONFIG_FB_NVIDIA=m
 CONFIG_FB_NVIDIA_I2C=y
 CONFIG_FB_RIVA=m
@@ -2176,7 +2221,6 @@
 # CONFIG_FB_PM3 is not set
 CONFIG_FB_GEODE=y
 CONFIG_FB_GEODE_GX1=m
-CONFIG_FB_S1D13XXX=m
 CONFIG_FB_VIRTUAL=m
 
 #
@@ -2185,6 +2229,7 @@
 CONFIG_VGA_CONSOLE=y
 CONFIG_DUMMY_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE=m
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
 # CONFIG_FONTS is not set
 CONFIG_FONT_8x8=y
 CONFIG_FONT_8x16=y
@@ -2204,6 +2249,8 @@
 # Advanced Linux Sound Architecture
 #
 CONFIG_SND=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_AC97_BUS=m
 CONFIG_SND_TIMER=m
 CONFIG_SND_PCM=m
 CONFIG_SND_HWDEP=m
@@ -2231,8 +2278,6 @@
 CONFIG_SND_MTPAV=m
 CONFIG_SND_SERIAL_U16550=m
 CONFIG_SND_MPU401=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
 
 #
 # PCI devices
@@ -2295,30 +2340,13 @@
 # Open Sound System
 #
 CONFIG_SOUND_PRIME=m
-CONFIG_SOUND_BT878=m
-CONFIG_SOUND_CMPCI=m
-# CONFIG_SOUND_CMPCI_FM is not set
-# CONFIG_SOUND_CMPCI_MIDI is not set
-CONFIG_SOUND_CMPCI_JOYSTICK=y
-CONFIG_SOUND_EMU10K1=m
+# CONFIG_OBSOLETE_OSS_DRIVER is not set
 CONFIG_SOUND_FUSION=m
-CONFIG_SOUND_CS4281=m
-CONFIG_SOUND_ES1370=m
-CONFIG_SOUND_ES1371=m
-CONFIG_SOUND_ESSSOLO1=m
-CONFIG_SOUND_MAESTRO=m
-CONFIG_SOUND_MAESTRO3=m
 CONFIG_SOUND_ICH=m
-CONFIG_SOUND_SONICVIBES=m
 CONFIG_SOUND_TRIDENT=m
 # CONFIG_SOUND_MSNDCLAS is not set
 # CONFIG_SOUND_MSNDPIN is not set
-CONFIG_SOUND_VIA82CXXX=m
 CONFIG_SOUND_TVMIXER=m
-CONFIG_SOUND_ALI5455=m
-CONFIG_SOUND_FORTE=m
-CONFIG_SOUND_RME96XX=m
-CONFIG_SOUND_AD1980=m
 
 #
 # USB support
@@ -2355,15 +2383,15 @@
 # USB Device Class drivers
 #
 # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
-
-#
-# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
-#
 CONFIG_USB_ACM=m
 CONFIG_USB_PRINTER=m
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; 
see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
 #
 CONFIG_USB_STORAGE=m
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -2375,7 +2403,6 @@
 CONFIG_USB_STORAGE_SDDR09=y
 CONFIG_USB_STORAGE_SDDR55=y
 CONFIG_USB_STORAGE_JUMPSHOT=y
-CONFIG_USB_STORAGE_ONETOUCH=y
 
 #
 # USB Input Devices
@@ -2460,6 +2487,7 @@
 CONFIG_USB_SERIAL=m
 CONFIG_USB_SERIAL_GENERIC=y
 CONFIG_USB_SERIAL_AIRPRIME=m
+CONFIG_USB_SERIAL_ANYDATA=m
 CONFIG_USB_SERIAL_BELKIN=m
 CONFIG_USB_SERIAL_WHITEHEAT=m
 CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
@@ -2593,7 +2621,7 @@
 CONFIG_FS_POSIX_ACL=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
 CONFIG_XFS_SECURITY=y
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
@@ -2660,6 +2688,7 @@
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_SUMMARY is not set
 # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
 CONFIG_JFFS2_ZLIB=y
 CONFIG_JFFS2_RTIME=y
@@ -2787,6 +2816,11 @@
 CONFIG_NLS_UTF8=m
 
 #
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
+#
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
@@ -2803,10 +2837,11 @@
 # CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
 # CONFIG_FRAME_POINTER is not set
+# CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_EARLY_PRINTK is not set
 # CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_4KSTACKS is not set
diff -r d609de73b9fa -r 5a63f675107c buildconfigs/linux-defconfig_xen_x86_64
--- a/buildconfigs/linux-defconfig_xen_x86_64   Wed Feb  1 17:06:16 2006
+++ b/buildconfigs/linux-defconfig_xen_x86_64   Wed Feb  1 18:00:19 2006
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen
-# Tue Jan 31 18:19:07 2006
+# Linux kernel version: 2.6.15-xen
+# Wed Feb  1 15:51:35 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -42,6 +42,7 @@
 # CONFIG_IKCONFIG is not set
 # CONFIG_CPUSETS is not set
 CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
@@ -70,6 +71,24 @@
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_KMOD=y
 CONFIG_STOP_MACHINE=y
+
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
 
 #
 # Processor type and features
@@ -92,7 +111,6 @@
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 CONFIG_PREEMPT_BKL=y
-# CONFIG_NUMA is not set
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
@@ -101,6 +119,7 @@
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
 CONFIG_NR_CPUS=8
 # CONFIG_HOTPLUG_CPU is not set
@@ -259,6 +278,10 @@
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
 CONFIG_NETFILTER_NETLINK=m
 CONFIG_NETFILTER_NETLINK_QUEUE=m
 CONFIG_NETFILTER_NETLINK_LOG=m
@@ -444,10 +467,18 @@
 CONFIG_NET_DIVERT=y
 # CONFIG_ECONET is not set
 CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
 CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_CLK_JIFFIES=y
 # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
 # CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
 CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
@@ -461,8 +492,10 @@
 CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
 CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
 CONFIG_NET_CLS=y
 CONFIG_NET_CLS_BASIC=m
 CONFIG_NET_CLS_TCINDEX=m
@@ -471,7 +504,6 @@
 CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 CONFIG_CLS_U32_PERF=y
-CONFIG_NET_CLS_IND=y
 CONFIG_CLS_U32_MARK=y
 CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
@@ -484,6 +516,8 @@
 CONFIG_NET_EMATCH_TEXT=m
 # CONFIG_NET_CLS_ACT is not set
 CONFIG_NET_CLS_POLICE=y
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_ESTIMATOR=y
 
 #
 # Network testing
@@ -570,7 +604,6 @@
 CONFIG_BT_HCIUART=m
 CONFIG_BT_HCIUART_H4=y
 CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_BCSP_TXCRC=y
 CONFIG_BT_HCIBCM203X=m
 CONFIG_BT_HCIBPA10X=m
 CONFIG_BT_HCIBFUSB=m
@@ -621,6 +654,7 @@
 CONFIG_NFTL=m
 CONFIG_NFTL_RW=y
 CONFIG_INFTL=m
+CONFIG_RFD_FTL=m
 
 #
 # RAM/ROM/Flash chip drivers
@@ -701,6 +735,12 @@
 CONFIG_MTD_NAND_IDS=m
 # CONFIG_MTD_NAND_DISKONCHIP is not set
 # CONFIG_MTD_NAND_NANDSIM is not set
+
+#
+# OneNAND Flash Device Drivers
+#
+CONFIG_MTD_ONENAND=m
+# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
 
 #
 # Parallel port support
@@ -767,18 +807,9 @@
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_CDROM_PKTCDVD_BUFFERS=8
 # CONFIG_CDROM_PKTCDVD_WCACHE is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
 CONFIG_ATA_OVER_ETH=m
 
 #
@@ -885,6 +916,7 @@
 #
 # SCSI low-level drivers
 #
+CONFIG_ISCSI_TCP=m
 CONFIG_BLK_DEV_3W_XXXX_RAID=m
 CONFIG_SCSI_3W_9XXX=m
 CONFIG_SCSI_ACARD=m
@@ -914,10 +946,12 @@
 CONFIG_SCSI_ATA_PIIX=y
 CONFIG_SCSI_SATA_MV=m
 CONFIG_SCSI_SATA_NV=m
+CONFIG_SCSI_PDC_ADMA=m
+CONFIG_SCSI_SATA_QSTOR=m
 CONFIG_SCSI_SATA_PROMISE=m
-CONFIG_SCSI_SATA_QSTOR=m
 CONFIG_SCSI_SATA_SX4=m
 CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIL24=m
 CONFIG_SCSI_SATA_SIS=m
 CONFIG_SCSI_SATA_ULI=m
 CONFIG_SCSI_SATA_VIA=m
@@ -925,7 +959,6 @@
 CONFIG_SCSI_SATA_INTEL_COMBINED=y
 CONFIG_SCSI_BUSLOGIC=m
 # CONFIG_SCSI_OMIT_FLASHPOINT is not set
-# CONFIG_SCSI_CPQFCTS is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA is not set
 # CONFIG_SCSI_EATA_PIO is not set
@@ -944,10 +977,8 @@
 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
 # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
 # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
 # CONFIG_SCSI_QLOGIC_FC is not set
 CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLOGIC_1280_1040=y
 CONFIG_SCSI_QLA2XXX=y
 CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
@@ -1055,7 +1086,6 @@
 # PHY device support
 #
 CONFIG_PHYLIB=m
-CONFIG_PHYCONTROL=y
 
 #
 # MII PHY device drivers
@@ -1152,7 +1182,6 @@
 CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
 CONFIG_S2IO_NAPI=y
-# CONFIG_2BUFF_MODE is not set
 
 #
 # Token Ring devices
@@ -1208,6 +1237,7 @@
 #
 # ATM drivers
 #
+CONFIG_ATM_DUMMY=m
 CONFIG_ATM_TCP=m
 CONFIG_ATM_LANAI=m
 CONFIG_ATM_ENI=m
@@ -1239,6 +1269,7 @@
 CONFIG_PPP_SYNC_TTY=m
 CONFIG_PPP_DEFLATE=m
 # CONFIG_PPP_BSDCOMP is not set
+CONFIG_PPP_MPPE=m
 CONFIG_PPPOE=m
 CONFIG_PPPOATM=m
 CONFIG_SLIP=m
@@ -1466,7 +1497,6 @@
 #
 # Serial drivers
 #
-# CONFIG_SERIAL_8250 is not set
 
 #
 # Non-8250 serial port support
@@ -1558,6 +1588,7 @@
 # TPM devices
 #
 # CONFIG_TCG_TPM is not set
+CONFIG_TELCLOCK=m
 
 #
 # I2C support
@@ -1611,6 +1642,7 @@
 CONFIG_SENSORS_PCF8591=m
 CONFIG_SENSORS_RTC8564=m
 CONFIG_SENSORS_MAX6875=m
+CONFIG_RTC_X1205_I2C=m
 # CONFIG_I2C_DEBUG_CORE is not set
 # CONFIG_I2C_DEBUG_ALGO is not set
 # CONFIG_I2C_DEBUG_BUS is not set
@@ -1692,6 +1724,7 @@
 # Video Adapters
 #
 CONFIG_VIDEO_BT848=m
+# CONFIG_VIDEO_BT848_DVB is not set
 CONFIG_VIDEO_SAA6588=m
 CONFIG_VIDEO_BWQCAM=m
 CONFIG_VIDEO_CQCAM=m
@@ -1711,14 +1744,20 @@
 CONFIG_VIDEO_ZORAN_LML33R10=m
 # CONFIG_VIDEO_ZR36120 is not set
 CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_SAA7134_ALSA=m
 CONFIG_VIDEO_SAA7134_DVB=m
+CONFIG_VIDEO_SAA7134_DVB_ALL_FRONTENDS=y
 CONFIG_VIDEO_MXB=m
 CONFIG_VIDEO_DPC=m
 CONFIG_VIDEO_HEXIUM_ORION=m
 CONFIG_VIDEO_HEXIUM_GEMINI=m
 CONFIG_VIDEO_CX88=m
 CONFIG_VIDEO_CX88_DVB=m
+CONFIG_VIDEO_CX88_DVB_ALL_FRONTENDS=y
+CONFIG_VIDEO_EM28XX=m
 CONFIG_VIDEO_OVCAMCHIP=m
+CONFIG_VIDEO_AUDIO_DECODER=m
+CONFIG_VIDEO_DECODER=m
 
 #
 # Radio Adapters
@@ -1831,6 +1870,7 @@
 # ATSC (North American/Korean Terresterial DTV) frontends
 #
 CONFIG_DVB_NXT2002=m
+CONFIG_DVB_NXT200X=m
 CONFIG_DVB_OR51211=m
 CONFIG_DVB_OR51132=m
 CONFIG_DVB_BCM3510=m
@@ -1852,7 +1892,6 @@
 CONFIG_FB_CFB_FILLRECT=y
 CONFIG_FB_CFB_COPYAREA=y
 CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SOFT_CURSOR=y
 # CONFIG_FB_MACMODES is not set
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_TILEBLITTING=y
@@ -1866,6 +1905,7 @@
 CONFIG_FB_VESA=y
 CONFIG_VIDEO_SELECT=y
 # CONFIG_FB_HGA is not set
+# CONFIG_FB_S1D13XXX is not set
 # CONFIG_FB_NVIDIA is not set
 CONFIG_FB_RIVA=m
 # CONFIG_FB_RIVA_I2C is not set
@@ -1901,7 +1941,6 @@
 CONFIG_FB_TRIDENT_ACCEL=y
 # CONFIG_FB_PM3 is not set
 # CONFIG_FB_GEODE is not set
-# CONFIG_FB_S1D13XXX is not set
 # CONFIG_FB_VIRTUAL is not set
 
 #
@@ -1910,6 +1949,7 @@
 CONFIG_VGA_CONSOLE=y
 CONFIG_DUMMY_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
 # CONFIG_FONTS is not set
 CONFIG_FONT_8x8=y
 CONFIG_FONT_8x16=y
@@ -1936,6 +1976,8 @@
 # Advanced Linux Sound Architecture
 #
 CONFIG_SND=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_AC97_BUS=m
 CONFIG_SND_TIMER=m
 CONFIG_SND_PCM=m
 CONFIG_SND_HWDEP=m
@@ -1963,8 +2005,6 @@
 CONFIG_SND_MTPAV=m
 # CONFIG_SND_SERIAL_U16550 is not set
 CONFIG_SND_MPU401=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
 
 #
 # PCI devices
@@ -2059,15 +2099,15 @@
 # USB Device Class drivers
 #
 # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
-
-#
-# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
-#
 CONFIG_USB_ACM=m
 CONFIG_USB_PRINTER=m
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; 
see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
 #
 CONFIG_USB_STORAGE=m
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -2079,7 +2119,6 @@
 CONFIG_USB_STORAGE_SDDR09=y
 CONFIG_USB_STORAGE_SDDR55=y
 CONFIG_USB_STORAGE_JUMPSHOT=y
-CONFIG_USB_STORAGE_ONETOUCH=y
 
 #
 # USB Input Devices
@@ -2161,6 +2200,7 @@
 CONFIG_USB_SERIAL=m
 CONFIG_USB_SERIAL_GENERIC=y
 CONFIG_USB_SERIAL_AIRPRIME=m
+CONFIG_USB_SERIAL_ANYDATA=m
 CONFIG_USB_SERIAL_BELKIN=m
 CONFIG_USB_SERIAL_WHITEHEAT=m
 CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
@@ -2252,6 +2292,7 @@
 # CONFIG_INFINIBAND_MTHCA_DEBUG is not set
 CONFIG_INFINIBAND_IPOIB=m
 # CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
 
 #
 # SN Devices
@@ -2293,7 +2334,7 @@
 CONFIG_FS_POSIX_ACL=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
 CONFIG_XFS_SECURITY=y
 CONFIG_XFS_POSIX_ACL=y
 # CONFIG_XFS_RT is not set
@@ -2355,6 +2396,7 @@
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_SUMMARY is not set
 # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
 CONFIG_JFFS2_ZLIB=y
 CONFIG_JFFS2_RTIME=y
@@ -2474,9 +2516,10 @@
 CONFIG_NLS_UTF8=m
 
 #
-# Profiling support
+# Instrumentation Support
 #
 # CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
 
 #
 # Kernel hacking
@@ -2492,9 +2535,10 @@
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
 # CONFIG_FRAME_POINTER is not set
+# CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
 
 #
 # Security options
diff -r d609de73b9fa -r 5a63f675107c buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen     Wed Feb  1 17:06:16 2006
+++ b/buildconfigs/mk.linux-2.6-xen     Wed Feb  1 18:00:19 2006
@@ -2,7 +2,7 @@
 OS           = linux
 
 LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.14
+LINUX_VER    = 2.6.15
 
 EXTRAVERSION ?= xen
 
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/arch/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig    Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig    Wed Feb  1 18:00:19 2006
@@ -5,7 +5,7 @@
 
 mainmenu "Linux Kernel Configuration"
 
-config X86
+config X86_32
        bool
        default y
        help
@@ -15,6 +15,10 @@
          AMD, Cyrix, and others.
 
 config SEMAPHORE_SLEEPERS
+       bool
+       default y
+
+config X86
        bool
        default y
 
@@ -160,304 +164,7 @@
        default y
        depends on SMP && X86_ES7000 && MPENTIUMIII
 
-if !X86_ELAN
-
-choice
-       prompt "Processor family"
-       default M686
-
-config M386
-       bool "386"
-       ---help---
-         This is the processor type of your CPU. This information is used for
-         optimizing purposes. In order to compile a kernel that can run on
-         all x86 CPU types (albeit not optimally fast), you can specify
-         "386" here.
-
-         The kernel will not necessarily run on earlier architectures than
-         the one you have chosen, e.g. a Pentium optimized kernel will run on
-         a PPro, but not necessarily on a i486.
-
-         Here are the settings recommended for greatest speed:
-         - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
-         486DLC/DLC2, UMC 486SX-S and NexGen Nx586.  Only "386" kernels
-         will run on a 386 class machine.
-         - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
-         SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
-         - "586" for generic Pentium CPUs lacking the TSC
-         (time stamp counter) register.
-         - "Pentium-Classic" for the Intel Pentium.
-         - "Pentium-MMX" for the Intel Pentium MMX.
-         - "Pentium-Pro" for the Intel Pentium Pro.
-         - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
-         - "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
-         - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
-         - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
-         - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
-         - "Crusoe" for the Transmeta Crusoe series.
-         - "Efficeon" for the Transmeta Efficeon series.
-         - "Winchip-C6" for original IDT Winchip.
-         - "Winchip-2" for IDT Winchip 2.
-         - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
-         - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
-         - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
-         - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
-
-         If you don't know what to do, choose "386".
-
-config M486
-       bool "486"
-       help
-         Select this for a 486 series processor, either Intel or one of the
-         compatible processors from AMD, Cyrix, IBM, or Intel.  Includes DX,
-         DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
-         U5S.
-
-config M586
-       bool "586/K5/5x86/6x86/6x86MX"
-       help
-         Select this for an 586 or 686 series processor such as the AMD K5,
-         the Cyrix 5x86, 6x86 and 6x86MX.  This choice does not
-         assume the RDTSC (Read Time Stamp Counter) instruction.
-
-config M586TSC
-       bool "Pentium-Classic"
-       help
-         Select this for a Pentium Classic processor with the RDTSC (Read
-         Time Stamp Counter) instruction for benchmarking.
-
-config M586MMX
-       bool "Pentium-MMX"
-       help
-         Select this for a Pentium with the MMX graphics/multimedia
-         extended instructions.
-
-config M686
-       bool "Pentium-Pro"
-       help
-         Select this for Intel Pentium Pro chips.  This enables the use of
-         Pentium Pro extended instructions, and disables the init-time guard
-         against the f00f bug found in earlier Pentiums.
-
-config MPENTIUMII
-       bool "Pentium-II/Celeron(pre-Coppermine)"
-       help
-         Select this for Intel chips based on the Pentium-II and
-         pre-Coppermine Celeron core.  This option enables an unaligned
-         copy optimization, compiles the kernel with optimization flags
-         tailored for the chip, and applies any applicable Pentium Pro
-         optimizations.
-
-config MPENTIUMIII
-       bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
-       help
-         Select this for Intel chips based on the Pentium-III and
-         Celeron-Coppermine core.  This option enables use of some
-         extended prefetch instructions in addition to the Pentium II
-         extensions.
-
-config MPENTIUMM
-       bool "Pentium M"
-       help
-         Select this for Intel Pentium M (not Pentium-4 M)
-         notebook chips.
-
-config MPENTIUM4
-       bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
-       help
-         Select this for Intel Pentium 4 chips.  This includes the
-         Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
-         (not Pentium M) chips.  This option enables compile flags
-         optimized for the chip, uses the correct cache shift, and
-         applies any applicable Pentium III optimizations.
-
-config MK6
-       bool "K6/K6-II/K6-III"
-       help
-         Select this for an AMD K6-family processor.  Enables use of
-         some extended instructions, and passes appropriate optimization
-         flags to GCC.
-
-config MK7
-       bool "Athlon/Duron/K7"
-       help
-         Select this for an AMD Athlon K7-family processor.  Enables use of
-         some extended instructions, and passes appropriate optimization
-         flags to GCC.
-
-config MK8
-       bool "Opteron/Athlon64/Hammer/K8"
-       help
-         Select this for an AMD Opteron or Athlon64 Hammer-family processor.  
Enables
-         use of some extended instructions, and passes appropriate optimization
-         flags to GCC.
-
-config MCRUSOE
-       bool "Crusoe"
-       help
-         Select this for a Transmeta Crusoe processor.  Treats the processor
-         like a 586 with TSC, and sets some GCC optimization flags (like a
-         Pentium Pro with no alignment requirements).
-
-config MEFFICEON
-       bool "Efficeon"
-       help
-         Select this for a Transmeta Efficeon processor.
-
-config MWINCHIPC6
-       bool "Winchip-C6"
-       help
-         Select this for an IDT Winchip C6 chip.  Linux and GCC
-         treat this chip as a 586TSC with some extended instructions
-         and alignment requirements.
-
-config MWINCHIP2
-       bool "Winchip-2"
-       help
-         Select this for an IDT Winchip-2.  Linux and GCC
-         treat this chip as a 586TSC with some extended instructions
-         and alignment requirements.
-
-config MWINCHIP3D
-       bool "Winchip-2A/Winchip-3"
-       help
-         Select this for an IDT Winchip-2A or 3.  Linux and GCC
-         treat this chip as a 586TSC with some extended instructions
-         and alignment reqirements.  Also enable out of order memory
-         stores for this CPU, which can increase performance of some
-         operations.
-
-config MGEODEGX1
-       bool "GeodeGX1"
-       help
-         Select this for a Geode GX1 (Cyrix MediaGX) chip.
-
-config MCYRIXIII
-       bool "CyrixIII/VIA-C3"
-       help
-         Select this for a Cyrix III or C3 chip.  Presently Linux and GCC
-         treat this chip as a generic 586. Whilst the CPU is 686 class,
-         it lacks the cmov extension which gcc assumes is present when
-         generating 686 code.
-         Note that Nehemiah (Model 9) and above will not boot with this
-         kernel due to them lacking the 3DNow! instructions used in earlier
-         incarnations of the CPU.
-
-config MVIAC3_2
-       bool "VIA C3-2 (Nehemiah)"
-       help
-         Select this for a VIA C3 "Nehemiah". Selecting this enables usage
-         of SSE and tells gcc to treat the CPU as a 686.
-         Note, this kernel will not boot on older (pre model 9) C3s.
-
-endchoice
-
-config X86_GENERIC
-       bool "Generic x86 support"
-       help
-         Instead of just including optimizations for the selected
-         x86 variant (e.g. PII, Crusoe or Athlon), include some more
-         generic optimizations as well. This will make the kernel
-         perform better on x86 CPUs other than that selected.
-
-         This is really intended for distributors who need more
-         generic optimizations.
-
-endif
-
-#
-# Define implied options from the CPU selection here
-#
-config X86_CMPXCHG
-       bool
-       depends on !M386
-       default y
-
-config X86_XADD
-       bool
-       depends on !M386
-       default y
-
-config X86_L1_CACHE_SHIFT
-       int
-       default "7" if MPENTIUM4 || X86_GENERIC
-       default "4" if X86_ELAN || M486 || M386
-       default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || 
MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX 
|| M586TSC || M586 || MVIAC3_2 || MGEODEGX1
-       default "6" if MK7 || MK8 || MPENTIUMM
-
-config RWSEM_GENERIC_SPINLOCK
-       bool
-       depends on M386
-       default y
-
-config RWSEM_XCHGADD_ALGORITHM
-       bool
-       depends on !M386
-       default y
-
-config GENERIC_CALIBRATE_DELAY
-       bool
-       default y
-
-config X86_PPRO_FENCE
-       bool
-       depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || 
MGEODEGX1
-       default y
-
-config X86_F00F_BUG
-       bool
-       depends on M586MMX || M586TSC || M586 || M486 || M386
-       default y
-
-config X86_WP_WORKS_OK
-       bool
-       depends on !M386
-       default y
-
-config X86_INVLPG
-       bool
-       depends on !M386
-       default y
-
-config X86_BSWAP
-       bool
-       depends on !M386
-       default y
-
-config X86_POPAD_OK
-       bool
-       depends on !M386
-       default y
-
-config X86_ALIGNMENT_16
-       bool
-       depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || 
X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
-       default y
-
-config X86_GOOD_APIC
-       bool
-       depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || MK8 || MEFFICEON
-       default y
-
-config X86_INTEL_USERCOPY
-       bool
-       depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || 
M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON
-       default y
-
-config X86_USE_PPRO_CHECKSUM
-       bool
-       depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || 
MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || 
MVIAC3_2 || MEFFICEON
-       default y
-
-config X86_USE_3DNOW
-       bool
-       depends on MCYRIXIII || MK7
-       default y
-
-config X86_OOSTORE
-       bool
-       depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
-       default y
+source "arch/i386/Kconfig.cpu"
 
 config HPET_TIMER
        bool "HPET Timer Support"
@@ -1041,7 +748,7 @@
 
 config APM
        tristate "APM (Advanced Power Management) BIOS support"
-       depends on PM
+       depends on PM && PM_LEGACY
        ---help---
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
@@ -1333,9 +1040,22 @@
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+       depends on EXPERIMENTAL
+
 if !X86_XEN
 source "arch/i386/oprofile/Kconfig"
 endif
+
+config KPROBES
+       bool "Kprobes (EXPERIMENTAL)"
+       help
+         Kprobes allows you to trap at almost any kernel address and
+         execute a callback function.  register_kprobe() establishes
+         a probepoint and specifies the callback.  Kprobes is useful
+         for kernel debugging, non-intrusive instrumentation and testing.
+         If in doubt, say "N".
+endmenu
 
 source "arch/i386/Kconfig.debug"
 
@@ -1382,8 +1102,3 @@
        bool
        depends on X86_SMP || (X86_VOYAGER && SMP)
        default y
-
-config PC
-       bool
-       depends on X86 && !EMBEDDED
-       default y
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/arch/i386/Makefile
--- a/linux-2.6-xen-sparse/arch/i386/Makefile   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/Makefile   Wed Feb  1 18:00:19 2006
@@ -34,35 +34,8 @@
 # prevent gcc from keeping the stack 16 byte aligned
 CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
 
-align := $(cc-option-align)
-cflags-$(CONFIG_M386)          += -march=i386
-cflags-$(CONFIG_M486)          += -march=i486
-cflags-$(CONFIG_M586)          += -march=i586
-cflags-$(CONFIG_M586TSC)       += -march=i586
-cflags-$(CONFIG_M586MMX)       += $(call 
cc-option,-march=pentium-mmx,-march=i586)
-cflags-$(CONFIG_M686)          += -march=i686
-cflags-$(CONFIG_MPENTIUMII)    += -march=i686 $(call cc-option,-mtune=pentium2)
-cflags-$(CONFIG_MPENTIUMIII)   += -march=i686 $(call cc-option,-mtune=pentium3)
-cflags-$(CONFIG_MPENTIUMM)     += -march=i686 $(call cc-option,-mtune=pentium3)
-cflags-$(CONFIG_MPENTIUM4)     += -march=i686 $(call cc-option,-mtune=pentium4)
-cflags-$(CONFIG_MK6)           += -march=k6
-# Please note, that patches that add -march=athlon-xp and friends are 
pointless.
-# They make zero difference whatsosever to performance at this time.
-cflags-$(CONFIG_MK7)           += $(call cc-option,-march=athlon,-march=i686 
$(align)-functions=4)
-cflags-$(CONFIG_MK8)           += $(call cc-option,-march=k8,$(call 
cc-option,-march=athlon,-march=i686 $(align)-functions=4))
-cflags-$(CONFIG_MCRUSOE)       += -march=i686 $(align)-functions=0 
$(align)-jumps=0 $(align)-loops=0
-cflags-$(CONFIG_MEFFICEON)     += -march=i686 $(call 
cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 
$(align)-loops=0
-cflags-$(CONFIG_MWINCHIPC6)    += $(call 
cc-option,-march=winchip-c6,-march=i586)
-cflags-$(CONFIG_MWINCHIP2)     += $(call cc-option,-march=winchip2,-march=i586)
-cflags-$(CONFIG_MWINCHIP3D)    += $(call cc-option,-march=winchip2,-march=i586)
-cflags-$(CONFIG_MCYRIXIII)     += $(call cc-option,-march=c3,-march=i486) 
$(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
-cflags-$(CONFIG_MVIAC3_2)      += $(call cc-option,-march=c3-2,-march=i686)
-
-# AMD Elan support
-cflags-$(CONFIG_X86_ELAN)      += -march=i486
-
-# Geode GX1 support
-cflags-$(CONFIG_MGEODEGX1)             += $(call 
cc-option,-march=pentium-mmx,-march=i486)
+# CPU-specific tuning. Anything which can be shared with UML should go here.
+include $(srctree)/arch/i386/Makefile.cpu
 
 # -mregparm=3 works ok on gcc-3.0 and later
 #
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile    Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile    Wed Feb  1 18:00:19 2006
@@ -86,6 +86,7 @@
 include $(srctree)/scripts/Makefile.xen
 
 obj-y += fixup.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
 n-obj-xen := i8259.o doublefault.o timers/ reboot.o smpboot.o trampoline.o
 
 obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c     Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c     Wed Feb  1 
18:00:19 2006
@@ -36,22 +36,16 @@
 #include <asm/apic.h>
 #include <asm/io.h>
 #include <asm/mpspec.h>
-#ifdef CONFIG_XEN
-#include <asm/fixmap.h>
-#endif
 
 #ifdef CONFIG_X86_64
 
-static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-}
 extern void __init clustered_apic_check(void);
-static inline int ioapic_setup_disabled(void)
-{
-       return 0;
-}
-
+
+extern int gsi_irq_sharing(int gsi);
 #include <asm/proto.h>
+
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { 
return 0; }
+
 
 #else                          /* X86 */
 
@@ -59,6 +53,8 @@
 #include <mach_apic.h>
 #include <mach_mpparse.h>
 #endif                         /* CONFIG_X86_LOCAL_APIC */
+
+static inline int gsi_irq_sharing(int gsi) { return gsi; }
 
 #endif                         /* X86 */
 
@@ -138,7 +134,7 @@
        int idx;
 
 #ifndef CONFIG_XEN
-       if (phys + size < 8 * 1024 * 1024) 
+       if (phys + size < 8 * 1024 * 1024)
                return __va(phys);
 #endif
 
@@ -254,9 +250,7 @@
 
        acpi_table_print_madt_entry(header);
 
-       /* no utility in registering a disabled processor */
-       if (processor->flags.enabled == 0)
-               return 0;
+       /* Register even disabled CPUs for cpu hotplug */
 
        x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
 
@@ -464,7 +458,7 @@
                *irq = IO_APIC_VECTOR(gsi);
        else
 #endif
-               *irq = gsi;
+               *irq = gsi_irq_sharing(gsi);
        return 0;
 }
 
@@ -538,7 +532,7 @@
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
 static unsigned long __init
-acpi_scan_rsdp (unsigned long start, unsigned long length)
+acpi_scan_rsdp(unsigned long start, unsigned long length)
 {
        unsigned long offset = 0;
        unsigned long sig_len = sizeof("RSD PTR ") - 1;
@@ -647,6 +641,13 @@
                        return 0;
 
                pmtmr_ioport = fadt->xpm_tmr_blk.address;
+               /*
+                * "X" fields are optional extensions to the original V1.0
+                * fields, so we must selectively expand V1.0 fields if the
+                * corresponding X field is zero.
+                */
+               if (!pmtmr_ioport)
+                       pmtmr_ioport = fadt->V1_pm_tmr_blk;
        } else {
                /* FADT rev. 1 */
                pmtmr_ioport = fadt->V1_pm_tmr_blk;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c    Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c    Wed Feb  1 
18:00:19 2006
@@ -33,8 +33,6 @@
 
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 
-extern void mcheck_init(struct cpuinfo_x86 *c);
-
 extern void machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c);
 
 extern int disable_pse;
@@ -238,10 +236,10 @@
                cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
                c->x86 = (tfms >> 8) & 15;
                c->x86_model = (tfms >> 4) & 15;
-               if (c->x86 == 0xf) {
+               if (c->x86 == 0xf)
                        c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
                        c->x86_model += ((tfms >> 16) & 0xF) << 4;
-               }
                c->x86_mask = tfms & 15;
                if (cap0 & (1<<19))
                        c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
@@ -340,7 +338,7 @@
        c->x86_model = c->x86_mask = 0; /* So far unknown... */
        c->x86_vendor_id[0] = '\0'; /* Unset */
        c->x86_model_id[0] = '\0';  /* Unset */
-       c->x86_num_cores = 1;
+       c->x86_max_cores = 1;
        memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
        if (!have_cpuid_p()) {
@@ -436,9 +434,8 @@
        }
 
        /* Init Machine Check Exception if available. */
-#ifdef CONFIG_X86_MCE
        mcheck_init(c);
-#endif
+
        if (c == &boot_cpu_data)
                sysenter_setup();
        enable_sep_cpu();
@@ -453,52 +450,44 @@
 void __devinit detect_ht(struct cpuinfo_x86 *c)
 {
        u32     eax, ebx, ecx, edx;
-       int     index_msb, tmp;
+       int     index_msb, core_bits;
        int     cpu = smp_processor_id();
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+
+       c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
 
        if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                return;
 
-       cpuid(1, &eax, &ebx, &ecx, &edx);
        smp_num_siblings = (ebx & 0xff0000) >> 16;
 
        if (smp_num_siblings == 1) {
                printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
        } else if (smp_num_siblings > 1 ) {
-               index_msb = 31;
 
                if (smp_num_siblings > NR_CPUS) {
                        printk(KERN_WARNING "CPU: Unsupported number of the 
siblings %d", smp_num_siblings);
                        smp_num_siblings = 1;
                        return;
                }
-               tmp = smp_num_siblings;
-               while ((tmp & 0x80000000 ) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
+
+               index_msb = get_count_order(smp_num_siblings);
                phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
 
                printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                       phys_proc_id[cpu]);
 
-               smp_num_siblings = smp_num_siblings / c->x86_num_cores;
-
-               tmp = smp_num_siblings;
-               index_msb = 31;
-               while ((tmp & 0x80000000) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
-
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
-
-               cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
-
-               if (c->x86_num_cores > 1)
+               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+               index_msb = get_count_order(smp_num_siblings) ;
+
+               core_bits = get_count_order(c->x86_max_cores);
+
+               cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+                                              ((1 << core_bits) - 1);
+
+               if (c->x86_max_cores > 1)
                        printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
                               cpu_core_id[cpu]);
        }
@@ -615,12 +604,6 @@
                set_in_cr4(X86_CR4_TSD);
        }
 
-       /*
-        * Set up the per-thread TLS descriptor cache:
-        */
-       memcpy(thread->tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
-              GDT_ENTRY_TLS_ENTRIES * 8);
-
        cpu_gdt_init(&cpu_gdt_descr[cpu]);
 
        /*
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Wed Feb  1 18:00:19 2006
@@ -746,11 +746,10 @@
 nmi_debug_stack_check:
        cmpw $__KERNEL_CS,16(%esp)
        jne nmi_stack_correct
-       cmpl $debug - 1,(%esp)
-       jle nmi_stack_correct
+       cmpl $debug,(%esp)
+       jb nmi_stack_correct
        cmpl $debug_esp_fix_insn,(%esp)
-       jle nmi_debug_stack_fixup
-nmi_debug_stack_fixup:
+       ja nmi_stack_correct
        FIX_STACK(24,nmi_stack_correct, 1)
        jmp nmi_stack_correct
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c       Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c       Wed Feb  1 
18:00:19 2006
@@ -88,6 +88,9 @@
 int (*ioapic_renumber_irq)(int ioapic, int irq);
 atomic_t irq_mis_count;
 
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
 static DEFINE_SPINLOCK(ioapic_lock);
 
 /*
@@ -784,10 +787,11 @@
 }
 
 #ifndef CONFIG_XEN
+#endif
 /*
  * Find the pin to which IRQ[irq] (ISA) is connected
  */
-static int find_isa_irq_pin(int irq, int type)
+static int __init find_isa_irq_pin(int irq, int type)
 {
        int i;
 
@@ -806,7 +810,33 @@
        }
        return -1;
 }
-#endif
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                   ) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+                       break;
+       }
+       if (i < mp_irq_entries) {
+               int apic;
+               for(apic = 0; apic < nr_ioapics; apic++) {
+                       if (mp_ioapics[apic].mpc_apicid == 
mp_irqs[i].mpc_dstapic)
+                               return apic;
+               }
+       }
+
+       return -1;
+}
 
 /*
  * Find a specific PCI IRQ entry.
@@ -1306,7 +1336,7 @@
  * Set up the 8259A-master output pin:
  */
 #ifndef CONFIG_XEN
-static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, 
int vector)
 {
        struct IO_APIC_route_entry entry;
        unsigned long flags;
@@ -1340,8 +1370,8 @@
         * Add it to the IO-APIC irq-routing table:
         */
        spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
-       io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+       io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        enable_8259A_irq(0);
@@ -1647,7 +1677,8 @@
 static void __init enable_IO_APIC(void)
 {
        union IO_APIC_reg_01 reg_01;
-       int i;
+       int i8259_apic, i8259_pin;
+       int i, apic;
        unsigned long flags;
 
        for (i = 0; i < PIN_MAP_SIZE; i++) {
@@ -1661,11 +1692,52 @@
        /*
         * The number of IO-APIC IRQ registers (== #pins):
         */
-       for (i = 0; i < nr_ioapics; i++) {
+       for (apic = 0; apic < nr_ioapics; apic++) {
                spin_lock_irqsave(&ioapic_lock, flags);
-               reg_01.raw = io_apic_read(i, 1);
+               reg_01.raw = io_apic_read(apic, 1);
                spin_unlock_irqrestore(&ioapic_lock, flags);
-               nr_ioapic_registers[i] = reg_01.bits.entries+1;
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+       for(apic = 0; apic < nr_ioapics; apic++) {
+               int pin;
+               /* See if any of the pins is in ExtINT mode */
+               for(pin = 0; pin < nr_ioapic_registers[i]; pin++) {
+                       struct IO_APIC_route_entry entry;
+                       spin_lock_irqsave(&ioapic_lock, flags);
+                       *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * 
pin);
+                       *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * 
pin);
+                       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+                        */
+                       if ((entry.mask == 0) && (entry.delivery_mode == 
dest_ExtINT)) {
+                               ioapic_i8259.apic = apic;
+                               ioapic_i8259.pin  = pin;
+                               goto found_i8259;
+                       }
+               }
+       }
+ found_i8259:
+       /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
+       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+       /* Trust the MP table if nothing is setup in the hardware */
+       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+               printk(KERN_WARNING "ExtINT not setup in hardware but reported 
by MP table\n");
+               ioapic_i8259.pin  = i8259_pin;
+               ioapic_i8259.apic = i8259_apic;
+       }
+       /* Complain if the MP table and the hardware disagree */
+       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != 
i8259_pin)) &&
+               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+       {
+               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
        }
 
        /*
@@ -1679,9 +1751,6 @@
  */
 void disable_IO_APIC(void)
 {
-#ifndef CONFIG_XEN
-       int pin;
-#endif
        /*
         * Clear the IO-APIC before rebooting:
         */
@@ -1693,8 +1762,7 @@
         * Put that IOAPIC in virtual wire mode
         * so legacy interrupts can be delivered.
         */
-       pin = find_isa_irq_pin(0, mp_ExtINT);
-       if (pin != -1) {
+       if (ioapic_i8259.pin != -1) {
                struct IO_APIC_route_entry entry;
                unsigned long flags;
 
@@ -1705,7 +1773,7 @@
                entry.polarity        = 0; /* High */
                entry.delivery_status = 0;
                entry.dest_mode       = 0; /* Physical */
-               entry.delivery_mode   = 7; /* ExtInt */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
                entry.dest.physical.physical_dest = 0;
 
@@ -1714,11 +1782,13 @@
                 * Add it to the IO-APIC irq-routing table:
                 */
                spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
-               io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+               io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+1));
+               io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+0));
                spin_unlock_irqrestore(&ioapic_lock, flags);
        }
-       disconnect_bsp_APIC(pin != -1);
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 #endif
 }
 
@@ -1994,7 +2064,7 @@
 {
        int irq = vector_to_irq(vector);
 
-       move_irq(vector);
+       move_native_irq(vector);
        ack_edge_ioapic_irq(irq);
 }
 
@@ -2009,7 +2079,7 @@
 {
        int irq = vector_to_irq(vector);
 
-       move_irq(vector);
+       move_native_irq(vector);
        end_level_ioapic_irq(irq);
 }
 
@@ -2174,20 +2244,21 @@
  */
 static inline void unlock_ExtINT_logic(void)
 {
-       int pin, i;
+       int apic, pin, i;
        struct IO_APIC_route_entry entry0, entry1;
        unsigned char save_control, save_freq_select;
        unsigned long flags;
 
-       pin = find_isa_irq_pin(8, mp_INT);
+       pin  = find_isa_irq_pin(8, mp_INT);
+       apic = find_isa_irq_apic(8, mp_INT);
        if (pin == -1)
                return;
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
-       *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+       *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+       *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
        spin_unlock_irqrestore(&ioapic_lock, flags);
-       clear_IO_APIC_pin(0, pin);
+       clear_IO_APIC_pin(apic, pin);
 
        memset(&entry1, 0, sizeof(entry1));
 
@@ -2200,8 +2271,8 @@
        entry1.vector = 0;
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
-       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        save_control = CMOS_READ(RTC_CONTROL);
@@ -2219,11 +2290,11 @@
 
        CMOS_WRITE(save_control, RTC_CONTROL);
        CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-       clear_IO_APIC_pin(0, pin);
+       clear_IO_APIC_pin(apic, pin);
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
-       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -2235,7 +2306,7 @@
  */
 static inline void check_timer(void)
 {
-       int pin1, pin2;
+       int apic1, pin1, apic2, pin2;
        int vector;
 
        /*
@@ -2257,10 +2328,13 @@
        timer_ack = 1;
        enable_8259A_irq(0);
 
-       pin1 = find_isa_irq_pin(0, mp_INT);
-       pin2 = find_isa_irq_pin(0, mp_ExtINT);
-
-       printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, 
pin1, pin2);
+       pin1  = find_isa_irq_pin(0, mp_INT);
+       apic1 = find_isa_irq_apic(0, mp_INT);
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
+
+       printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d 
pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
 
        if (pin1 != -1) {
                /*
@@ -2277,8 +2351,9 @@
                                clear_IO_APIC_pin(0, pin1);
                        return;
                }
-               clear_IO_APIC_pin(0, pin1);
-               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to 
IO-APIC\n");
+               clear_IO_APIC_pin(apic1, pin1);
+               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
+                               "IO-APIC\n");
        }
 
        printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A 
... ");
@@ -2287,13 +2362,13 @@
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               setup_ExtINT_IRQ0_pin(pin2, vector);
+               setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
                if (timer_irq_works()) {
                        printk("works.\n");
                        if (pin1 != -1)
-                               replace_pin_at_irq(0, 0, pin1, 0, pin2);
+                               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
                        else
-                               add_pin_to_irq(0, 0, pin2);
+                               add_pin_to_irq(0, apic2, pin2);
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
                        }
@@ -2302,7 +2377,7 @@
                /*
                 * Cleanup, just in case ...
                 */
-               clear_IO_APIC_pin(0, pin2);
+               clear_IO_APIC_pin(apic2, pin2);
        }
        printk(" failed.\n");
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c   Wed Feb  1 18:00:19 2006
@@ -218,7 +218,7 @@
 
        if (i == 0) {
                seq_printf(p, "           ");
-               for_each_cpu(j)
+               for_each_online_cpu(j)
                        seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
@@ -232,7 +232,7 @@
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for_each_cpu(j)
+               for_each_online_cpu(j)
                        seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
                seq_printf(p, " %14s", irq_desc[i].handler->typename);
@@ -246,12 +246,12 @@
                spin_unlock_irqrestore(&irq_desc[i].lock, flags);
        } else if (i == NR_IRQS) {
                seq_printf(p, "NMI: ");
-               for_each_cpu(j)
+               for_each_online_cpu(j)
                        seq_printf(p, "%10u ", nmi_count(j));
                seq_putc(p, '\n');
 #ifdef CONFIG_X86_LOCAL_APIC
                seq_printf(p, "LOC: ");
-               for_each_cpu(j)
+               for_each_online_cpu(j)
                        seq_printf(p, "%10u ",
                                per_cpu(irq_stat,j).apic_timer_irqs);
                seq_putc(p, '\n');
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/ldt-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/ldt-xen.c   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/ldt-xen.c   Wed Feb  1 18:00:19 2006
@@ -18,6 +18,7 @@
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
+#include <asm/mmu_context.h>
 
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c       Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c       Wed Feb  1 
18:00:19 2006
@@ -69,7 +69,7 @@
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_physical_apicid = -1U;
 /* Internal processor count */
-static unsigned int __initdata num_processors;
+static unsigned int __devinitdata num_processors;
 
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
@@ -120,7 +120,7 @@
 #endif
 
 #ifndef CONFIG_XEN
-static void __init MP_processor_info (struct mpc_config_processor *m)
+static void __devinit MP_processor_info (struct mpc_config_processor *m)
 {
        int ver, apicid;
        physid_mask_t phys_cpu;
@@ -183,17 +183,6 @@
                boot_cpu_physical_apicid = m->mpc_apicid;
        }
 
-       if (num_processors >= NR_CPUS) {
-               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-                       "  Processor ignored.\n", NR_CPUS); 
-               return;
-       }
-
-       if (num_processors >= maxcpus) {
-               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
-                       " Processor ignored.\n", maxcpus); 
-               return;
-       }
        ver = m->mpc_apicver;
 
        if (!MP_valid_apicid(apicid, ver)) {
@@ -201,11 +190,6 @@
                        m->mpc_apicid, MAX_APICS);
                return;
        }
-
-       cpu_set(num_processors, cpu_possible_map);
-       num_processors++;
-       phys_cpu = apicid_to_cpu_present(apicid);
-       physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
 
        /*
         * Validate version
@@ -217,9 +201,29 @@
                ver = 0x10;
        }
        apic_version[m->mpc_apicid] = ver;
+
+       phys_cpu = apicid_to_cpu_present(apicid);
+       physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+
+       if (num_processors >= NR_CPUS) {
+               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+                       "  Processor ignored.\n", NR_CPUS);
+               return;
+       }
+
+       if (num_processors >= maxcpus) {
+               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+                       " Processor ignored.\n", maxcpus);
+               return;
+       }
+
+       cpu_set(num_processors, cpu_possible_map);
+       num_processors++;
+
        if ((num_processors > 8) &&
-           APIC_XAPIC(ver) &&
-           (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
+           ((APIC_XAPIC(ver) &&
+            (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) ||
+            (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)))
                def_to_bigsmp = 1;
        else
                def_to_bigsmp = 0;
@@ -850,7 +854,7 @@
 }
 
 
-void __init mp_register_lapic (
+void __devinit mp_register_lapic (
        u8                      id, 
        u8                      enabled)
 {
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c       Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c       Wed Feb  1 
18:00:19 2006
@@ -108,15 +108,31 @@
        if (need_resched()) {
                local_irq_enable();
        } else {
+               clear_thread_flag(TIF_POLLING_NRFLAG);
+               smp_mb__after_clear_bit();
                stop_hz_timer();
                /* Blocking includes an implicit local_irq_enable(). */
                HYPERVISOR_sched_op(SCHEDOP_block, 0);
                start_hz_timer();
+               set_thread_flag(TIF_POLLING_NRFLAG);
        }
 }
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(default_idle);
 #endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void play_dead(void)
+{
+       HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+       local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
 
 /*
  * The idle thread. There's no useful work to be
@@ -126,9 +142,9 @@
  */
 void cpu_idle(void)
 {
-#if defined(CONFIG_HOTPLUG_CPU)
-       int cpu = raw_smp_processor_id();
-#endif
+       int cpu = smp_processor_id();
+
+       set_thread_flag(TIF_POLLING_NRFLAG);
 
        /* endless idle loop with no priority at all */
        while (1) {
@@ -139,17 +155,15 @@
 
                        rmb();
 
-#if defined(CONFIG_HOTPLUG_CPU)
-                       if (cpu_is_offline(cpu)) {
-                               HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
-                               local_irq_enable();
-                       }
-#endif
+                       if (cpu_is_offline(cpu))
+                               play_dead();
 
                        __get_cpu_var(irq_stat).idle_timestamp = jiffies;
                        xen_idle();
                }
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
        }
 }
 
@@ -187,6 +201,8 @@
 
 void show_regs(struct pt_regs * regs)
 {
+       unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+
        printk("\n");
        printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
        printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, 
smp_processor_id());
@@ -203,6 +219,13 @@
        printk(" DS: %04x ES: %04x\n",
                0xffff & regs->xds,0xffff & regs->xes);
 
+       cr0 = read_cr0();
+       cr2 = read_cr2();
+       cr3 = read_cr3();
+       if (current_cpu_data.x86 > 4) {
+               cr4 = read_cr4();
+       }
+       printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, 
cr4);
        show_trace(NULL, &regs->esp);
 }
 
@@ -274,13 +297,6 @@
 void flush_thread(void)
 {
        struct task_struct *tsk = current;
-
-       /*
-        * Remove function-return probe instances associated with this task
-        * and put them back on the free list. Do not insert an exit probe for
-        * this function, it will be disabled by kprobe_flush_task if you do.
-        */
-       kprobe_flush_task(tsk);
 
        memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));        
@@ -445,7 +461,9 @@
        struct pt_regs ptregs;
        
        ptregs = *(struct pt_regs *)
-               ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
+               ((unsigned long)tsk->thread_info +
+               /* see comments in copy_thread() about -8 */
+               THREAD_SIZE - sizeof(ptregs) - 8);
        ptregs.xcs &= 0xffff;
        ptregs.xds &= 0xffff;
        ptregs.xes &= 0xffff;
@@ -453,7 +471,6 @@
 
        elf_core_copy_regs(regs, &ptregs);
 
-       boot_option_idle_override = 1;
        return 1;
 }
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Wed Feb  1 18:00:19 2006
@@ -142,9 +142,7 @@
 EXPORT_SYMBOL(drive_info);
 #endif
 struct screen_info screen_info;
-#ifdef CONFIG_VT
 EXPORT_SYMBOL(screen_info);
-#endif
 struct apm_info apm_info;
 EXPORT_SYMBOL(apm_info);
 struct sys_desc_table_struct {
@@ -425,14 +423,24 @@
                }
        }
        for (i = 0; i < e820.nr_map; i++) {
-               if (e820.map[i].type == E820_RAM) {
-                       current_addr = e820.map[i].addr + e820.map[i].size;
-                       if (current_addr >= size) {
-                               e820.map[i].size -= current_addr-size;
-                               e820.nr_map = i + 1;
-                               return;
-                       }
-               }
+               current_addr = e820.map[i].addr + e820.map[i].size;
+               if (current_addr < size)
+                       continue;
+
+               if (e820.map[i].type != E820_RAM)
+                       continue;
+
+               if (e820.map[i].addr >= size) {
+                       /*
+                        * This region starts past the end of the
+                        * requested size, skip it completely.
+                        */
+                       e820.nr_map = i;
+               } else {
+                       e820.nr_map = i + 1;
+                       e820.map[i].size -= current_addr - size;
+               }
+               return;
        }
 }
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c   Wed Feb  1 18:00:19 2006
@@ -68,15 +68,15 @@
 
 /* Package ID of each logical CPU */
 int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(phys_proc_id);
 
 /* Core ID of each logical CPU */
 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(cpu_core_id);
-
+
+/* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_sibling_map);
 
+/* representing HT and core siblings of each logical CPU */
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
@@ -87,7 +87,11 @@
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 EXPORT_SYMBOL(cpu_callout_map);
+#ifdef CONFIG_HOTPLUG_CPU
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
+#else
 cpumask_t cpu_possible_map;
+#endif
 EXPORT_SYMBOL(cpu_possible_map);
 static cpumask_t smp_commenced_mask;
 
@@ -440,35 +444,60 @@
 
 static int cpucount;
 
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
+
 static inline void
 set_cpu_sibling_map(int cpu)
 {
        int i;
+       struct cpuinfo_x86 *c = cpu_data;
+
+       cpu_set(cpu, cpu_sibling_setup_map);
 
        if (smp_num_siblings > 1) {
-               for (i = 0; i < NR_CPUS; i++) {
-                       if (!cpu_isset(i, cpu_callout_map))
-                               continue;
-                       if (cpu_core_id[cpu] == cpu_core_id[i]) {
+               for_each_cpu_mask(i, cpu_sibling_setup_map) {
+                       if (phys_proc_id[cpu] == phys_proc_id[i] &&
+                           cpu_core_id[cpu] == cpu_core_id[i]) {
                                cpu_set(i, cpu_sibling_map[cpu]);
                                cpu_set(cpu, cpu_sibling_map[i]);
-                       }
-               }
-       } else {
-               cpu_set(cpu, cpu_sibling_map[cpu]);
-       }
-
-       if (current_cpu_data.x86_num_cores > 1) {
-               for (i = 0; i < NR_CPUS; i++) {
-                       if (!cpu_isset(i, cpu_callout_map))
-                               continue;
-                       if (phys_proc_id[cpu] == phys_proc_id[i]) {
                                cpu_set(i, cpu_core_map[cpu]);
                                cpu_set(cpu, cpu_core_map[i]);
                        }
                }
        } else {
+               cpu_set(cpu, cpu_sibling_map[cpu]);
+       }
+
+       if (current_cpu_data.x86_max_cores == 1) {
                cpu_core_map[cpu] = cpu_sibling_map[cpu];
+               c[cpu].booted_cores = 1;
+               return;
+       }
+
+       for_each_cpu_mask(i, cpu_sibling_setup_map) {
+               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+                       cpu_set(i, cpu_core_map[cpu]);
+                       cpu_set(cpu, cpu_core_map[i]);
+                       /*
+                        *  Does this new cpu bringup a new core?
+                        */
+                       if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+                               /*
+                                * for each core in package, increment
+                                * the booted_cores for this new cpu
+                                */
+                               if (first_cpu(cpu_sibling_map[i]) == i)
+                                       c[cpu].booted_cores++;
+                               /*
+                                * increment the core count for all
+                                * the other cpus in this package
+                                */
+                               if (i != cpu)
+                                       c[i].booted_cores++;
+                       } else if (i != cpu && !c[cpu].booted_cores)
+                               c[cpu].booted_cores = c[i].booted_cores;
+               }
        }
 }
 
@@ -483,6 +512,7 @@
         * things done here to the most necessary things.
         */
        cpu_init();
+       preempt_disable();
        smp_callin();
        while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
                rep_nop();
@@ -608,7 +638,7 @@
 
        printk("Inquiring remote APIC #%d...\n", apicid);
 
-       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+       for (i = 0; i < ARRAY_SIZE(regs); i++) {
                printk("... APIC #%d %s: ", apicid, names[i]);
 
                /*
@@ -1092,11 +1122,8 @@
 
        current_thread_info()->cpu = 0;
        smp_tune_scheduling();
-       cpus_clear(cpu_sibling_map[0]);
-       cpu_set(0, cpu_sibling_map[0]);
-
-       cpus_clear(cpu_core_map[0]);
-       cpu_set(0, cpu_core_map[0]);
+
+       set_cpu_sibling_map(0);
 
        /*
         * If we couldn't find an SMP configuration at boot time,
@@ -1280,15 +1307,24 @@
 remove_siblinginfo(int cpu)
 {
        int sibling;
-
+       struct cpuinfo_x86 *c = cpu_data;
+
+       for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+               cpu_clear(cpu, cpu_core_map[sibling]);
+               /*
+                * last thread sibling in this cpu core going down
+                */
+               if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+                       c[sibling].booted_cores--;
+       }
+                       
        for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
                cpu_clear(cpu, cpu_sibling_map[sibling]);
-       for_each_cpu_mask(sibling, cpu_core_map[cpu])
-               cpu_clear(cpu, cpu_core_map[sibling]);
        cpus_clear(cpu_sibling_map[cpu]);
        cpus_clear(cpu_core_map[cpu]);
        phys_proc_id[cpu] = BAD_APICID;
        cpu_core_id[cpu] = BAD_APICID;
+       cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
 int __cpu_disable(void)
@@ -1307,8 +1343,7 @@
        if (cpu == 0)
                return -EBUSY;
 
-       /* We enable the timer again on the exit path of the death loop */
-       disable_APIC_timer();
+       clear_local_APIC();
        /* Allow any queued timer interrupts to get serviced */
        local_irq_enable();
        mdelay(1);
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c  Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c  Wed Feb  1 18:00:19 2006
@@ -69,8 +69,6 @@
 
 #include <asm/arch_hooks.h>
 
-#include "io_ports.h"
-
 #include <xen/evtchn.h>
 
 #if defined (__i386__)
@@ -78,10 +76,6 @@
 #endif
 
 int pit_latch_buggy;              /* extern */
-
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
 
 #if defined(__x86_64__)
 unsigned long vxtime_hz = PIT_TICK_RATE;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c Wed Feb  1 18:00:19 2006
@@ -626,13 +626,6 @@
        nmi_enter();
 
        cpu = smp_processor_id();
-
-#ifdef CONFIG_HOTPLUG_CPU
-       if (!cpu_online(cpu)) {
-               nmi_exit();
-               return;
-       }
-#endif
 
        ++nmi_count(cpu);
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/traps.c     Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/traps.c     Wed Feb  1 18:00:19 2006
@@ -488,6 +488,7 @@
                                tss->io_bitmap_max - thread->io_bitmap_max);
                tss->io_bitmap_max = thread->io_bitmap_max;
                tss->io_bitmap_base = IO_BITMAP_OFFSET;
+               tss->io_bitmap_owner = thread;
                put_cpu();
                return;
        }
@@ -641,13 +642,6 @@
        nmi_enter();
 
        cpu = smp_processor_id();
-
-#ifdef CONFIG_HOTPLUG_CPU
-       if (!cpu_online(cpu)) {
-               nmi_exit();
-               return;
-       }
-#endif
 
        ++nmi_count(cpu);
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile
--- a/linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile  Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile  Wed Feb  1 18:00:19 2006
@@ -3,5 +3,5 @@
 #
 
 obj-y                          := setup.o topology.o
-
+  
 topology-y                     := ../mach-default/topology.o
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c     Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c     Wed Feb  1 18:00:19 2006
@@ -22,7 +22,6 @@
 #include <linux/highmem.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
-#include <linux/percpu.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -30,8 +29,6 @@
 #include <asm/kdebug.h>
 
 extern void die(const char *,struct pt_regs *,long);
-
-DEFINE_PER_CPU(pgd_t *, cur_pgd);
 
 /*
  * Unlock any spinlocks which will prevent us from getting the
@@ -111,7 +108,7 @@
                desc = (void *)desc + (seg & ~7);
        } else {
                /* Must disable preemption while reading the GDT. */
-               desc = (u32 *)get_cpu_gdt_table(get_cpu());
+               desc = (u32 *)get_cpu_gdt_table(get_cpu());
                desc = (void *)desc + (seg & ~7);
        }
 
@@ -223,10 +220,7 @@
        unsigned long *p, page;
        unsigned long mfn; 
 
-       preempt_disable();
-       page = __pa(per_cpu(cur_pgd, smp_processor_id()));
-       preempt_enable();
-
+       page = read_cr3();
        p  = (unsigned long *)__va(page);
        p += (address >> 30) * 2;
        printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]);
@@ -256,13 +250,8 @@
 {
        unsigned long page;
 
-       preempt_disable();
-       page = ((unsigned long *) per_cpu(cur_pgd, smp_processor_id()))
-           [address >> 22];
-       preempt_enable();
-
-       page = ((unsigned long *) per_cpu(cur_pgd, get_cpu()))
-           [address >> 22];
+       page = read_cr3();
+       page = ((unsigned long *) __va(page))[address >> 22];
        printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
               machine_to_phys(page));
        /*
@@ -304,8 +293,8 @@
        unsigned long address;
        int write, si_code;
 
-       address = HYPERVISOR_shared_info->vcpu_info[
-               smp_processor_id()].arch.cr2;
+       /* get the address */
+        address = read_cr2();
 
        /* Set the "privileged fault" bit to something sane. */
        error_code &= ~4;
@@ -582,14 +571,14 @@
                 * an interrupt in the middle of a task switch..
                 */
                int index = pgd_index(address);
+               unsigned long pgd_paddr;
                pgd_t *pgd, *pgd_k;
                pud_t *pud, *pud_k;
                pmd_t *pmd, *pmd_k;
                pte_t *pte_k;
 
-               preempt_disable();
-               pgd = index + per_cpu(cur_pgd, smp_processor_id());
-               preempt_enable();
+               pgd_paddr = read_cr3();
+               pgd = index + (pgd_t *)__va(pgd_paddr);
                pgd_k = init_mm.pgd + index;
 
                if (!pgd_present(*pgd_k))
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Wed Feb  1 18:00:19 2006
@@ -27,6 +27,8 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/efi.h>
+#include <linux/memory_hotplug.h>
+#include <linux/initrd.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -313,18 +315,47 @@
        pkmap_page_table = pte; 
 }
 
-void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+static void __devinit free_new_highpage(struct page *page, int pfn)
+{
+       set_page_count(page, 1);
+       if (pfn < xen_start_info->nr_pages)
+               __free_page(page);
+       totalhigh_pages++;
+}
+
+void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 {
        if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
                ClearPageReserved(page);
-               set_page_count(page, 1);
-               if (pfn < xen_start_info->nr_pages)
-                       __free_page(page);
-               totalhigh_pages++;
+               free_new_highpage(page, pfn);
        } else
                SetPageReserved(page);
 }
 
+static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+{
+       free_new_highpage(page, pfn);
+       totalram_pages++;
+#ifdef CONFIG_FLATMEM
+       max_mapnr = max(pfn, max_mapnr);
+#endif
+       num_physpages++;
+       return 0;
+}
+
+/*
+ * Not currently handling the NUMA case.
+ * Assuming single node and all memory that
+ * has been added dynamically that would be
+ * onlined here is in HIGHMEM
+ */
+void online_page(struct page *page)
+{
+       ClearPageReserved(page);
+       add_one_highpage_hotplug(page, page_to_pfn(page));
+}
+
+
 #ifdef CONFIG_NUMA
 extern void set_highmem_pages_init(int);
 #else
@@ -332,7 +363,7 @@
 {
        int pfn;
        for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
-               one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+               add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
        totalram_pages += totalhigh_pages;
 }
 #endif /* CONFIG_FLATMEM */
@@ -359,12 +390,9 @@
 {
        unsigned long vaddr;
        pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
-       int i;
 
        swapper_pg_dir = pgd_base;
        init_mm.pgd    = pgd_base;
-       for (i = 0; i < NR_CPUS; i++)
-               per_cpu(cur_pgd, i) = pgd_base;
 
        /* Enable PSE if available */
        if (cpu_has_pse) {
@@ -693,6 +721,28 @@
 
        set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
 }
+
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+int add_memory(u64 start, u64 size)
+{
+       struct pglist_data *pgdata = &contig_page_data;
+       struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+
+       return __add_pages(zone, start_pfn, nr_pages);
+}
+
+int remove_memory(u64 start, u64 size)
+{
+       return -EINVAL;
+}
+#endif
 
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pmd_cache;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Wed Feb  1 18:00:19 2006
@@ -323,9 +323,15 @@
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
+/**
+ * iounmap - Free a IO remapping
+ * @addr: virtual address from ioremap_*
+ *
+ * Caller must ensure there is only one unmapping for the same pointer.
+ */
 void iounmap(volatile void __iomem *addr)
 {
-       struct vm_struct *p;
+       struct vm_struct *p, *o;
 
        if ((void __force *)addr <= high_memory)
                return;
@@ -338,14 +344,27 @@
        if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
                return;
 
-       write_lock(&vmlist_lock);
-       p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
-       if (!p) { 
-               printk(KERN_WARNING "iounmap: bad address %p\n", addr);
+       addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long 
__force)addr);
+
+       /* Use the vm area unlocked, assuming the caller
+          ensures there isn't another iounmap for the same address
+          in parallel. Reuse of the virtual address is prevented by
+          leaving it in the global lists until we're done with it.
+          cpa takes care of the direct mappings. */
+       read_lock(&vmlist_lock);
+       for (p = vmlist; p; p = p->next) {
+               if (p->addr == addr)
+                       break;
+       }
+       read_unlock(&vmlist_lock);
+
+       if (!p) {
+               printk("iounmap: bad address %p\n", addr);
                dump_stack();
-               goto out_unlock;
-       }
-
+               return;
+       }
+
+       /* Reset the direct mapping. Can block */
        if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
                /* p->size includes the guard page, but cpa doesn't like that */
                change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
@@ -353,8 +372,10 @@
                                 PAGE_KERNEL);
                global_flush_tlb();
        } 
-out_unlock:
-       write_unlock(&vmlist_lock);
+
+       /* Finally remove it */
+       o = remove_vm_area((void *)addr);
+       BUG_ON(p != o || o == NULL);
        kfree(p); 
 }
 EXPORT_SYMBOL(iounmap);
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Wed Feb  1 18:00:19 2006
@@ -39,11 +39,13 @@
        pg_data_t *pgdat;
        unsigned long i;
        struct page_state ps;
+       unsigned long flags;
 
        printk(KERN_INFO "Mem-info:\n");
        show_free_areas();
        printk(KERN_INFO "Free swap:       %6ldkB\n", 
nr_swap_pages<<(PAGE_SHIFT-10));
        for_each_pgdat(pgdat) {
+               pgdat_resize_lock(pgdat, &flags);
                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
                        page = pgdat_page_nr(pgdat, i);
                        total++;
@@ -56,6 +58,7 @@
                        else if (page_count(page))
                                shared += page_count(page) - 1;
                }
+               pgdat_resize_unlock(pgdat, &flags);
        }
        printk(KERN_INFO "%d pages of RAM\n", total);
        printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
@@ -267,19 +270,19 @@
        struct page *page = virt_to_page(pgd);
        page->index = (unsigned long)pgd_list;
        if (pgd_list)
-               pgd_list->private = (unsigned long)&page->index;
+               set_page_private(pgd_list, (unsigned long)&page->index);
        pgd_list = page;
-       page->private = (unsigned long)&pgd_list;
+       set_page_private(page, (unsigned long)&pgd_list);
 }
 
 static inline void pgd_list_del(pgd_t *pgd)
 {
        struct page *next, **pprev, *page = virt_to_page(pgd);
        next = (struct page *)page->index;
-       pprev = (struct page **)page->private;
+       pprev = (struct page **)page_private(page);
        *pprev = next;
        if (next)
-               next->private = (unsigned long)pprev;
+               set_page_private(next, (unsigned long)pprev);
 }
 
 void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/arch/i386/pci/Makefile
--- a/linux-2.6-xen-sparse/arch/i386/pci/Makefile       Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/pci/Makefile       Wed Feb  1 18:00:19 2006
@@ -1,7 +1,7 @@
 obj-y                          := i386.o
 
 obj-$(CONFIG_PCI_BIOS)         += pcbios.o
-obj-$(CONFIG_PCI_MMCONFIG)     += mmconfig.o
+obj-$(CONFIG_PCI_MMCONFIG)     += mmconfig.o direct.o
 obj-$(CONFIG_PCI_DIRECT)       += direct.o
 
 pci-y                          := fixup.o
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c      Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c      Wed Feb  1 18:00:19 2006
@@ -550,31 +550,48 @@
        return 0;
 }
 
-static __init int via_router_probe(struct irq_router *r, struct pci_dev 
*router, u16 device)
+static __init int via_router_probe(struct irq_router *r,
+                               struct pci_dev *router, u16 device)
 {
        /* FIXME: We should move some of the quirk fixup stuff here */
 
-       if (router->device == PCI_DEVICE_ID_VIA_82C686 &&
-                       device == PCI_DEVICE_ID_VIA_82C586_0) {
-               /* Asus k7m bios wrongly reports 82C686A as 586-compatible */
-               device = PCI_DEVICE_ID_VIA_82C686;
-       }
-
-       switch(device)
-       {
-               case PCI_DEVICE_ID_VIA_82C586_0:
-                       r->name = "VIA";
-                       r->get = pirq_via586_get;
-                       r->set = pirq_via586_set;
-                       return 1;
-               case PCI_DEVICE_ID_VIA_82C596:
+       /*
+        * work arounds for some buggy BIOSes
+        */
+       if (device == PCI_DEVICE_ID_VIA_82C586_0) {
+               switch(router->device) {
                case PCI_DEVICE_ID_VIA_82C686:
-               case PCI_DEVICE_ID_VIA_8231:
+                       /*
+                        * Asus k7m bios wrongly reports 82C686A
+                        * as 586-compatible
+                        */
+                       device = PCI_DEVICE_ID_VIA_82C686;
+                       break;
+               case PCI_DEVICE_ID_VIA_8235:
+                       /**
+                        * Asus a7v-x bios wrongly reports 8235
+                        * as 586-compatible
+                        */
+                       device = PCI_DEVICE_ID_VIA_8235;
+                       break;
+               }
+       }
+
+       switch(device) {
+       case PCI_DEVICE_ID_VIA_82C586_0:
+               r->name = "VIA";
+               r->get = pirq_via586_get;
+               r->set = pirq_via586_set;
+               return 1;
+       case PCI_DEVICE_ID_VIA_82C596:
+       case PCI_DEVICE_ID_VIA_82C686:
+       case PCI_DEVICE_ID_VIA_8231:
+       case PCI_DEVICE_ID_VIA_8235:
                /* FIXME: add new ones for 8233/5 */
-                       r->name = "VIA";
-                       r->get = pirq_via_get;
-                       r->set = pirq_via_set;
-                       return 1;
+               r->name = "VIA";
+               r->get = pirq_via_get;
+               r->set = pirq_via_set;
+               return 1;
        }
        return 0;
 }
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/um/kernel/physmem.c
--- a/linux-2.6-xen-sparse/arch/um/kernel/physmem.c     Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/um/kernel/physmem.c     Wed Feb  1 18:00:19 2006
@@ -248,7 +248,7 @@
 /* Changed during early boot */
 unsigned long high_physmem;
 
-extern unsigned long physmem_size;
+extern unsigned long long physmem_size;
 
 int init_maps(unsigned long physmem, unsigned long iomem, unsigned long 
highmem)
 {
@@ -323,7 +323,7 @@
 extern int __syscall_stub_start, __binary_start;
 
 void setup_physmem(unsigned long start, unsigned long reserve_end,
-                  unsigned long len, unsigned long highmem)
+                  unsigned long len, unsigned long long highmem)
 {
        unsigned long reserve = reserve_end - start;
        int pfn = PFN_UP(__pa(reserve_end));
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/arch/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig  Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig  Wed Feb  1 18:00:19 2006
@@ -242,22 +242,42 @@
 
 source "kernel/Kconfig.preempt"
 
-config K8_NUMA
-       bool "K8 NUMA support"
-       select NUMA
+config NUMA
+       bool "Non Uniform Memory Access (NUMA) Support"
        depends on SMP && !X86_64_XEN
        help
-         Enable NUMA (Non Unified Memory Architecture) support for
-         AMD Opteron Multiprocessor systems. The kernel will try to allocate
-         memory used by a CPU on the local memory controller of the CPU
-         and add some more NUMA awareness to the kernel.
-         This code is recommended on all multiprocessor Opteron systems
-         and normally doesn't hurt on others.
+        Enable NUMA (Non Uniform Memory Access) support. The kernel 
+        will try to allocate memory used by a CPU on the local memory 
+        controller of the CPU and add some more NUMA awareness to the kernel.
+        This code is recommended on all multiprocessor Opteron systems.
+        If the system is EM64T, you should say N unless your system is EM64T 
+        NUMA. 
+
+config K8_NUMA
+       bool "Old style AMD Opteron NUMA detection"
+       depends on NUMA
+       default y
+       help
+        Enable K8 NUMA node topology detection.  You should say Y here if
+        you have a multi processor AMD K8 system. This uses an old
+        method to read the NUMA configurtion directly from the builtin
+        Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
+        instead, which also takes priority if both are compiled in.   
+
+# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
+
+config X86_64_ACPI_NUMA
+       bool "ACPI NUMA detection"
+       depends on NUMA
+       select ACPI 
+       select ACPI_NUMA
+       default y
+       help
+        Enable ACPI SRAT based node topology detection.
 
 config NUMA_EMU
-       bool "NUMA emulation support"
-       select NUMA
-       depends on SMP && !X86_64_XEN
+       bool "NUMA emulation"
+       depends on NUMA
        help
          Enable NUMA emulation. A flat machine will be split
          into virtual nodes when booted with "numa=fake=N", where N is the
@@ -268,9 +288,6 @@
        depends on NUMA
        default y
 
-config NUMA
-       bool
-       default n
 
 config ARCH_DISCONTIGMEM_ENABLE
        def_bool y
@@ -393,6 +410,14 @@
           Additional support for intel specific MCE features such as
           the thermal monitor.
 
+config X86_MCE_AMD
+       bool "AMD MCE features"
+       depends on X86_MCE && X86_LOCAL_APIC
+       default y
+       help
+          Additional support for AMD specific MCE features such as
+          the DRAM Error Threshold.
+
 config PHYSICAL_START
        hex "Physical address where the kernel is loaded" if EMBEDDED
        default "0x100000"
@@ -528,7 +553,7 @@
          left.
 
 config IA32_AOUT
-       bool "IA32 a.out support"
+       tristate "IA32 a.out support"
        depends on IA32_EMULATION
        help
          Support old a.out binaries in the 32bit emulation.
@@ -558,8 +583,21 @@
 
 source fs/Kconfig
 
+menu "Instrumentation Support"
+        depends on EXPERIMENTAL
+
 source "arch/x86_64/oprofile/Kconfig"
 
+config KPROBES
+       bool "Kprobes (EXPERIMENTAL)"
+       help
+         Kprobes allows you to trap at almost any kernel address and
+         execute a callback function.  register_kprobe() establishes
+         a probepoint and specifies the callback.  Kprobes is useful
+         for kernel debugging, non-intrusive instrumentation and testing.
+         If in doubt, say "N".
+endmenu
+
 source "arch/x86_64/Kconfig.debug"
 
 source "security/Kconfig"
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile  Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile  Wed Feb  1 18:00:19 2006
@@ -11,6 +11,7 @@
 
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)    += mce_intel.o
+obj-$(CONFIG_X86_MCE_AMD)      += mce_amd.o
 obj-$(CONFIG_MTRR)             += ../../i386/kernel/cpu/mtrr/
 obj-$(CONFIG_ACPI)             += acpi/
 obj-$(CONFIG_X86_MSR)          += msr.o
@@ -22,13 +23,13 @@
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o \
                genapic.o genapic_cluster.o genapic_flat.o
 obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o crash.o
-obj-$(CONFIG_PM)               += suspend.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
+obj-$(CONFIG_ACPI_SLEEP)       += suspend.o
 obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
 obj-$(CONFIG_CPU_FREQ)         += cpufreq/
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
 obj-$(CONFIG_GART_IOMMU)       += pci-gart.o aperture.o
 obj-$(CONFIG_DUMMY_IOMMU)      += pci-nommu.o pci-dma.o
-obj-$(CONFIG_SWIOTLB)          += swiotlb.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
 obj-$(CONFIG_X86_PM_TIMER)     += pmtimer.o
 
@@ -42,7 +43,6 @@
 bootflag-y                     += ../../i386/kernel/bootflag.o
 cpuid-$(subst m,y,$(CONFIG_X86_CPUID))  += ../../i386/kernel/cpuid.o
 topology-y                     += ../../i386/mach-default/topology.o
-swiotlb-$(CONFIG_SWIOTLB)      += ../../ia64/lib/swiotlb.o
 microcode-$(subst m,y,$(CONFIG_MICROCODE))  += ../../i386/kernel/microcode.o
 intel_cacheinfo-y              += ../../i386/kernel/cpu/intel_cacheinfo.o
 quirks-y                       += ../../i386/kernel/quirks.o
@@ -51,6 +51,7 @@
 
 ifdef CONFIG_XEN
 time-y                         += ../../i386/kernel/time-xen.o
+obj-$(CONFIG_SWIOTLB)          += swiotlb.o
 swiotlb-$(CONFIG_SWIOTLB)      := ../../i386/kernel/swiotlb.o
 pci-dma-y                      += ../../i386/kernel/pci-dma-xen.o
 microcode-$(subst m,y,$(CONFIG_MICROCODE))  := 
../../i386/kernel/microcode-xen.o
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c        Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c        Wed Feb  1 
18:00:19 2006
@@ -33,8 +33,6 @@
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
-
-#include "io_ports.h"
 
 /*
  * Debug level
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Wed Feb  1 
18:00:19 2006
@@ -23,6 +23,7 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/bootsetup.h>
+#include <asm/sections.h>
 #include <xen/interface/memory.h>
 
 unsigned long pci_mem_start = 0xaeedbabe;
@@ -54,7 +55,6 @@
 }
 
 #ifndef CONFIG_XEN
-extern char _end[];
 
 /* 
  * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S       Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S       Wed Feb  1 
18:00:19 2006
@@ -609,7 +609,14 @@
        CFI_ENDPROC
        .endm
 
-#if 0
+#ifndef CONFIG_XEN
+ENTRY(thermal_interrupt)
+       apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+
+ENTRY(threshold_interrupt)
+       apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
+
+#ifdef CONFIG_SMP      
 ENTRY(reschedule_interrupt)
        apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
 
@@ -630,6 +637,7 @@
 ENTRY(call_function_interrupt)
        apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 #endif
+#endif /* !CONFIG_XEN */
 
 #ifdef CONFIG_X86_LOCAL_APIC   
 ENTRY(apic_timer_interrupt)
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S        Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S        Wed Feb  1 
18:00:19 2006
@@ -25,6 +25,7 @@
                 
       
 #include <linux/threads.h>
+#include <linux/init.h>
 #include <asm/desc.h>
 #include <asm/segment.h>
 #include <asm/page.h>
@@ -243,6 +244,26 @@
        .quad   0x0000000000003007 + __PHYSICAL_START   /* -> level3_kernel_pgt 
*/
 #endif
 
+#ifndef CONFIG_XEN
+#ifndef CONFIG_HOTPLUG_CPU
+       __INITDATA
+#endif
+       /*
+        * This default setting generates an ident mapping at address 0x100000
+        * and a mapping for the kernel that precisely maps virtual address
+        * 0xffffffff80000000 to physical address 0x000000. (always using
+        * 2Mbyte large pages provided by PAE mode)
+        */
+       .align PAGE_SIZE
+ENTRY(boot_level4_pgt)
+       .quad   0x0000000000002007 + __PHYSICAL_START   /* -> level3_ident_pgt 
*/
+       .fill   255,8,0
+       .quad   0x000000000000a007 + __PHYSICAL_START
+       .fill   254,8,0
+       /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+       .quad   0x0000000000003007 + __PHYSICAL_START   /* -> level3_kernel_pgt 
*/
+#endif
+
        .data
 
        .align 16
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c      Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c      Wed Feb  1 
18:00:19 2006
@@ -22,6 +22,8 @@
 #include <asm/bootsetup.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
 
 unsigned long start_pfn;
 
@@ -30,9 +32,8 @@
 #if 0
 static void __init clear_bss(void)
 {
-       extern char __bss_start[], __bss_end[];
        memset(__bss_start, 0,
-              (unsigned long) __bss_end - (unsigned long) __bss_start);
+              (unsigned long) __bss_stop - (unsigned long) __bss_start);
 }
 #endif
 
@@ -84,8 +85,6 @@
        boot_cpu_data.x86_mask = eax & 0xf;
 }
 
-extern char _end[];
-
 void __init x86_64_start_kernel(char * real_mode_data)
 {
        int i;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c     Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c     Wed Feb  1 
18:00:19 2006
@@ -57,7 +57,7 @@
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
 /*
@@ -162,6 +162,7 @@
        static int first_free_entry = NR_IRQS;
        struct irq_pin_list *entry = irq_2_pin + irq;
 
+       BUG_ON(irq >= NR_IRQS);
        while (entry->next)
                entry = irq_2_pin + entry->next;
 
@@ -169,7 +170,7 @@
                entry->next = first_free_entry;
                entry = irq_2_pin + entry->next;
                if (++first_free_entry >= PIN_MAP_SIZE)
-                       panic("io_apic.c: whoops");
+                       panic("io_apic.c: ran out of irq_2_pin entries!");
        }
        entry->apic = apic;
        entry->pin = pin;
@@ -182,6 +183,7 @@
        int pin;                                                        \
        struct irq_pin_list *entry = irq_2_pin + irq;                   \
                                                                        \
+       BUG_ON(irq >= NR_IRQS);                                         \
        for (;;) {                                                      \
                unsigned int reg;                                       \
                pin = entry->pin;                                       \
@@ -258,6 +260,8 @@
 }
 
 #endif /* !CONFIG_XEN */
+
+static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
 
 /*
  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
@@ -470,6 +474,7 @@
                                best_guess = irq;
                }
        }
+       BUG_ON(best_guess >= NR_IRQS);
        return best_guess;
 }
 
@@ -660,6 +665,64 @@
        return MPBIOS_trigger(idx);
 }
 
+static int next_irq = 16;
+
+/*
+ * gsi_irq_sharing -- Name overload!  "irq" can be either a legacy IRQ
+ * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
+ * from ACPI, which can reach 800 in large boxen.
+ *
+ * Compact the sparse GSI space into a sequential IRQ series and reuse
+ * vectors if possible.
+ */
+int gsi_irq_sharing(int gsi)
+{
+       int i, tries, vector;
+
+       BUG_ON(gsi >= NR_IRQ_VECTORS);
+
+       if (platform_legacy_irq(gsi))
+               return gsi;
+
+       if (gsi_2_irq[gsi] != 0xFF)
+               return (int)gsi_2_irq[gsi];
+
+       tries = NR_IRQS;
+  try_again:
+       vector = assign_irq_vector(gsi);
+
+       /*
+        * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
+        * use of vector and if found, return that IRQ.  However, we never want
+        * to share legacy IRQs, which usually have a different trigger mode
+        * than PCI.
+        */
+       for (i = 0; i < NR_IRQS; i++)
+               if (IO_APIC_VECTOR(i) == vector)
+                       break;
+       if (platform_legacy_irq(i)) {
+               if (--tries >= 0) {
+                       IO_APIC_VECTOR(i) = 0;
+                       goto try_again;
+               }
+               panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X 
for GSI %d", vector, gsi);
+       }
+       if (i < NR_IRQS) {
+               gsi_2_irq[gsi] = i;
+               printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
+                               gsi, vector, i);
+               return i;
+       }
+
+       i = next_irq++;
+       BUG_ON(i >= NR_IRQS);
+       gsi_2_irq[gsi] = i;
+       IO_APIC_VECTOR(i) = vector;
+       printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
+                       gsi, vector, i);
+       return i;
+}
+
 static int pin_2_irq(int idx, int apic, int pin)
 {
        int irq, i;
@@ -689,6 +752,7 @@
                        while (i < apic)
                                irq += nr_ioapic_registers[i++];
                        irq += pin;
+                       irq = gsi_irq_sharing(irq);
                        break;
                }
                default:
@@ -698,6 +762,7 @@
                        break;
                }
        }
+       BUG_ON(irq >= NR_IRQS);
 
        /*
         * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -713,6 +778,7 @@
                        }
                }
        }
+       BUG_ON(irq >= NR_IRQS);
        return irq;
 }
 
@@ -741,7 +807,7 @@
        static int current_vector = FIRST_DEVICE_VECTOR;
        physdev_op_t op;
   
-       BUG_ON(irq >= NR_IRQ_VECTORS);
+       BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
        if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
                return IO_APIC_VECTOR(irq);
 
@@ -1990,6 +2056,7 @@
        entry.polarity = active_high_low;
        entry.mask = 1;                                  /* Disabled (masked) */
 
+       irq = gsi_irq_sharing(irq);
        /*
         * IRQs < 16 are already in the irq_2_pin[] map
         */
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c     Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c     Wed Feb  1 
18:00:19 2006
@@ -42,7 +42,7 @@
  * Various Linux-internal data structures created from the
  * MP-table.
  */
-int apic_version [MAX_APICS];
+unsigned char apic_version [MAX_APICS];
 unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = 
-1 };
 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
 
@@ -65,7 +65,9 @@
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_id = -1U;
 /* Internal processor count */
-static unsigned int num_processors = 0;
+unsigned int num_processors __initdata = 0;
+
+unsigned disabled_cpus __initdata;
 
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
@@ -107,11 +109,14 @@
 #ifndef CONFIG_XEN
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
-       int ver, cpu;
+       int cpu;
+       unsigned char ver;
        static int found_bsp=0;
 
-       if (!(m->mpc_cpuflag & CPU_ENABLED))
+       if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+               disabled_cpus++;
                return;
+       }
 
        printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
                m->mpc_apicid,
@@ -130,12 +135,14 @@
        }
 
        cpu = num_processors++;
-
-       if (m->mpc_apicid > MAX_APICS) {
+       
+#if MAX_APICS < 255    
+       if ((int)m->mpc_apicid > MAX_APICS) {
                printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
                        m->mpc_apicid, MAX_APICS);
                return;
        }
+#endif
        ver = m->mpc_apicver;
 
        physid_set(m->mpc_apicid, phys_cpu_present_map);
@@ -225,7 +232,7 @@
                        m->mpc_irqtype, m->mpc_irqflag & 3,
                        (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
                        m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
-       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+       if (++mp_irq_entries >= MAX_IRQ_SOURCES)
                panic("Max # of irq sources exceeded!!\n");
 }
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/pci-nommu-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-nommu-xen.c   Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-nommu-xen.c   Wed Feb  1 
18:00:19 2006
@@ -25,7 +25,7 @@
  */
 
 void *dma_alloc_coherent(struct device *hwdev, size_t size,
-                        dma_addr_t *dma_handle, unsigned gfp)
+                        dma_addr_t *dma_handle, gfp_t gfp)
 {
        void *ret;
        u64 mask;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c     Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c     Wed Feb  1 
18:00:19 2006
@@ -96,10 +96,13 @@
        if (need_resched()) {
                local_irq_enable();
        } else {
+               clear_thread_flag(TIF_POLLING_NRFLAG);
+               smp_mb__after_clear_bit();
                stop_hz_timer();
                /* Blocking includes an implicit local_irq_enable(). */
                HYPERVISOR_sched_op(SCHEDOP_block, 0);
                start_hz_timer();
+               set_thread_flag(TIF_POLLING_NRFLAG);
        }
 }
 
@@ -143,6 +146,8 @@
  */
 void cpu_idle (void)
 {
+       set_thread_flag(TIF_POLLING_NRFLAG);
+
        /* endless idle loop with no priority at all */
        while (1) {
                while (!need_resched()) {
@@ -156,7 +161,9 @@
                        xen_idle();
                }
 
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
        }
 }
 
@@ -180,7 +187,8 @@
        do {
                ssleep(1);
                for_each_online_cpu(cpu) {
-                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
cpu))
+                       if (cpu_isset(cpu, map) &&
+                                       !per_cpu(cpu_idle_state, cpu))
                                cpu_clear(cpu, map);
                }
                cpus_and(map, map, cpu_online_map);
@@ -208,7 +216,8 @@
                system_utsname.version);
        printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
        printk_address(regs->rip); 
-       printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, 
regs->eflags);
+       printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
+               regs->eflags);
        printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
               regs->rax, regs->rbx, regs->rcx);
        printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
@@ -281,13 +290,6 @@
 {
        struct task_struct *tsk = current;
        struct thread_info *t = current_thread_info();
-
-       /*
-        * Remove function-return probe instances associated with this task
-        * and put them back on the free list. Do not insert an exit probe for
-        * this function, it will be disabled by kprobe_flush_task if you do.
-        */
-       kprobe_flush_task(tsk);
 
        if (t->flags & _TIF_ABI_PENDING)
                t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
@@ -361,15 +363,14 @@
        struct pt_regs * childregs;
        struct task_struct *me = current;
 
-       childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) 
p->thread_info)) - 1;
-
+       childregs = ((struct pt_regs *)
+                       (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
        *childregs = *regs;
 
        childregs->rax = 0;
        childregs->rsp = rsp;
-       if (rsp == ~0UL) {
+       if (rsp == ~0UL)
                childregs->rsp = (unsigned long)childregs;
-       }
 
        p->thread.rsp = (unsigned long) childregs;
        p->thread.rsp0 = (unsigned long) (childregs+1);
@@ -391,7 +392,8 @@
                        p->thread.io_bitmap_max = 0;
                        return -ENOMEM;
                }
-               memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, 
IO_BITMAP_BYTES);
+               memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+                               IO_BITMAP_BYTES);
        } 
 
        /*
@@ -432,7 +434,8 @@
  * - fold all the options into a flag word and test it with a single test.
  * - could test fs/gs bitsliced
  */
-struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
+struct task_struct *
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
        struct thread_struct *prev = &prev_p->thread,
                                 *next = &next_p->thread;
@@ -530,7 +533,8 @@
        prev->userrsp = read_pda(oldrsp); 
        write_pda(oldrsp, next->userrsp); 
        write_pda(pcurrent, next_p); 
-       write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE 
- PDA_STACKOFFSET);
+       write_pda(kernelstack,
+           (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
 
        /*
         * Now maybe reload the debug registers
@@ -591,7 +595,9 @@
        return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
 }
 
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void 
__user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+asmlinkage long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+         void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 {
        if (!newsp)
                newsp = regs->rsp;
@@ -627,7 +633,8 @@
                return 0;
        fp = *(u64 *)(p->thread.rsp);
        do { 
-               if (fp < (unsigned long)stack || fp > (unsigned 
long)stack+THREAD_SIZE)
+               if (fp < (unsigned long)stack ||
+                   fp > (unsigned long)stack+THREAD_SIZE)
                        return 0; 
                rip = *(u64 *)(fp+8); 
                if (!in_sched_functions(rip))
@@ -662,8 +669,9 @@
                        task->thread.gsindex = 0;
                        task->thread.gs = addr;
                        if (doit) {
-               load_gs_index(0);
-                ret = HYPERVISOR_set_segment_base(SEGBASE_GS_USER, addr);
+                               load_gs_index(0);
+                               ret = HYPERVISOR_set_segment_base(
+                                       SEGBASE_GS_USER, addr);
                        } 
                }
                put_cpu();
@@ -680,7 +688,7 @@
                        set_32bit_tls(task, FS_TLS, addr);
                        if (doit) { 
                                load_TLS(&task->thread, cpu); 
-                               asm volatile("mov %0,%%fs" :: "r" (FS_TLS_SEL));
+                               asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
                        }
                        task->thread.fsindex = FS_TLS_SEL;
                        task->thread.fs = 0;
@@ -690,9 +698,9 @@
                        if (doit) {
                                /* set the selector to 0 to not confuse
                                   __switch_to */
-               asm volatile("mov %0,%%fs" :: "r" (0));
-                                ret = HYPERVISOR_set_segment_base(SEGBASE_FS, 
addr);
-
+                               asm volatile("movl %0,%%fs" :: "r" (0));
+                                ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
+                                                                 addr);
                        }
                }
                put_cpu();
@@ -701,9 +709,9 @@
                unsigned long base; 
                if (task->thread.fsindex == FS_TLS_SEL)
                        base = read_32bit_tls(task, FS_TLS);
-               else if (doit) {
+               else if (doit)
                        rdmsrl(MSR_FS_BASE, base);
-               } else
+               else
                        base = task->thread.fs;
                ret = put_user(base, (unsigned long __user *)addr); 
                break; 
@@ -712,9 +720,9 @@
                unsigned long base;
                if (task->thread.gsindex == GS_TLS_SEL)
                        base = read_32bit_tls(task, GS_TLS);
-               else if (doit) {
+               else if (doit)
                        rdmsrl(MSR_KERNEL_GS_BASE, base);
-               } else
+               else
                        base = task->thread.gs;
                ret = put_user(base, (unsigned long __user *)addr); 
                break;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Wed Feb  1 
18:00:19 2006
@@ -60,6 +60,7 @@
 #include <asm/setup.h>
 #include <asm/mach_apic.h>
 #include <asm/numa.h>
+#include <asm/sections.h>
 #ifdef CONFIG_XEN
 #include <linux/percpu.h>
 #include <xen/interface/physdev.h>
@@ -139,7 +140,6 @@
 struct e820map e820;
 
 extern int root_mountflags;
-extern char _text, _etext, _edata, _end;
 
 char command_line[COMMAND_LINE_SIZE];
 
@@ -473,7 +473,6 @@
 {
        unsigned long bootmap_size, bootmap;
 
-       memory_present(0, start_pfn, end_pfn);
        bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
        bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
        if (bootmap == -1L)
@@ -773,8 +772,6 @@
        }
 #endif
 
-       sparse_init();
-
        paging_init();
 #ifdef CONFIG_X86_LOCAL_APIC
        /*
@@ -855,6 +852,8 @@
        check_ioapic();
 #endif
 
+       zap_low_mappings(0);
+
 #ifdef CONFIG_ACPI
        /*
         * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -1032,7 +1031,7 @@
 #endif
 
        bits = 0;
-       while ((1 << bits) < c->x86_num_cores)
+       while ((1 << bits) < c->x86_max_cores)
                bits++;
 
        /* Low order bits define the core id (index of core in socket) */
@@ -1062,10 +1061,10 @@
                if (!node_online(node))
                        node = nearby_node(apicid);
        }
-       cpu_to_node[cpu] = node;
+       numa_set_node(cpu, node);
 
        printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
-                       cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
+                       cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
 #endif
 #endif
 }
@@ -1114,9 +1113,9 @@
        display_cacheinfo(c);
 
        if (c->extended_cpuid_level >= 0x80000008) {
-               c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-               if (c->x86_num_cores & (c->x86_num_cores - 1))
-                       c->x86_num_cores = 1;
+               c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+               if (c->x86_max_cores & (c->x86_max_cores - 1))
+                       c->x86_max_cores = 1;
 
                amd_detect_cmp(c);
        }
@@ -1128,54 +1127,44 @@
 {
 #ifdef CONFIG_SMP
        u32     eax, ebx, ecx, edx;
-       int     index_msb, tmp;
+       int     index_msb, core_bits;
        int     cpu = smp_processor_id();
-       
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+
+       c->apicid = phys_pkg_id(0);
+
        if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                return;
 
-       cpuid(1, &eax, &ebx, &ecx, &edx);
        smp_num_siblings = (ebx & 0xff0000) >> 16;
-       
+
        if (smp_num_siblings == 1) {
                printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-       } else if (smp_num_siblings > 1) {
-               index_msb = 31;
-               /*
-                * At this point we only support two siblings per
-                * processor package.
-                */
+       } else if (smp_num_siblings > 1 ) {
+
                if (smp_num_siblings > NR_CPUS) {
                        printk(KERN_WARNING "CPU: Unsupported number of the 
siblings %d", smp_num_siblings);
                        smp_num_siblings = 1;
                        return;
                }
-               tmp = smp_num_siblings;
-               while ((tmp & 0x80000000 ) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
+
+               index_msb = get_count_order(smp_num_siblings);
                phys_proc_id[cpu] = phys_pkg_id(index_msb);
-               
+
                printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                       phys_proc_id[cpu]);
 
-               smp_num_siblings = smp_num_siblings / c->x86_num_cores;
-
-               tmp = smp_num_siblings;
-               index_msb = 31;
-               while ((tmp & 0x80000000) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
-
-               cpu_core_id[cpu] = phys_pkg_id(index_msb);
-
-               if (c->x86_num_cores > 1)
+               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+               index_msb = get_count_order(smp_num_siblings) ;
+
+               core_bits = get_count_order(c->x86_max_cores);
+
+               cpu_core_id[cpu] = phys_pkg_id(index_msb) &
+                                              ((1 << core_bits) - 1);
+
+               if (c->x86_max_cores > 1)
                        printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
                               cpu_core_id[cpu]);
        }
@@ -1214,7 +1203,7 @@
        node = apicid_to_node[hard_smp_processor_id()];
        if (node == NUMA_NO_NODE)
                node = 0;
-       cpu_to_node[cpu] = node;
+       numa_set_node(cpu, node);
 
        if (acpi_numa > 0)
                printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
@@ -1232,13 +1221,18 @@
                unsigned eax = cpuid_eax(0x80000008);
                c->x86_virt_bits = (eax >> 8) & 0xff;
                c->x86_phys_bits = eax & 0xff;
+               /* CPUID workaround for Intel 0F34 CPU */
+               if (c->x86_vendor == X86_VENDOR_INTEL &&
+                   c->x86 == 0xF && c->x86_model == 0x3 &&
+                   c->x86_mask == 0x4)
+                       c->x86_phys_bits = 36;
        }
 
        if (c->x86 == 15)
                c->x86_cache_alignment = c->x86_clflush_size * 2;
        if (c->x86 >= 15)
                set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
-       c->x86_num_cores = intel_num_cpu_cores(c);
+       c->x86_max_cores = intel_num_cpu_cores(c);
 
        srat_detect_node();
 }
@@ -1276,7 +1270,7 @@
        c->x86_model_id[0] = '\0';  /* Unset */
        c->x86_clflush_size = 64;
        c->x86_cache_alignment = c->x86_clflush_size;
-       c->x86_num_cores = 1;
+       c->x86_max_cores = 1;
        c->extended_cpuid_level = 0;
        memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
@@ -1299,10 +1293,10 @@
                c->x86 = (tfms >> 8) & 0xf;
                c->x86_model = (tfms >> 4) & 0xf;
                c->x86_mask = tfms & 0xf;
-               if (c->x86 == 0xf) {
+               if (c->x86 == 0xf)
                        c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
                        c->x86_model += ((tfms >> 16) & 0xF) << 4;
-               } 
                if (c->x86_capability[0] & (1<<19)) 
                        c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
        } else {
@@ -1452,7 +1446,7 @@
                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 
                /* Intel-defined (#2) */
-               "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est",
+               "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
                "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1510,13 +1504,12 @@
                seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
        
 #ifdef CONFIG_SMP
-       if (smp_num_siblings * c->x86_num_cores > 1) {
+       if (smp_num_siblings * c->x86_max_cores > 1) {
                int cpu = c - cpu_data;
                seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
-               seq_printf(m, "siblings\t: %d\n",
-                               c->x86_num_cores * smp_num_siblings);
+               seq_printf(m, "siblings\t: %d\n", 
cpus_weight(cpu_core_map[cpu]));
                seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
-               seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+               seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
        }
 #endif 
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c     Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c     Wed Feb  1 
18:00:19 2006
@@ -255,6 +255,7 @@
        /* CPU 0 is initialised in head64.c */
        if (cpu != 0) {
                pda_init(cpu);
+               zap_low_mappings(cpu);
        } else 
                estacks = boot_exception_stacks; 
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c Wed Feb  1 18:00:19 2006
@@ -70,8 +70,6 @@
    want false sharing in the per cpu data segment. */
 static DEFINE_PER_CPU(union smp_flush_state, flush_state);
 #endif
-
-#define __cpuinit __init
 
 /*
  * We cannot call mmdrop() because we are in interrupt context, 
@@ -478,15 +476,16 @@
 
 void smp_stop_cpu(void)
 {
+       unsigned long flags;
        /*
         * Remove this CPU:
         */
        cpu_clear(smp_processor_id(), cpu_online_map);
-       local_irq_disable();
+       local_irq_save(flags);
 #ifndef CONFIG_XEN
        disable_local_APIC();
 #endif
-       local_irq_enable(); 
+       local_irq_restore(flags); 
 }
 
 static void smp_really_stop_cpu(void *dummy)
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c       Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c       Wed Feb  1 
18:00:19 2006
@@ -430,19 +430,6 @@
 {
        conditional_sti(regs);
 
-#ifdef CONFIG_CHECKING
-       { 
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                       printk("%s: wrong gs %lx expected %p rip %lx\n", str, 
gs, pda,
-                             regs->rip);
-               }
-       }
-#endif
-
        if (user_mode(regs)) {
                struct task_struct *tsk = current;
 
@@ -514,20 +501,6 @@
                                                long error_code)
 {
        conditional_sti(regs);
-
-#ifdef CONFIG_CHECKING
-       { 
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                      oops_in_progress++;
-                       printk("general protection handler: wrong gs %lx 
expected %p\n", gs, pda);
-                      oops_in_progress--;
-               }
-       }
-#endif
 
        if (user_mode(regs)) {
                struct task_struct *tsk = current;
@@ -670,19 +643,6 @@
        unsigned long condition;
        struct task_struct *tsk = current;
        siginfo_t info;
-
-#ifdef CONFIG_CHECKING
-       { 
-              /* RED-PEN interaction with debugger - could destroy gs */
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                       printk("debug handler: wrong gs %lx expected %p\n", gs, 
pda);
-               }
-       }
-#endif
 
        get_debugreg(condition, 6);
 
@@ -896,6 +856,10 @@
 }
 #endif
 
+asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+{
+}
+
 /*
  *  'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c Wed Feb  1 
18:00:19 2006
@@ -204,3 +204,6 @@
 #endif
 
 EXPORT_SYMBOL(cpu_khz);
+
+EXPORT_SYMBOL(load_gs_index);
+
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c   Wed Feb  1 18:00:19 2006
@@ -24,7 +24,6 @@
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
-#include <linux/percpu.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -35,8 +34,6 @@
 #include <asm/kdebug.h>
 #include <asm-generic/sections.h>
 #include <asm/kdebug.h>
-
-DEFINE_PER_CPU(pgd_t *, cur_pgd);
 
 void bust_spinlocks(int yes)
 {
@@ -149,9 +146,10 @@
        pmd_t *pmd;
        pte_t *pte;
 
-       preempt_disable();
-       pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
-       preempt_enable();
+       asm("movq %%cr3,%0" : "=r" (pgd));
+       pgd = (pgd_t *)machine_to_phys((maddr_t)pgd);
+
+       pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); 
        pgd += pgd_index(address);
        printk("PGD %lx ", pgd_val(*pgd));
        if (bad_address(pgd)) goto bad;
@@ -252,9 +250,9 @@
 
        /* On Xen the line below does not always work. Needs investigating! */
        /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
-       preempt_disable();
-       pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
-       preempt_enable();
+       asm("movq %%cr3,%0" : "=r" (pgd));
+       pgd = (pgd_t *)machine_to_phys((maddr_t)pgd);
+       pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
        pgd += pgd_index(address);
        pgd_ref = pgd_offset_k(address);
        if (pgd_none(*pgd_ref))
@@ -330,22 +328,9 @@
        if (!user_mode(regs))
                error_code &= ~4; /* means kernel */
 
-#ifdef CONFIG_CHECKING
-       { 
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                       printk("page_fault: wrong gs %lx expected %p\n", gs, 
pda);
-               }
-       }
-#endif
-
        /* get the address */
        address = HYPERVISOR_shared_info->vcpu_info[
                smp_processor_id()].arch.cr2;
-
        if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
                                        SIGSEGV) == NOTIFY_STOP)
                return;
@@ -595,3 +580,10 @@
        force_sig_info(SIGBUS, &info, tsk);
        return;
 }
+
+static int __init enable_pagefaulttrace(char *str)
+{
+       page_fault_trace = 1;
+       return 0;
+}
+__setup("pagefaulttrace", enable_pagefaulttrace);
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Wed Feb  1 18:00:19 2006
@@ -25,6 +25,7 @@
 #include <linux/pagemap.h>
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
+#include <linux/pci.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -39,6 +40,7 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/smp.h>
+#include <asm/sections.h>
 
 #include <xen/features.h>
 
@@ -52,7 +54,7 @@
 extern void swiotlb_init(void);
 #endif
 
-extern char _stext[];
+static unsigned long dma_reserve __initdata;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 extern unsigned long start_pfn;
@@ -193,9 +195,6 @@
 }
 
 /* References to section boundaries */
-
-extern char _text, _etext, _edata, __bss_start, _end[];
-extern char __init_begin, __init_end;
 
 int after_bootmem;
 
@@ -492,10 +491,6 @@
 void __init xen_init_pt(void)
 {
        unsigned long addr, *page;
-       int i;
-
-       for (i = 0; i < NR_CPUS; i++)
-               per_cpu(cur_pgd, i) = init_mm.pgd;
 
        memset((void *)init_level4_pgt,   0, PAGE_SIZE);
        memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
@@ -628,9 +623,7 @@
        __flush_tlb_all();
 }
 
-extern struct x8664_pda cpu_pda[NR_CPUS];
-
-void zap_low_mappings(void)
+void __cpuinit zap_low_mappings(int cpu)
 {
        /* this is not required for Xen */
 #if 0
@@ -638,32 +631,74 @@
 #endif
 }
 
+/* Compute zone sizes for the DMA and DMA32 zones in a node. */
+__init void
+size_zones(unsigned long *z, unsigned long *h,
+          unsigned long start_pfn, unsigned long end_pfn)
+{
+       int i;
+#ifndef CONFIG_XEN
+       unsigned long w;
+#endif
+
+       for (i = 0; i < MAX_NR_ZONES; i++)
+               z[i] = 0;
+
+#ifndef CONFIG_XEN
+       if (start_pfn < MAX_DMA_PFN)
+               z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
+       if (start_pfn < MAX_DMA32_PFN) {
+               unsigned long dma32_pfn = MAX_DMA32_PFN;
+               if (dma32_pfn > end_pfn)
+                       dma32_pfn = end_pfn;
+               z[ZONE_DMA32] = dma32_pfn - start_pfn;
+       }
+       z[ZONE_NORMAL] = end_pfn - start_pfn;
+
+       /* Remove lower zones from higher ones. */
+       w = 0;
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               if (z[i])
+                       z[i] -= w;
+               w += z[i];
+       }
+
+       /* Compute holes */
+       w = start_pfn;
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               unsigned long s = w;
+               w += z[i];
+               h[i] = e820_hole_size(s, w);
+       }
+
+       /* Add the space pace needed for mem_map to the holes too. */
+       for (i = 0; i < MAX_NR_ZONES; i++)
+               h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
+
+       /* The 16MB DMA zone has the kernel and other misc mappings.
+          Account them too */
+       if (h[ZONE_DMA]) {
+               h[ZONE_DMA] += dma_reserve;
+               if (h[ZONE_DMA] >= z[ZONE_DMA]) {
+                       printk(KERN_WARNING
+                               "Kernel too large and filling up ZONE_DMA?\n");
+                       h[ZONE_DMA] = z[ZONE_DMA];
+               }
+       }
+#else
+       z[ZONE_DMA] = end_pfn;
+       for (i = 0; i < MAX_NR_ZONES; i++)
+               h[i] = 0;
+#endif
+}
+
 #ifndef CONFIG_NUMA
 void __init paging_init(void)
 {
-       {
-               unsigned long zones_size[MAX_NR_ZONES];
-               unsigned long holes[MAX_NR_ZONES];
-               /*      unsigned int max_dma; */
-
-               memset(zones_size, 0, sizeof(zones_size));
-               memset(holes, 0, sizeof(holes));
-
-               /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> 
PAGE_SHIFT; */
-               /* if (end_pfn < max_dma) { */
-                       zones_size[ZONE_DMA] = end_pfn;
-#if 0
-                       holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
-               } else {
-                       zones_size[ZONE_DMA] = max_dma;
-                       holes[ZONE_DMA] = e820_hole_size(0, max_dma);
-                       zones_size[ZONE_NORMAL] = end_pfn - max_dma;
-                       holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
-               }
-#endif
-               free_area_init_node(0, NODE_DATA(0), zones_size,
-                                   __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
-       }
+       unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
+       size_zones(zones, holes, 0, end_pfn);
+       free_area_init_node(0, NODE_DATA(0), zones,
+                           __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
 
        set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
        HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
@@ -780,18 +815,17 @@
                datasize >> 10,
                initsize >> 10);
 
+#ifndef CONFIG_XEN
+#ifdef CONFIG_SMP
        /*
-        * Subtle. SMP is doing its boot stuff late (because it has to
-        * fork idle threads) - but it also needs low mappings for the
-        * protected-mode entry to work. We zap these entries only after
-        * the WP-bit has been tested.
+        * Sync boot_level4_pgt mappings with the init_level4_pgt
+        * except for the low identity mappings which are already zapped
+        * in init_level4_pgt. This sync-up is essential for AP's bringup
         */
-#ifndef CONFIG_SMP
-       zap_low_mappings();
-#endif
-}
-
-extern char __initdata_begin[], __initdata_end[];
+       memcpy(boot_level4_pgt+1, init_level4_pgt+1, 
(PTRS_PER_PGD-1)*sizeof(pgd_t));
+#endif
+#endif
+}
 
 void free_initmem(void)
 {
@@ -819,7 +853,7 @@
                totalram_pages++;
        }
        memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
-       printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - 
&__init_begin) >> 10);
+       printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - 
__init_begin) >> 10);
 #endif
 }
 
@@ -847,6 +881,8 @@
 #else                  
        reserve_bootmem(phys, len);    
 #endif
+       if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
+               dma_reserve += len / PAGE_SIZE;
 }
 
 int kern_addr_valid(unsigned long addr) 
@@ -888,10 +924,6 @@
 static ctl_table debug_table2[] = {
        { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
          proc_dointvec },
-#ifdef CONFIG_CHECKING
-       { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
-         proc_dointvec },
-#endif
        { 0, }
 }; 
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/x86_64/pci/Makefile
--- a/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile     Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile     Wed Feb  1 18:00:19 2006
@@ -11,7 +11,7 @@
 obj-$(CONFIG_ACPI)     += acpi.o
 obj-y                  += legacy.o irq.o common.o
 # mmconfig has a 64bit special
-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
 
 obj-$(CONFIG_NUMA)     += k8-bus.o
 
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/drivers/Makefile
--- a/linux-2.6-xen-sparse/drivers/Makefile     Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/Makefile     Wed Feb  1 18:00:19 2006
@@ -7,6 +7,7 @@
 
 obj-$(CONFIG_PCI)              += pci/
 obj-$(CONFIG_PARISC)           += parisc/
+obj-$(CONFIG_RAPIDIO)          += rapidio/
 obj-y                          += video/
 obj-$(CONFIG_ACPI)             += acpi/
 # PnP must come after ACPI since it will eventually need to check if acpi
@@ -49,6 +50,7 @@
 obj-$(CONFIG_PARIDE)           += block/paride/
 obj-$(CONFIG_TC)               += tc/
 obj-$(CONFIG_USB)              += usb/
+obj-$(CONFIG_PCI)              += usb/
 obj-$(CONFIG_USB_GADGET)       += usb/gadget/
 obj-$(CONFIG_GAMEPORT)         += input/gameport/
 obj-$(CONFIG_INPUT)            += input/
@@ -68,3 +70,4 @@
 obj-$(CONFIG_SGI_IOC4)         += sn/
 obj-y                          += firmware/
 obj-$(CONFIG_CRYPTO)           += crypto/
+obj-$(CONFIG_SUPERH)           += sh/
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/drivers/acpi/Kconfig
--- a/linux-2.6-xen-sparse/drivers/acpi/Kconfig Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/acpi/Kconfig Wed Feb  1 18:00:19 2006
@@ -197,7 +197,6 @@
 config ACPI_IBM
        tristate "IBM ThinkPad Laptop Extras"
        depends on X86
-       default y
        ---help---
          This is a Linux ACPI driver for the IBM ThinkPad laptops. It adds
          support for Fn-Fx key combinations, Bluetooth control, video
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/drivers/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/char/mem.c   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c   Wed Feb  1 18:00:19 2006
@@ -233,9 +233,7 @@
 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
 #if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
-       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-
-       vma->vm_page_prot = phys_mem_access_prot(file, offset,
+       vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
                                                 vma->vm_end - vma->vm_start,
                                                 vma->vm_page_prot);
 #elif defined(pgprot_noncached)
@@ -926,7 +924,8 @@
 
        mem_class = class_create(THIS_MODULE, "mem");
        for (i = 0; i < ARRAY_SIZE(devlist); i++) {
-               class_device_create(mem_class, MKDEV(MEM_MAJOR, 
devlist[i].minor),
+               class_device_create(mem_class, NULL,
+                                       MKDEV(MEM_MAJOR, devlist[i].minor),
                                        NULL, devlist[i].name);
                devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor),
                                S_IFCHR | devlist[i].mode, devlist[i].name);
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/drivers/char/tpm/Kconfig
--- a/linux-2.6-xen-sparse/drivers/char/tpm/Kconfig     Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/Kconfig     Wed Feb  1 18:00:19 2006
@@ -6,7 +6,7 @@
 
 config TCG_TPM
        tristate "TPM Hardware Support"
-       depends on EXPERIMENTAL && (PCI || XEN)
+       depends on EXPERIMENTAL
        ---help---
          If you have a TPM security chip in your system, which
          implements the Trusted Computing Group's specification,
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/drivers/char/tpm/tpm.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c       Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c       Wed Feb  1 18:00:19 2006
@@ -47,6 +47,13 @@
 {
        struct tpm_chip *chip = (struct tpm_chip *) ptr;
 
+       schedule_work(&chip->work);
+}
+
+static void timeout_work(void * ptr)
+{
+       struct tpm_chip *chip = ptr;
+
        down(&chip->buffer_mutex);
        atomic_set(&chip->data_pending, 0);
        memset(chip->data_buffer, 0, chip->vendor->buffersize);
@@ -146,8 +153,7 @@
        __be32 index;
        char *str = buf;
 
-       struct tpm_chip *chip =
-           pci_get_drvdata(to_pci_dev(dev));
+       struct tpm_chip *chip = dev_get_drvdata(dev);
        if (chip == NULL)
                return -ENODEV;
 
@@ -170,7 +176,8 @@
                    < READ_PCR_RESULT_SIZE){
                        dev_dbg(chip->dev, "A TPM error (%d) occurred"
                                " attempting to read PCR %d of %d\n",
-                               be32_to_cpu(*((__be32 *) (data + 6))), i, 
num_pcrs);
+                               be32_to_cpu(*((__be32 *) (data + 6))),
+                               i, num_pcrs);
                        goto out;
                }
                str += sprintf(str, "PCR-%02d: ", i);
@@ -198,17 +205,15 @@
        int i, rc;
        char *str = buf;
 
-       struct tpm_chip *chip =
-           pci_get_drvdata(to_pci_dev(dev));
+       struct tpm_chip *chip = dev_get_drvdata(dev);
        if (chip == NULL)
                return -ENODEV;
 
-       data = kmalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL);
+       data = kzalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
        memcpy(data, readpubek, sizeof(readpubek));
-       memset(data + sizeof(readpubek), 0, 20);        /* zero nonce */
 
        if ((len = tpm_transmit(chip, data, READ_PUBEK_RESULT_SIZE)) <
            READ_PUBEK_RESULT_SIZE) {
@@ -252,7 +257,6 @@
        kfree(data);
        return rc;
 }
-
 EXPORT_SYMBOL_GPL(tpm_show_pubek);
 
 #define CAP_VER_RESULT_SIZE 18
@@ -281,8 +285,7 @@
        ssize_t len;
        char *str = buf;
 
-       struct tpm_chip *chip =
-           pci_get_drvdata(to_pci_dev(dev));
+       struct tpm_chip *chip = dev_get_drvdata(dev);
        if (chip == NULL)
                return -ENODEV;
 
@@ -321,7 +324,6 @@
        return count;
 }
 EXPORT_SYMBOL_GPL(tpm_store_cancel);
-
 
 /*
  * Device file system interface to the TPM
@@ -346,8 +348,7 @@
        }
 
        if (chip->num_opens) {
-               dev_dbg(chip->dev,
-                       "Another process owns this TPM\n");
+               dev_dbg(chip->dev, "Another process owns this TPM\n");
                rc = -EBUSY;
                goto err_out;
        }
@@ -373,7 +374,6 @@
        spin_unlock(&driver_lock);
        return rc;
 }
-
 EXPORT_SYMBOL_GPL(tpm_open);
 
 int tpm_release(struct inode *inode, struct file *file)
@@ -384,16 +384,16 @@
        file->private_data = NULL;
        chip->num_opens--;
        del_singleshot_timer_sync(&chip->user_read_timer);
+       flush_scheduled_work();
        atomic_set(&chip->data_pending, 0);
        put_device(chip->dev);
        kfree(chip->data_buffer);
        spin_unlock(&driver_lock);
        return 0;
 }
-
 EXPORT_SYMBOL_GPL(tpm_release);
 
-ssize_t tpm_write(struct file * file, const char __user * buf,
+ssize_t tpm_write(struct file *file, const char __user *buf,
                  size_t size, loff_t * off)
 {
        struct tpm_chip *chip = file->private_data;
@@ -431,13 +431,14 @@
 
 EXPORT_SYMBOL_GPL(tpm_write);
 
-ssize_t tpm_read(struct file * file, char __user * buf,
+ssize_t tpm_read(struct file * file, char __user *buf,
                 size_t size, loff_t * off)
 {
        struct tpm_chip *chip = file->private_data;
        int ret_size;
        int pos, pending = 0;
 
+       flush_scheduled_work();
        ret_size = atomic_read(&chip->data_pending);
        if (ret_size > 0) {     /* relay data */
                if (size < ret_size)
@@ -446,8 +447,7 @@
                pos = atomic_read(&chip->data_position);
 
                down(&chip->buffer_mutex);
-               if (copy_to_user
-                   ((void __user *) buf, &chip->data_buffer[pos], ret_size)) {
+               if (copy_to_user(buf, &chip->data_buffer[pos], ret_size)) {
                        ret_size = -EFAULT;
                } else {
                        pending = atomic_read(&chip->data_pending) - ret_size;
@@ -466,7 +466,6 @@
 
        return ret_size;
 }
-
 EXPORT_SYMBOL_GPL(tpm_read);
 
 void tpm_remove_hardware(struct device *dev)
@@ -490,13 +489,13 @@
 
        sysfs_remove_group(&dev->kobj, chip->vendor->attr_group);
 
-       dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &= !(1 << 
(chip->dev_num % TPM_NUM_MASK_ENTRIES));
+       dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &=
+               ~(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES));
 
        kfree(chip);
 
        put_device(dev);
 }
-
 EXPORT_SYMBOL_GPL(tpm_remove_hardware);
 
 static u8 savestate[] = {
@@ -509,32 +508,30 @@
  * We are about to suspend. Save the TPM state
  * so that it can be restored.
  */
-int tpm_pm_suspend(struct pci_dev *pci_dev, pm_message_t pm_state)
-{
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+int tpm_pm_suspend(struct device *dev, pm_message_t pm_state)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
        if (chip == NULL)
                return -ENODEV;
 
        tpm_transmit(chip, savestate, sizeof(savestate));
        return 0;
 }
-
 EXPORT_SYMBOL_GPL(tpm_pm_suspend);
 
 /*
  * Resume from a power safe. The BIOS already restored
  * the TPM state.
  */
-int tpm_pm_resume(struct pci_dev *pci_dev)
-{
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+int tpm_pm_resume(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
 
        if (chip == NULL)
                return -ENODEV;
 
        return 0;
 }
-
 EXPORT_SYMBOL_GPL(tpm_pm_resume);
 
 /*
@@ -544,8 +541,7 @@
  * upon errant exit from this function specific probe function should call
  * pci_disable_device
  */
-int tpm_register_hardware(struct device *dev,
-                         struct tpm_vendor_specific *entry)
+int tpm_register_hardware(struct device *dev, struct tpm_vendor_specific 
*entry)
 {
 #define DEVNAME_SIZE 7
 
@@ -554,15 +550,15 @@
        int i, j;
 
        /* Driver specific per-device data */
-       chip = kmalloc(sizeof(*chip), GFP_KERNEL);
+       chip = kzalloc(sizeof(*chip), GFP_KERNEL);
        if (chip == NULL)
                return -ENOMEM;
-
-       memset(chip, 0, sizeof(struct tpm_chip));
 
        init_MUTEX(&chip->buffer_mutex);
        init_MUTEX(&chip->tpm_mutex);
        INIT_LIST_HEAD(&chip->list);
+
+       INIT_WORK(&chip->work, timeout_work, chip);
 
        init_timer(&chip->user_read_timer);
        chip->user_read_timer.function = user_reader_timeout;
@@ -589,8 +585,7 @@
 
 dev_num_search_complete:
        if (chip->dev_num < 0) {
-               dev_err(dev,
-                       "No available tpm device numbers\n");
+               dev_err(dev, "No available tpm device numbers\n");
                kfree(chip);
                return -ENODEV;
        } else if (chip->dev_num == 0)
@@ -628,7 +623,6 @@
 
        return 0;
 }
-
 EXPORT_SYMBOL_GPL(tpm_register_hardware);
 
 MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)");
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/drivers/char/tpm/tpm.h
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h       Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h       Wed Feb  1 18:00:19 2006
@@ -19,11 +19,11 @@
  * 
  */
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
+#include <linux/platform_device.h>
 
 enum tpm_timeout {
        TPM_TIMEOUT = 5,        /* msecs */
@@ -76,6 +76,7 @@
        struct semaphore buffer_mutex;
 
        struct timer_list user_read_timer;      /* user needs to claim result */
+       struct work_struct work;
        struct semaphore tpm_mutex;     /* tpm is processing */
 
        struct tpm_vendor_specific *vendor;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c Wed Feb  1 18:00:19 2006
@@ -20,12 +20,7 @@
  */
 
 #include "tpm.h"
-
-/* Atmel definitions */
-enum tpm_atmel_addr {
-       TPM_ATMEL_BASE_ADDR_LO = 0x08,
-       TPM_ATMEL_BASE_ADDR_HI = 0x09
-};
+#include "tpm_atmel.h"
 
 /* write status bits */
 enum tpm_atmel_write_status {
@@ -40,7 +35,7 @@
        ATML_STATUS_READY = 0x08
 };
 
-static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
+static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
        u8 status, *hdr = buf;
        u32 size;
@@ -52,13 +47,12 @@
                return -EIO;
 
        for (i = 0; i < 6; i++) {
-               status = inb(chip->vendor->base + 1);
+               status = atmel_getb(chip, 1);
                if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                       dev_err(chip->dev,
-                               "error reading header\n");
+                       dev_err(chip->dev, "error reading header\n");
                        return -EIO;
                }
-               *buf++ = inb(chip->vendor->base);
+               *buf++ = atmel_getb(chip, 0);
        }
 
        /* size of the data received */
@@ -69,10 +63,9 @@
                dev_err(chip->dev,
                        "Recv size(%d) less than available space\n", size);
                for (; i < size; i++) { /* clear the waiting data anyway */
-                       status = inb(chip->vendor->base + 1);
+                       status = atmel_getb(chip, 1);
                        if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                               dev_err(chip->dev,
-                                       "error reading data\n");
+                               dev_err(chip->dev, "error reading data\n");
                                return -EIO;
                        }
                }
@@ -81,17 +74,16 @@
 
        /* read all the data available */
        for (; i < size; i++) {
-               status = inb(chip->vendor->base + 1);
+               status = atmel_getb(chip, 1);
                if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                       dev_err(chip->dev,
-                               "error reading data\n");
+                       dev_err(chip->dev, "error reading data\n");
                        return -EIO;
                }
-               *buf++ = inb(chip->vendor->base);
+               *buf++ = atmel_getb(chip, 0);
        }
 
        /* make sure data available is gone */
-       status = inb(chip->vendor->base + 1);
+       status = atmel_getb(chip, 1);
        if (status & ATML_STATUS_DATA_AVAIL) {
                dev_err(chip->dev, "data available is stuck\n");
                return -EIO;
@@ -100,14 +92,14 @@
        return size;
 }
 
-static int tpm_atml_send(struct tpm_chip *chip, u8 * buf, size_t count)
+static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count)
 {
        int i;
 
        dev_dbg(chip->dev, "tpm_atml_send:\n");
        for (i = 0; i < count; i++) {
                dev_dbg(chip->dev, "%d 0x%x(%d)\n",  i, buf[i], buf[i]);
-               outb(buf[i], chip->vendor->base);
+               atmel_putb(buf[i], chip, 0);
        }
 
        return count;
@@ -115,12 +107,12 @@
 
 static void tpm_atml_cancel(struct tpm_chip *chip)
 {
-       outb(ATML_STATUS_ABORT, chip->vendor->base + 1);
+       atmel_putb(ATML_STATUS_ABORT, chip, 1);
 }
 
 static u8 tpm_atml_status(struct tpm_chip *chip)
 {
-       return inb(chip->vendor->base + 1);
+       return atmel_getb(chip, 1);
 }
 
 static struct file_operations atmel_ops = {
@@ -142,7 +134,7 @@
        &dev_attr_pcrs.attr,
        &dev_attr_caps.attr,
        &dev_attr_cancel.attr,
-       0,
+       NULL,
 };
 
 static struct attribute_group atmel_attr_grp = { .attrs = atmel_attrs };
@@ -159,27 +151,39 @@
        .miscdev = { .fops = &atmel_ops, },
 };
 
-static int __devinit tpm_atml_init(struct pci_dev *pci_dev,
-                                  const struct pci_device_id *pci_id)
-{
-       u8 version[4];
+static struct platform_device *pdev;
+
+static void atml_plat_remove(void)
+{
+       struct tpm_chip *chip = dev_get_drvdata(&pdev->dev);
+
+       if (chip) {
+               if (chip->vendor->have_region)
+                       atmel_release_region(chip->vendor->base,
+                                            chip->vendor->region_size);
+               atmel_put_base_addr(chip->vendor);
+               tpm_remove_hardware(chip->dev);
+               platform_device_unregister(pdev);
+       }
+}
+
+static struct device_driver atml_drv = {
+       .name = "tpm_atmel",
+       .bus = &platform_bus_type,
+       .owner = THIS_MODULE,
+       .suspend = tpm_pm_suspend,
+       .resume = tpm_pm_resume,
+};
+
+static int __init init_atmel(void)
+{
        int rc = 0;
-       int lo, hi;
-
-       if (pci_enable_device(pci_dev))
-               return -EIO;
-
-       lo = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_LO);
-       hi = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_HI);
-
-       tpm_atmel.base = (hi<<8)|lo;
-       dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base);
-
-       /* verify that it is an Atmel part */
-       if (tpm_read_index(TPM_ADDR, 4) != 'A' || tpm_read_index(TPM_ADDR, 5) 
!= 'T'
-           || tpm_read_index(TPM_ADDR, 6) != 'M' || tpm_read_index(TPM_ADDR, 
7) != 'L') {
+
+       driver_register(&atml_drv);
+
+       if ((tpm_atmel.iobase = atmel_get_base_addr(&tpm_atmel)) == NULL) {
                rc = -ENODEV;
-               goto out_err;
+               goto err_unreg_drv;
        }
 
        /* query chip for its version number */
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/drivers/char/tty_io.c
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c        Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c        Wed Feb  1 18:00:19 2006
@@ -811,7 +811,7 @@
        check_tty_count(tty, "do_tty_hangup");
        file_list_lock();
        /* This breaks for file handles being sent over AF_UNIX sockets ? */
-       list_for_each_entry(filp, &tty->tty_files, f_list) {
+       list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
                if (filp->f_op->write == redirected_tty_write)
                        cons_filp = filp;
                if (filp->f_op->write != tty_write)
@@ -1418,14 +1418,11 @@
 
        /* Release locally allocated memory ... nothing placed in slots */
 free_mem_out:
-       if (o_tp)
-               kfree(o_tp);
+       kfree(o_tp);
        if (o_tty)
                free_tty_struct(o_tty);
-       if (ltp)
-               kfree(ltp);
-       if (tp)
-               kfree(tp);
+       kfree(ltp);
+       kfree(tp);
        free_tty_struct(tty);
 
 fail_no_mem:
@@ -2730,7 +2727,7 @@
                pty_line_name(driver, index, name);
        else
                tty_line_name(driver, index, name);
-       class_device_create(tty_class, dev, device, name);
+       class_device_create(tty_class, NULL, dev, device, "%s", name);
 }
 
 /**
@@ -2985,14 +2982,14 @@
            register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
                panic("Couldn't register /dev/tty driver\n");
        devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 0), S_IFCHR|S_IRUGO|S_IWUGO, "tty");
-       class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");
+       class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, 
"tty");
 
        cdev_init(&console_cdev, &console_fops);
        if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
            register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 
0)
                panic("Couldn't register /dev/console driver\n");
        devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 1), S_IFCHR|S_IRUSR|S_IWUSR, 
"console");
-       class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 1), NULL, "console");
+       class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL, 
"console");
 
 #ifdef CONFIG_UNIX98_PTYS
        cdev_init(&ptmx_cdev, &ptmx_fops);
@@ -3000,7 +2997,7 @@
            register_chrdev_region(MKDEV(TTYAUX_MAJOR, 2), 1, "/dev/ptmx") < 0)
                panic("Couldn't register /dev/ptmx driver\n");
        devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 2), S_IFCHR|S_IRUGO|S_IWUGO, "ptmx");
-       class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
+       class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 2), NULL, 
"ptmx");
 #endif
 
 #ifdef CONFIG_VT
@@ -3011,7 +3008,7 @@
            register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0)
                panic("Couldn't register /dev/tty0 driver\n");
        devfs_mk_cdev(MKDEV(TTY_MAJOR, 0), S_IFCHR|S_IRUSR|S_IWUSR, "vc/0");
-       class_device_create(tty_class, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
+       class_device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
 
        vty_init();
  out_vt:
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/drivers/firmware/Kconfig
--- a/linux-2.6-xen-sparse/drivers/firmware/Kconfig     Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/firmware/Kconfig     Wed Feb  1 18:00:19 2006
@@ -60,6 +60,7 @@
 
 config DELL_RBU
        tristate "BIOS update support for DELL systems via sysfs"
+       depends on X86
        select FW_LOADER
        help
         Say m if you want to have the option of updating the BIOS for your
@@ -70,8 +71,7 @@
 
 config DCDBAS
        tristate "Dell Systems Management Base Driver"
-       depends on X86 || X86_64
-       default m
+       depends on X86
        help
          The Dell Systems Management Base Driver provides a sysfs interface
          for systems management software to perform System Management
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/drivers/serial/Kconfig
--- a/linux-2.6-xen-sparse/drivers/serial/Kconfig       Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/serial/Kconfig       Wed Feb  1 18:00:19 2006
@@ -10,7 +10,8 @@
 # The new 8250/16550 serial drivers
 config SERIAL_8250
        tristate "8250/16550 and compatible serial support"
-       depends on (BROKEN || !(SPARC64 || SPARC32 || XEN_DISABLE_SERIAL))
+       depends on (BROKEN || !SPARC)
+       depends on !XEN_DISABLE_SERIAL
        select SERIAL_CORE
        ---help---
          This selects whether you want to include the driver for the standard
@@ -207,6 +208,14 @@
          system, say Y to this option.  The driver can handle 1, 2, or 3 port
          cards.  If unsure, say N.
 
+config SERIAL_8250_AU1X00
+       bool "AU1X00 serial port support"
+       depends on SERIAL_8250 != n && SOC_AU1X00
+       help
+         If you have an Au1x00 board and want to use the serial port, say Y
+         to this option.  The driver can handle 1 or 2 serial ports.
+         If unsure, say N.
+
 comment "Non-8250 serial port support"
 
 config SERIAL_AMBA_PL010
@@ -461,14 +470,14 @@
 
 config SERIAL_SUNCORE
        bool
-       depends on SPARC32 || SPARC64
+       depends on SPARC
        select SERIAL_CORE
        select SERIAL_CORE_CONSOLE
        default y
 
 config SERIAL_SUNZILOG
        tristate "Sun Zilog8530 serial support"
-       depends on SPARC32 || SPARC64
+       depends on SPARC
        help
          This driver supports the Zilog8530 serial ports found on many Sparc
          systems.  Say Y or M if you want to be able to these serial ports.
@@ -483,7 +492,7 @@
 
 config SERIAL_SUNSU
        tristate "Sun SU serial support"
-       depends on (SPARC32 || SPARC64) && PCI
+       depends on SPARC && PCI
        help
          This driver supports the 8250 serial ports that run the keyboard and
          mouse on (PCI) UltraSPARC systems.  Say Y or M if you want to be able
@@ -499,7 +508,7 @@
 
 config SERIAL_MUX
        tristate "Serial MUX support"
-       depends on PARISC
+       depends on GSC
        select SERIAL_CORE
        default y
        ---help---
@@ -539,7 +548,7 @@
 
 config SERIAL_SUNSAB
        tristate "Sun Siemens SAB82532 serial support"
-       depends on (SPARC32 || SPARC64) && PCI
+       depends on SPARC && PCI
        help
          This driver supports the Siemens SAB82532 DUSCC serial ports on newer
          (PCI) UltraSPARC systems.  Say Y or M if you want to be able to these
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Wed Feb  1 
18:00:19 2006
@@ -92,14 +92,14 @@
 static struct timer_list balloon_timer;
 
 /* Use the private and mapping fields of struct page as a list. */
-#define PAGE_TO_LIST(p) ((struct list_head *)&p->private)
+#define PAGE_TO_LIST(p) ((struct list_head *)&p->u.private)
 #define LIST_TO_PAGE(l)                                \
-       (list_entry(((unsigned long *)l), struct page, private))
+       (list_entry(((unsigned long *)l), struct page, u.private))
 #define UNLIST_PAGE(p)                         \
        do {                                    \
                list_del(PAGE_TO_LIST(p));      \
                p->mapping = NULL;              \
-               p->private = 0;                 \
+               p->u.private = 0;               \
        } while(0)
 
 #define IPRINTK(fmt, args...) \
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Wed Feb  1 18:00:19 2006
@@ -126,8 +126,10 @@
 
 static void cpu_bringup(void)
 {
-       if (!cpu_isset(smp_processor_id(), cpu_initialized))
+       if (!cpu_isset(smp_processor_id(), cpu_initialized)) {
                cpu_init();
+               preempt_disable();
+       }
        local_irq_enable();
        cpu_idle();
 }
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/fs/Kconfig
--- a/linux-2.6-xen-sparse/fs/Kconfig   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/fs/Kconfig   Wed Feb  1 18:00:19 2006
@@ -810,7 +810,7 @@
 
 config HUGETLBFS
        bool "HugeTLB file system support"
-       depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN
+       depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
        depends !XEN
 
 config HUGETLB_PAGE
@@ -899,6 +899,7 @@
 config HFS_FS
        tristate "Apple Macintosh file system support (EXPERIMENTAL)"
        depends on EXPERIMENTAL
+       select NLS
        help
          If you say Y here, you will be able to mount Macintosh-formatted
          floppy disks and hard drive partitions with full read-write access.
@@ -1051,6 +1052,19 @@
            - NOR flash with transparent ECC
            - DataFlash
 
+config JFFS2_SUMMARY
+       bool "JFFS2 summary support (EXPERIMENTAL)"
+       depends on JFFS2_FS && EXPERIMENTAL
+       default n
+       help
+         This feature makes it possible to use summary information
+         for faster filesystem mount.
+
+         The summary information can be inserted into a filesystem image
+         by the utility 'sumtool'.
+
+         If unsure, say 'N'.
+
 config JFFS2_COMPRESSION_OPTIONS
        bool "Advanced compression options for JFFS2"
        depends on JFFS2_FS
@@ -1072,10 +1086,10 @@
        default y
         help
           Zlib is designed to be a free, general-purpose, legally unencumbered,
-          lossless data-compression library for use on virtually any computer 
+          lossless data-compression library for use on virtually any computer
           hardware and operating system. See <http://www.gzip.org/zlib/> for
           further information.
-          
+
           Say 'Y' if unsure.
 
 config JFFS2_RTIME
@@ -1097,7 +1111,7 @@
         default JFFS2_CMODE_PRIORITY
         depends on JFFS2_FS
         help
-          You can set here the default compression mode of JFFS2 from 
+          You can set here the default compression mode of JFFS2 from
           the available compression modes. Don't touch if unsure.
 
 config JFFS2_CMODE_NONE
@@ -1108,13 +1122,13 @@
 config JFFS2_CMODE_PRIORITY
         bool "priority"
         help
-          Tries the compressors in a predefinied order and chooses the first 
+          Tries the compressors in a predefinied order and chooses the first
           successful one.
 
 config JFFS2_CMODE_SIZE
         bool "size (EXPERIMENTAL)"
         help
-          Tries all compressors and chooses the one which has the smallest 
+          Tries all compressors and chooses the one which has the smallest
           result.
 
 endchoice
@@ -1588,9 +1602,10 @@
          PC operating systems.  The CIFS protocol is fully supported by 
          file servers such as Windows 2000 (including Windows 2003, NT 4  
          and Windows XP) as well by Samba (which provides excellent CIFS
-         server support for Linux and many other operating systems). Currently
-         you must use the smbfs client filesystem to access older SMB servers
-         such as Windows 9x and OS/2.
+         server support for Linux and many other operating systems). Limited
+         support for Windows ME and similar servers is provided as well. 
+         You must use the smbfs client filesystem to access older SMB servers
+         such as OS/2 and DOS.
 
          The intent of the cifs module is to provide an advanced
          network file system client for mounting to CIFS compliant servers, 
@@ -1601,7 +1616,7 @@
          cifs if running only a (Samba) server. It is possible to enable both
          smbfs and cifs (e.g. if you are using CIFS for accessing Windows 2003
          and Samba 3 servers, and smbfs for accessing old servers). If you 
need 
-         to mount to Samba or Windows 2003 servers from this machine, say Y.
+         to mount to Samba or Windows from this machine, say Y.
 
 config CIFS_STATS
         bool "CIFS statistics"
@@ -1610,8 +1625,22 @@
           Enabling this option will cause statistics for each server share
          mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
 
+config CIFS_STATS2
+       bool "CIFS extended statistics"
+       depends on CIFS_STATS
+       help
+         Enabling this option will allow more detailed statistics on SMB
+         request timing to be displayed in /proc/fs/cifs/DebugData and also
+         allow optional logging of slow responses to dmesg (depending on the
+         value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
+         These additional statistics may have a minor effect on performance
+         and memory utilization.
+
+         Unless you are a developer or are doing network performance analysis
+         or tuning, say N.
+
 config CIFS_XATTR
-        bool "CIFS extended attributes (EXPERIMENTAL)"
+        bool "CIFS extended attributes"
         depends on CIFS
         help
           Extended attributes are name:value pairs associated with inodes by
@@ -1623,11 +1652,11 @@
           prefaced by the user namespace prefix. The system namespace
           (used by some filesystems to store ACLs) is not supported at
           this time.
-                                                                               
                     
+
           If unsure, say N.
 
 config CIFS_POSIX
-        bool "CIFS POSIX Extensions (EXPERIMENTAL)"
+        bool "CIFS POSIX Extensions"
         depends on CIFS_XATTR
         help
           Enabling this option will cause the cifs client to attempt to
@@ -1640,10 +1669,28 @@
 
 config CIFS_EXPERIMENTAL
          bool "CIFS Experimental Features (EXPERIMENTAL)"
-         depends on CIFS
+         depends on CIFS && EXPERIMENTAL
          help
-           Enables cifs features under testing. These features
-           are highly experimental.  If unsure, say N.
+           Enables cifs features under testing. These features are
+           experimental and currently include support for writepages
+           (multipage writebehind performance improvements) and directory
+           change notification ie fcntl(F_DNOTIFY) as well as some security
+           improvements.  Some also depend on setting at runtime the
+           pseudo-file /proc/fs/cifs/Experimental (which is disabled by
+           default). See the file fs/cifs/README for more details.
+
+           If unsure, say N.
+
+config CIFS_UPCALL
+         bool "CIFS Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
+         depends on CIFS_EXPERIMENTAL
+         select CONNECTOR
+         help
+           Enables an upcall mechanism for CIFS which will be used to contact
+           userspace helper utilities to provide SPNEGO packaged Kerberos
+           tickets which are needed to mount to certain secure servers
+           (for which more secure Kerberos authentication is required). If
+           unsure, say N.
 
 config NCP_FS
        tristate "NCP file system support (to mount NetWare volumes)"
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/atomic.h
--- a/linux-2.6-xen-sparse/include/asm-i386/atomic.h    Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/atomic.h    Wed Feb  1 18:00:19 2006
@@ -210,6 +210,27 @@
        return atomic_add_return(-i,v);
 }
 
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)                             \
+({                                                             \
+       int c, old;                                             \
+       c = atomic_read(v);                                     \
+       while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+               c = old;                                        \
+       c != (u);                                               \
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_inc_return(v)  (atomic_add_return(1,v))
 #define atomic_dec_return(v)  (atomic_sub_return(1,v))
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h Wed Feb  1 
18:00:19 2006
@@ -14,6 +14,8 @@
 #include <asm/mmu.h>
 
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+
+#define get_cpu_gdt_table(_cpu) ((struct desc_struct 
*)cpu_gdt_descr[(_cpu)].address)
 
 DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 
@@ -37,8 +39,6 @@
 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
 #define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
 #define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
-
-#define get_cpu_gdt_table(_cpu) ((struct desc_struct 
*)cpu_gdt_descr[(_cpu)].address)
 
 /*
  * This is the ldt that every process will get unless we need
@@ -68,8 +68,7 @@
 
 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int 
size)
 {
-       _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT],
-           (int)addr, ((size << 3)-1), 0x82);
+       _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, 
((size << 3)-1), 0x82);
 }
 
 #define LDT_entry_a(info) \
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h  Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h  Wed Feb 
 1 18:00:19 2006
@@ -63,7 +63,6 @@
                cpu_set(cpu, next->cpu_vm_mask);
 
                /* Re-load page tables: load_cr3(next->pgd) */
-               per_cpu(cur_pgd, cpu) = next->pgd;
                op->cmd = MMUEXT_NEW_BASEPTR;
                op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
                op++;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/param.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/param.h        Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/param.h        Wed Feb 
 1 18:00:19 2006
@@ -1,9 +1,8 @@
-#include <linux/config.h>
-
 #ifndef _ASMi386_PARAM_H
 #define _ASMi386_PARAM_H
 
 #ifdef __KERNEL__
+# include <linux/config.h>
 # define HZ            CONFIG_HZ       /* Internal kernel timer frequency */
 # define USER_HZ       100             /* .. some user interfaces are in 
"ticks" */
 # define CLOCKS_PER_SEC                (USER_HZ)       /* like times() */
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h       
Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h       
Wed Feb  1 18:00:19 2006
@@ -76,11 +76,6 @@
 #define pfn_pte_ma(pfn, prot)  __pte_ma(((pfn) << PAGE_SHIFT) | 
pgprot_val(prot))
 #define pfn_pmd(pfn, prot)     __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-
-#define pmd_page_kernel(pmd) \
-((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
 /*
  * All present user pages are user-executable:
  */
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h       
Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h       
Wed Feb  1 18:00:19 2006
@@ -103,11 +103,6 @@
  */
 static inline void pud_clear (pud_t * pud) { }
 
-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-
-#define pmd_page_kernel(pmd) \
-((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
 #define pud_page(pud) \
 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h      Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h      Wed Feb 
 1 18:00:19 2006
@@ -25,6 +25,9 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+
+struct mm_struct;
+struct vm_area_struct;
 
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
@@ -204,7 +207,8 @@
 #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
 #define pte_clear(mm,addr,xp)  do { set_pte_at(mm, addr, xp, __pte(0)); } 
while (0)
 
-#define pmd_none(x)    (!pmd_val(x))
+/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
+#define pmd_none(x)    (!(unsigned long)pmd_val(x))
 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
    can temporarily clear it. */
 #define pmd_present(x) (pmd_val(x))
@@ -326,8 +330,6 @@
        return pte;
 }
 
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
 #define pmd_large(pmd) \
 ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
 
@@ -371,6 +373,11 @@
                (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) \
        ((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
+
+#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+
+#define pmd_page_kernel(pmd) \
+               ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
 /*
  * Helper function that returns the kernel pagetable entry controlling
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h    Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h    Wed Feb 
 1 18:00:19 2006
@@ -66,7 +66,9 @@
        int     f00f_bug;
        int     coma_bug;
        unsigned long loops_per_jiffy;
-       unsigned char x86_num_cores;
+       unsigned char x86_max_cores;    /* cpuid returned max cores value */
+       unsigned char booted_cores;     /* number of cores as seen by OS */
+       unsigned char apicid;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define X86_VENDOR_INTEL 0
@@ -89,7 +91,6 @@
 extern struct cpuinfo_x86 new_cpu_data;
 extern struct tss_struct doublefault_tss;
 DECLARE_PER_CPU(struct tss_struct, init_tss);
-DECLARE_PER_CPU(pgd_t *, cur_pgd);
 
 #ifdef CONFIG_SMP
 extern struct cpuinfo_x86 cpu_data[];
@@ -724,4 +725,10 @@
 #define mtrr_bp_init() do {} while (0)
 #endif
 
+#ifdef CONFIG_X86_MCE
+extern void mcheck_init(struct cpuinfo_x86 *c);
+#else
+#define mcheck_init(c) do {} while(0)
+#endif
+
 #endif /* __ASM_I386_PROCESSOR_H */
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h  Wed Feb  1 
17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h  Wed Feb  1 
18:00:19 2006
@@ -45,6 +45,8 @@
 #define MAX_APICID 256
 extern u8 x86_cpu_to_apicid[];
 
+#define cpu_physical_id(cpu)   x86_cpu_to_apicid[cpu]
+
 #ifdef CONFIG_HOTPLUG_CPU
 extern void cpu_exit_clear(void);
 extern void cpu_uninit(void);
@@ -91,6 +93,10 @@
 extern void __cpu_die(unsigned int cpu);
 #endif /* !__ASSEMBLY__ */
 
+#else /* CONFIG_SMP */
+
+#define cpu_physical_id(cpu)           boot_cpu_physical_apicid
+
 #define NO_PROC_ID             0xFF            /* No processor magic marker */
 
 #endif
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h       Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h       Wed Feb 
 1 18:00:19 2006
@@ -122,21 +122,22 @@
 #define write_cr0(x) \
        __asm__ __volatile__("movl %0,%%cr0": :"r" (x));
 
-#define read_cr2() ({ \
-       unsigned int __dummy; \
-       __asm__ __volatile__( \
-               "movl %%cr2,%0\n\t" \
-               :"=r" (__dummy)); \
-       __dummy; \
-})
+#define read_cr2() \
+       (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2)
 #define write_cr2(x) \
        __asm__ __volatile__("movl %0,%%cr2": :"r" (x));
 
-#define read_cr3() per_cpu(cur_pgd, smp_processor_id())
-#define write_cr3(x) do {                              \
-       xen_pt_switch((x));                             \
-       per_cpu(cur_pgd, smp_processor_id()) = (x);     \
-} while (/* CONSTCOND */0)
+#define read_cr3() ({ \
+       unsigned int __dummy; \
+       __asm__ ( \
+               "movl %%cr3,%0\n\t" \
+               :"=r" (__dummy)); \
+       machine_to_phys(__dummy); \
+})
+#define write_cr3(x) ({                                                \
+       maddr_t __dummy = phys_to_machine(x);                   \
+       __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy));  \
+})
 
 #define read_cr4() ({ \
        unsigned int __dummy; \
@@ -147,7 +148,6 @@
 })
 #define write_cr4(x) \
        __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
-
 #define stts() (HYPERVISOR_fpu_taskswitch(1))
 
 #endif /* __KERNEL__ */
@@ -172,6 +172,8 @@
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) ((struct __xchg_dummy *)(x))
 
+
+#ifdef CONFIG_X86_CMPXCHG64
 
 /*
  * The semantics of XCHGCMP8B are a bit strange, this is why
@@ -226,6 +228,8 @@
 (__builtin_constant_p(value) ? \
  __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
  __set_64bit(ptr, ll_low(value), ll_high(value)) )
+
+#endif
 
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
@@ -265,6 +269,9 @@
 
 #ifdef CONFIG_X86_CMPXCHG
 #define __HAVE_ARCH_CMPXCHG 1
+#define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
 #endif
 
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -281,22 +288,78 @@
        case 2:
                __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
                                     : "=a"(prev)
-                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "r"(new), "m"(*__xg(ptr)), "0"(old)
                                     : "memory");
                return prev;
        case 4:
                __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
                                     : "=a"(prev)
-                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "r"(new), "m"(*__xg(ptr)), "0"(old)
                                     : "memory");
                return prev;
        }
        return old;
 }
 
-#define cmpxchg(ptr,o,n)\
-       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-                                       (unsigned long)(n),sizeof(*(ptr))))
+#ifndef CONFIG_X86_CMPXCHG
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
+
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       switch (size) {
+       case 1:
+               return cmpxchg_386_u8(ptr, old, new);
+       case 2:
+               return cmpxchg_386_u16(ptr, old, new);
+       case 4:
+               return cmpxchg_386_u32(ptr, old, new);
+       }
+       return old;
+}
+
+#define cmpxchg(ptr,o,n)                                               \
+({                                                                     \
+       __typeof__(*(ptr)) __ret;                                       \
+       if (likely(boot_cpu_data.x86 > 3))                              \
+               __ret = __cmpxchg((ptr), (unsigned long)(o),            \
+                                       (unsigned long)(n), sizeof(*(ptr))); \
+       else                                                            \
+               __ret = cmpxchg_386((ptr), (unsigned long)(o),          \
+                                       (unsigned long)(n), sizeof(*(ptr))); \
+       __ret;                                                          \
+})
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+
+static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long 
long old,
+                                     unsigned long long new)
+{
+       unsigned long long prev;
+       __asm__ __volatile__(LOCK "cmpxchg8b %3"
+                            : "=A"(prev)
+                            : "b"((unsigned long)new),
+                              "c"((unsigned long)(new >> 32)),
+                              "m"(*__xg(ptr)),
+                              "0"(old)
+                            : "memory");
+       return prev;
+}
+
+#define cmpxchg64(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
+                                       (unsigned long long)(n)))
+
+#endif
     
 #ifdef __KERNEL__
 struct alt_instr { 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/rwsem.h
--- a/linux-2.6-xen-sparse/include/asm-i386/rwsem.h     Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/rwsem.h     Wed Feb  1 18:00:19 2006
@@ -285,5 +285,10 @@
        return tmp+delta;
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+       return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _I386_RWSEM_H */
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-i386/system.h
--- a/linux-2.6-xen-sparse/include/asm-i386/system.h    Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/system.h    Wed Feb  1 18:00:19 2006
@@ -167,6 +167,8 @@
 #define __xg(x) ((struct __xchg_dummy *)(x))
 
 
+#ifdef CONFIG_X86_CMPXCHG64
+
 /*
  * The semantics of XCHGCMP8B are a bit strange, this is why
  * there is a loop and the loading of %%eax and %%edx has to
@@ -220,6 +222,8 @@
 (__builtin_constant_p(value) ? \
  __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
  __set_64bit(ptr, ll_low(value), ll_high(value)) )
+
+#endif
 
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
@@ -259,6 +263,9 @@
 
 #ifdef CONFIG_X86_CMPXCHG
 #define __HAVE_ARCH_CMPXCHG 1
+#define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
 #endif
 
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -275,22 +282,78 @@
        case 2:
                __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
                                     : "=a"(prev)
-                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "r"(new), "m"(*__xg(ptr)), "0"(old)
                                     : "memory");
                return prev;
        case 4:
                __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
                                     : "=a"(prev)
-                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "r"(new), "m"(*__xg(ptr)), "0"(old)
                                     : "memory");
                return prev;
        }
        return old;
 }
 
-#define cmpxchg(ptr,o,n)\
-       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-                                       (unsigned long)(n),sizeof(*(ptr))))
+#ifndef CONFIG_X86_CMPXCHG
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
+
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       switch (size) {
+       case 1:
+               return cmpxchg_386_u8(ptr, old, new);
+       case 2:
+               return cmpxchg_386_u16(ptr, old, new);
+       case 4:
+               return cmpxchg_386_u32(ptr, old, new);
+       }
+       return old;
+}
+
+#define cmpxchg(ptr,o,n)                                               \
+({                                                                     \
+       __typeof__(*(ptr)) __ret;                                       \
+       if (likely(boot_cpu_data.x86 > 3))                              \
+               __ret = __cmpxchg((ptr), (unsigned long)(o),            \
+                                       (unsigned long)(n), sizeof(*(ptr))); \
+       else                                                            \
+               __ret = cmpxchg_386((ptr), (unsigned long)(o),          \
+                                       (unsigned long)(n), sizeof(*(ptr))); \
+       __ret;                                                          \
+})
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+
+static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long 
long old,
+                                     unsigned long long new)
+{
+       unsigned long long prev;
+       __asm__ __volatile__(LOCK "cmpxchg8b %3"
+                            : "=A"(prev)
+                            : "b"((unsigned long)new),
+                              "c"((unsigned long)(new >> 32)),
+                              "m"(*__xg(ptr)),
+                              "0"(old)
+                            : "memory");
+       return prev;
+}
+
+#define cmpxchg64(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
+                                       (unsigned long long)(n)))
+
+#endif
     
 #ifdef __KERNEL__
 struct alt_instr { 
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/include/asm-um/page.h
--- a/linux-2.6-xen-sparse/include/asm-um/page.h        Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-um/page.h        Wed Feb  1 18:00:19 2006
@@ -115,7 +115,7 @@
 #define pfn_valid(pfn) ((pfn) < max_mapnr)
 #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
 
-extern struct page *arch_validate(struct page *page, int mask, int order);
+extern struct page *arch_validate(struct page *page, gfp_t mask, int order);
 #define HAVE_ARCH_VALIDATE
 
 extern int arch_free_page(struct page *page, int order);
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h       Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h       Wed Feb 
 1 18:00:19 2006
@@ -114,16 +114,19 @@
 
 static inline void set_intr_gate(int nr, void *func) 
 { 
+       BUG_ON((unsigned)nr > 0xFF);
        _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 
 } 
 
 static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
 { 
+       BUG_ON((unsigned)nr > 0xFF);
        _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 
ist); 
 } 
 
 static inline void set_system_gate(int nr, void *func) 
 { 
+       BUG_ON((unsigned)nr > 0xFF);
        _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
 } 
 
@@ -145,10 +148,16 @@
 
 static inline void set_tss_desc(unsigned cpu, void *addr)
 { 
+       /*
+        * sizeof(unsigned long) coming from an extra "long" at the end
+        * of the iobitmap. See tss_struct definition in processor.h
+        *
+        * -1? seg base+limit should be pointing to the address of the
+        * last valid byte
+        */
         set_tssldt_descriptor((struct ldttss_desc 
*)&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS], 
-                              (unsigned long)addr, 
-                              DESC_TSS,
-                              sizeof(struct tss_struct) - 1);
+               (unsigned long)addr, DESC_TSS,
+               IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
 } 
 
 static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hw_irq.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hw_irq.h     Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hw_irq.h     Wed Feb 
 1 18:00:19 2006
@@ -56,7 +56,7 @@
 #define CALL_FUNCTION_VECTOR   0xfc
 #define KDB_VECTOR             0xfb    /* reserved for KDB */
 #define THERMAL_APIC_VECTOR    0xfa
-/* 0xf9 free */
+#define THRESHOLD_APIC_VECTOR   0xf9
 #define INVALIDATE_TLB_VECTOR_END      0xf8
 #define INVALIDATE_TLB_VECTOR_START    0xf0    /* f0-f8 used for TLB flush */
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h        
Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h        
Wed Feb  1 18:00:19 2006
@@ -60,6 +60,12 @@
 extern void mm_unpin(struct mm_struct *mm);
 void mm_pin_all(void);
 
+static inline void load_cr3(pgd_t *pgd)
+{
+       asm volatile("movq %0,%%cr3" :: "r" (phys_to_machine(__pa(pgd))) :
+                    "memory");
+}
+
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
                             struct task_struct *tsk)
 {
@@ -79,7 +85,6 @@
                set_bit(cpu, &next->cpu_vm_mask);
 
                /* load_cr3(next->pgd) */
-               per_cpu(cur_pgd, smp_processor_id()) = next->pgd;
                op->cmd = MMUEXT_NEW_BASEPTR;
                op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
                op++;
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h       Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h       Wed Feb 
 1 18:00:19 2006
@@ -31,7 +31,7 @@
 #define PAGE_SIZE      (1UL << PAGE_SHIFT)
 #endif
 #define PAGE_MASK      (~(PAGE_SIZE-1))
-#define PHYSICAL_PAGE_MASK     (~(PAGE_SIZE-1) & (__PHYSICAL_MASK << 
PAGE_SHIFT))
+#define PHYSICAL_PAGE_MASK     (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
 
 #define THREAD_ORDER 1 
 #ifdef __ASSEMBLY__
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/param.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/param.h      Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/param.h      Wed Feb 
 1 18:00:19 2006
@@ -1,9 +1,8 @@
-#include <linux/config.h>
-
 #ifndef _ASMx86_64_PARAM_H
 #define _ASMx86_64_PARAM_H
 
 #ifdef __KERNEL__
+# include <linux/config.h>
 # define HZ            CONFIG_HZ       /* Internal kernel timer frequency */
 # define USER_HZ       100             /* .. some user interfaces are in 
"ticks */
 # define CLOCKS_PER_SEC                (USER_HZ)       /* like times() */
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    Wed Feb 
 1 18:00:19 2006
@@ -39,6 +39,7 @@
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pgd_t init_level4_pgt[];
+extern pgd_t boot_level4_pgt[];
 extern unsigned long __supported_pte_mask;
 
 #define swapper_pg_dir init_level4_pgt
@@ -142,6 +143,8 @@
         return pte;
 }
 #endif
+
+struct mm_struct;
 
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned 
long addr, pte_t *ptep, int full)
 {
@@ -405,8 +408,6 @@
  * and a page entry and page directory to the page they refer to.
  */
 
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
 /*
  * Level 4 access.
  * Never use these in the common code.
@@ -450,7 +451,7 @@
 #define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
 #define        pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) 
!= (_KERNPG_TABLE & ~_PAGE_PRESENT))
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
-#define pmd_pfn(x)  ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+#define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
 #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
 #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h  Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h  Wed Feb 
 1 18:00:19 2006
@@ -61,10 +61,12 @@
        int     x86_cache_alignment;
        int     x86_tlbsize;    /* number of 4K pages in DTLB/ITLB combined(in 
pages)*/
         __u8    x86_virt_bits, x86_phys_bits;
-       __u8    x86_num_cores;
+       __u8    x86_max_cores;  /* cpuid returned max cores value */
         __u32   x86_power;     
        __u32   extended_cpuid_level;   /* Max extended CPUID function 
supported */
        unsigned long loops_per_jiffy;
+       __u8    apicid;
+       __u8    booted_cores;   /* number of cores as seen by OS */
 } ____cacheline_aligned;
 
 #define X86_VENDOR_INTEL 0
@@ -167,11 +169,6 @@
 #endif
 }
 
-
-#define load_cr3(pgdir) do {                           \
-       xen_pt_switch(__pa(pgdir));                     \
-       per_cpu(cur_pgd, smp_processor_id()) = pgdir;   \
-} while (/* CONSTCOND */0)
 
 /*
  * Bus types
@@ -247,7 +244,6 @@
 
 extern struct cpuinfo_x86 boot_cpu_data;
 DECLARE_PER_CPU(struct tss_struct,init_tss);
-DECLARE_PER_CPU(pgd_t *, cur_pgd);
 
 #define ARCH_MIN_TASKALIGN     16
 
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h        Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h        Wed Feb 
 1 18:00:19 2006
@@ -46,7 +46,6 @@
 extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
-extern void zap_low_mappings(void);
 void smp_stop_cpu(void);
 extern int smp_call_function_single(int cpuid, void (*func) (void *info),
                                void *info, int retry, int wait);
@@ -83,6 +82,8 @@
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
 extern void prefill_possible_map(void);
+extern unsigned num_processors;
+extern unsigned disabled_cpus;
 
 #endif /* !ASSEMBLY */
 
@@ -139,5 +140,11 @@
 #endif
 #endif
 
+#ifdef CONFIG_SMP
+#define cpu_physical_id(cpu)           x86_cpu_to_apicid[cpu]
+#else
+#define cpu_physical_id(cpu)           boot_cpu_id
 #endif
 
+#endif
+
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h     Wed Feb 
 1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h     Wed Feb 
 1 18:00:19 2006
@@ -9,6 +9,12 @@
 #include <xen/interface/arch-x86_64.h>
 
 #ifdef __KERNEL__
+
+#ifdef CONFIG_SMP
+#define __vcpu_id smp_processor_id()
+#else
+#define __vcpu_id 0
+#endif
 
 #ifdef CONFIG_SMP
 #define LOCK_PREFIX "lock ; "
@@ -325,7 +331,7 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
        barrier();                                                      \
@@ -336,7 +342,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
        _vcpu->evtchn_upcall_mask = 0;                                  \
        barrier(); /* unmask then check (avoid races) */                \
        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
@@ -348,7 +354,7 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
        (x) = _vcpu->evtchn_upcall_mask;                                \
        preempt_enable();                                               \
 } while (0)
@@ -358,7 +364,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
                barrier(); /* unmask then check (avoid races) */        \
                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
@@ -374,7 +380,7 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
        (x) = _vcpu->evtchn_upcall_mask;                                \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
@@ -394,7 +400,7 @@
 ({     int ___x;                                                       \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
        ___x = (_vcpu->evtchn_upcall_mask != 0);                        \
        preempt_enable_no_resched();                                    \
        ___x; })
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/include/linux/gfp.h
--- a/linux-2.6-xen-sparse/include/linux/gfp.h  Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/linux/gfp.h  Wed Feb  1 18:00:19 2006
@@ -11,9 +11,16 @@
 /*
  * GFP bitmasks..
  */
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
-#define __GFP_DMA      0x01u
-#define __GFP_HIGHMEM  0x02u
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
+#define __GFP_DMA      ((__force gfp_t)0x01u)
+#define __GFP_HIGHMEM  ((__force gfp_t)0x02u)
+#ifdef CONFIG_DMA_IS_DMA32
+#define __GFP_DMA32    ((__force gfp_t)0x01)   /* ZONE_DMA is ZONE_DMA32 */
+#elif BITS_PER_LONG < 64
+#define __GFP_DMA32    ((__force gfp_t)0x00)   /* ZONE_NORMAL is ZONE_DMA32 */
+#else
+#define __GFP_DMA32    ((__force gfp_t)0x04)   /* Has own ZONE_DMA32 */
+#endif
 
 /*
  * Action modifiers - doesn't change the zoning
@@ -26,30 +33,29 @@
  *
  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
  */
-#define __GFP_WAIT     0x10u   /* Can wait and reschedule? */
-#define __GFP_HIGH     0x20u   /* Should access emergency pools? */
-#define __GFP_IO       0x40u   /* Can start physical IO? */
-#define __GFP_FS       0x80u   /* Can call down to low-level FS? */
-#define __GFP_COLD     0x100u  /* Cache-cold page required */
-#define __GFP_NOWARN   0x200u  /* Suppress page allocation failure warning */
-#define __GFP_REPEAT   0x400u  /* Retry the allocation.  Might fail */
-#define __GFP_NOFAIL   0x800u  /* Retry for ever.  Cannot fail */
-#define __GFP_NORETRY  0x1000u /* Do not retry.  Might fail */
-#define __GFP_NO_GROW  0x2000u /* Slab internal usage */
-#define __GFP_COMP     0x4000u /* Add compound page metadata */
-#define __GFP_ZERO     0x8000u /* Return zeroed page on success */
-#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
-#define __GFP_NORECLAIM  0x20000u /* No realy zone reclaim during allocation */
-#define __GFP_HARDWALL   0x40000u /* Enforce hardwall cpuset memory allocs */
+#define __GFP_WAIT     ((__force gfp_t)0x10u)  /* Can wait and reschedule? */
+#define __GFP_HIGH     ((__force gfp_t)0x20u)  /* Should access emergency 
pools? */
+#define __GFP_IO       ((__force gfp_t)0x40u)  /* Can start physical IO? */
+#define __GFP_FS       ((__force gfp_t)0x80u)  /* Can call down to low-level 
FS? */
+#define __GFP_COLD     ((__force gfp_t)0x100u) /* Cache-cold page required */
+#define __GFP_NOWARN   ((__force gfp_t)0x200u) /* Suppress page allocation 
failure warning */
+#define __GFP_REPEAT   ((__force gfp_t)0x400u) /* Retry the allocation.  Might 
fail */
+#define __GFP_NOFAIL   ((__force gfp_t)0x800u) /* Retry for ever.  Cannot fail 
*/
+#define __GFP_NORETRY  ((__force gfp_t)0x1000u)/* Do not retry.  Might fail */
+#define __GFP_NO_GROW  ((__force gfp_t)0x2000u)/* Slab internal usage */
+#define __GFP_COMP     ((__force gfp_t)0x4000u)/* Add compound page metadata */
+#define __GFP_ZERO     ((__force gfp_t)0x8000u)/* Return zeroed page on 
success */
+#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency 
reserves */
+#define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset 
memory allocs */
 
 #define __GFP_BITS_SHIFT 20    /* Room for 20 __GFP_FOO bits */
-#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
+#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* if you forget to add the bitmask here kernel will crash, period */
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
                        __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
                        __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-                       __GFP_NOMEMALLOC|__GFP_NORECLAIM|__GFP_HARDWALL)
+                       __GFP_NOMEMALLOC|__GFP_HARDWALL)
 
 #define GFP_ATOMIC     (__GFP_HIGH)
 #define GFP_NOIO       (__GFP_WAIT)
@@ -64,6 +70,16 @@
 
 #define GFP_DMA                __GFP_DMA
 
+/* 4GB DMA on some platforms */
+#define GFP_DMA32      __GFP_DMA32
+
+
+static inline int gfp_zone(gfp_t gfp)
+{
+       int zone = GFP_ZONEMASK & (__force int) gfp;
+       BUG_ON(zone >= GFP_ZONETYPES);
+       return zone;
+}
 
 /*
  * There is only one page-allocator function, and two main namespaces to
@@ -98,7 +114,7 @@
                return NULL;
 
        return __alloc_pages(gfp_mask, order,
-               NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+               NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
 }
 
 #ifdef CONFIG_NUMA
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/include/linux/irq.h
--- a/linux-2.6-xen-sparse/include/linux/irq.h  Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/linux/irq.h  Wed Feb  1 18:00:19 2006
@@ -10,6 +10,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/smp.h>
 
 #if !defined(CONFIG_ARCH_S390)
 
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/linux/mm.h   Wed Feb  1 18:00:19 2006
@@ -144,7 +144,8 @@
 
 #define VM_GROWSDOWN   0x00000100      /* general info on the segment */
 #define VM_GROWSUP     0x00000200
-#define VM_SHM         0x00000400      /* shared memory area, don't swap out */
+#define VM_SHM         0x00000000      /* Means nothing: delete it later */
+#define VM_PFNMAP      0x00000400      /* Page-ranges managed without "struct 
page", just pure PFN */
 #define VM_DENYWRITE   0x00000800      /* ETXTBSY on write attempts.. */
 
 #define VM_EXECUTABLE  0x00001000
@@ -157,13 +158,14 @@
 
 #define VM_DONTCOPY    0x00020000      /* Do not copy this vma on fork */
 #define VM_DONTEXPAND  0x00040000      /* Cannot expand with mremap() */
-#define VM_RESERVED    0x00080000      /* Don't unmap it from swap_out */
+#define VM_RESERVED    0x00080000      /* Count as reserved_vm like IO */
 #define VM_ACCOUNT     0x00100000      /* Is a VM accounted object */
 #define VM_HUGETLB     0x00400000      /* Huge TLB Page VM */
 #define VM_NONLINEAR   0x00800000      /* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY 0x01000000      /* T if mapped copy of data (nommu 
mmap) */
+#define VM_INSERTPAGE  0x02000000      /* The vma has had "vm_insert_page()" 
done on it */
 #ifdef CONFIG_XEN
-#define VM_FOREIGN     0x02000000      /* Has pages belonging to another VM */
+#define VM_FOREIGN     0x04000000      /* Has pages belonging to another VM */
 #endif
 
 #ifndef VM_STACK_DEFAULT_FLAGS         /* arch can override this */
@@ -209,12 +211,6 @@
 struct mmu_gather;
 struct inode;
 
-#ifdef ARCH_HAS_ATOMIC_UNSIGNED
-typedef unsigned page_flags_t;
-#else
-typedef unsigned long page_flags_t;
-#endif
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -222,20 +218,25 @@
  * a page.
  */
 struct page {
-       page_flags_t flags;             /* Atomic flags, some possibly
+       unsigned long flags;            /* Atomic flags, some possibly
                                         * updated asynchronously */
        atomic_t _count;                /* Usage count, see below. */
        atomic_t _mapcount;             /* Count of ptes mapped in mms,
                                         * to show when page is mapped
                                         * & limit reverse map searches.
                                         */
-       unsigned long private;          /* Mapping-private opaque data:
+       union {
+               unsigned long private;  /* Mapping-private opaque data:
                                         * usually used for buffer_heads
                                         * if PagePrivate set; used for
                                         * swp_entry_t if PageSwapCache
                                         * When page is free, this indicates
                                         * order in the buddy system.
                                         */
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+               spinlock_t ptl;
+#endif
+       } u;
        struct address_space *mapping;  /* If low bit clear, points to
                                         * inode address_space, or NULL.
                                         * If page mapped as anonymous
@@ -263,6 +264,9 @@
 #endif /* WANT_PAGE_VIRTUAL */
 };
 
+#define page_private(page)             ((page)->u.private)
+#define set_page_private(page, v)      ((page)->u.private = (v))
+
 /*
  * FIXME: take this include out, include page-flags.h in
  * files which need it (119 of them)
@@ -312,40 +316,21 @@
 
 extern void FASTCALL(__page_cache_release(struct page *));
 
-#ifdef CONFIG_HUGETLB_PAGE
-
-static inline int page_count(struct page *p)
-{
-       if (PageCompound(p))
-               p = (struct page *)p->private;
-       return atomic_read(&(p)->_count) + 1;
+static inline int page_count(struct page *page)
+{
+       if (PageCompound(page))
+               page = (struct page *)page_private(page);
+       return atomic_read(&page->_count) + 1;
 }
 
 static inline void get_page(struct page *page)
 {
        if (unlikely(PageCompound(page)))
-               page = (struct page *)page->private;
+               page = (struct page *)page_private(page);
        atomic_inc(&page->_count);
 }
 
 void put_page(struct page *page);
-
-#else          /* CONFIG_HUGETLB_PAGE */
-
-#define page_count(p)          (atomic_read(&(p)->_count) + 1)
-
-static inline void get_page(struct page *page)
-{
-       atomic_inc(&page->_count);
-}
-
-static inline void put_page(struct page *page)
-{
-       if (!PageReserved(page) && put_page_testzero(page))
-               __page_cache_release(page);
-}
-
-#endif         /* CONFIG_HUGETLB_PAGE */
 
 /*
  * Multiple processes may "see" the same page. E.g. for untouched
@@ -430,7 +415,7 @@
 #endif
 
 /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
-#define SECTIONS_PGOFF         ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH)
+#define SECTIONS_PGOFF         ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF            (SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF            (NODES_PGOFF - ZONES_WIDTH)
 
@@ -590,7 +575,7 @@
 static inline pgoff_t page_index(struct page *page)
 {
        if (unlikely(PageSwapCache(page)))
-               return page->private;
+               return page_private(page);
        return page->index;
 }
 
@@ -683,9 +668,10 @@
        unsigned long truncate_count;           /* Compare vm_truncate_count */
 };
 
+struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t);
 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
                unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlb,
                struct vm_area_struct *start_vma, unsigned long start_addr,
                unsigned long end_addr, unsigned long *nr_accounted,
                struct zap_details *);
@@ -707,10 +693,6 @@
 }
 
 extern int vmtruncate(struct inode * inode, loff_t offset);
-extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned 
long address));
-extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned 
long address));
-extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, 
unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, 
unsigned long address));
 extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, 
unsigned long addr, struct page *page, pgprot_t prot);
 extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, 
unsigned long addr, unsigned long pgoff, pgprot_t prot);
 extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, 
unsigned long address, int write_access);
@@ -726,6 +708,7 @@
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned 
long start,
                int len, int write, int force, struct page **pages, struct 
vm_area_struct **vmas);
+void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
 
 int __set_page_dirty_buffers(struct page *page);
 int __set_page_dirty_nobuffers(struct page *page);
@@ -750,7 +733,7 @@
  * The callback will be passed nr_to_scan == 0 when the VM is querying the
  * cache size, so a fastpath for that case is appropriate.
  */
-typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
+typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
 
 /*
  * Add an aging callback.  The int is the number of 'seeks' it takes
@@ -762,38 +745,85 @@
 extern struct shrinker *set_shrinker(int, shrinker_t);
 extern void remove_shrinker(struct shrinker *shrinker);
 
-/*
- * On a two-level or three-level page table, this ends up being trivial. Thus
- * the inlining and the symmetry break with pte_alloc_map() that does all
- * of this out-of-line.
- */
+extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long 
addr, spinlock_t **ptl));
+
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
  * Remove it when 4level-fixup.h has been removed.
  */
-#ifdef CONFIG_MMU
-#ifndef __ARCH_HAS_4LEVEL_HACK 
+#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
 static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long 
address)
 {
-       if (pgd_none(*pgd))
-               return __pud_alloc(mm, pgd, address);
-       return pud_offset(pgd, address);
+       return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
+               NULL: pud_offset(pgd, address);
 }
 
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
-       if (pud_none(*pud))
-               return __pmd_alloc(mm, pud, address);
-       return pmd_offset(pud, address);
-}
-#endif
-#endif /* CONFIG_MMU */
+       return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
+               NULL: pmd_offset(pud, address);
+}
+#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * We tuck a spinlock to guard each pagetable page into its struct page,
+ * at page->private, with BUILD_BUG_ON to make sure that this will not
+ * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
+#define __pte_lockptr(page)    &((page)->u.ptl)
+#define pte_lock_init(_page)   do {                                    \
+       spin_lock_init(__pte_lockptr(_page));                           \
+} while (0)
+#define pte_lock_deinit(page)  ((page)->mapping = NULL)
+#define pte_lockptr(mm, pmd)   ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+#else
+/*
+ * We use mm->page_table_lock to guard all pagetable pages of the mm.
+ */
+#define pte_lock_init(page)    do {} while (0)
+#define pte_lock_deinit(page)  do {} while (0)
+#define pte_lockptr(mm, pmd)   ({(void)(pmd); &(mm)->page_table_lock;})
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define pte_offset_map_lock(mm, pmd, address, ptlp)    \
+({                                                     \
+       spinlock_t *__ptl = pte_lockptr(mm, pmd);       \
+       pte_t *__pte = pte_offset_map(pmd, address);    \
+       *(ptlp) = __ptl;                                \
+       spin_lock(__ptl);                               \
+       __pte;                                          \
+})
+
+#define pte_unmap_unlock(pte, ptl)     do {            \
+       spin_unlock(ptl);                               \
+       pte_unmap(pte);                                 \
+} while (0)
+
+#define pte_alloc_map(mm, pmd, address)                        \
+       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+               NULL: pte_offset_map(pmd, address))
+
+#define pte_alloc_map_lock(mm, pmd, address, ptlp)     \
+       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+               NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
+
+#define pte_alloc_kernel(pmd, address)                 \
+       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+               NULL: pte_offset_kernel(pmd, address))
 
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, pg_data_t *pgdat,
        unsigned long * zones_size, unsigned long zone_start_pfn, 
        unsigned long *zholes_size);
 extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
+extern void setup_per_zone_pages_min(void);
 extern void mem_init(void);
 extern void show_mem(void);
 extern void si_meminfo(struct sysinfo * val);
@@ -837,6 +867,7 @@
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
        struct rb_node **, struct rb_node *);
+extern void unlink_file_vma(struct vm_area_struct *);
 extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
        unsigned long addr, unsigned long len, pgoff_t pgoff);
 extern void exit_mmap(struct mm_struct *);
@@ -884,20 +915,23 @@
                                         * turning readahead off */
 
 int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-                       unsigned long offset, unsigned long nr_to_read);
+                       pgoff_t offset, unsigned long nr_to_read);
 int force_page_cache_readahead(struct address_space *mapping, struct file 
*filp,
-                       unsigned long offset, unsigned long nr_to_read);
-unsigned long  page_cache_readahead(struct address_space *mapping,
+                       pgoff_t offset, unsigned long nr_to_read);
+unsigned long page_cache_readahead(struct address_space *mapping,
                          struct file_ra_state *ra,
                          struct file *filp,
-                         unsigned long offset,
+                         pgoff_t offset,
                          unsigned long size);
 void handle_ra_miss(struct address_space *mapping, 
                    struct file_ra_state *ra, pgoff_t offset);
 unsigned long max_sane_readahead(unsigned long nr);
 
 /* Do stack extension */
-extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
+extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+#ifdef CONFIG_IA64
+extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
+#endif
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long 
addr);
@@ -920,15 +954,19 @@
        return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
-extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned 
long addr);
-
-extern struct page * vmalloc_to_page(void *addr);
-extern unsigned long vmalloc_to_pfn(void *addr);
-extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
-               int write);
-extern int check_user_page_readable(struct mm_struct *mm, unsigned long 
address);
-int remap_pfn_range(struct vm_area_struct *, unsigned long,
-               unsigned long, unsigned long, pgprot_t);
+struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
+struct page *vmalloc_to_page(void *addr);
+unsigned long vmalloc_to_pfn(void *addr);
+int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+                       unsigned long pfn, unsigned long size, pgprot_t);
+int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
+
+struct page *follow_page(struct vm_area_struct *, unsigned long address,
+                       unsigned int foll_flags);
+#define FOLL_WRITE     0x01    /* check pte is writable */
+#define FOLL_TOUCH     0x02    /* mark page accessed */
+#define FOLL_GET       0x04    /* do get_page on page */
+#define FOLL_ANON      0x08    /* give ZERO_PAGE if no pgtable */
 
 #ifdef CONFIG_XEN
 typedef int (*pte_fn_t)(pte_t *pte, struct page *pte_page, unsigned long addr, 
@@ -938,28 +976,13 @@
 #endif
 
 #ifdef CONFIG_PROC_FS
-void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 #else
-static inline void __vm_stat_account(struct mm_struct *mm,
+static inline void vm_stat_account(struct mm_struct *mm,
                        unsigned long flags, struct file *file, long pages)
 {
 }
 #endif /* CONFIG_PROC_FS */
-
-static inline void vm_stat_account(struct vm_area_struct *vma)
-{
-       __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
-                                                       vma_pages(vma));
-}
-
-static inline void vm_stat_unaccount(struct vm_area_struct *vma)
-{
-       __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
-                                                       -vma_pages(vma));
-}
-
-/* update per process rss and vm hiwater data */
-extern void update_mem_hiwater(struct task_struct *tsk);
 
 #ifndef CONFIG_DEBUG_PAGEALLOC
 static inline void
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/include/linux/skbuff.h
--- a/linux-2.6-xen-sparse/include/linux/skbuff.h       Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/include/linux/skbuff.h       Wed Feb  1 18:00:19 2006
@@ -137,6 +137,8 @@
        unsigned int    nr_frags;
        unsigned short  tso_size;
        unsigned short  tso_segs;
+       unsigned short  ufo_size;
+       unsigned int    ip6_frag_id;
        struct sk_buff  *frag_list;
        skb_frag_t      frags[MAX_SKB_FRAGS];
 };
@@ -171,7 +173,6 @@
  *     struct sk_buff - socket buffer
  *     @next: Next buffer in list
  *     @prev: Previous buffer in list
- *     @list: List we are on
  *     @sk: Socket we are owned by
  *     @tstamp: Time we arrived
  *     @dev: Device we arrived on/are leaving by
@@ -192,6 +193,7 @@
  *     @proto_csum_valid: Protocol csum validated since arriving at localhost
  *     @proto_csum_blank: Protocol csum must be added before leaving localhost
  *     @pkt_type: Packet class
+ *     @fclone: skbuff clone status
  *     @ip_summed: Driver fed us an IP checksum
  *     @priority: Packet queueing priority
  *     @users: User count - see {datagram,tcp}.c
@@ -204,7 +206,9 @@
  *     @destructor: Destruct function
  *     @nfmark: Can be used for communication between hooks
  *     @nfct: Associated connection, if any
+ *     @ipvs_property: skbuff is owned by ipvs
  *     @nfctinfo: Relationship of this skb to the connection
+ *     @nfct_reasm: netfilter conntrack re-assembly pointer
  *     @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *     @tc_index: Traffic control index
  *     @tc_verd: traffic control verdict
@@ -263,13 +267,13 @@
                                nohdr:1,
                                nfctinfo:3;
        __u8                    pkt_type:3,
+                               fclone:2,
 #ifndef CONFIG_XEN
-                               fclone:2;
+                               ipvs_property:1;
 #else
-                               fclone:2,
+                               ipvs_property:1,
                                proto_csum_valid:1,
                                proto_csum_blank:1;
-                               /* 1 bit spare */
 #endif
        __be16                  protocol;
 
@@ -277,8 +281,8 @@
 #ifdef CONFIG_NETFILTER
        __u32                   nfmark;
        struct nf_conntrack     *nfct;
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
-       __u8                    ipvs_property:1;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       struct sk_buff          *nfct_reasm;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        struct nf_bridge_info   *nf_bridge;
@@ -350,6 +354,11 @@
 extern void          skb_under_panic(struct sk_buff *skb, int len,
                                      void *here);
 
+extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+                       int getfrag(void *from, char *to, int offset,
+                       int len,int odd, struct sk_buff *skb),
+                       void *from, int length);
+
 struct skb_seq_state
 {
        __u32           lower_offset;
@@ -605,27 +614,44 @@
  */
 
 /**
- *     __skb_queue_head - queue a buffer at the list head
+ *     __skb_queue_after - queue a buffer at the list head
  *     @list: list to use
+ *     @prev: place after this buffer
  *     @newsk: buffer to queue
  *
- *     Queue a buffer at the start of a list. This function takes no locks
+ *     Queue a buffer int the middle of a list. This function takes no locks
  *     and you must therefore hold required locks before calling it.
  *
  *     A buffer cannot be placed on two lists at the same time.
  */
-extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
-static inline void __skb_queue_head(struct sk_buff_head *list,
-                                   struct sk_buff *newsk)
-{
-       struct sk_buff *prev, *next;
-
+static inline void __skb_queue_after(struct sk_buff_head *list,
+                                    struct sk_buff *prev,
+                                    struct sk_buff *newsk)
+{
+       struct sk_buff *next;
        list->qlen++;
-       prev = (struct sk_buff *)list;
+
        next = prev->next;
        newsk->next = next;
        newsk->prev = prev;
        next->prev  = prev->next = newsk;
+}
+
+/**
+ *     __skb_queue_head - queue a buffer at the list head
+ *     @list: list to use
+ *     @newsk: buffer to queue
+ *
+ *     Queue a buffer at the start of a list. This function takes no locks
+ *     and you must therefore hold required locks before calling it.
+ *
+ *     A buffer cannot be placed on two lists at the same time.
+ */
+extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
+static inline void __skb_queue_head(struct sk_buff_head *list,
+                                   struct sk_buff *newsk)
+{
+       __skb_queue_after(list, (struct sk_buff *)list, newsk);
 }
 
 /**
@@ -1205,6 +1231,11 @@
                     prefetch(skb->next), (skb != (struct sk_buff *)(queue));   
\
                     skb = skb->next)
 
+#define skb_queue_reverse_walk(queue, skb) \
+               for (skb = (queue)->prev;                                       
\
+                    prefetch(skb->prev), (skb != (struct sk_buff *)(queue));   
\
+                    skb = skb->prev)
+
 
 extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
                                         int noblock, int *err);
@@ -1213,8 +1244,7 @@
 extern int            skb_copy_datagram_iovec(const struct sk_buff *from,
                                               int offset, struct iovec *to,
                                               int size);
-extern int            skb_copy_and_csum_datagram_iovec(const
-                                                       struct sk_buff *skb,
+extern int            skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
                                                        int hlen,
                                                        struct iovec *iov);
 extern void           skb_free_datagram(struct sock *sk, struct sk_buff *skb);
@@ -1282,6 +1312,30 @@
 
 extern void __net_timestamp(struct sk_buff *skb);
 
+extern unsigned int __skb_checksum_complete(struct sk_buff *skb);
+
+/**
+ *     skb_checksum_complete - Calculate checksum of an entire packet
+ *     @skb: packet to process
+ *
+ *     This function calculates the checksum over the entire packet plus
+ *     the value of skb->csum.  The latter can be used to supply the
+ *     checksum of a pseudo header as used by TCP/UDP.  It returns the
+ *     checksum.
+ *
+ *     For protocols that contain complete checksums such as ICMP/TCP/UDP,
+ *     this function can be used to verify that checksum on received
+ *     packets.  In that case the function should return zero if the
+ *     checksum is correct.  In particular, this function will return zero
+ *     if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the
+ *     hardware has already verified the correctness of the checksum.
+ */
+static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
+{
+       return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+               __skb_checksum_complete(skb);
+}
+
 #ifdef CONFIG_NETFILTER
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
@@ -1293,10 +1347,26 @@
        if (nfct)
                atomic_inc(&nfct->use);
 }
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
+{
+       if (skb)
+               atomic_inc(&skb->users);
+}
+static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
+{
+       if (skb)
+               kfree_skb(skb);
+}
+#endif
 static inline void nf_reset(struct sk_buff *skb)
 {
        nf_conntrack_put(skb->nfct);
        skb->nfct = NULL;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       nf_conntrack_put_reasm(skb->nfct_reasm);
+       skb->nfct_reasm = NULL;
+#endif
 }
 
 #ifdef CONFIG_BRIDGE_NETFILTER
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/kernel/irq/manage.c
--- a/linux-2.6-xen-sparse/kernel/irq/manage.c  Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/kernel/irq/manage.c  Wed Feb  1 18:00:19 2006
@@ -24,6 +24,7 @@
 
 /**
  *     synchronize_irq - wait for pending IRQ handlers (on other CPUs)
+ *     @irq: interrupt number to wait for
  *
  *     This function waits for any pending IRQ handlers for this interrupt
  *     to complete before returning. If you use this function while
@@ -34,6 +35,9 @@
 void synchronize_irq(unsigned int irq)
 {
        struct irq_desc *desc = irq_desc + irq;
+
+       if (irq >= NR_IRQS)
+               return;
 
        while (desc->status & IRQ_INPROGRESS)
                cpu_relax();
@@ -58,6 +62,9 @@
 {
        irq_desc_t *desc = irq_desc + irq;
        unsigned long flags;
+
+       if (irq >= NR_IRQS)
+               return;
 
        spin_lock_irqsave(&desc->lock, flags);
        if (!desc->depth++) {
@@ -85,6 +92,9 @@
 {
        irq_desc_t *desc = irq_desc + irq;
 
+       if (irq >= NR_IRQS)
+               return;
+
        disable_irq_nosync(irq);
        if (desc->action)
                synchronize_irq(irq);
@@ -106,6 +116,9 @@
 {
        irq_desc_t *desc = irq_desc + irq;
        unsigned long flags;
+
+       if (irq >= NR_IRQS)
+               return;
 
        spin_lock_irqsave(&desc->lock, flags);
        switch (desc->depth) {
@@ -166,6 +179,9 @@
        struct irqaction *old, **p;
        unsigned long flags;
        int shared = 0;
+
+       if (irq >= NR_IRQS)
+               return -EINVAL;
 
        if (desc->handler == &no_irq_type)
                return -ENOSYS;
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/lib/Kconfig.debug
--- a/linux-2.6-xen-sparse/lib/Kconfig.debug    Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/lib/Kconfig.debug    Wed Feb  1 18:00:19 2006
@@ -128,7 +128,7 @@
 config DEBUG_BUGVERBOSE
        bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
        depends on BUG
-       depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || (X86 
&& !X86_64) || FRV
+       depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 
|| FRV
        default !EMBEDDED
        help
          Say Y here to make BUG() panics output the file name and line number
@@ -168,13 +168,34 @@
 
          If unsure, say N.
 
+config DEBUG_VM
+       bool "Debug VM"
+       depends on DEBUG_KERNEL
+       help
+         Enable this to debug the virtual-memory system.
+
+         If unsure, say N.
+
 config FRAME_POINTER
        bool "Compile the kernel with frame pointers"
        depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || 
UML)
        default y if DEBUG_INFO && UML
        help
          If you say Y here the resulting kernel image will be slightly larger
-         and slower, but it might give very useful debugging information
-         on some architectures or you use external debuggers.
+         and slower, but it might give very useful debugging information on
+         some architectures or if you use external debuggers.
          If you don't debug the kernel, you can say N.
 
+config RCU_TORTURE_TEST
+       tristate "torture tests for RCU"
+       depends on DEBUG_KERNEL
+       default n
+       help
+         This option provides a kernel module that runs torture tests
+         on the RCU infrastructure.  The kernel module may be built
+         after the fact on the running kernel to be tested, if desired.
+
+         Say Y here if you want RCU torture tests to start automatically
+         at boot time (you probably don't).
+         Say M if you want the RCU torture tests to build as a module.
+         Say N if you are unsure.
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/mm/highmem.c
--- a/linux-2.6-xen-sparse/mm/highmem.c Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/mm/highmem.c Wed Feb  1 18:00:19 2006
@@ -30,11 +30,9 @@
 
 static mempool_t *page_pool, *isa_page_pool;
 
-static void *page_pool_alloc(gfp_t gfp_mask, void *data)
-{
-       unsigned int gfp = gfp_mask | (unsigned int) (long) data;
-
-       return alloc_page(gfp);
+static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
+{
+       return alloc_page(gfp_mask | GFP_DMA);
 }
 
 static void page_pool_free(void *page, void *data)
@@ -51,6 +49,12 @@
  *  n means that there are (n-1) current users of it.
  */
 #ifdef CONFIG_HIGHMEM
+
+static void *page_pool_alloc(gfp_t gfp_mask, void *data)
+{
+       return alloc_page(gfp_mask);
+}
+
 static int pkmap_count[LAST_PKMAP];
 static unsigned int last_pkmap_nr;
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
@@ -278,7 +282,7 @@
        if (isa_page_pool)
                return 0;
 
-       isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, 
page_pool_free, (void *) __GFP_DMA);
+       isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, 
page_pool_free, NULL);
        if (!isa_page_pool)
                BUG();
 
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c  Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/mm/memory.c  Wed Feb  1 18:00:19 2006
@@ -114,6 +114,7 @@
 {
        struct page *page = pmd_page(*pmd);
        pmd_clear(pmd);
+       pte_lock_deinit(page);
        pte_free_tlb(tlb, page);
        dec_page_state(nr_page_table_pages);
        tlb->mm->nr_ptes--;
@@ -249,7 +250,7 @@
                free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
        } while (pgd++, addr = next, addr != end);
 
-       if (!tlb_is_full_mm(*tlb))
+       if (!(*tlb)->fullmm)
                flush_tlb_pgtables((*tlb)->mm, start, end);
 }
 
@@ -259,6 +260,12 @@
        while (vma) {
                struct vm_area_struct *next = vma->vm_next;
                unsigned long addr = vma->vm_start;
+
+               /*
+                * Hide vma from rmap and vmtruncate before freeing pgtables
+                */
+               anon_vma_unlink(vma);
+               unlink_file_vma(vma);
 
                if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
                        hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
@@ -272,6 +279,8 @@
                                                        HPAGE_SIZE)) {
                                vma = next;
                                next = vma->vm_next;
+                               anon_vma_unlink(vma);
+                               unlink_file_vma(vma);
                        }
                        free_pgd_range(tlb, addr, vma->vm_end,
                                floor, next? next->vm_start: ceiling);
@@ -280,75 +289,141 @@
        }
 }
 
-pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
-                               unsigned long address)
-{
-       if (!pmd_present(*pmd)) {
-               struct page *new;
-
-               spin_unlock(&mm->page_table_lock);
-               new = pte_alloc_one(mm, address);
-               spin_lock(&mm->page_table_lock);
-               if (!new)
-                       return NULL;
-               /*
-                * Because we dropped the lock, we should re-check the
-                * entry, as somebody else could have populated it..
-                */
-               if (pmd_present(*pmd)) {
-                       pte_free(new);
-                       goto out;
-               }
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+{
+       struct page *new = pte_alloc_one(mm, address);
+       if (!new)
+               return -ENOMEM;
+
+       pte_lock_init(new);
+       spin_lock(&mm->page_table_lock);
+       if (pmd_present(*pmd)) {        /* Another has populated it */
+               pte_lock_deinit(new);
+               pte_free(new);
+       } else {
                mm->nr_ptes++;
                inc_page_state(nr_page_table_pages);
                pmd_populate(mm, pmd, new);
        }
-out:
-       return pte_offset_map(pmd, address);
-}
-
-pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned 
long address)
-{
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
-
-               spin_unlock(&mm->page_table_lock);
-               new = pte_alloc_one_kernel(mm, address);
-               spin_lock(&mm->page_table_lock);
-               if (!new)
+       spin_unlock(&mm->page_table_lock);
+       return 0;
+}
+
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+{
+       pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+       if (!new)
+               return -ENOMEM;
+
+       spin_lock(&init_mm.page_table_lock);
+       if (pmd_present(*pmd))          /* Another has populated it */
+               pte_free_kernel(new);
+       else
+               pmd_populate_kernel(&init_mm, pmd, new);
+       spin_unlock(&init_mm.page_table_lock);
+       return 0;
+}
+
+static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+{
+       if (file_rss)
+               add_mm_counter(mm, file_rss, file_rss);
+       if (anon_rss)
+               add_mm_counter(mm, anon_rss, anon_rss);
+}
+
+/*
+ * This function is called to print an error when a bad pte
+ * is found. For example, we might have a PFN-mapped pte in
+ * a region that doesn't allow it.
+ *
+ * The calling function must still handle the error.
+ */
+void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
+{
+       printk(KERN_ERR "Bad pte = %08llx, process = %s, "
+                       "vm_flags = %lx, vaddr = %lx\n",
+               (long long)pte_val(pte),
+               (vma->vm_mm == current->mm ? current->comm : "???"),
+               vma->vm_flags, vaddr);
+       dump_stack();
+}
+
+static inline int is_cow_mapping(unsigned int flags)
+{
+       return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
+/*
+ * This function gets the "struct page" associated with a pte.
+ *
+ * NOTE! Some mappings do not have "struct pages". A raw PFN mapping
+ * will have each page table entry just pointing to a raw page frame
+ * number, and as far as the VM layer is concerned, those do not have
+ * pages associated with them - even if the PFN might point to memory
+ * that otherwise is perfectly fine and has a "struct page".
+ *
+ * The way we recognize those mappings is through the rules set up
+ * by "remap_pfn_range()": the vma will have the VM_PFNMAP bit set,
+ * and the vm_pgoff will point to the first PFN mapped: thus every
+ * page that is a raw mapping will always honor the rule
+ *
+ *     pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
+ *
+ * and if that isn't true, the page has been COW'ed (in which case it
+ * _does_ have a "struct page" associated with it even if it is in a
+ * VM_PFNMAP range).
+ */
+struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 
pte_t pte)
+{
+       unsigned long pfn = pte_pfn(pte);
+
+       if (vma->vm_flags & VM_PFNMAP) {
+               unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
+               if (pfn == vma->vm_pgoff + off)
                        return NULL;
-
-               /*
-                * Because we dropped the lock, we should re-check the
-                * entry, as somebody else could have populated it..
-                */
-               if (pmd_present(*pmd)) {
-                       pte_free_kernel(new);
-                       goto out;
-               }
-               pmd_populate_kernel(mm, pmd, new);
-       }
-out:
-       return pte_offset_kernel(pmd, address);
+               if (!is_cow_mapping(vma->vm_flags))
+                       return NULL;
+       }
+
+       /*
+        * Add some anal sanity checks for now. Eventually,
+        * we should just do "return pfn_to_page(pfn)", but
+        * in the meantime we check that we get a valid pfn,
+        * and that the resulting page looks ok.
+        *
+        * Remove this test eventually!
+        */
+       if (unlikely(!pfn_valid(pfn))) {
+               if (!vma->vm_flags & VM_RESERVED)
+                       print_bad_pte(vma, pte, addr);
+               return NULL;
+       }
+
+       /*
+        * NOTE! We still have PageReserved() pages in the page 
+        * tables. 
+        *
+        * The PAGE_ZERO() pages and various VDSO mappings can
+        * cause them to exist.
+        */
+       return pfn_to_page(pfn);
 }
 
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
  * covered by this vma.
- *
- * dst->page_table_lock is held on entry and exit,
- * but may be dropped within p[mg]d_alloc() and pte_alloc_map().
  */
 
 static inline void
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-               pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
-               unsigned long addr)
-{
+               pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+               unsigned long addr, int *rss)
+{
+       unsigned long vm_flags = vma->vm_flags;
        pte_t pte = *src_pte;
        struct page *page;
-       unsigned long pfn;
 
        /* pte contains position in swap or file, so copy. */
        if (unlikely(!pte_present(pte))) {
@@ -357,34 +432,20 @@
                        /* make sure dst_mm is on swapoff's mmlist. */
                        if (unlikely(list_empty(&dst_mm->mmlist))) {
                                spin_lock(&mmlist_lock);
-                               list_add(&dst_mm->mmlist, &src_mm->mmlist);
+                               if (list_empty(&dst_mm->mmlist))
+                                       list_add(&dst_mm->mmlist,
+                                                &src_mm->mmlist);
                                spin_unlock(&mmlist_lock);
                        }
                }
-               set_pte_at(dst_mm, addr, dst_pte, pte);
-               return;
-       }
-
-       pfn = pte_pfn(pte);
-       /* the pte points outside of valid memory, the
-        * mapping is assumed to be good, meaningful
-        * and not mapped via rmap - duplicate the
-        * mapping as is.
-        */
-       page = NULL;
-       if (pfn_valid(pfn))
-               page = pfn_to_page(pfn);
-
-       if (!page || PageReserved(page)) {
-               set_pte_at(dst_mm, addr, dst_pte, pte);
-               return;
+               goto out_set_pte;
        }
 
        /*
         * If it's a COW mapping, write protect it both
         * in the parent and the child
         */
-       if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
+       if (is_cow_mapping(vm_flags)) {
                ptep_set_wrprotect(src_mm, addr, src_pte);
                pte = *src_pte;
        }
@@ -396,12 +457,16 @@
        if (vm_flags & VM_SHARED)
                pte = pte_mkclean(pte);
        pte = pte_mkold(pte);
-       get_page(page);
-       inc_mm_counter(dst_mm, rss);
-       if (PageAnon(page))
-               inc_mm_counter(dst_mm, anon_rss);
+
+       page = vm_normal_page(vma, addr, pte);
+       if (page) {
+               get_page(page);
+               page_dup_rmap(page);
+               rss[!!PageAnon(page)]++;
+       }
+
+out_set_pte:
        set_pte_at(dst_mm, addr, dst_pte, pte);
-       page_dup_rmap(page);
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -409,38 +474,44 @@
                unsigned long addr, unsigned long end)
 {
        pte_t *src_pte, *dst_pte;
-       unsigned long vm_flags = vma->vm_flags;
-       int progress;
+       spinlock_t *src_ptl, *dst_ptl;
+       int progress = 0;
+       int rss[2];
 
 again:
-       dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+       rss[1] = rss[0] = 0;
+       dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
        if (!dst_pte)
                return -ENOMEM;
        src_pte = pte_offset_map_nested(src_pmd, addr);
-
-       progress = 0;
-       spin_lock(&src_mm->page_table_lock);
+       src_ptl = pte_lockptr(src_mm, src_pmd);
+       spin_lock(src_ptl);
+
        do {
                /*
                 * We are holding two locks at this point - either of them
                 * could generate latencies in another task on another CPU.
                 */
-               if (progress >= 32 && (need_resched() ||
-                   need_lockbreak(&src_mm->page_table_lock) ||
-                   need_lockbreak(&dst_mm->page_table_lock)))
-                       break;
+               if (progress >= 32) {
+                       progress = 0;
+                       if (need_resched() ||
+                           need_lockbreak(src_ptl) ||
+                           need_lockbreak(dst_ptl))
+                               break;
+               }
                if (pte_none(*src_pte)) {
                        progress++;
                        continue;
                }
-               copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+               copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
                progress += 8;
        } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
-       spin_unlock(&src_mm->page_table_lock);
-
+
+       spin_unlock(src_ptl);
        pte_unmap_nested(src_pte - 1);
-       pte_unmap(dst_pte - 1);
-       cond_resched_lock(&dst_mm->page_table_lock);
+       add_mm_rss(dst_mm, rss[0], rss[1]);
+       pte_unmap_unlock(dst_pte - 1, dst_ptl);
+       cond_resched();
        if (addr != end)
                goto again;
        return 0;
@@ -504,7 +575,7 @@
         * readonly mappings. The tradeoff is that copy_page_range is more
         * efficient than faulting.
         */
-       if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) {
+       if (!(vma->vm_flags & 
(VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
                if (!vma->anon_vma)
                        return 0;
        }
@@ -525,25 +596,30 @@
        return 0;
 }
 
-static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+static unsigned long zap_pte_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pmd_t *pmd,
                                unsigned long addr, unsigned long end,
-                               struct zap_details *details)
-{
+                               long *zap_work, struct zap_details *details)
+{
+       struct mm_struct *mm = tlb->mm;
        pte_t *pte;
-
-       pte = pte_offset_map(pmd, addr);
+       spinlock_t *ptl;
+       int file_rss = 0;
+       int anon_rss = 0;
+
+       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
        do {
                pte_t ptent = *pte;
-               if (pte_none(ptent))
+               if (pte_none(ptent)) {
+                       (*zap_work)--;
                        continue;
+               }
                if (pte_present(ptent)) {
-                       struct page *page = NULL;
-                       unsigned long pfn = pte_pfn(ptent);
-                       if (pfn_valid(pfn)) {
-                               page = pfn_to_page(pfn);
-                               if (PageReserved(page))
-                                       page = NULL;
-                       }
+                       struct page *page;
+
+                       (*zap_work) -= PAGE_SIZE;
+
+                       page = vm_normal_page(vma, addr, ptent);
                        if (unlikely(details) && page) {
                                /*
                                 * unmap_shared_mapping_pages() wants to
@@ -562,7 +638,7 @@
                                     page->index > details->last_index))
                                        continue;
                        }
-                       ptent = ptep_get_and_clear_full(tlb->mm, addr, pte,
+                       ptent = ptep_get_and_clear_full(mm, addr, pte,
                                                        tlb->fullmm);
                        tlb_remove_tlb_entry(tlb, pte, addr);
                        if (unlikely(!page))
@@ -570,15 +646,17 @@
                        if (unlikely(details) && details->nonlinear_vma
                            && linear_page_index(details->nonlinear_vma,
                                                addr) != page->index)
-                               set_pte_at(tlb->mm, addr, pte,
+                               set_pte_at(mm, addr, pte,
                                           pgoff_to_pte(page->index));
-                       if (pte_dirty(ptent))
-                               set_page_dirty(page);
                        if (PageAnon(page))
-                               dec_mm_counter(tlb->mm, anon_rss);
-                       else if (pte_young(ptent))
-                               mark_page_accessed(page);
-                       tlb->freed++;
+                               anon_rss--;
+                       else {
+                               if (pte_dirty(ptent))
+                                       set_page_dirty(page);
+                               if (pte_young(ptent))
+                                       mark_page_accessed(page);
+                               file_rss--;
+                       }
                        page_remove_rmap(page);
                        tlb_remove_page(tlb, page);
                        continue;
@@ -591,14 +669,19 @@
                        continue;
                if (!pte_file(ptent))
                        free_swap_and_cache(pte_to_swp_entry(ptent));
-               pte_clear_full(tlb->mm, addr, pte, tlb->fullmm);
-       } while (pte++, addr += PAGE_SIZE, addr != end);
-       pte_unmap(pte - 1);
-}
-
-static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+               pte_clear_full(mm, addr, pte, tlb->fullmm);
+       } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+
+       add_mm_rss(mm, file_rss, anon_rss);
+       pte_unmap_unlock(pte - 1, ptl);
+
+       return addr;
+}
+
+static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pud_t *pud,
                                unsigned long addr, unsigned long end,
-                               struct zap_details *details)
+                               long *zap_work, struct zap_details *details)
 {
        pmd_t *pmd;
        unsigned long next;
@@ -606,15 +689,21 @@
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
-               if (pmd_none_or_clear_bad(pmd))
+               if (pmd_none_or_clear_bad(pmd)) {
+                       (*zap_work)--;
                        continue;
-               zap_pte_range(tlb, pmd, addr, next, details);
-       } while (pmd++, addr = next, addr != end);
-}
-
-static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+               }
+               next = zap_pte_range(tlb, vma, pmd, addr, next,
+                                               zap_work, details);
+       } while (pmd++, addr = next, (addr != end && *zap_work > 0));
+
+       return addr;
+}
+
+static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pgd_t *pgd,
                                unsigned long addr, unsigned long end,
-                               struct zap_details *details)
+                               long *zap_work, struct zap_details *details)
 {
        pud_t *pud;
        unsigned long next;
@@ -622,15 +711,21 @@
        pud = pud_offset(pgd, addr);
        do {
                next = pud_addr_end(addr, end);
-               if (pud_none_or_clear_bad(pud))
+               if (pud_none_or_clear_bad(pud)) {
+                       (*zap_work)--;
                        continue;
-               zap_pmd_range(tlb, pud, addr, next, details);
-       } while (pud++, addr = next, addr != end);
-}
-
-static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct 
*vma,
+               }
+               next = zap_pmd_range(tlb, vma, pud, addr, next,
+                                               zap_work, details);
+       } while (pud++, addr = next, (addr != end && *zap_work > 0));
+
+       return addr;
+}
+
+static unsigned long unmap_page_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma,
                                unsigned long addr, unsigned long end,
-                               struct zap_details *details)
+                               long *zap_work, struct zap_details *details)
 {
        pgd_t *pgd;
        unsigned long next;
@@ -643,11 +738,16 @@
        pgd = pgd_offset(vma->vm_mm, addr);
        do {
                next = pgd_addr_end(addr, end);
-               if (pgd_none_or_clear_bad(pgd))
+               if (pgd_none_or_clear_bad(pgd)) {
+                       (*zap_work)--;
                        continue;
-               zap_pud_range(tlb, pgd, addr, next, details);
-       } while (pgd++, addr = next, addr != end);
+               }
+               next = zap_pud_range(tlb, vma, pgd, addr, next,
+                                               zap_work, details);
+       } while (pgd++, addr = next, (addr != end && *zap_work > 0));
        tlb_end_vma(tlb, vma);
+
+       return addr;
 }
 
 #ifdef CONFIG_PREEMPT
@@ -660,7 +760,6 @@
 /**
  * unmap_vmas - unmap a range of memory covered by a list of vma's
  * @tlbp: address of the caller's struct mmu_gather
- * @mm: the controlling mm_struct
  * @vma: the starting vma
  * @start_addr: virtual address at which to start unmapping
  * @end_addr: virtual address at which to end unmapping
@@ -669,10 +768,10 @@
  *
  * Returns the end address of the unmapping (restart addr if interrupted).
  *
- * Unmap all pages in the vma list.  Called under page_table_lock.
- *
- * We aim to not hold page_table_lock for too long (for scheduling latency
- * reasons).  So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to
+ * Unmap all pages in the vma list.
+ *
+ * We aim to not hold locks for too long (for scheduling latency reasons).
+ * So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to
  * return the ending mmu_gather to the caller.
  *
  * Only addresses between `start' and `end' will be unmapped.
@@ -684,17 +783,17 @@
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp,
                struct vm_area_struct *vma, unsigned long start_addr,
                unsigned long end_addr, unsigned long *nr_accounted,
                struct zap_details *details)
 {
-       unsigned long zap_bytes = ZAP_BLOCK_SIZE;
+       long zap_work = ZAP_BLOCK_SIZE;
        unsigned long tlb_start = 0;    /* For tlb_finish_mmu */
        int tlb_start_valid = 0;
        unsigned long start = start_addr;
        spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
-       int fullmm = tlb_is_full_mm(*tlbp);
+       int fullmm = (*tlbp)->fullmm;
 
        for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
                unsigned long end;
@@ -710,45 +809,39 @@
                        *nr_accounted += (end - start) >> PAGE_SHIFT;
 
                while (start != end) {
-                       unsigned long block;
-
                        if (!tlb_start_valid) {
                                tlb_start = start;
                                tlb_start_valid = 1;
                        }
 
-                       if (is_vm_hugetlb_page(vma)) {
-                               block = end - start;
+                       if (unlikely(is_vm_hugetlb_page(vma))) {
                                unmap_hugepage_range(vma, start, end);
-                       } else {
-                               block = min(zap_bytes, end - start);
-                               unmap_page_range(*tlbp, vma, start,
-                                               start + block, details);
+                               zap_work -= (end - start) /
+                                               (HPAGE_SIZE / PAGE_SIZE);
+                               start = end;
+                       } else
+                               start = unmap_page_range(*tlbp, vma,
+                                               start, end, &zap_work, details);
+
+                       if (zap_work > 0) {
+                               BUG_ON(start != end);
+                               break;
                        }
 
-                       start += block;
-                       zap_bytes -= block;
-                       if ((long)zap_bytes > 0)
-                               continue;
-
                        tlb_finish_mmu(*tlbp, tlb_start, start);
 
                        if (need_resched() ||
-                               need_lockbreak(&mm->page_table_lock) ||
                                (i_mmap_lock && need_lockbreak(i_mmap_lock))) {
                                if (i_mmap_lock) {
-                                       /* must reset count of rss freed */
-                                       *tlbp = tlb_gather_mmu(mm, fullmm);
+                                       *tlbp = NULL;
                                        goto out;
                                }
-                               spin_unlock(&mm->page_table_lock);
                                cond_resched();
-                               spin_lock(&mm->page_table_lock);
                        }
 
-                       *tlbp = tlb_gather_mmu(mm, fullmm);
+                       *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
                        tlb_start_valid = 0;
-                       zap_bytes = ZAP_BLOCK_SIZE;
+                       zap_work = ZAP_BLOCK_SIZE;
                }
        }
 out:
@@ -770,123 +863,92 @@
        unsigned long end = address + size;
        unsigned long nr_accounted = 0;
 
-       if (is_vm_hugetlb_page(vma)) {
-               zap_hugepage_range(vma, address, size);
-               return end;
-       }
-
        lru_add_drain();
-       spin_lock(&mm->page_table_lock);
        tlb = tlb_gather_mmu(mm, 0);
-       end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
-       tlb_finish_mmu(tlb, address, end);
-       spin_unlock(&mm->page_table_lock);
+       update_hiwater_rss(mm);
+       end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+       if (tlb)
+               tlb_finish_mmu(tlb, address, end);
        return end;
 }
 
 /*
  * Do a quick page-table lookup for a single page.
- * mm->page_table_lock must be held.
- */
-static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
-                       int read, int write, int accessed)
+ */
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+                       unsigned int flags)
 {
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *ptep, pte;
-       unsigned long pfn;
+       spinlock_t *ptl;
        struct page *page;
-
-       page = follow_huge_addr(mm, address, write);
-       if (! IS_ERR(page))
-               return page;
-
+       struct mm_struct *mm = vma->vm_mm;
+
+       page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
+       if (!IS_ERR(page)) {
+               BUG_ON(flags & FOLL_GET);
+               goto out;
+       }
+
+       page = NULL;
        pgd = pgd_offset(mm, address);
        if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-               goto out;
+               goto no_page_table;
 
        pud = pud_offset(pgd, address);
        if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-               goto out;
+               goto no_page_table;
        
        pmd = pmd_offset(pud, address);
        if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+               goto no_page_table;
+
+       if (pmd_huge(*pmd)) {
+               BUG_ON(flags & FOLL_GET);
+               page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
                goto out;
-       if (pmd_huge(*pmd))
-               return follow_huge_pmd(mm, address, pmd, write);
-
-       ptep = pte_offset_map(pmd, address);
+       }
+
+       ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
        if (!ptep)
                goto out;
 
        pte = *ptep;
-       pte_unmap(ptep);
-       if (pte_present(pte)) {
-               if (write && !pte_write(pte))
-                       goto out;
-               if (read && !pte_read(pte))
-                       goto out;
-               pfn = pte_pfn(pte);
-               if (pfn_valid(pfn)) {
-                       page = pfn_to_page(pfn);
-                       if (accessed) {
-                               if (write && !pte_dirty(pte) &&!PageDirty(page))
-                                       set_page_dirty(page);
-                               mark_page_accessed(page);
-                       }
-                       return page;
-               }
-       }
-
+       if (!pte_present(pte))
+               goto unlock;
+       if ((flags & FOLL_WRITE) && !pte_write(pte))
+               goto unlock;
+       page = vm_normal_page(vma, address, pte);
+       if (unlikely(!page))
+               goto unlock;
+
+       if (flags & FOLL_GET)
+               get_page(page);
+       if (flags & FOLL_TOUCH) {
+               if ((flags & FOLL_WRITE) &&
+                   !pte_dirty(pte) && !PageDirty(page))
+                       set_page_dirty(page);
+               mark_page_accessed(page);
+       }
+unlock:
+       pte_unmap_unlock(ptep, ptl);
 out:
-       return NULL;
-}
-
-inline struct page *
-follow_page(struct mm_struct *mm, unsigned long address, int write)
-{
-       return __follow_page(mm, address, 0, write, 1);
-}
-
-/*
- * check_user_page_readable() can be called frm niterrupt context by oprofile,
- * so we need to avoid taking any non-irq-safe locks
- */
-int check_user_page_readable(struct mm_struct *mm, unsigned long address)
-{
-       return __follow_page(mm, address, 1, 0, 0) != NULL;
-}
-EXPORT_SYMBOL(check_user_page_readable);
-
-static inline int
-untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
-                        unsigned long address)
-{
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-
-       /* Check if the vma is for an anonymous mapping. */
-       if (vma->vm_ops && vma->vm_ops->nopage)
-               return 0;
-
-       /* Check if page directory entry exists. */
-       pgd = pgd_offset(mm, address);
-       if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-               return 1;
-
-       pud = pud_offset(pgd, address);
-       if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-               return 1;
-
-       /* Check if page middle directory entry exists. */
-       pmd = pmd_offset(pud, address);
-       if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-               return 1;
-
-       /* There is a pte slot for 'address' in 'mm'. */
-       return 0;
+       return page;
+
+no_page_table:
+       /*
+        * When core dumping an enormous anonymous area that nobody
+        * has touched so far, we don't want to allocate page tables.
+        */
+       if (flags & FOLL_ANON) {
+               page = ZERO_PAGE(address);
+               if (flags & FOLL_GET)
+                       get_page(page);
+               BUG_ON(flags & FOLL_WRITE);
+       }
+       return page;
 }
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
@@ -894,18 +956,19 @@
                struct page **pages, struct vm_area_struct **vmas)
 {
        int i;
-       unsigned int flags;
+       unsigned int vm_flags;
 
        /* 
         * Require read or write permissions.
         * If 'force' is set, we only require the "MAY" flags.
         */
-       flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
-       flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+       vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+       vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
        i = 0;
 
        do {
-               struct vm_area_struct * vma;
+               struct vm_area_struct *vma;
+               unsigned int foll_flags;
 
                vma = find_extend_vma(mm, start);
                if (!vma && in_gate_area(tsk, start)) {
@@ -933,8 +996,10 @@
                                return i ? : -EFAULT;
                        }
                        if (pages) {
-                               pages[i] = pte_page(*pte);
-                               get_page(pages[i]);
+                               struct page *page = vm_normal_page(gate_vma, 
start, *pte);
+                               pages[i] = page;
+                               if (page)
+                                       get_page(page);
                        }
                        pte_unmap(pte);
                        if (vmas)
@@ -962,8 +1027,8 @@
                        }
                 }
 #endif
-               if (!vma || (vma->vm_flags & VM_IO)
-                               || !(flags & vma->vm_flags))
+               if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
+                               || !(vm_flags & vma->vm_flags))
                        return i ? : -EFAULT;
 
                if (is_vm_hugetlb_page(vma)) {
@@ -971,29 +1036,25 @@
                                                &start, &len, i);
                        continue;
                }
-               spin_lock(&mm->page_table_lock);
+
+               foll_flags = FOLL_TOUCH;
+               if (pages)
+                       foll_flags |= FOLL_GET;
+               if (!write && !(vma->vm_flags & VM_LOCKED) &&
+                   (!vma->vm_ops || !vma->vm_ops->nopage))
+                       foll_flags |= FOLL_ANON;
+
                do {
-                       int write_access = write;
                        struct page *page;
 
-                       cond_resched_lock(&mm->page_table_lock);
-                       while (!(page = follow_page(mm, start, write_access))) {
+                       if (write)
+                               foll_flags |= FOLL_WRITE;
+
+                       cond_resched();
+                       while (!(page = follow_page(vma, start, foll_flags))) {
                                int ret;
-
-                               /*
-                                * Shortcut for anonymous pages. We don't want
-                                * to force the creation of pages tables for
-                                * insanely big anonymously mapped areas that
-                                * nobody touched so far. This is important
-                                * for doing a core dump for these mappings.
-                                */
-                               if (!write && 
untouched_anonymous_page(mm,vma,start)) {
-                                       page = ZERO_PAGE(start);
-                                       break;
-                               }
-                               spin_unlock(&mm->page_table_lock);
-                               ret = __handle_mm_fault(mm, vma, start, 
write_access);
-
+                               ret = __handle_mm_fault(mm, vma, start,
+                                               foll_flags & FOLL_WRITE);
                                /*
                                 * The VM_FAULT_WRITE bit tells us that 
do_wp_page has
                                 * broken COW when necessary, even if 
maybe_mkwrite
@@ -1001,7 +1062,7 @@
                                 * subsequent page lookups as if they were 
reads.
                                 */
                                if (ret & VM_FAULT_WRITE)
-                                       write_access = 0;
+                                       foll_flags &= ~FOLL_WRITE;
                                
                                switch (ret & ~VM_FAULT_WRITE) {
                                case VM_FAULT_MINOR:
@@ -1017,13 +1078,10 @@
                                default:
                                        BUG();
                                }
-                               spin_lock(&mm->page_table_lock);
                        }
                        if (pages) {
                                pages[i] = page;
                                flush_dcache_page(page);
-                               if (!PageReserved(page))
-                                       page_cache_get(page);
                        }
                        if (vmas)
                                vmas[i] = vma;
@@ -1031,7 +1089,6 @@
                        start += PAGE_SIZE;
                        len--;
                } while (len && start < vma->vm_end);
-               spin_unlock(&mm->page_table_lock);
        } while (len);
        return i;
 }
@@ -1041,16 +1098,21 @@
                        unsigned long addr, unsigned long end, pgprot_t prot)
 {
        pte_t *pte;
-
-       pte = pte_alloc_map(mm, pmd, addr);
+       spinlock_t *ptl;
+
+       pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return -ENOMEM;
        do {
-               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
+               struct page *page = ZERO_PAGE(addr);
+               pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+               page_cache_get(page);
+               page_add_file_rmap(page);
+               inc_mm_counter(mm, file_rss);
                BUG_ON(!pte_none(*pte));
                set_pte_at(mm, addr, pte, zero_pte);
        } while (pte++, addr += PAGE_SIZE, addr != end);
-       pte_unmap(pte - 1);
+       pte_unmap_unlock(pte - 1, ptl);
        return 0;
 }
 
@@ -1100,16 +1162,94 @@
        BUG_ON(addr >= end);
        pgd = pgd_offset(mm, addr);
        flush_cache_range(vma, addr, end);
-       spin_lock(&mm->page_table_lock);
        do {
                next = pgd_addr_end(addr, end);
                err = zeromap_pud_range(mm, pgd, addr, next, prot);
                if (err)
                        break;
        } while (pgd++, addr = next, addr != end);
-       spin_unlock(&mm->page_table_lock);
        return err;
 }
+
+pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, 
spinlock_t **ptl)
+{
+       pgd_t * pgd = pgd_offset(mm, addr);
+       pud_t * pud = pud_alloc(mm, pgd, addr);
+       if (pud) {
+               pmd_t * pmd = pmd_alloc(mm, pud, addr);
+               if (pmd)
+                       return pte_alloc_map_lock(mm, pmd, addr, ptl);
+       }
+       return NULL;
+}
+
+/*
+ * This is the old fallback for page remapping.
+ *
+ * For historical reasons, it only allows reserved pages. Only
+ * old drivers should use this, and they needed to mark their
+ * pages reserved for the old functions anyway.
+ */
+static int insert_page(struct mm_struct *mm, unsigned long addr, struct page 
*page, pgprot_t prot)
+{
+       int retval;
+       pte_t *pte;
+       spinlock_t *ptl;  
+
+       retval = -EINVAL;
+       if (PageAnon(page))
+               goto out;
+       retval = -ENOMEM;
+       flush_dcache_page(page);
+       pte = get_locked_pte(mm, addr, &ptl);
+       if (!pte)
+               goto out;
+       retval = -EBUSY;
+       if (!pte_none(*pte))
+               goto out_unlock;
+
+       /* Ok, finally just insert the thing.. */
+       get_page(page);
+       inc_mm_counter(mm, file_rss);
+       page_add_file_rmap(page);
+       set_pte_at(mm, addr, pte, mk_pte(page, prot));
+
+       retval = 0;
+out_unlock:
+       pte_unmap_unlock(pte, ptl);
+out:
+       return retval;
+}
+
+/*
+ * This allows drivers to insert individual pages they've allocated
+ * into a user vma.
+ *
+ * The page has to be a nice clean _individual_ kernel allocation.
+ * If you allocate a compound page, you need to have marked it as
+ * such (__GFP_COMP), or manually just split the page up yourself
+ * (which is mainly an issue of doing "set_page_count(page, 1)" for
+ * each sub-page, and then freeing them one by one when you free
+ * them rather than freeing it as a compound page).
+ *
+ * NOTE! Traditionally this was done with "remap_pfn_range()" which
+ * took an arbitrary page protection parameter. This doesn't allow
+ * that. Your vma protection will have to be set up correctly, which
+ * means that if you want a shared writable mapping, you'd better
+ * ask for a shared writable mapping!
+ *
+ * The page does not need to be reserved.
+ */
+int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page 
*page)
+{
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return -EFAULT;
+       if (!page_count(page))
+               return -EINVAL;
+       vma->vm_flags |= VM_INSERTPAGE;
+       return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_page);
 
 /*
  * maps a range of physical memory into the requested pages. the old
@@ -1121,17 +1261,17 @@
                        unsigned long pfn, pgprot_t prot)
 {
        pte_t *pte;
-
-       pte = pte_alloc_map(mm, pmd, addr);
+       spinlock_t *ptl;
+
+       pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return -ENOMEM;
        do {
                BUG_ON(!pte_none(*pte));
-               if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
-                       set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
+               set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
-       pte_unmap(pte - 1);
+       pte_unmap_unlock(pte - 1, ptl);
        return 0;
 }
 
@@ -1190,16 +1330,31 @@
         * rest of the world about it:
         *   VM_IO tells people not to look at these pages
         *      (accesses can have side effects).
-        *   VM_RESERVED tells swapout not to try to touch
-        *      this region.
+        *   VM_RESERVED is specified all over the place, because
+        *      in 2.4 it kept swapout's vma scan off this vma; but
+        *      in 2.6 the LRU scan won't even find its pages, so this
+        *      flag means no more than count its pages in reserved_vm,
+        *      and omit it from core dump, even when VM_IO turned off.
+        *   VM_PFNMAP tells the core MM that the base pages are just
+        *      raw PFN mappings, and do not have a "struct page" associated
+        *      with them.
+        *
+        * There's a horrible special case to handle copy-on-write
+        * behaviour that some programs depend on. We mark the "original"
+        * un-COW'ed pages by matching them up with "vma->vm_pgoff".
         */
-       vma->vm_flags |= VM_IO | VM_RESERVED;
+       if (is_cow_mapping(vma->vm_flags)) {
+               if (addr != vma->vm_start || end != vma->vm_end)
+                       return -EINVAL;
+               vma->vm_pgoff = pfn;
+       }
+
+       vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
 
        BUG_ON(addr >= end);
        pfn -= addr >> PAGE_SHIFT;
        pgd = pgd_offset(mm, addr);
        flush_cache_range(vma, addr, end);
-       spin_lock(&mm->page_table_lock);
        do {
                next = pgd_addr_end(addr, end);
                err = remap_pud_range(mm, pgd, addr, next,
@@ -1207,7 +1362,6 @@
                if (err)
                        break;
        } while (pgd++, addr = next, addr != end);
-       spin_unlock(&mm->page_table_lock);
        return err;
 }
 EXPORT_SYMBOL(remap_pfn_range);
@@ -1224,7 +1378,7 @@
         struct page *pte_page;
 
         pte = (mm == &init_mm) ?
-                pte_alloc_kernel(mm, pmd, addr) :
+                pte_alloc_kernel(pmd, addr) :
                 pte_alloc_map(mm, pmd, addr);
         if (!pte)
                 return -ENOMEM;
@@ -1300,17 +1454,40 @@
 
        BUG_ON(addr >= end);
        pgd = pgd_offset(mm, addr);
-       spin_lock(&mm->page_table_lock);
        do {
                next = pgd_addr_end(addr, end);
                err = generic_pud_range(mm, pgd, addr, next, fn, data);
                if (err)
                        break;
        } while (pgd++, addr = next, addr != end);
-       spin_unlock(&mm->page_table_lock);
        return err;
 }
 #endif
+
+/*
+ * handle_pte_fault chooses page fault handler according to an entry
+ * which was read non-atomically.  Before making any commitment, on
+ * those architectures or configurations (e.g. i386 with PAE) which
+ * might give a mix of unmatched parts, do_swap_page and do_file_page
+ * must check under lock before unmapping the pte and proceeding
+ * (but do_wp_page is only called after already making such a check;
+ * and do_anonymous_page and do_no_page can safely check later on).
+ */
+static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
+                               pte_t *page_table, pte_t orig_pte)
+{
+       int same = 1;
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+       if (sizeof(pte_t) > sizeof(unsigned long)) {
+               spinlock_t *ptl = pte_lockptr(mm, pmd);
+               spin_lock(ptl);
+               same = pte_same(*page_table, orig_pte);
+               spin_unlock(ptl);
+       }
+#endif
+       pte_unmap(page_table);
+       return same;
+}
 
 /*
  * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
@@ -1325,28 +1502,37 @@
        return pte;
 }
 
-/*
- * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
- */
-static inline void break_cow(struct vm_area_struct * vma, struct page * 
new_page, unsigned long address, 
-               pte_t *page_table)
-{
-       pte_t entry;
-
-       entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
-                             vma);
-       ptep_establish(vma, address, page_table, entry);
-       update_mmu_cache(vma, address, entry);
-       lazy_mmu_prot_update(entry);
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned 
long va)
+{
+       /*
+        * If the source page was a PFN mapping, we don't have
+        * a "struct page" for it. We do a best-effort copy by
+        * just copying from the original user address. If that
+        * fails, we just zero-fill it. Live with it.
+        */
+       if (unlikely(!src)) {
+               void *kaddr = kmap_atomic(dst, KM_USER0);
+               void __user *uaddr = (void __user *)(va & PAGE_MASK);
+
+               /*
+                * This really shouldn't fail, because the page is there
+                * in the page tables. But it might just be unreadable,
+                * in which case we just give up and fill the result with
+                * zeroes.
+                */
+               if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+                       memset(kaddr, 0, PAGE_SIZE);
+               kunmap_atomic(kaddr, KM_USER0);
+               return;
+               
+       }
+       copy_user_highpage(dst, src, va);
 }
 
 /*
  * This routine handles present pages, when users try to write
  * to a shared page. It is done by copying the page to a new address
  * and decrementing the shared-page counter for the old page.
- *
- * Goto-purists beware: the only reason for goto's here is that it results
- * in better assembly code.. The "default" path will see no jumps at all.
  *
  * Note that this routine assumes that the protection checks have been
  * done by the caller (the low-level page fault routine in most cases).
@@ -1357,105 +1543,76 @@
  * change only once the write actually happens. This avoids a few races,
  * and potentially makes it more efficient.
  *
- * We hold the mm semaphore and the page_table_lock on entry and exit
- * with the page_table_lock released.
- */
-static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
-       unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), with pte both mapped and locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               spinlock_t *ptl, pte_t orig_pte)
 {
        struct page *old_page, *new_page;
-       unsigned long pfn = pte_pfn(pte);
        pte_t entry;
-       int ret;
-#ifdef CONFIG_XEN
-       struct page invalid_page;
-#endif
-
-       if (unlikely(!pfn_valid(pfn))) {
-#ifdef CONFIG_XEN
-               /* This can happen with /dev/mem (PROT_WRITE, MAP_PRIVATE). */
-               invalid_page.flags = (1<<PG_reserved) | (1<<PG_locked);
-               old_page = &invalid_page;
-       } else {
-               old_page = pfn_to_page(pfn);
-#else
-               /*
-                * This should really halt the system so it can be debugged or
-                * at least the kernel stops what it's doing before it corrupts
-                * data, but for the moment just pretend this is OOM.
-                */
-               pte_unmap(page_table);
-               printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
-                               address);
-               spin_unlock(&mm->page_table_lock);
-               return VM_FAULT_OOM;
-#endif
-       }
-#ifndef CONFIG_XEN
-       old_page = pfn_to_page(pfn);
-#endif
+       int ret = VM_FAULT_MINOR;
+
+       old_page = vm_normal_page(vma, address, orig_pte);
+       if (!old_page)
+               goto gotten;
 
        if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
                int reuse = can_share_swap_page(old_page);
                unlock_page(old_page);
                if (reuse) {
-                       flush_cache_page(vma, address, pfn);
-                       entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
-                                             vma);
+                       flush_cache_page(vma, address, pte_pfn(orig_pte));
+                       entry = pte_mkyoung(orig_pte);
+                       entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                        ptep_set_access_flags(vma, address, page_table, entry, 
1);
                        update_mmu_cache(vma, address, entry);
                        lazy_mmu_prot_update(entry);
-                       pte_unmap(page_table);
-                       spin_unlock(&mm->page_table_lock);
-                       return VM_FAULT_MINOR|VM_FAULT_WRITE;
+                       ret |= VM_FAULT_WRITE;
+                       goto unlock;
                }
        }
-       pte_unmap(page_table);
 
        /*
         * Ok, we need to copy. Oh, well..
         */
-       if (!PageReserved(old_page))
-               page_cache_get(old_page);
-       spin_unlock(&mm->page_table_lock);
+       page_cache_get(old_page);
+gotten:
+       pte_unmap_unlock(page_table, ptl);
 
        if (unlikely(anon_vma_prepare(vma)))
-               goto no_new_page;
+               goto oom;
        if (old_page == ZERO_PAGE(address)) {
                new_page = alloc_zeroed_user_highpage(vma, address);
                if (!new_page)
-                       goto no_new_page;
+                       goto oom;
        } else {
                new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
                if (!new_page)
-                       goto no_new_page;
-#ifndef CONFIG_XEN
-               copy_user_highpage(new_page, old_page, address);
-#else
-               if (old_page == &invalid_page) {
-                       char *vto = kmap_atomic(new_page, KM_USER1);
-                       copy_page(vto, (void *)(address & PAGE_MASK));
-                       kunmap_atomic(vto, KM_USER1);
-               } else {
-                       copy_user_highpage(new_page, old_page, address);
-               }
-#endif
-       }
+                       goto oom;
+               cow_user_page(new_page, old_page, address);
+       }
+
        /*
         * Re-check the pte - we dropped the lock
         */
-       ret = VM_FAULT_MINOR;
-       spin_lock(&mm->page_table_lock);
-       page_table = pte_offset_map(pmd, address);
-       if (likely(pte_same(*page_table, pte))) {
-               if (PageAnon(old_page))
-                       dec_mm_counter(mm, anon_rss);
-               if (PageReserved(old_page))
-                       inc_mm_counter(mm, rss);
-               else
+       page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+       if (likely(pte_same(*page_table, orig_pte))) {
+               if (old_page) {
                        page_remove_rmap(old_page);
-               flush_cache_page(vma, address, pfn);
-               break_cow(vma, new_page, address, page_table);
+                       if (!PageAnon(old_page)) {
+                               dec_mm_counter(mm, file_rss);
+                               inc_mm_counter(mm, anon_rss);
+                       }
+               } else
+                       inc_mm_counter(mm, anon_rss);
+               flush_cache_page(vma, address, pte_pfn(orig_pte));
+               entry = mk_pte(new_page, vma->vm_page_prot);
+               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+               ptep_establish(vma, address, page_table, entry);
+               update_mmu_cache(vma, address, entry);
+               lazy_mmu_prot_update(entry);
                lru_cache_add_active(new_page);
                page_add_anon_rmap(new_page, vma, address);
 
@@ -1463,14 +1620,16 @@
                new_page = old_page;
                ret |= VM_FAULT_WRITE;
        }
-       pte_unmap(page_table);
-       page_cache_release(new_page);
-       page_cache_release(old_page);
-       spin_unlock(&mm->page_table_lock);
+       if (new_page)
+               page_cache_release(new_page);
+       if (old_page)
+               page_cache_release(old_page);
+unlock:
+       pte_unmap_unlock(page_table, ptl);
        return ret;
-
-no_new_page:
-       page_cache_release(old_page);
+oom:
+       if (old_page)
+               page_cache_release(old_page);
        return VM_FAULT_OOM;
 }
 
@@ -1539,13 +1698,6 @@
 
        restart_addr = zap_page_range(vma, start_addr,
                                        end_addr - start_addr, details);
-
-       /*
-        * We cannot rely on the break test in unmap_vmas:
-        * on the one hand, we don't want to restart our loop
-        * just because that broke out for the page_table_lock;
-        * on the other hand, it does no test when vma is small.
-        */
        need_break = need_resched() ||
                        need_lockbreak(details->i_mmap_lock);
 
@@ -1794,38 +1946,37 @@
 }
 
 /*
- * We hold the mm semaphore and the page_table_lock on entry and
- * should release the pagetable lock on exit..
- */
-static int do_swap_page(struct mm_struct * mm,
-       struct vm_area_struct * vma, unsigned long address,
-       pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
-{
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               int write_access, pte_t orig_pte)
+{
+       spinlock_t *ptl;
        struct page *page;
-       swp_entry_t entry = pte_to_swp_entry(orig_pte);
+       swp_entry_t entry;
        pte_t pte;
        int ret = VM_FAULT_MINOR;
 
-       pte_unmap(page_table);
-       spin_unlock(&mm->page_table_lock);
+       if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+               goto out;
+
+       entry = pte_to_swp_entry(orig_pte);
        page = lookup_swap_cache(entry);
        if (!page) {
                swapin_readahead(entry, address, vma);
                page = read_swap_cache_async(entry, vma, address);
                if (!page) {
                        /*
-                        * Back out if somebody else faulted in this pte while
-                        * we released the page table lock.
+                        * Back out if somebody else faulted in this pte
+                        * while we released the pte lock.
                         */
-                       spin_lock(&mm->page_table_lock);
-                       page_table = pte_offset_map(pmd, address);
+                       page_table = pte_offset_map_lock(mm, pmd, address, 
&ptl);
                        if (likely(pte_same(*page_table, orig_pte)))
                                ret = VM_FAULT_OOM;
-                       else
-                               ret = VM_FAULT_MINOR;
-                       pte_unmap(page_table);
-                       spin_unlock(&mm->page_table_lock);
-                       goto out;
+                       goto unlock;
                }
 
                /* Had to read the page from swap area: Major fault */
@@ -1838,15 +1989,11 @@
        lock_page(page);
 
        /*
-        * Back out if somebody else faulted in this pte while we
-        * released the page table lock.
+        * Back out if somebody else already faulted in this pte.
         */
-       spin_lock(&mm->page_table_lock);
-       page_table = pte_offset_map(pmd, address);
-       if (unlikely(!pte_same(*page_table, orig_pte))) {
-               ret = VM_FAULT_MINOR;
+       page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+       if (unlikely(!pte_same(*page_table, orig_pte)))
                goto out_nomap;
-       }
 
        if (unlikely(!PageUptodate(page))) {
                ret = VM_FAULT_SIGBUS;
@@ -1855,7 +2002,7 @@
 
        /* The page isn't present yet, go ahead with the fault. */
 
-       inc_mm_counter(mm, rss);
+       inc_mm_counter(mm, anon_rss);
        pte = mk_pte(page, vma->vm_page_prot);
        if (write_access && can_share_swap_page(page)) {
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1873,7 +2020,7 @@
 
        if (write_access) {
                if (do_wp_page(mm, vma, address,
-                               page_table, pmd, pte) == VM_FAULT_OOM)
+                               page_table, pmd, ptl, pte) == VM_FAULT_OOM)
                        ret = VM_FAULT_OOM;
                goto out;
        }
@@ -1881,74 +2028,76 @@
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, address, pte);
        lazy_mmu_prot_update(pte);
-       pte_unmap(page_table);
-       spin_unlock(&mm->page_table_lock);
+unlock:
+       pte_unmap_unlock(page_table, ptl);
 out:
        return ret;
 out_nomap:
-       pte_unmap(page_table);
-       spin_unlock(&mm->page_table_lock);
+       pte_unmap_unlock(page_table, ptl);
        unlock_page(page);
        page_cache_release(page);
-       goto out;
-}
-
-/*
- * We are called with the MM semaphore and page_table_lock
- * spinlock held to protect against concurrent faults in
- * multithreaded programs. 
- */
-static int
-do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
-               pte_t *page_table, pmd_t *pmd, int write_access,
-               unsigned long addr)
-{
+       return ret;
+}
+
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               int write_access)
+{
+       struct page *page;
+       spinlock_t *ptl;
        pte_t entry;
-       struct page * page = ZERO_PAGE(addr);
-
-       /* Read-only mapping of ZERO_PAGE. */
-       entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
-
-       /* ..except if it's a write access */
+
        if (write_access) {
                /* Allocate our own private page. */
                pte_unmap(page_table);
-               spin_unlock(&mm->page_table_lock);
 
                if (unlikely(anon_vma_prepare(vma)))
-                       goto no_mem;
-               page = alloc_zeroed_user_highpage(vma, addr);
+                       goto oom;
+               page = alloc_zeroed_user_highpage(vma, address);
                if (!page)
-                       goto no_mem;
-
-               spin_lock(&mm->page_table_lock);
-               page_table = pte_offset_map(pmd, addr);
-
-               if (!pte_none(*page_table)) {
-                       pte_unmap(page_table);
-                       page_cache_release(page);
-                       spin_unlock(&mm->page_table_lock);
-                       goto out;
-               }
-               inc_mm_counter(mm, rss);
-               entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
-                                                        vma->vm_page_prot)),
-                                     vma);
+                       goto oom;
+
+               entry = mk_pte(page, vma->vm_page_prot);
+               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+
+               page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+               if (!pte_none(*page_table))
+                       goto release;
+               inc_mm_counter(mm, anon_rss);
                lru_cache_add_active(page);
                SetPageReferenced(page);
-               page_add_anon_rmap(page, vma, addr);
-       }
-
-       set_pte_at(mm, addr, page_table, entry);
-       pte_unmap(page_table);
+               page_add_anon_rmap(page, vma, address);
+       } else {
+               /* Map the ZERO_PAGE - vm_page_prot is readonly */
+               page = ZERO_PAGE(address);
+               page_cache_get(page);
+               entry = mk_pte(page, vma->vm_page_prot);
+
+               ptl = pte_lockptr(mm, pmd);
+               spin_lock(ptl);
+               if (!pte_none(*page_table))
+                       goto release;
+               inc_mm_counter(mm, file_rss);
+               page_add_file_rmap(page);
+       }
+
+       set_pte_at(mm, address, page_table, entry);
 
        /* No need to invalidate - it was non-present before */
-       update_mmu_cache(vma, addr, entry);
+       update_mmu_cache(vma, address, entry);
        lazy_mmu_prot_update(entry);
-       spin_unlock(&mm->page_table_lock);
-out:
+unlock:
+       pte_unmap_unlock(page_table, ptl);
        return VM_FAULT_MINOR;
-no_mem:
+release:
+       page_cache_release(page);
+       goto unlock;
+oom:
        return VM_FAULT_OOM;
 }
 
@@ -1961,25 +2110,24 @@
  * As this is called only for pages that do not currently exist, we
  * do not need to flush old virtual caches or the TLB.
  *
- * This is called with the MM semaphore held and the page table
- * spinlock held. Exit with the spinlock released.
- */
-static int
-do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
-       unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
-{
-       struct page * new_page;
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               int write_access)
+{
+       spinlock_t *ptl;
+       struct page *new_page;
        struct address_space *mapping = NULL;
        pte_t entry;
        unsigned int sequence = 0;
        int ret = VM_FAULT_MINOR;
        int anon = 0;
 
-       if (!vma->vm_ops || !vma->vm_ops->nopage)
-               return do_anonymous_page(mm, vma, page_table,
-                                       pmd, write_access, address);
        pte_unmap(page_table);
-       spin_unlock(&mm->page_table_lock);
+       BUG_ON(vma->vm_flags & VM_PFNMAP);
 
        if (vma->vm_file) {
                mapping = vma->vm_file->f_mapping;
@@ -1987,7 +2135,6 @@
                smp_rmb(); /* serializes i_size against truncate_count */
        }
 retry:
-       cond_resched();
        new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
        /*
         * No smp_rmb is needed here as long as there's a full
@@ -2020,19 +2167,20 @@
                anon = 1;
        }
 
-       spin_lock(&mm->page_table_lock);
+       page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
        /*
         * For a file-backed vma, someone could have truncated or otherwise
         * invalidated this page.  If unmap_mapping_range got called,
         * retry getting the page.
         */
        if (mapping && unlikely(sequence != mapping->truncate_count)) {
+               pte_unmap_unlock(page_table, ptl);
+               page_cache_release(new_page);
+               cond_resched();
                sequence = mapping->truncate_count;
-               spin_unlock(&mm->page_table_lock);
-               page_cache_release(new_page);
+               smp_rmb();
                goto retry;
        }
-       page_table = pte_offset_map(pmd, address);
 
        /*
         * This silly early PAGE_DIRTY setting removes a race
@@ -2046,68 +2194,67 @@
         */
        /* Only go through if we didn't race with anybody else... */
        if (pte_none(*page_table)) {
-               if (!PageReserved(new_page))
-                       inc_mm_counter(mm, rss);
-
                flush_icache_page(vma, new_page);
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                set_pte_at(mm, address, page_table, entry);
                if (anon) {
+                       inc_mm_counter(mm, anon_rss);
                        lru_cache_add_active(new_page);
                        page_add_anon_rmap(new_page, vma, address);
-               } else
+               } else {
+                       inc_mm_counter(mm, file_rss);
                        page_add_file_rmap(new_page);
-               pte_unmap(page_table);
+               }
        } else {
                /* One of our sibling threads was faster, back out. */
-               pte_unmap(page_table);
                page_cache_release(new_page);
-               spin_unlock(&mm->page_table_lock);
-               goto out;
+               goto unlock;
        }
 
        /* no need to invalidate: a not-present page shouldn't be cached */
        update_mmu_cache(vma, address, entry);
        lazy_mmu_prot_update(entry);
-       spin_unlock(&mm->page_table_lock);
-out:
+unlock:
+       pte_unmap_unlock(page_table, ptl);
        return ret;
 oom:
        page_cache_release(new_page);
-       ret = VM_FAULT_OOM;
-       goto out;
+       return VM_FAULT_OOM;
 }
 
 /*
  * Fault of a previously existing named mapping. Repopulate the pte
  * from the encoded file_pte if possible. This enables swappable
  * nonlinear vmas.
- */
-static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
-       unsigned long address, int write_access, pte_t *pte, pmd_t *pmd)
-{
-       unsigned long pgoff;
+ *
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               int write_access, pte_t orig_pte)
+{
+       pgoff_t pgoff;
        int err;
 
-       BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage);
-       /*
-        * Fall back to the linear mapping if the fs does not support
-        * ->populate:
-        */
-       if (!vma->vm_ops->populate ||
-                       (write_access && !(vma->vm_flags & VM_SHARED))) {
-               pte_clear(mm, address, pte);
-               return do_no_page(mm, vma, address, write_access, pte, pmd);
-       }
-
-       pgoff = pte_to_pgoff(*pte);
-
-       pte_unmap(pte);
-       spin_unlock(&mm->page_table_lock);
-
-       err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, 
vma->vm_page_prot, pgoff, 0);
+       if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+               return VM_FAULT_MINOR;
+
+       if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
+               /*
+                * Page table corrupted: show pte and kill process.
+                */
+               print_bad_pte(vma, orig_pte, address);
+               return VM_FAULT_OOM;
+       }
+       /* We can then assume vm->vm_ops && vma->vm_ops->populate */
+
+       pgoff = pte_to_pgoff(orig_pte);
+       err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
+                                       vma->vm_page_prot, pgoff, 0);
        if (err == -ENOMEM)
                return VM_FAULT_OOM;
        if (err)
@@ -2124,56 +2271,68 @@
  * with external mmu caches can use to update those (ie the Sparc or
  * PowerPC hashed page tables that act as extended TLBs).
  *
- * Note the "page_table_lock". It is to protect against kswapd removing
- * pages from under us. Note that kswapd only ever _removes_ pages, never
- * adds them. As such, once we have noticed that the page is not present,
- * we can drop the lock early.
- *
- * The adding of pages is protected by the MM semaphore (which we hold),
- * so we don't need to worry about a page being suddenly been added into
- * our VM.
- *
- * We enter with the pagetable spinlock held, we are supposed to
- * release it when done.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
 static inline int handle_pte_fault(struct mm_struct *mm,
-       struct vm_area_struct * vma, unsigned long address,
-       int write_access, pte_t *pte, pmd_t *pmd)
+               struct vm_area_struct *vma, unsigned long address,
+               pte_t *pte, pmd_t *pmd, int write_access)
 {
        pte_t entry;
-
-       entry = *pte;
+       pte_t old_entry;
+       spinlock_t *ptl;
+
+       old_entry = entry = *pte;
        if (!pte_present(entry)) {
-               /*
-                * If it truly wasn't present, we know that kswapd
-                * and the PTE updates will not touch it later. So
-                * drop the lock.
-                */
-               if (pte_none(entry))
-                       return do_no_page(mm, vma, address, write_access, pte, 
pmd);
+               if (pte_none(entry)) {
+                       if (!vma->vm_ops || !vma->vm_ops->nopage)
+                               return do_anonymous_page(mm, vma, address,
+                                       pte, pmd, write_access);
+                       return do_no_page(mm, vma, address,
+                                       pte, pmd, write_access);
+               }
                if (pte_file(entry))
-                       return do_file_page(mm, vma, address, write_access, 
pte, pmd);
-               return do_swap_page(mm, vma, address, pte, pmd, entry, 
write_access);
-       }
-
+                       return do_file_page(mm, vma, address,
+                                       pte, pmd, write_access, entry);
+               return do_swap_page(mm, vma, address,
+                                       pte, pmd, write_access, entry);
+       }
+
+       ptl = pte_lockptr(mm, pmd);
+       spin_lock(ptl);
+       if (unlikely(!pte_same(*pte, entry)))
+               goto unlock;
        if (write_access) {
                if (!pte_write(entry))
-                       return do_wp_page(mm, vma, address, pte, pmd, entry);
+                       return do_wp_page(mm, vma, address,
+                                       pte, pmd, ptl, entry);
                entry = pte_mkdirty(entry);
        }
        entry = pte_mkyoung(entry);
-       ptep_set_access_flags(vma, address, pte, entry, write_access);
-       update_mmu_cache(vma, address, entry);
-       lazy_mmu_prot_update(entry);
-       pte_unmap(pte);
-       spin_unlock(&mm->page_table_lock);
+       if (!pte_same(old_entry, entry)) {
+               ptep_set_access_flags(vma, address, pte, entry, write_access);
+               update_mmu_cache(vma, address, entry);
+               lazy_mmu_prot_update(entry);
+       } else {
+               /*
+                * This is needed only for protection faults but the arch code
+                * is not yet telling us if this is a protection fault or not.
+                * This still avoids useless tlb flushes for .text page faults
+                * with threads.
+                */
+               if (write_access)
+                       flush_tlb_page(vma, address);
+       }
+unlock:
+       pte_unmap_unlock(pte, ptl);
        return VM_FAULT_MINOR;
 }
 
 /*
  * By the time we get here, we already hold the mm semaphore
  */
-int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, int write_access)
 {
        pgd_t *pgd;
@@ -2188,100 +2347,78 @@
        if (unlikely(is_vm_hugetlb_page(vma)))
                return hugetlb_fault(mm, vma, address, write_access);
 
-       /*
-        * We need the page table lock to synchronize with kswapd
-        * and the SMP-safe atomic PTE updates.
-        */
        pgd = pgd_offset(mm, address);
-       spin_lock(&mm->page_table_lock);
-
        pud = pud_alloc(mm, pgd, address);
        if (!pud)
-               goto oom;
-
+               return VM_FAULT_OOM;
        pmd = pmd_alloc(mm, pud, address);
        if (!pmd)
-               goto oom;
-
+               return VM_FAULT_OOM;
        pte = pte_alloc_map(mm, pmd, address);
        if (!pte)
-               goto oom;
-       
-       return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
-
- oom:
+               return VM_FAULT_OOM;
+
+       return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+}
+
+#ifndef __PAGETABLE_PUD_FOLDED
+/*
+ * Allocate page upper directory.
+ * We've already handled the fast-path in-line.
+ */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+       pud_t *new = pud_alloc_one(mm, address);
+       if (!new)
+               return -ENOMEM;
+
+       spin_lock(&mm->page_table_lock);
+       if (pgd_present(*pgd))          /* Another has populated it */
+               pud_free(new);
+       else
+               pgd_populate(mm, pgd, new);
        spin_unlock(&mm->page_table_lock);
-       return VM_FAULT_OOM;
-}
-
-#ifndef __PAGETABLE_PUD_FOLDED
-/*
- * Allocate page upper directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
- */
-pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long 
address)
-{
-       pud_t *new;
-
+       return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+       return 0;
+}
+#endif /* __PAGETABLE_PUD_FOLDED */
+
+#ifndef __PAGETABLE_PMD_FOLDED
+/*
+ * Allocate page middle directory.
+ * We've already handled the fast-path in-line.
+ */
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+       pmd_t *new = pmd_alloc_one(mm, address);
+       if (!new)
+               return -ENOMEM;
+
+       spin_lock(&mm->page_table_lock);
+#ifndef __ARCH_HAS_4LEVEL_HACK
+       if (pud_present(*pud))          /* Another has populated it */
+               pmd_free(new);
+       else
+               pud_populate(mm, pud, new);
+#else
+       if (pgd_present(*pud))          /* Another has populated it */
+               pmd_free(new);
+       else
+               pgd_populate(mm, pud, new);
+#endif /* __ARCH_HAS_4LEVEL_HACK */
        spin_unlock(&mm->page_table_lock);
-       new = pud_alloc_one(mm, address);
-       spin_lock(&mm->page_table_lock);
-       if (!new)
-               return NULL;
-
-       /*
-        * Because we dropped the lock, we should re-check the
-        * entry, as somebody else could have populated it..
-        */
-       if (pgd_present(*pgd)) {
-               pud_free(new);
-               goto out;
-       }
-       pgd_populate(mm, pgd, new);
- out:
-       return pud_offset(pgd, address);
-}
-#endif /* __PAGETABLE_PUD_FOLDED */
-
-#ifndef __PAGETABLE_PMD_FOLDED
-/*
- * Allocate page middle directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
- */
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
-{
-       pmd_t *new;
-
-       spin_unlock(&mm->page_table_lock);
-       new = pmd_alloc_one(mm, address);
-       spin_lock(&mm->page_table_lock);
-       if (!new)
-               return NULL;
-
-       /*
-        * Because we dropped the lock, we should re-check the
-        * entry, as somebody else could have populated it..
-        */
-#ifndef __ARCH_HAS_4LEVEL_HACK
-       if (pud_present(*pud)) {
-               pmd_free(new);
-               goto out;
-       }
-       pud_populate(mm, pud, new);
+       return 0;
+}
 #else
-       if (pgd_present(*pud)) {
-               pmd_free(new);
-               goto out;
-       }
-       pgd_populate(mm, pud, new);
-#endif /* __ARCH_HAS_4LEVEL_HACK */
-
- out:
-       return pmd_offset(pud, address);
+/* Workaround for gcc 2.96 */
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+       return 0;
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
@@ -2346,22 +2483,6 @@
 
 EXPORT_SYMBOL(vmalloc_to_pfn);
 
-/*
- * update_mem_hiwater
- *     - update per process rss and vm high water data
- */
-void update_mem_hiwater(struct task_struct *tsk)
-{
-       if (tsk->mm) {
-               unsigned long rss = get_mm_counter(tsk->mm, rss);
-
-               if (tsk->mm->hiwater_rss < rss)
-                       tsk->mm->hiwater_rss = rss;
-               if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
-                       tsk->mm->hiwater_vm = tsk->mm->total_vm;
-       }
-}
-
 #if !defined(__HAVE_ARCH_GATE_AREA)
 
 #if defined(AT_SYSINFO_EHDR)
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/mm/mmap.c
--- a/linux-2.6-xen-sparse/mm/mmap.c    Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/mm/mmap.c    Wed Feb  1 18:00:19 2006
@@ -155,10 +155,6 @@
        return -ENOMEM;
 }
 
-EXPORT_SYMBOL(sysctl_overcommit_memory);
-EXPORT_SYMBOL(sysctl_overcommit_ratio);
-EXPORT_SYMBOL(sysctl_max_map_count);
-EXPORT_SYMBOL(vm_committed_space);
 EXPORT_SYMBOL(__vm_enough_memory);
 
 /*
@@ -181,26 +177,36 @@
 }
 
 /*
- * Remove one vm structure and free it.
- */
-static void remove_vm_struct(struct vm_area_struct *vma)
+ * Unlink a file-based vm structure from its prio_tree, to hide
+ * vma from rmap and vmtruncate before freeing its page tables.
+ */
+void unlink_file_vma(struct vm_area_struct *vma)
 {
        struct file *file = vma->vm_file;
 
-       might_sleep();
        if (file) {
                struct address_space *mapping = file->f_mapping;
                spin_lock(&mapping->i_mmap_lock);
                __remove_shared_vm_struct(vma, file, mapping);
                spin_unlock(&mapping->i_mmap_lock);
        }
+}
+
+/*
+ * Close a vm structure and free it, returning the next.
+ */
+static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+{
+       struct vm_area_struct *next = vma->vm_next;
+
+       might_sleep();
        if (vma->vm_ops && vma->vm_ops->close)
                vma->vm_ops->close(vma);
-       if (file)
-               fput(file);
-       anon_vma_unlink(vma);
+       if (vma->vm_file)
+               fput(vma->vm_file);
        mpol_free(vma_policy(vma));
        kmem_cache_free(vm_area_cachep, vma);
+       return next;
 }
 
 asmlinkage unsigned long sys_brk(unsigned long brk)
@@ -605,7 +611,7 @@
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED)
+#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | 
VM_PFNMAP)
 
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
                        struct file *file, unsigned long vm_flags)
@@ -832,7 +838,7 @@
 }
 
 #ifdef CONFIG_PROC_FS
-void __vm_stat_account(struct mm_struct *mm, unsigned long flags,
+void vm_stat_account(struct mm_struct *mm, unsigned long flags,
                                                struct file *file, long pages)
 {
        const unsigned long stack_flags
@@ -1110,7 +1116,7 @@
        }
 out:   
        mm->total_vm += len >> PAGE_SHIFT;
-       __vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+       vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
        if (vm_flags & VM_LOCKED) {
                mm->locked_vm += len >> PAGE_SHIFT;
                make_pages_present(addr, addr + len);
@@ -1475,15 +1481,19 @@
        mm->total_vm += grow;
        if (vma->vm_flags & VM_LOCKED)
                mm->locked_vm += grow;
-       __vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
+       vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
        return 0;
 }
 
-#ifdef CONFIG_STACK_GROWSUP
-/*
- * vma is the first one with address > vma->vm_end.  Have to extend vma.
- */
-int expand_stack(struct vm_area_struct * vma, unsigned long address)
+#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
+/*
+ * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
+ * vma is the last one with address > vma->vm_end.  Have to extend vma.
+ */
+#ifndef CONFIG_IA64
+static inline
+#endif
+int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
        int error;
 
@@ -1520,6 +1530,13 @@
        }
        anon_vma_unlock(vma);
        return error;
+}
+#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
+
+#ifdef CONFIG_STACK_GROWSUP
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+       return expand_upwards(vma, address);
 }
 
 struct vm_area_struct *
@@ -1603,36 +1620,24 @@
 }
 #endif
 
-/* Normal function to fix up a mapping
- * This function is the default for when an area has no specific
- * function.  This may be used as part of a more specific routine.
+/*
+ * Ok - we have the memory areas we should free on the vma list,
+ * so release them, and do the vma updates.
  *
- * By the time this function is called, the area struct has been
- * removed from the process mapping list.
- */
-static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
-{
-       size_t len = area->vm_end - area->vm_start;
-
-       area->vm_mm->total_vm -= len >> PAGE_SHIFT;
-       if (area->vm_flags & VM_LOCKED)
-               area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
-       vm_stat_unaccount(area);
-       remove_vm_struct(area);
-}
-
-/*
- * Update the VMA and inode share lists.
- *
- * Ok - we have the memory areas we should free on the 'free' list,
- * so release them, and do the vma updates.
- */
-static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
-{
+ * Called with the mm semaphore held.
+ */
+static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+       /* Update high watermark before we lower total_vm */
+       update_hiwater_vm(mm);
        do {
-               struct vm_area_struct *next = vma->vm_next;
-               unmap_vma(mm, vma);
-               vma = next;
+               long nrpages = vma_pages(vma);
+
+               mm->total_vm -= nrpages;
+               if (vma->vm_flags & VM_LOCKED)
+                       mm->locked_vm -= nrpages;
+               vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
+               vma = remove_vma(vma);
        } while (vma);
        validate_mm(mm);
 }
@@ -1651,14 +1656,13 @@
        unsigned long nr_accounted = 0;
 
        lru_add_drain();
-       spin_lock(&mm->page_table_lock);
        tlb = tlb_gather_mmu(mm, 0);
-       unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
+       update_hiwater_rss(mm);
+       unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
        vm_unacct_memory(nr_accounted);
        free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
                                 next? next->vm_start: 0);
        tlb_finish_mmu(tlb, start, end);
-       spin_unlock(&mm->page_table_lock);
 }
 
 /*
@@ -1799,7 +1803,7 @@
        unmap_region(mm, vma, prev, start, end);
 
        /* Fix up all other VM information */
-       unmap_vma_list(mm, vma);
+       remove_vma_list(mm, vma);
 
        return 0;
 }
@@ -1821,7 +1825,7 @@
 
 static inline void verify_mm_writelocked(struct mm_struct *mm)
 {
-#ifdef CONFIG_DEBUG_KERNEL
+#ifdef CONFIG_DEBUG_VM
        if (unlikely(down_read_trylock(&mm->mmap_sem))) {
                WARN_ON(1);
                up_read(&mm->mmap_sem);
@@ -1937,34 +1941,21 @@
 #endif
 
        lru_add_drain();
-
-       spin_lock(&mm->page_table_lock);
-
        flush_cache_mm(mm);
        tlb = tlb_gather_mmu(mm, 1);
+       /* Don't update_hiwater_rss(mm) here, do_exit already did */
        /* Use -1 here to ensure all VMAs in the mm are unmapped */
-       end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
+       end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
        vm_unacct_memory(nr_accounted);
        free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
        tlb_finish_mmu(tlb, 0, end);
 
-       mm->mmap = mm->mmap_cache = NULL;
-       mm->mm_rb = RB_ROOT;
-       set_mm_counter(mm, rss, 0);
-       mm->total_vm = 0;
-       mm->locked_vm = 0;
-
-       spin_unlock(&mm->page_table_lock);
-
-       /*
-        * Walk the list again, actually closing and freeing it
-        * without holding any MM locks.
-        */
-       while (vma) {
-               struct vm_area_struct *next = vma->vm_next;
-               remove_vm_struct(vma);
-               vma = next;
-       }
+       /*
+        * Walk the list again, actually closing and freeing it,
+        * with preemption enabled, without holding any MM locks.
+        */
+       while (vma)
+               vma = remove_vma(vma);
 
        BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
 }
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/mm/page_alloc.c
--- a/linux-2.6-xen-sparse/mm/page_alloc.c      Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c      Wed Feb  1 18:00:19 2006
@@ -33,6 +33,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
 
@@ -59,11 +60,13 @@
  *     NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
  *     HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
  *     HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
- */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+ *
+ * TBD: should special case ZONE_DMA32 machines here - in those we normally
+ * don't need any ZONE_NORMAL reservation
+ */
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
 
 EXPORT_SYMBOL(totalram_pages);
-EXPORT_SYMBOL(nr_swap_pages);
 
 /*
  * Used by page_zone() to look up the address of the struct zone whose
@@ -72,27 +75,50 @@
 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 EXPORT_SYMBOL(zone_table);
 
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" 
};
 int min_free_kbytes = 1024;
 
 unsigned long __initdata nr_kernel_pages;
 unsigned long __initdata nr_all_pages;
 
-/*
- * Temporary debugging check for pages not lying within a given zone.
- */
-static int bad_range(struct zone *zone, struct page *page)
-{
-       if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
-               return 1;
-       if (page_to_pfn(page) < zone->zone_start_pfn)
-               return 1;
+static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
+{
+       int ret = 0;
+       unsigned seq;
+       unsigned long pfn = page_to_pfn(page);
+
+       do {
+               seq = zone_span_seqbegin(zone);
+               if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
+                       ret = 1;
+               else if (pfn < zone->zone_start_pfn)
+                       ret = 1;
+       } while (zone_span_seqretry(zone, seq));
+
+       return ret;
+}
+
+static int page_is_consistent(struct zone *zone, struct page *page)
+{
 #ifdef CONFIG_HOLES_IN_ZONE
        if (!pfn_valid(page_to_pfn(page)))
-               return 1;
+               return 0;
 #endif
        if (zone != page_zone(page))
+               return 0;
+
+       return 1;
+}
+/*
+ * Temporary debugging check for pages not lying within a given zone.
+ */
+static int bad_range(struct zone *zone, struct page *page)
+{
+       if (page_outside_zone_boundaries(zone, page))
                return 1;
+       if (!page_is_consistent(zone, page))
+               return 1;
+
        return 0;
 }
 
@@ -101,7 +127,7 @@
        printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
                function, current->comm, page);
        printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-               (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+               (int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
                page->mapping, page_mapcount(page), page_count(page));
        printk(KERN_EMERG "Backtrace:\n");
        dump_stack();
@@ -114,17 +140,13 @@
                        1 << PG_reclaim |
                        1 << PG_slab    |
                        1 << PG_swapcache |
-                       1 << PG_writeback);
+                       1 << PG_writeback );
        set_page_count(page, 0);
        reset_page_mapcount(page);
        page->mapping = NULL;
        add_taint(TAINT_BAD_PAGE);
 }
 
-#ifndef CONFIG_HUGETLB_PAGE
-#define prep_compound_page(page, order) do { } while (0)
-#define destroy_compound_page(page, order) do { } while (0)
-#else
 /*
  * Higher-order pages are called "compound pages".  They are structured thusly:
  *
@@ -153,7 +175,7 @@
                struct page *p = page + i;
 
                SetPageCompound(p);
-               p->private = (unsigned long)page;
+               set_page_private(p, (unsigned long)page);
        }
 }
 
@@ -173,12 +195,11 @@
 
                if (!PageCompound(p))
                        bad_page(__FUNCTION__, page);
-               if (p->private != (unsigned long)page)
+               if (page_private(p) != (unsigned long)page)
                        bad_page(__FUNCTION__, page);
                ClearPageCompound(p);
        }
 }
-#endif         /* CONFIG_HUGETLB_PAGE */
 
 /*
  * function for dealing with page's order in buddy system.
@@ -186,18 +207,18 @@
  * So, we don't need atomic page->flags operations here.
  */
 static inline unsigned long page_order(struct page *page) {
-       return page->private;
+       return page_private(page);
 }
 
 static inline void set_page_order(struct page *page, int order) {
-       page->private = order;
+       set_page_private(page, order);
        __SetPagePrivate(page);
 }
 
 static inline void rmv_page_order(struct page *page)
 {
        __ClearPagePrivate(page);
-       page->private = 0;
+       set_page_private(page, 0);
 }
 
 /*
@@ -237,14 +258,13 @@
  * (a) the buddy is free &&
  * (b) the buddy is on the buddy system &&
  * (c) a page and its buddy have the same order.
- * for recording page's order, we use page->private and PG_private.
+ * for recording page's order, we use page_private(page) and PG_private.
  *
  */
 static inline int page_is_buddy(struct page *page, int order)
 {
        if (PagePrivate(page)           &&
            (page_order(page) == order) &&
-           !PageReserved(page)         &&
             page_count(page) == 0)
                return 1;
        return 0;
@@ -264,7 +284,7 @@
  * parts of the VM system.
  * At each level, we keep a list of pages, which are heads of continuous
  * free pages of length of (1 << order) and marked with PG_Private.Page's
- * order is recorded in page->private field.
+ * order is recorded in page_private(page) field.
  * So when we are allocating or freeing one, we can derive the state of the
  * other.  That is, if we allocate a small block, and both were   
  * free, the remainder of the region must be split into blocks.   
@@ -314,7 +334,7 @@
        zone->free_area[order].nr_free++;
 }
 
-static inline void free_pages_check(const char *function, struct page *page)
+static inline int free_pages_check(const char *function, struct page *page)
 {
        if (    page_mapcount(page) ||
                page->mapping != NULL ||
@@ -327,10 +347,17 @@
                        1 << PG_reclaim |
                        1 << PG_slab    |
                        1 << PG_swapcache |
-                       1 << PG_writeback )))
+                       1 << PG_writeback |
+                       1 << PG_reserved )))
                bad_page(function, page);
        if (PageDirty(page))
                __ClearPageDirty(page);
+       /*
+        * For now, we report if PG_reserved was found set, but do not
+        * clear it, and do not free the page.  But we shall soon need
+        * to do more, for when the ZERO_PAGE count wraps negative.
+        */
+       return PageReserved(page);
 }
 
 /*
@@ -370,11 +397,10 @@
 {
        LIST_HEAD(list);
        int i;
+       int reserved = 0;
 
        if (arch_free_page(page, order))
                return;
-
-       mod_page_state(pgfree, 1 << order);
 
 #ifndef CONFIG_MMU
        if (order > 0)
@@ -383,8 +409,12 @@
 #endif
 
        for (i = 0 ; i < (1 << order) ; ++i)
-               free_pages_check(__FUNCTION__, page + i);
+               reserved += free_pages_check(__FUNCTION__, page + i);
+       if (reserved)
+               return;
+
        list_add(&page->lru, &list);
+       mod_page_state(pgfree, 1 << order);
        kernel_map_pages(page, 1<<order, 0);
        free_pages_bulk(page_zone(page), 1, &list, order);
 }
@@ -442,7 +472,7 @@
 /*
  * This page is about to be returned from the page allocator
  */
-static void prep_new_page(struct page *page, int order)
+static int prep_new_page(struct page *page, int order)
 {
        if (    page_mapcount(page) ||
                page->mapping != NULL ||
@@ -456,15 +486,24 @@
                        1 << PG_reclaim |
                        1 << PG_slab    |
                        1 << PG_swapcache |
-                       1 << PG_writeback )))
+                       1 << PG_writeback |
+                       1 << PG_reserved )))
                bad_page(__FUNCTION__, page);
+
+       /*
+        * For now, we report if PG_reserved was found set, but do not
+        * clear it, and do not allocate the page: as a safety net.
+        */
+       if (PageReserved(page))
+               return 1;
 
        page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
                        1 << PG_referenced | 1 << PG_arch_1 |
                        1 << PG_checked | 1 << PG_mappedtodisk);
-       page->private = 0;
+       set_page_private(page, 0);
        set_page_refs(page, order);
        kernel_map_pages(page, 1 << order, 1);
+       return 0;
 }
 
 /* 
@@ -648,11 +687,14 @@
        if (arch_free_page(page, 0))
                return;
 
-       kernel_map_pages(page, 1, 0);
-       inc_page_state(pgfree);
        if (PageAnon(page))
                page->mapping = NULL;
-       free_pages_check(__FUNCTION__, page);
+       if (free_pages_check(__FUNCTION__, page))
+               return;
+
+       inc_page_state(pgfree);
+       kernel_map_pages(page, 1, 0);
+
        pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
        local_irq_save(flags);
        list_add(&page->lru, &pcp->list);
@@ -691,12 +733,14 @@
 buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
 {
        unsigned long flags;
-       struct page *page = NULL;
+       struct page *page;
        int cold = !!(gfp_flags & __GFP_COLD);
 
+again:
        if (order == 0) {
                struct per_cpu_pages *pcp;
 
+               page = NULL;
                pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
                local_irq_save(flags);
                if (pcp->count <= pcp->low)
@@ -709,9 +753,7 @@
                }
                local_irq_restore(flags);
                put_cpu();
-       }
-
-       if (page == NULL) {
+       } else {
                spin_lock_irqsave(&zone->lock, flags);
                page = __rmqueue(zone, order);
                spin_unlock_irqrestore(&zone->lock, flags);
@@ -720,7 +762,8 @@
        if (page != NULL) {
                BUG_ON(bad_range(zone, page));
                mod_page_state_zone(zone, pgalloc, 1 << order);
-               prep_new_page(page, order);
+               if (prep_new_page(page, order))
+                       goto again;
 
                if (gfp_flags & __GFP_ZERO)
                        prep_zero_page(page, order, gfp_flags);
@@ -731,20 +774,28 @@
        return page;
 }
 
+#define ALLOC_NO_WATERMARKS    0x01 /* don't check watermarks at all */
+#define ALLOC_WMARK_MIN                0x02 /* use pages_min watermark */
+#define ALLOC_WMARK_LOW                0x04 /* use pages_low watermark */
+#define ALLOC_WMARK_HIGH       0x08 /* use pages_high watermark */
+#define ALLOC_HARDER           0x10 /* try to alloc harder */
+#define ALLOC_HIGH             0x20 /* __GFP_HIGH set */
+#define ALLOC_CPUSET           0x40 /* check for correct cpuset */
+
 /*
  * Return 1 if free pages are above 'mark'. This takes into account the order
  * of the allocation.
  */
 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-                     int classzone_idx, int can_try_harder, int gfp_high)
+                     int classzone_idx, int alloc_flags)
 {
        /* free_pages my go negative - that's OK */
        long min = mark, free_pages = z->free_pages - (1 << order) + 1;
        int o;
 
-       if (gfp_high)
+       if (alloc_flags & ALLOC_HIGH)
                min -= min / 2;
-       if (can_try_harder)
+       if (alloc_flags & ALLOC_HARDER)
                min -= min / 4;
 
        if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -762,123 +813,127 @@
        return 1;
 }
 
-static inline int
-should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
-{
-       if (!z->reclaim_pages)
-               return 0;
-       if (gfp_mask & __GFP_NORECLAIM)
-               return 0;
-       return 1;
-}
-
-/*
- * This is the 'heart' of the zoned buddy allocator.
- */
-struct page * fastcall
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
-               struct zonelist *zonelist)
-{
-       const int wait = gfp_mask & __GFP_WAIT;
-       struct zone **zones, *z;
-       struct page *page;
-       struct reclaim_state reclaim_state;
-       struct task_struct *p = current;
-       int i;
-       int classzone_idx;
-       int do_retry;
-       int can_try_harder;
-       int did_some_progress;
-
-       might_sleep_if(wait);
-
-       /*
-        * The caller may dip into page reserves a bit more if the caller
-        * cannot run direct reclaim, or is the caller has realtime scheduling
-        * policy
-        */
-       can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
-
-       zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
-
-       if (unlikely(zones[0] == NULL)) {
-               /* Should this ever happen?? */
-               return NULL;
-       }
-
-       classzone_idx = zone_idx(zones[0]);
-
-restart:
+/*
+ * get_page_from_freeliest goes through the zonelist trying to allocate
+ * a page.
+ */
+static struct page *
+get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+               struct zonelist *zonelist, int alloc_flags)
+{
+       struct zone **z = zonelist->zones;
+       struct page *page = NULL;
+       int classzone_idx = zone_idx(*z);
+
        /*
         * Go through the zonelist once, looking for a zone with enough free.
         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
         */
-       for (i = 0; (z = zones[i]) != NULL; i++) {
-               int do_reclaim = should_reclaim_zone(z, gfp_mask);
-
-               if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
+       do {
+               if ((alloc_flags & ALLOC_CPUSET) &&
+                               !cpuset_zone_allowed(*z, gfp_mask))
                        continue;
 
-               /*
-                * If the zone is to attempt early page reclaim then this loop
-                * will try to reclaim pages and check the watermark a second
-                * time before giving up and falling back to the next zone.
-                */
-zone_reclaim_retry:
-               if (!zone_watermark_ok(z, order, z->pages_low,
-                                      classzone_idx, 0, 0)) {
-                       if (!do_reclaim)
+               if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
+                       unsigned long mark;
+                       if (alloc_flags & ALLOC_WMARK_MIN)
+                               mark = (*z)->pages_min;
+                       else if (alloc_flags & ALLOC_WMARK_LOW)
+                               mark = (*z)->pages_low;
+                       else
+                               mark = (*z)->pages_high;
+                       if (!zone_watermark_ok(*z, order, mark,
+                                   classzone_idx, alloc_flags))
                                continue;
-                       else {
-                               zone_reclaim(z, gfp_mask, order);
-                               /* Only try reclaim once */
-                               do_reclaim = 0;
-                               goto zone_reclaim_retry;
-                       }
                }
 
-               page = buffered_rmqueue(z, order, gfp_mask);
-               if (page)
-                       goto got_pg;
-       }
-
-       for (i = 0; (z = zones[i]) != NULL; i++)
-               wakeup_kswapd(z, order);
+               page = buffered_rmqueue(*z, order, gfp_mask);
+               if (page) {
+                       zone_statistics(zonelist, *z);
+                       break;
+               }
+       } while (*(++z) != NULL);
+       return page;
+}
+
+/*
+ * This is the 'heart' of the zoned buddy allocator.
+ */
+struct page * fastcall
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
+               struct zonelist *zonelist)
+{
+       const gfp_t wait = gfp_mask & __GFP_WAIT;
+       struct zone **z;
+       struct page *page;
+       struct reclaim_state reclaim_state;
+       struct task_struct *p = current;
+       int do_retry;
+       int alloc_flags;
+       int did_some_progress;
+
+       might_sleep_if(wait);
+
+restart:
+       z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
+
+       if (unlikely(*z == NULL)) {
+               /* Should this ever happen?? */
+               return NULL;
+       }
+
+       page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+                               zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+       if (page)
+               goto got_pg;
+
+       do {
+               wakeup_kswapd(*z, order);
+       } while (*(++z));
+
+       /*
+        * OK, we're below the kswapd watermark and have kicked background
+        * reclaim. Now things get more complex, so set up alloc_flags according
+        * to how we want to proceed.
+        *
+        * The caller may dip into page reserves a bit more if the caller
+        * cannot run direct reclaim, or if the caller has realtime scheduling
+        * policy.
+        */
+       alloc_flags = ALLOC_WMARK_MIN;
+       if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
+               alloc_flags |= ALLOC_HARDER;
+       if (gfp_mask & __GFP_HIGH)
+               alloc_flags |= ALLOC_HIGH;
+       if (wait)
+               alloc_flags |= ALLOC_CPUSET;
 
        /*
         * Go through the zonelist again. Let __GFP_HIGH and allocations
-        * coming from realtime tasks to go deeper into reserves
+        * coming from realtime tasks go deeper into reserves.
         *
         * This is the last chance, in general, before the goto nopage.
         * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
         */
-       for (i = 0; (z = zones[i]) != NULL; i++) {
-               if (!zone_watermark_ok(z, order, z->pages_min,
-                                      classzone_idx, can_try_harder,
-                                      gfp_mask & __GFP_HIGH))
-                       continue;
-
-               if (wait && !cpuset_zone_allowed(z, gfp_mask))
-                       continue;
-
-               page = buffered_rmqueue(z, order, gfp_mask);
-               if (page)
-                       goto got_pg;
-       }
+       page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
+       if (page)
+               goto got_pg;
 
        /* This allocation should allow future memory freeing. */
 
        if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
                        && !in_interrupt()) {
                if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+nofail_alloc:
                        /* go through the zonelist yet again, ignoring mins */
-                       for (i = 0; (z = zones[i]) != NULL; i++) {
-                               if (!cpuset_zone_allowed(z, gfp_mask))
-                                       continue;
-                               page = buffered_rmqueue(z, order, gfp_mask);
-                               if (page)
-                                       goto got_pg;
+                       page = get_page_from_freelist(gfp_mask, order,
+                               zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
+                       if (page)
+                               goto got_pg;
+                       if (gfp_mask & __GFP_NOFAIL) {
+                               blk_congestion_wait(WRITE, HZ/50);
+                               goto nofail_alloc;
                        }
                }
                goto nopage;
@@ -896,7 +951,7 @@
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
 
-       did_some_progress = try_to_free_pages(zones, gfp_mask);
+       did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
 
        p->reclaim_state = NULL;
        p->flags &= ~PF_MEMALLOC;
@@ -904,19 +959,10 @@
        cond_resched();
 
        if (likely(did_some_progress)) {
-               for (i = 0; (z = zones[i]) != NULL; i++) {
-                       if (!zone_watermark_ok(z, order, z->pages_min,
-                                              classzone_idx, can_try_harder,
-                                              gfp_mask & __GFP_HIGH))
-                               continue;
-
-                       if (!cpuset_zone_allowed(z, gfp_mask))
-                               continue;
-
-                       page = buffered_rmqueue(z, order, gfp_mask);
-                       if (page)
-                               goto got_pg;
-               }
+               page = get_page_from_freelist(gfp_mask, order,
+                                               zonelist, alloc_flags);
+               if (page)
+                       goto got_pg;
        } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
                /*
                 * Go through the zonelist yet one more time, keep
@@ -924,18 +970,10 @@
                 * a parallel oom killing, we must fail if we're still
                 * under heavy pressure.
                 */
-               for (i = 0; (z = zones[i]) != NULL; i++) {
-                       if (!zone_watermark_ok(z, order, z->pages_high,
-                                              classzone_idx, 0, 0))
-                               continue;
-
-                       if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-                               continue;
-
-                       page = buffered_rmqueue(z, order, gfp_mask);
-                       if (page)
-                               goto got_pg;
-               }
+               page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+                               zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
+               if (page)
+                       goto got_pg;
 
                out_of_memory(gfp_mask, order);
                goto restart;
@@ -968,9 +1006,7 @@
                dump_stack();
                show_mem();
        }
-       return NULL;
 got_pg:
-       zone_statistics(zonelist, z);
        return page;
 }
 
@@ -998,7 +1034,7 @@
         * get_zeroed_page() returns a 32-bit address, which cannot represent
         * a highmem page
         */
-       BUG_ON(gfp_mask & __GFP_HIGHMEM);
+       BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
 
        page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
        if (page)
@@ -1018,7 +1054,7 @@
 
 fastcall void __free_pages(struct page *page, unsigned int order)
 {
-       if (!PageReserved(page) && put_page_testzero(page)) {
+       if (put_page_testzero(page)) {
                if (order == 0)
                        free_hot_page(page);
                else
@@ -1091,7 +1127,7 @@
  */
 unsigned int nr_free_buffer_pages(void)
 {
-       return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK);
+       return nr_free_zone_pages(gfp_zone(GFP_USER));
 }
 
 /*
@@ -1099,7 +1135,7 @@
  */
 unsigned int nr_free_pagecache_pages(void)
 {
-       return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+       return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1307,11 +1343,8 @@
                } else
                        printk("\n");
 
-               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               for_each_online_cpu(cpu) {
                        struct per_cpu_pageset *pageset;
-
-                       if (!cpu_possible(cpu))
-                               continue;
 
                        pageset = zone_pcp(zone, cpu);
 
@@ -1421,6 +1454,10 @@
                zone = pgdat->node_zones + ZONE_NORMAL;
                if (zone->present_pages)
                        zonelist->zones[j++] = zone;
+       case ZONE_DMA32:
+               zone = pgdat->node_zones + ZONE_DMA32;
+               if (zone->present_pages)
+                       zonelist->zones[j++] = zone;
        case ZONE_DMA:
                zone = pgdat->node_zones + ZONE_DMA;
                if (zone->present_pages)
@@ -1428,6 +1465,18 @@
        }
 
        return j;
+}
+
+static inline int highest_zone(int zone_bits)
+{
+       int res = ZONE_NORMAL;
+       if (zone_bits & (__force int)__GFP_HIGHMEM)
+               res = ZONE_HIGHMEM;
+       if (zone_bits & (__force int)__GFP_DMA32)
+               res = ZONE_DMA32;
+       if (zone_bits & (__force int)__GFP_DMA)
+               res = ZONE_DMA;
+       return res;
 }
 
 #ifdef CONFIG_NUMA
@@ -1526,11 +1575,7 @@
                        zonelist = pgdat->node_zonelists + i;
                        for (j = 0; zonelist->zones[j] != NULL; j++);
 
-                       k = ZONE_NORMAL;
-                       if (i & __GFP_HIGHMEM)
-                               k = ZONE_HIGHMEM;
-                       if (i & __GFP_DMA)
-                               k = ZONE_DMA;
+                       k = highest_zone(i);
 
                        j = build_zonelists_node(NODE_DATA(node), zonelist, j, 
k);
                        zonelist->zones[j] = NULL;
@@ -1551,12 +1596,7 @@
                zonelist = pgdat->node_zonelists + i;
 
                j = 0;
-               k = ZONE_NORMAL;
-               if (i & __GFP_HIGHMEM)
-                       k = ZONE_HIGHMEM;
-               if (i & __GFP_DMA)
-                       k = ZONE_DMA;
-
+               k = highest_zone(i);
                j = build_zonelists_node(pgdat, zonelist, j, k);
                /*
                 * Now we build the zonelist so that it contains the zones
@@ -1661,7 +1701,7 @@
  * up by free_all_bootmem() once the early boot process is
  * done. Non-atomic initialization, single-pass.
  */
-void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long 
zone,
                unsigned long start_pfn)
 {
        struct page *page;
@@ -1675,7 +1715,7 @@
                        continue;
                page = pfn_to_page(pfn);
                set_page_links(page, zone, nid, pfn);
-               set_page_count(page, 0);
+               set_page_count(page, 1);
                reset_page_mapcount(page);
                SetPageReserved(page);
                INIT_LIST_HEAD(&page->lru);
@@ -1722,14 +1762,13 @@
 
        /*
         * The per-cpu-pages pools are set to around 1000th of the
-        * size of the zone.  But no more than 1/4 of a meg - there's
-        * no point in going beyond the size of L2 cache.
+        * size of the zone.  But no more than 1/2 of a meg.
         *
         * OK, so we don't know how big the cache is.  So guess.
         */
        batch = zone->present_pages / 1024;
-       if (batch * PAGE_SIZE > 256 * 1024)
-               batch = (256 * 1024) / PAGE_SIZE;
+       if (batch * PAGE_SIZE > 512 * 1024)
+               batch = (512 * 1024) / PAGE_SIZE;
        batch /= 4;             /* We effectively *= 4 below */
        if (batch < 1)
                batch = 1;
@@ -1744,7 +1783,8 @@
         * of pages of one half of the possible page colors
         * and the other with pages of the other colors.
         */
-       batch = (1 << fls(batch + batch/2)) - 1;
+       batch = (1 << (fls(batch + batch/2)-1)) - 1;
+
        return batch;
 }
 
@@ -1756,7 +1796,7 @@
 
        pcp = &p->pcp[0];               /* hot */
        pcp->count = 0;
-       pcp->low = 2 * batch;
+       pcp->low = 0;
        pcp->high = 6 * batch;
        pcp->batch = max(1UL, 1 * batch);
        INIT_LIST_HEAD(&pcp->list);
@@ -1765,7 +1805,7 @@
        pcp->count = 0;
        pcp->low = 0;
        pcp->high = 2 * batch;
-       pcp->batch = max(1UL, 1 * batch);
+       pcp->batch = max(1UL, batch/2);
        INIT_LIST_HEAD(&pcp->list);
 }
 
@@ -1845,11 +1885,10 @@
                        if (process_zones(cpu))
                                ret = NOTIFY_BAD;
                        break;
-#ifdef CONFIG_HOTPLUG_CPU
+               case CPU_UP_CANCELED:
                case CPU_DEAD:
                        free_zone_pagesets(cpu);
                        break;
-#endif
                default:
                        break;
        }
@@ -1859,7 +1898,7 @@
 static struct notifier_block pageset_notifier =
        { &pageset_cpuup_callback, NULL, 0 };
 
-void __init setup_per_cpu_pageset()
+void __init setup_per_cpu_pageset(void)
 {
        int err;
 
@@ -1874,6 +1913,60 @@
 
 #endif
 
+static __devinit
+void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
+{
+       int i;
+       struct pglist_data *pgdat = zone->zone_pgdat;
+
+       /*
+        * The per-page waitqueue mechanism uses hashed waitqueues
+        * per zone.
+        */
+       zone->wait_table_size = wait_table_size(zone_size_pages);
+       zone->wait_table_bits = wait_table_bits(zone->wait_table_size);
+       zone->wait_table = (wait_queue_head_t *)
+               alloc_bootmem_node(pgdat, zone->wait_table_size
+                                       * sizeof(wait_queue_head_t));
+
+       for(i = 0; i < zone->wait_table_size; ++i)
+               init_waitqueue_head(zone->wait_table + i);
+}
+
+static __devinit void zone_pcp_init(struct zone *zone)
+{
+       int cpu;
+       unsigned long batch = zone_batchsize(zone);
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+#ifdef CONFIG_NUMA
+               /* Early boot. Slab allocator not functional yet */
+               zone->pageset[cpu] = &boot_pageset[cpu];
+               setup_pageset(&boot_pageset[cpu],0);
+#else
+               setup_pageset(zone_pcp(zone,cpu), batch);
+#endif
+       }
+       printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
+               zone->name, zone->present_pages, batch);
+}
+
+static __devinit void init_currently_empty_zone(struct zone *zone,
+               unsigned long zone_start_pfn, unsigned long size)
+{
+       struct pglist_data *pgdat = zone->zone_pgdat;
+
+       zone_wait_table_init(zone, size);
+       pgdat->nr_zones = zone_idx(zone) + 1;
+
+       zone->zone_mem_map = pfn_to_page(zone_start_pfn);
+       zone->zone_start_pfn = zone_start_pfn;
+
+       memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
+
+       zone_init_free_lists(pgdat, zone, zone->spanned_pages);
+}
+
 /*
  * Set up the zone data structures:
  *   - mark all pages reserved
@@ -1883,10 +1976,11 @@
 static void __init free_area_init_core(struct pglist_data *pgdat,
                unsigned long *zones_size, unsigned long *zholes_size)
 {
-       unsigned long i, j;
-       int cpu, nid = pgdat->node_id;
+       unsigned long j;
+       int nid = pgdat->node_id;
        unsigned long zone_start_pfn = pgdat->node_start_pfn;
 
+       pgdat_resize_init(pgdat);
        pgdat->nr_zones = 0;
        init_waitqueue_head(&pgdat->kswapd_wait);
        pgdat->kswapd_max_order = 0;
@@ -1894,13 +1988,12 @@
        for (j = 0; j < MAX_NR_ZONES; j++) {
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize;
-               unsigned long batch;
 
                realsize = size = zones_size[j];
                if (zholes_size)
                        realsize -= zholes_size[j];
 
-               if (j == ZONE_DMA || j == ZONE_NORMAL)
+               if (j < ZONE_HIGHMEM)
                        nr_kernel_pages += realsize;
                nr_all_pages += realsize;
 
@@ -1909,24 +2002,13 @@
                zone->name = zone_names[j];
                spin_lock_init(&zone->lock);
                spin_lock_init(&zone->lru_lock);
+               zone_seqlock_init(zone);
                zone->zone_pgdat = pgdat;
                zone->free_pages = 0;
 
                zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
 
-               batch = zone_batchsize(zone);
-
-               for (cpu = 0; cpu < NR_CPUS; cpu++) {
-#ifdef CONFIG_NUMA
-                       /* Early boot. Slab allocator not functional yet */
-                       zone->pageset[cpu] = &boot_pageset[cpu];
-                       setup_pageset(&boot_pageset[cpu],0);
-#else
-                       setup_pageset(zone_pcp(zone,cpu), batch);
-#endif
-               }
-               printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
-                               zone_names[j], realsize, batch);
+               zone_pcp_init(zone);
                INIT_LIST_HEAD(&zone->active_list);
                INIT_LIST_HEAD(&zone->inactive_list);
                zone->nr_scan_active = 0;
@@ -1937,32 +2019,9 @@
                if (!size)
                        continue;
 
-               /*
-                * The per-page waitqueue mechanism uses hashed waitqueues
-                * per zone.
-                */
-               zone->wait_table_size = wait_table_size(size);
-               zone->wait_table_bits =
-                       wait_table_bits(zone->wait_table_size);
-               zone->wait_table = (wait_queue_head_t *)
-                       alloc_bootmem_node(pgdat, zone->wait_table_size
-                                               * sizeof(wait_queue_head_t));
-
-               for(i = 0; i < zone->wait_table_size; ++i)
-                       init_waitqueue_head(zone->wait_table + i);
-
-               pgdat->nr_zones = j+1;
-
-               zone->zone_mem_map = pfn_to_page(zone_start_pfn);
-               zone->zone_start_pfn = zone_start_pfn;
-
-               memmap_init(size, nid, j, zone_start_pfn);
-
                zonetable_add(zone, nid, j, zone_start_pfn, size);
-
+               init_currently_empty_zone(zone, zone_start_pfn, size);
                zone_start_pfn += size;
-
-               zone_init_free_lists(pgdat, zone, zone->spanned_pages);
        }
 }
 
@@ -2362,7 +2421,7 @@
  *     that the pages_{min,low,high} values for each zone are set correctly 
  *     with respect to min_free_kbytes.
  */
-static void setup_per_zone_pages_min(void)
+void setup_per_zone_pages_min(void)
 {
        unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
        unsigned long lowmem_pages = 0;
@@ -2376,13 +2435,18 @@
        }
 
        for_each_zone(zone) {
+               unsigned long tmp;
                spin_lock_irqsave(&zone->lru_lock, flags);
+               tmp = (pages_min * zone->present_pages) / lowmem_pages;
                if (is_highmem(zone)) {
                        /*
-                        * Often, highmem doesn't need to reserve any pages.
-                        * But the pages_min/low/high values are also used for
-                        * batching up page reclaim activity so we need a
-                        * decent value here.
+                        * __GFP_HIGH and PF_MEMALLOC allocations usually don't
+                        * need highmem pages, so cap pages_min to a small
+                        * value here.
+                        *
+                        * The (pages_high-pages_low) and (pages_low-pages_min)
+                        * deltas controls asynch page reclaim, and so should
+                        * not be capped for highmem.
                         */
                        int min_pages;
 
@@ -2393,19 +2457,15 @@
                                min_pages = 128;
                        zone->pages_min = min_pages;
                } else {
-                       /* if it's a lowmem zone, reserve a number of pages
+                       /*
+                        * If it's a lowmem zone, reserve a number of pages
                         * proportionate to the zone's size.
                         */
-                       zone->pages_min = (pages_min * zone->present_pages) /
-                                          lowmem_pages;
+                       zone->pages_min = tmp;
                }
 
-               /*
-                * When interpreting these watermarks, just keep in mind that:
-                * zone->pages_min == (zone->pages_min * 4) / 4;
-                */
-               zone->pages_low   = (zone->pages_min * 5) / 4;
-               zone->pages_high  = (zone->pages_min * 6) / 4;
+               zone->pages_low   = zone->pages_min + tmp / 4;
+               zone->pages_high  = zone->pages_min + tmp / 2;
                spin_unlock_irqrestore(&zone->lru_lock, flags);
        }
 }
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/net/core/dev.c
--- a/linux-2.6-xen-sparse/net/core/dev.c       Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/net/core/dev.c       Wed Feb  1 18:00:19 2006
@@ -1114,6 +1114,19 @@
        return ret;
 }
 
+/* Take action when hardware reception checksum errors are detected. */
+#ifdef CONFIG_BUG
+void netdev_rx_csum_fault(struct net_device *dev)
+{
+       if (net_ratelimit()) {
+               printk(KERN_ERR "%s: hw csum failure.\n", 
+                       dev ? dev->name : "<unknown>");
+               dump_stack();
+       }
+}
+EXPORT_SYMBOL(netdev_rx_csum_fault);
+#endif
+
 #ifdef CONFIG_HIGHMEM
 /* Actually, we should eliminate this check as soon as we know, that:
  * 1. IOMMU is present and allows to map all the memory.
@@ -2767,6 +2780,20 @@
                       dev->name);
                dev->features &= ~NETIF_F_TSO;
        }
+       if (dev->features & NETIF_F_UFO) {
+               if (!(dev->features & NETIF_F_HW_CSUM)) {
+                       printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+                                       "NETIF_F_HW_CSUM feature.\n",
+                                                       dev->name);
+                       dev->features &= ~NETIF_F_UFO;
+               }
+               if (!(dev->features & NETIF_F_SG)) {
+                       printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+                                       "NETIF_F_SG feature.\n",
+                                       dev->name);
+                       dev->features &= ~NETIF_F_UFO;
+               }
+       }
 
        /*
         *      nil rebuild_header routine,
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/net/core/skbuff.c
--- a/linux-2.6-xen-sparse/net/core/skbuff.c    Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/net/core/skbuff.c    Wed Feb  1 18:00:19 2006
@@ -122,6 +122,8 @@
  *     __alloc_skb     -       allocate a network buffer
  *     @size: size to allocate
  *     @gfp_mask: allocation mask
+ *     @fclone: allocate from fclone cache instead of head cache
+ *             and allocate a cloned (child) skb
  *
  *     Allocate a new &sk_buff. The returned buffer has no headroom and a
  *     tail room of size bytes. The object has a reference count of one.
@@ -175,6 +177,8 @@
        skb_shinfo(skb)->tso_size = 0;
        skb_shinfo(skb)->tso_segs = 0;
        skb_shinfo(skb)->frag_list = NULL;
+       skb_shinfo(skb)->ufo_size = 0;
+       skb_shinfo(skb)->ip6_frag_id = 0;
 out:
        return skb;
 nodata:
@@ -247,6 +251,8 @@
        skb_shinfo(skb)->tso_size = 0;
        skb_shinfo(skb)->tso_segs = 0;
        skb_shinfo(skb)->frag_list = NULL;
+       skb_shinfo(skb)->ufo_size = 0;
+       skb_shinfo(skb)->ip6_frag_id = 0;
 out:
        return skb;
 nodata:
@@ -354,6 +360,9 @@
        }
 #ifdef CONFIG_NETFILTER
        nf_conntrack_put(skb->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       nf_conntrack_put_reasm(skb->nfct_reasm);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(skb->nf_bridge);
 #endif
@@ -436,8 +445,16 @@
        C(nfct);
        nf_conntrack_get(skb->nfct);
        C(nfctinfo);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       C(nfct_reasm);
+       nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
        C(ipvs_property);
+#endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       C(nfct_reasm);
+       nf_conntrack_get_reasm(skb->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        C(nf_bridge);
@@ -496,6 +513,10 @@
        new->nfct       = old->nfct;
        nf_conntrack_get(old->nfct);
        new->nfctinfo   = old->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       new->nfct_reasm = old->nfct_reasm;
+       nf_conntrack_get_reasm(old->nfct_reasm);
+#endif
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
        new->ipvs_property = old->ipvs_property;
 #endif
@@ -1718,6 +1739,78 @@
        skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
 
        return textsearch_find(config, state);
+}
+
+/**
+ * skb_append_datato_frags: - append the user data to a skb
+ * @sk: sock  structure
+ * @skb: skb structure to be appened with user data.
+ * @getfrag: call back function to be used for getting the user data
+ * @from: pointer to user message iov
+ * @length: length of the iov message
+ *
+ * Description: This procedure append the user data in the fragment part
+ * of the skb if any page alloc fails user this procedure returns  -ENOMEM
+ */
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+                       int (*getfrag)(void *from, char *to, int offset,
+                                       int len, int odd, struct sk_buff *skb),
+                       void *from, int length)
+{
+       int frg_cnt = 0;
+       skb_frag_t *frag = NULL;
+       struct page *page = NULL;
+       int copy, left;
+       int offset = 0;
+       int ret;
+
+       do {
+               /* Return error if we don't have space for new frag */
+               frg_cnt = skb_shinfo(skb)->nr_frags;
+               if (frg_cnt >= MAX_SKB_FRAGS)
+                       return -EFAULT;
+
+               /* allocate a new page for next frag */
+               page = alloc_pages(sk->sk_allocation, 0);
+
+               /* If alloc_page fails just return failure and caller will
+                * free previous allocated pages by doing kfree_skb()
+                */
+               if (page == NULL)
+                       return -ENOMEM;
+
+               /* initialize the next frag */
+               sk->sk_sndmsg_page = page;
+               sk->sk_sndmsg_off = 0;
+               skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
+               skb->truesize += PAGE_SIZE;
+               atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+
+               /* get the new initialized frag */
+               frg_cnt = skb_shinfo(skb)->nr_frags;
+               frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
+
+               /* copy the user data to page */
+               left = PAGE_SIZE - frag->page_offset;
+               copy = (length > left)? left : length;
+
+               ret = getfrag(from, (page_address(frag->page) +
+                           frag->page_offset + frag->size),
+                           offset, copy, 0, skb);
+               if (ret < 0)
+                       return -EFAULT;
+
+               /* copy was successful so update the size parameters */
+               sk->sk_sndmsg_off += copy;
+               frag->size += copy;
+               skb->len += copy;
+               skb->data_len += copy;
+               offset += copy;
+               length -= copy;
+
+       } while (length > 0);
+
+       return 0;
 }
 
 void __init skb_init(void)
@@ -1771,3 +1864,4 @@
 EXPORT_SYMBOL(skb_seq_read);
 EXPORT_SYMBOL(skb_abort_seq_read);
 EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_append_datato_frags);
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/arch/i386/power/Makefile
--- /dev/null   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/arch/i386/power/Makefile     Wed Feb  1 18:00:19 2006
@@ -0,0 +1,4 @@
+obj-$(CONFIG_PM_LEGACY)                += cpu.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += cpu.o
+obj-$(CONFIG_ACPI_SLEEP)       += cpu.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/lib/Makefile
--- /dev/null   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/lib/Makefile Wed Feb  1 18:00:19 2006
@@ -0,0 +1,60 @@
+#
+# Makefile for some libs needed in the kernel.
+#
+
+lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
+        bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
+        idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
+        sha1.o
+
+lib-y  += kobject.o kref.o kobject_uevent.o klist.o
+
+obj-y += sort.o parser.o halfmd4.o
+
+ifeq ($(CONFIG_DEBUG_KOBJECT),y)
+CFLAGS_kobject.o += -DDEBUG
+CFLAGS_kobject_uevent.o += -DDEBUG
+endif
+
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
+lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
+lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+
+ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
+  lib-y += dec_and_lock.o
+endif
+
+obj-$(CONFIG_CRC_CCITT)        += crc-ccitt.o
+obj-$(CONFIG_CRC16)    += crc16.o
+obj-$(CONFIG_CRC32)    += crc32.o
+obj-$(CONFIG_LIBCRC32C)        += libcrc32c.o
+obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
+
+obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
+obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
+obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
+
+obj-$(CONFIG_TEXTSEARCH) += textsearch.o
+obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
+obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
+obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
+
+ifneq ($(CONFIG_XEN),y)
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
+endif
+
+hostprogs-y    := gen_crc32table
+clean-files    := crc32table.h
+
+$(obj)/crc32.o: $(obj)/crc32table.h
+
+quiet_cmd_crc32 = GEN     $@
+      cmd_crc32 = $< > $@
+
+$(obj)/crc32table.h: $(obj)/gen_crc32table
+       $(call cmd,crc32)
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/mm/Kconfig
--- /dev/null   Wed Feb  1 17:06:16 2006
+++ b/linux-2.6-xen-sparse/mm/Kconfig   Wed Feb  1 18:00:19 2006
@@ -0,0 +1,137 @@
+config SELECT_MEMORY_MODEL
+       def_bool y
+       depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL
+
+choice
+       prompt "Memory model"
+       depends on SELECT_MEMORY_MODEL
+       default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT
+       default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT
+       default FLATMEM_MANUAL
+
+config FLATMEM_MANUAL
+       bool "Flat Memory"
+       depends on !ARCH_DISCONTIGMEM_ENABLE || ARCH_FLATMEM_ENABLE
+       help
+         This option allows you to change some of the ways that
+         Linux manages its memory internally.  Most users will
+         only have one option here: FLATMEM.  This is normal
+         and a correct option.
+
+         Some users of more advanced features like NUMA and
+         memory hotplug may have different options here.
+         DISCONTIGMEM is an more mature, better tested system,
+         but is incompatible with memory hotplug and may suffer
+         decreased performance over SPARSEMEM.  If unsure between
+         "Sparse Memory" and "Discontiguous Memory", choose
+         "Discontiguous Memory".
+
+         If unsure, choose this option (Flat Memory) over any other.
+
+config DISCONTIGMEM_MANUAL
+       bool "Discontiguous Memory"
+       depends on ARCH_DISCONTIGMEM_ENABLE
+       help
+         This option provides enhanced support for discontiguous
+         memory systems, over FLATMEM.  These systems have holes
+         in their physical address spaces, and this option provides
+         more efficient handling of these holes.  However, the vast
+         majority of hardware has quite flat address spaces, and
+         can have degraded performance from extra overhead that
+         this option imposes.
+
+         Many NUMA configurations will have this as the only option.
+
+         If unsure, choose "Flat Memory" over this option.
+
+config SPARSEMEM_MANUAL
+       bool "Sparse Memory"
+       depends on ARCH_SPARSEMEM_ENABLE
+       help
+         This will be the only option for some systems, including
+         memory hotplug systems.  This is normal.
+
+         For many other systems, this will be an alternative to
+         "Discontiguous Memory".  This option provides some potential
+         performance benefits, along with decreased code complexity,
+         but it is newer, and more experimental.
+
+         If unsure, choose "Discontiguous Memory" or "Flat Memory"
+         over this option.
+
+endchoice
+
+config DISCONTIGMEM
+       def_bool y
+       depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || 
DISCONTIGMEM_MANUAL
+
+config SPARSEMEM
+       def_bool y
+       depends on SPARSEMEM_MANUAL
+
+config FLATMEM
+       def_bool y
+       depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL
+
+config FLAT_NODE_MEM_MAP
+       def_bool y
+       depends on !SPARSEMEM
+
+#
+# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
+# to represent different areas of memory.  This variable allows
+# those dependencies to exist individually.
+#
+config NEED_MULTIPLE_NODES
+       def_bool y
+       depends on DISCONTIGMEM || NUMA
+
+config HAVE_MEMORY_PRESENT
+       def_bool y
+       depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
+
+#
+# SPARSEMEM_EXTREME (which is the default) does some bootmem
+# allocations when memory_present() is called.  If this can not
+# be done on your architecture, select this option.  However,
+# statically allocating the mem_section[] array can potentially
+# consume vast quantities of .bss, so be careful.
+#
+# This option will also potentially produce smaller runtime code
+# with gcc 3.4 and later.
+#
+config SPARSEMEM_STATIC
+       def_bool n
+
+#
+# Architectecture platforms which require a two level mem_section in SPARSEMEM
+# must select this option. This is usually for architecture platforms with
+# an extremely sparse physical address space.
+#
+config SPARSEMEM_EXTREME
+       def_bool y
+       depends on SPARSEMEM && !SPARSEMEM_STATIC
+
+# eventually, we can have this option just 'select SPARSEMEM'
+config MEMORY_HOTPLUG
+       bool "Allow for memory hot-add"
+       depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
+
+comment "Memory hotplug is currently incompatible with Software Suspend"
+       depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
+
+# Heavily threaded applications may benefit from splitting the mm-wide
+# page_table_lock, so that faults on different parts of the user address
+# space can be handled with less contention: split it at this NR_CPUS.
+# Default to 4 for wider testing, though 8 might be more appropriate.
+# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
+# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
+# XEN uses the mapping field on pagetable pages to store a pointer to
+# the destructor.
+#
+config SPLIT_PTLOCK_CPUS
+       int
+       default "4096" if ARM && !CPU_CACHE_VIPT
+       default "4096" if PARISC && !PA20
+       default "4096" if XEN
+       default "4"
diff -r d609de73b9fa -r 5a63f675107c linux-2.6-xen-sparse/arch/i386/kernel/apm.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/apm.c       Wed Feb  1 17:06:16 2006
+++ /dev/null   Wed Feb  1 18:00:19 2006
@@ -1,2420 +0,0 @@
-/* -*- linux-c -*-
- * APM BIOS driver for Linux
- * Copyright 1994-2001 Stephen Rothwell (sfr@xxxxxxxxxxxxxxxx)
- *
- * Initial development of this driver was funded by NEC Australia P/L
- *     and NEC Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * October 1995, Rik Faith (faith@xxxxxxxxxx):
- *    Minor enhancements and updates (to the patch set) for 1.3.x
- *    Documentation
- * January 1996, Rik Faith (faith@xxxxxxxxxx):
- *    Make /proc/apm easy to format (bump driver version)
- * March 1996, Rik Faith (faith@xxxxxxxxxx):
- *    Prohibit APM BIOS calls unless apm_enabled.
- *    (Thanks to Ulrich Windl <Ulrich.Windl@xxxxxxxxxxxxxxxxxxxx>)
- * April 1996, Stephen Rothwell (sfr@xxxxxxxxxxxxxxxx)
- *    Version 1.0 and 1.1
- * May 1996, Version 1.2
- * Feb 1998, Version 1.3
- * Feb 1998, Version 1.4
- * Aug 1998, Version 1.5
- * Sep 1998, Version 1.6
- * Nov 1998, Version 1.7
- * Jan 1999, Version 1.8
- * Jan 1999, Version 1.9
- * Oct 1999, Version 1.10
- * Nov 1999, Version 1.11
- * Jan 2000, Version 1.12
- * Feb 2000, Version 1.13
- * Nov 2000, Version 1.14
- * Oct 2001, Version 1.15
- * Jan 2002, Version 1.16
- * Oct 2002, Version 1.16ac
- *
- * History:
- *    0.6b: first version in official kernel, Linux 1.3.46
- *    0.7: changed /proc/apm format, Linux 1.3.58
- *    0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59
- *    0.9: only call bios if bios is present, Linux 1.3.72
- *    1.0: use fixed device number, consolidate /proc/apm into this file,
- *         Linux 1.3.85
- *    1.1: support user-space standby and suspend, power off after system
- *         halted, Linux 1.3.98
- *    1.2: When resetting RTC after resume, take care so that the time
- *         is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth
- *         <jtoth@xxxxxxxxxxxxx>); improve interaction between
- *         screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4
- *    1.2a:Simple change to stop mysterious bug reports with SMP also added
- *        levels to the printk calls. APM is not defined for SMP machines.
- *         The new replacment for it is, but Linux doesn't yet support this.
- *         Alan Cox Linux 2.1.55
- *    1.3: Set up a valid data descriptor 0x40 for buggy BIOS's
- *    1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by
- *         Dean Gaudet <dgaudet@xxxxxxxxxx>.
- *         C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx> Linux 2.1.87
- *    1.5: Fix segment register reloading (in case of bad segments saved
- *         across BIOS call).
- *         Stephen Rothwell
- *    1.6: Cope with complier/assembler differences.
- *         Only try to turn off the first display device.
- *         Fix OOPS at power off with no APM BIOS by Jan Echternach
- *                   <echter@xxxxxxxxxxxxxxxxxxxxxxxxx>
- *         Stephen Rothwell
- *    1.7: Modify driver's cached copy of the disabled/disengaged flags
- *         to reflect current state of APM BIOS.
- *         Chris Rankin <rankinc@xxxxxxxxxxxxx>
- *         Reset interrupt 0 timer to 100Hz after suspend
- *         Chad Miller <cmiller@xxxxxxxxxxxxx>
- *         Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE
- *         Richard Gooch <rgooch@xxxxxxxxxxxxx>
- *         Allow boot time disabling of APM
- *         Make boot messages far less verbose by default
- *         Make asm safer
- *         Stephen Rothwell
- *    1.8: Add CONFIG_APM_RTC_IS_GMT
- *         Richard Gooch <rgooch@xxxxxxxxxxxxx>
- *         change APM_NOINTS to CONFIG_APM_ALLOW_INTS
- *         remove dependency on CONFIG_PROC_FS
- *         Stephen Rothwell
- *    1.9: Fix small typo.  <laslo@xxxxxxxxxxxxxx>
- *         Try to cope with BIOS's that need to have all display
- *         devices blanked and not just the first one.
- *         Ross Paterson <ross@xxxxxxxxxxxxxx>
- *         Fix segment limit setting it has always been wrong as
- *         the segments needed to have byte granularity.
- *         Mark a few things __init.
- *         Add hack to allow power off of SMP systems by popular request.
- *         Use CONFIG_SMP instead of __SMP__
- *         Ignore BOUNCES for three seconds.
- *         Stephen Rothwell
- *   1.10: Fix for Thinkpad return code.
- *         Merge 2.2 and 2.3 drivers.
- *         Remove APM dependencies in arch/i386/kernel/process.c
- *         Remove APM dependencies in drivers/char/sysrq.c
- *         Reset time across standby.
- *         Allow more inititialisation on SMP.
- *         Remove CONFIG_APM_POWER_OFF and make it boot time
- *         configurable (default on).
- *         Make debug only a boot time parameter (remove APM_DEBUG).
- *         Try to blank all devices on any error.
- *   1.11: Remove APM dependencies in drivers/char/console.c
- *         Check nr_running to detect if we are idle (from
- *         Borislav Deianov <borislav@xxxxxxxxxxxxxxxxxxxx>)
- *         Fix for bioses that don't zero the top part of the
- *         entrypoint offset (Mario Sitta <sitta@xxxxxxxxxxxx>)
- *         (reported by Panos Katsaloulis <teras@xxxxxxxxxxx>).
- *         Real mode power off patch (Walter Hofmann
- *         <Walter.Hofmann@xxxxxxxxxxxxxxxxxxxxxxxxxxx>).
- *   1.12: Remove CONFIG_SMP as the compiler will optimize
- *         the code away anyway (smp_num_cpus == 1 in UP)
- *         noted by Artur Skawina <skawina@xxxxxxxxxxxxx>.
- *         Make power off under SMP work again.
- *         Fix thinko with initial engaging of BIOS.
- *         Make sure power off only happens on CPU 0
- *         (Paul "Rusty" Russell <rusty@xxxxxxxxxxxxxxx>).
- *         Do error notification to user mode if BIOS calls fail.
- *         Move entrypoint offset fix to ...boot/setup.S
- *         where it belongs (Cosmos <gis88564@xxxxxxxxxxxxxxx>).
- *         Remove smp-power-off. SMP users must now specify
- *         "apm=power-off" on the kernel command line. Suggested
- *         by Jim Avera <jima@xxxxxxx>, modified by Alan Cox
- *         <alan@xxxxxxxxxxxxxxxxxxx>.
- *         Register the /proc/apm entry even on SMP so that
- *         scripts that check for it before doing power off
- *         work (Jim Avera <jima@xxxxxxx>).
- *   1.13: Changes for new pm_ interfaces (Andy Henroid
- *         <andy_henroid@xxxxxxxxx>).
- *         Modularize the code.
- *         Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS
- *         is now the way life works).
- *         Fix thinko in suspend() (wrong return).
- *         Notify drivers on critical suspend.
- *         Make kapmd absorb more idle time (Pavel Machek <pavel@xxxxxxx>
- *         modified by sfr).
- *         Disable interrupts while we are suspended (Andy Henroid
- *         <andy_henroid@xxxxxxxxx> fixed by sfr).
- *         Make power off work on SMP again (Tony Hoyle
- *         <tmh@xxxxxxxxxxxxxxxxx> and <zlatko@xxxxxxxx>) modified by sfr.
- *         Remove CONFIG_APM_SUSPEND_BOUNCE.  The bounce ignore
- *         interval is now configurable.
- *   1.14: Make connection version persist across module unload/load.
- *         Enable and engage power management earlier.
- *         Disengage power management on module unload.
- *         Changed to use the sysrq-register hack for registering the
- *         power off function called by magic sysrq based upon discussions
- *         in irc://irc.openprojects.net/#kernelnewbies
- *         (Crutcher Dunnavant <crutcher+kernel@xxxxxxxxxxxxxx>).
- *         Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable.
- *         (Arjan van de Ven <arjanv@xxxxxxxxxx>) modified by sfr.
- *         Work around byte swap bug in one of the Vaio's BIOS's
- *         (Marc Boucher <marc@xxxxxxx>).
- *         Exposed the disable flag to dmi so that we can handle known
- *         broken APM (Alan Cox <alan@xxxxxxxxxx>).
- *   1.14ac: If the BIOS says "I slowed the CPU down" then don't spin
- *         calling it - instead idle. (Alan Cox <alan@xxxxxxxxxx>)
- *         If an APM idle fails log it and idle sensibly
- *   1.15: Don't queue events to clients who open the device O_WRONLY.
- *         Don't expect replies from clients who open the device O_RDONLY.
- *         (Idea from Thomas Hood)
- *         Minor waitqueue cleanups. (John Fremlin <chief@xxxxxxxxxxx>)
- *   1.16: Fix idle calling. (Andreas Steinmetz <ast@xxxxxxxx> et al.)
- *         Notify listeners of standby or suspend events before notifying
- *         drivers. Return EBUSY to ioctl() if suspend is rejected.
- *         (Russell King <rmk@xxxxxxxxxxxxxxxx> and Thomas Hood)
- *         Ignore first resume after we generate our own resume event
- *         after a suspend (Thomas Hood)
- *         Daemonize now gets rid of our controlling terminal (sfr).
- *         CONFIG_APM_CPU_IDLE now just affects the default value of
- *         idle_threshold (sfr).
- *         Change name of kernel apm daemon (as it no longer idles) (sfr).
- *   1.16ac: Fix up SMP support somewhat. You can now force SMP on and we
- *        make _all_ APM calls on the CPU#0. Fix unsafe sign bug.
- *        TODO: determine if its "boot CPU" or "CPU0" we want to lock to.
- *
- * APM 1.1 Reference:
- *
- *   Intel Corporation, Microsoft Corporation. Advanced Power Management
- *   (APM) BIOS Interface Specification, Revision 1.1, September 1993.
- *   Intel Order Number 241704-001.  Microsoft Part Number 781-110-X01.
- *
- * [This document is available free from Intel by calling 800.628.8686 (fax
- * 916.356.6100) or 800.548.4725; or via anonymous ftp from
- * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc.  It is also
- * available from Microsoft by calling 206.882.8080.]
- *
- * APM 1.2 Reference:
- *   Intel Corporation, Microsoft Corporation. Advanced Power Management
- *   (APM) BIOS Interface Specification, Revision 1.2, February 1996.
- *
- * [This document is available from Microsoft at:
- *    http://www.microsoft.com/hwdev/busbios/amp_12.htm]
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/poll.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/timer.h>
-#include <linux/fcntl.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/proc_fs.h>
-#include <linux/miscdevice.h>
-#include <linux/apm_bios.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/sched.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/dmi.h>
-#include <linux/suspend.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-extern unsigned long get_cmos_time(void);
-extern void machine_real_restart(unsigned char *, int);
-
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-extern int (*console_blank_hook)(int);
-#endif
-
-/*
- * The apm_bios device is one of the misc char devices.
- * This is its minor number.
- */
-#define        APM_MINOR_DEV   134
-
-/*
- * See Documentation/Config.help for the configuration options.
- *
- * Various options can be changed at boot time as follows:
- * (We allow underscores for compatibility with the modules code)
- *     apm=on/off                      enable/disable APM
- *         [no-]allow[-_]ints          allow interrupts during BIOS calls
- *         [no-]broken[-_]psr          BIOS has a broken GetPowerStatus call
- *         [no-]realmode[-_]power[-_]off       switch to real mode before
- *                                             powering off
- *         [no-]debug                  log some debugging messages
- *         [no-]power[-_]off           power off on shutdown
- *         [no-]smp                    Use apm even on an SMP box
- *         bounce[-_]interval=<n>      number of ticks to ignore suspend
- *                                     bounces
- *          idle[-_]threshold=<n>       System idle percentage above which to
- *                                      make APM BIOS idle calls. Set it to
- *                                      100 to disable.
- *          idle[-_]period=<n>          Period (in 1/100s of a second) over
- *                                      which the idle percentage is
- *                                      calculated.
- */
-
-/* KNOWN PROBLEM MACHINES:
- *
- * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant
- *                         [Confirmed by TI representative]
- * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification
- *                    [Confirmed by BIOS disassembly]
- *                    [This may work now ...]
- * P: Toshiba 1950S: battery life information only gets updated after resume
- * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking
- *     broken in BIOS [Reported by Garst R. Reese <reese@xxxxxxx>]
- * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP
- *     Neale Banks <neale@xxxxxxxxxxxxxxxx> December 2000
- *
- * Legend: U = unusable with APM patches
- *         P = partially usable with APM patches
- */
-
-/*
- * Define as 1 to make the driver always call the APM BIOS busy
- * routine even if the clock was not reported as slowed by the
- * idle routine.  Otherwise, define as 0.
- */
-#define ALWAYS_CALL_BUSY   1
-
-/*
- * Define to make the APM BIOS calls zero all data segment registers (so
- * that an incorrect BIOS implementation will cause a kernel panic if it
- * tries to write to arbitrary memory).
- */
-#define APM_ZERO_SEGS
-
-#include "apm.h"
-
-/*
- * Define to make all _set_limit calls use 64k limits.  The APM 1.1 BIOS is
- * supposed to provide limit information that it recognizes.  Many machines
- * do this correctly, but many others do not restrict themselves to their
- * claimed limit.  When this happens, they will cause a segmentation
- * violation in the kernel at boot time.  Most BIOS's, however, will
- * respect a 64k limit, so we use that.  If you want to be pedantic and
- * hold your BIOS to its claims, then undefine this.
- */
-#define APM_RELAX_SEGMENTS
-
-/*
- * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
- * This patched by Chad Miller <cmiller@xxxxxxxxxxxxx>, original code by
- * David Chen <chen@xxxxxxxxxxxxxx>
- */
-#undef INIT_TIMER_AFTER_SUSPEND
-
-#ifdef INIT_TIMER_AFTER_SUSPEND
-#include <linux/timex.h>
-#include <asm/io.h>
-#include <linux/delay.h>
-#endif
-
-/*
- * Need to poll the APM BIOS every second
- */
-#define APM_CHECK_TIMEOUT      (HZ)
-
-/*
- * Ignore suspend events for this amount of time after a resume
- */
-#define DEFAULT_BOUNCE_INTERVAL                (3 * HZ)
-
-/*
- * Maximum number of events stored
- */
-#define APM_MAX_EVENTS         20
-
-/*
- * The per-file APM data
- */
-struct apm_user {
-       int             magic;
-       struct apm_user *       next;
-       unsigned int    suser: 1;
-       unsigned int    writer: 1;
-       unsigned int    reader: 1;
-       unsigned int    suspend_wait: 1;
-       int             suspend_result;
-       int             suspends_pending;
-       int             standbys_pending;
-       int             suspends_read;
-       int             standbys_read;
-       int             event_head;
-       int             event_tail;
-       apm_event_t     events[APM_MAX_EVENTS];
-};
-
-/*
- * The magic number in apm_user
- */
-#define APM_BIOS_MAGIC         0x4101
-
-/*
- * idle percentage above which bios idle calls are done
- */
-#ifdef CONFIG_APM_CPU_IDLE
-#define DEFAULT_IDLE_THRESHOLD 95
-#else
-#define DEFAULT_IDLE_THRESHOLD 100
-#endif
-#define DEFAULT_IDLE_PERIOD    (100 / 3)
-
-/*
- * Local variables
- */
-static struct {
-       unsigned long   offset;
-       unsigned short  segment;
-}                              apm_bios_entry;
-static int                     clock_slowed;
-static int                     idle_threshold = DEFAULT_IDLE_THRESHOLD;
-static int                     idle_period = DEFAULT_IDLE_PERIOD;
-static int                     set_pm_idle;
-static int                     suspends_pending;
-static int                     standbys_pending;
-static int                     ignore_sys_suspend;
-static int                     ignore_normal_resume;
-static int                     bounce_interval = DEFAULT_BOUNCE_INTERVAL;
-
-#ifdef CONFIG_APM_RTC_IS_GMT
-#      define  clock_cmos_diff 0
-#      define  got_clock_diff  1
-#else
-static long                    clock_cmos_diff;
-static int                     got_clock_diff;
-#endif
-static int                     debug;
-static int                     smp;
-static int                     apm_disabled = -1;
-#ifdef CONFIG_SMP
-static int                     power_off;
-#else
-static int                     power_off = 1;
-#endif
-#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int                     realmode_power_off = 1;
-#else
-static int                     realmode_power_off;
-#endif
-static int                     exit_kapmd;
-static int                     kapmd_running;
-#ifdef CONFIG_APM_ALLOW_INTS
-static int                     allow_ints = 1;
-#else
-static int                     allow_ints;
-#endif
-static int                     broken_psr;
-
-static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
-static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
-static struct apm_user *       user_list;
-static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct      bad_bios_desc = { 0, 0x00409200 };
-
-static char                    driver_version[] = "1.16ac";    /* no spaces */
-
-/*
- *     APM event names taken from the APM 1.2 specification. These are
- *     the message codes that the BIOS uses to tell us about events
- */
-static char *  apm_event_name[] = {
-       "system standby",
-       "system suspend",
-       "normal resume",
-       "critical resume",
-       "low battery",
-       "power status change",
-       "update time",
-       "critical suspend",
-       "user standby",
-       "user suspend",
-       "system standby resume",
-       "capabilities change"
-};
-#define NR_APM_EVENT_NAME      \
-               (sizeof(apm_event_name) / sizeof(apm_event_name[0]))
-
-typedef struct lookup_t {
-       int     key;
-       char *  msg;
-} lookup_t;
-
-/*
- *     The BIOS returns a set of standard error codes in AX when the
- *     carry flag is set.
- */
- 
-static const lookup_t error_table[] = {
-/* N/A { APM_SUCCESS,          "Operation succeeded" }, */
-       { APM_DISABLED,         "Power management disabled" },
-       { APM_CONNECTED,        "Real mode interface already connected" },
-       { APM_NOT_CONNECTED,    "Interface not connected" },
-       { APM_16_CONNECTED,     "16 bit interface already connected" },
-/* N/A { APM_16_UNSUPPORTED,   "16 bit interface not supported" }, */
-       { APM_32_CONNECTED,     "32 bit interface already connected" },
-       { APM_32_UNSUPPORTED,   "32 bit interface not supported" },
-       { APM_BAD_DEVICE,       "Unrecognized device ID" },
-       { APM_BAD_PARAM,        "Parameter out of range" },
-       { APM_NOT_ENGAGED,      "Interface not engaged" },
-       { APM_BAD_FUNCTION,     "Function not supported" },
-       { APM_RESUME_DISABLED,  "Resume timer disabled" },
-       { APM_BAD_STATE,        "Unable to enter requested state" },
-/* N/A { APM_NO_EVENTS,        "No events pending" }, */
-       { APM_NO_ERROR,         "BIOS did not set a return code" },
-       { APM_NOT_PRESENT,      "No APM present" }
-};
-#define ERROR_COUNT    (sizeof(error_table)/sizeof(lookup_t))
-
-/**
- *     apm_error       -       display an APM error
- *     @str: information string
- *     @err: APM BIOS return code
- *
- *     Write a meaningful log entry to the kernel log in the event of
- *     an APM error.
- */
- 
-static void apm_error(char *str, int err)
-{
-       int     i;
-
-       for (i = 0; i < ERROR_COUNT; i++)
-               if (error_table[i].key == err) break;
-       if (i < ERROR_COUNT)
-               printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
-       else
-               printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
-                       str, err);
-}
-
-/*
- * Lock APM functionality to physical CPU 0
- */
- 
-#ifdef CONFIG_SMP
-
-static cpumask_t apm_save_cpus(void)
-{
-       cpumask_t x = current->cpus_allowed;
-       /* Some bioses don't like being called from CPU != 0 */
-       set_cpus_allowed(current, cpumask_of_cpu(0));
-       BUG_ON(smp_processor_id() != 0);
-       return x;
-}
-
-static inline void apm_restore_cpus(cpumask_t mask)
-{
-       set_cpus_allowed(current, mask);
-}
-
-#else
-
-/*
- *     No CPU lockdown needed on a uniprocessor
- */
- 
-#define apm_save_cpus()                (current->cpus_allowed)
-#define apm_restore_cpus(x)    (void)(x)
-
-#endif
-
-/*
- * These are the actual BIOS calls.  Depending on APM_ZERO_SEGS and
- * apm_info.allow_ints, we are being really paranoid here!  Not only
- * are interrupts disabled, but all the segment registers (except SS)
- * are saved and zeroed this means that if the BIOS tries to reference
- * any data without explicitly loading the segment registers, the kernel
- * will fault immediately rather than have some unforeseen circumstances
- * for the rest of the kernel.  And it will be very obvious!  :-) Doing
- * this depends on CS referring to the same physical memory as DS so that
- * DS can be zeroed before the call. Unfortunately, we can't do anything
- * about the stack segment/pointer.  Also, we tell the compiler that
- * everything could change.
- *
- * Also, we KNOW that for the non error case of apm_bios_call, there
- * is no useful data returned in the low order 8 bits of eax.
- */
-#define APM_DO_CLI     \
-       if (apm_info.allow_ints) \
-               local_irq_enable(); \
-       else \
-               local_irq_disable();
-
-#ifdef APM_ZERO_SEGS
-#      define APM_DECL_SEGS \
-               unsigned int saved_fs; unsigned int saved_gs;
-#      define APM_DO_SAVE_SEGS \
-               savesegment(fs, saved_fs); savesegment(gs, saved_gs)
-#      define APM_DO_RESTORE_SEGS \
-               loadsegment(fs, saved_fs); loadsegment(gs, saved_gs)
-#else
-#      define APM_DECL_SEGS
-#      define APM_DO_SAVE_SEGS
-#      define APM_DO_RESTORE_SEGS
-#endif
-
-/**
- *     apm_bios_call   -       Make an APM BIOS 32bit call
- *     @func: APM function to execute
- *     @ebx_in: EBX register for call entry
- *     @ecx_in: ECX register for call entry
- *     @eax: EAX register return
- *     @ebx: EBX register return
- *     @ecx: ECX register return
- *     @edx: EDX register return
- *     @esi: ESI register return
- *
- *     Make an APM call using the 32bit protected mode interface. The
- *     caller is responsible for knowing if APM BIOS is configured and
- *     enabled. This call can disable interrupts for a long period of
- *     time on some laptops.  The return value is in AH and the carry
- *     flag is loaded into AL.  If there is an error, then the error
- *     code is returned in AH (bits 8-15 of eax) and this function
- *     returns non-zero.
- */
- 
-static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
-       u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
-{
-       APM_DECL_SEGS
-       unsigned long           flags;
-       cpumask_t               cpus;
-       int                     cpu;
-       struct desc_struct      save_desc_40;
-
-       cpus = apm_save_cpus();
-       
-       cpu = get_cpu();
-       save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-       get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
-
-       local_save_flags(flags);
-       APM_DO_CLI;
-       APM_DO_SAVE_SEGS;
-       apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
-       APM_DO_RESTORE_SEGS;
-       local_irq_restore(flags);
-       get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
-       put_cpu();
-       apm_restore_cpus(cpus);
-       
-       return *eax & 0xff;
-}
-
-/**
- *     apm_bios_call_simple    -       make a simple APM BIOS 32bit call
- *     @func: APM function to invoke
- *     @ebx_in: EBX register value for BIOS call
- *     @ecx_in: ECX register value for BIOS call
- *     @eax: EAX register on return from the BIOS call
- *
- *     Make a BIOS call that does only returns one value, or just status.
- *     If there is an error, then the error code is returned in AH
- *     (bits 8-15 of eax) and this function returns non-zero. This is
- *     used for simpler BIOS operations. This call may hold interrupts
- *     off for a long time on some laptops.
- */
-
-static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
-{
-       u8                      error;
-       APM_DECL_SEGS
-       unsigned long           flags;
-       cpumask_t               cpus;
-       int                     cpu;
-       struct desc_struct      save_desc_40;
-
-
-       cpus = apm_save_cpus();
-       
-       cpu = get_cpu();
-       save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-       get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
-
-       local_save_flags(flags);
-       APM_DO_CLI;
-       APM_DO_SAVE_SEGS;
-       error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
-       APM_DO_RESTORE_SEGS;
-       local_irq_restore(flags);
-       get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
-       put_cpu();
-       apm_restore_cpus(cpus);
-       return error;
-}
-
-/**
- *     apm_driver_version      -       APM driver version
- *     @val:   loaded with the APM version on return
- *
- *     Retrieve the APM version supported by the BIOS. This is only
- *     supported for APM 1.1 or higher. An error indicates APM 1.0 is
- *     probably present.
- *
- *     On entry val should point to a value indicating the APM driver
- *     version with the high byte being the major and the low byte the
- *     minor number both in BCD
- *
- *     On return it will hold the BIOS revision supported in the
- *     same format.
- */
-
-static int apm_driver_version(u_short *val)
-{
-       u32     eax;
-
-       if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
-               return (eax >> 8) & 0xff;
-       *val = eax;
-       return APM_SUCCESS;
-}
-
-/**
- *     apm_get_event   -       get an APM event from the BIOS
- *     @event: pointer to the event
- *     @info: point to the event information
- *
- *     The APM BIOS provides a polled information for event
- *     reporting. The BIOS expects to be polled at least every second
- *     when events are pending. When a message is found the caller should
- *     poll until no more messages are present.  However, this causes
- *     problems on some laptops where a suspend event notification is
- *     not cleared until it is acknowledged.
- *
- *     Additional information is returned in the info pointer, providing
- *     that APM 1.2 is in use. If no messges are pending the value 0x80
- *     is returned (No power management events pending).
- */
- 
-static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
-{
-       u32     eax;
-       u32     ebx;
-       u32     ecx;
-       u32     dummy;
-
-       if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
-                       &dummy, &dummy))
-               return (eax >> 8) & 0xff;
-       *event = ebx;
-       if (apm_info.connection_version < 0x0102)
-               *info = ~0; /* indicate info not valid */
-       else
-               *info = ecx;
-       return APM_SUCCESS;
-}
-
-/**
- *     set_power_state -       set the power management state
- *     @what: which items to transition
- *     @state: state to transition to
- *
- *     Request an APM change of state for one or more system devices. The
- *     processor state must be transitioned last of all. what holds the
- *     class of device in the upper byte and the device number (0xFF for
- *     all) for the object to be transitioned.
- *
- *     The state holds the state to transition to, which may in fact
- *     be an acceptance of a BIOS requested state change.
- */
- 
-static int set_power_state(u_short what, u_short state)
-{
-       u32     eax;
-
-       if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
-               return (eax >> 8) & 0xff;
-       return APM_SUCCESS;
-}
-
-/**
- *     set_system_power_state - set system wide power state
- *     @state: which state to enter
- *
- *     Transition the entire system into a new APM power state.
- */
- 
-static int set_system_power_state(u_short state)
-{
-       return set_power_state(APM_DEVICE_ALL, state);
-}
-
-/**
- *     apm_do_idle     -       perform power saving
- *
- *     This function notifies the BIOS that the processor is (in the view
- *     of the OS) idle. It returns -1 in the event that the BIOS refuses
- *     to handle the idle request. On a success the function returns 1
- *     if the BIOS did clock slowing or 0 otherwise.
- */
- 
-static int apm_do_idle(void)
-{
-       u32     eax;
-
-       if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) {
-               static unsigned long t;
-
-               /* This always fails on some SMP boards running UP kernels.
-                * Only report the failure the first 5 times.
-                */
-               if (++t < 5)
-               {
-                       printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
-                                       (eax >> 8) & 0xff);
-                       t = jiffies;
-               }
-               return -1;
-       }
-       clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0;
-       return clock_slowed;
-}
-
-/**
- *     apm_do_busy     -       inform the BIOS the CPU is busy
- *
- *     Request that the BIOS brings the CPU back to full performance. 
- */
- 
-static void apm_do_busy(void)
-{
-       u32     dummy;
-
-       if (clock_slowed || ALWAYS_CALL_BUSY) {
-               (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
-               clock_slowed = 0;
-       }
-}
-
-/*
- * If no process has really been interested in
- * the CPU for some time, we want to call BIOS
- * power management - we probably want
- * to conserve power.
- */
-#define IDLE_CALC_LIMIT   (HZ * 100)
-#define IDLE_LEAKY_MAX    16
-
-static void (*original_pm_idle)(void);
-
-extern void default_idle(void);
-
-/**
- * apm_cpu_idle                -       cpu idling for APM capable Linux
- *
- * This is the idling function the kernel executes when APM is available. It 
- * tries to do BIOS powermanagement based on the average system idle time.
- * Furthermore it calls the system default idle routine.
- */
-
-static void apm_cpu_idle(void)
-{
-       static int use_apm_idle; /* = 0 */
-       static unsigned int last_jiffies; /* = 0 */
-       static unsigned int last_stime; /* = 0 */
-
-       int apm_idle_done = 0;
-       unsigned int jiffies_since_last_check = jiffies - last_jiffies;
-       unsigned int bucket;
-
-recalc:
-       if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
-               use_apm_idle = 0;
-               last_jiffies = jiffies;
-               last_stime = current->stime;
-       } else if (jiffies_since_last_check > idle_period) {
-               unsigned int idle_percentage;
-
-               idle_percentage = current->stime - last_stime;
-               idle_percentage *= 100;
-               idle_percentage /= jiffies_since_last_check;
-               use_apm_idle = (idle_percentage > idle_threshold);
-               if (apm_info.forbid_idle)
-                       use_apm_idle = 0;
-               last_jiffies = jiffies;
-               last_stime = current->stime;
-       }
-
-       bucket = IDLE_LEAKY_MAX;
-
-       while (!need_resched()) {
-               if (use_apm_idle) {
-                       unsigned int t;
-
-                       t = jiffies;
-                       switch (apm_do_idle()) {
-                       case 0: apm_idle_done = 1;
-                               if (t != jiffies) {
-                                       if (bucket) {
-                                               bucket = IDLE_LEAKY_MAX;
-                                               continue;
-                                       }
-                               } else if (bucket) {
-                                       bucket--;
-                                       continue;
-                               }
-                               break;
-                       case 1: apm_idle_done = 1;
-                               break;
-                       default: /* BIOS refused */
-                               break;
-                       }
-               }
-               if (original_pm_idle)
-                       original_pm_idle();
-               else
-                       default_idle();
-               jiffies_since_last_check = jiffies - last_jiffies;
-               if (jiffies_since_last_check > idle_period)
-                       goto recalc;
-       }
-
-       if (apm_idle_done)
-               apm_do_busy();
-}
-
-/**
- *     apm_power_off   -       ask the BIOS to power off
- *
- *     Handle the power off sequence. This is the one piece of code we
- *     will execute even on SMP machines. In order to deal with BIOS
- *     bugs we support real mode APM BIOS power off calls. We also make
- *     the SMP call on CPU0 as some systems will only honour this call
- *     on their first cpu.
- */
- 
-static void apm_power_off(void)
-{
-       unsigned char   po_bios_call[] = {
-               0xb8, 0x00, 0x10,       /* movw  $0x1000,ax  */
-               0x8e, 0xd0,             /* movw  ax,ss       */
-               0xbc, 0x00, 0xf0,       /* movw  $0xf000,sp  */
-               0xb8, 0x07, 0x53,       /* movw  $0x5307,ax  */
-               0xbb, 0x01, 0x00,       /* movw  $0x0001,bx  */
-               0xb9, 0x03, 0x00,       /* movw  $0x0003,cx  */
-               0xcd, 0x15              /* int   $0x15       */
-       };
-
-       /* Some bioses don't like being called from CPU != 0 */
-       if (apm_info.realmode_power_off)
-       {
-               (void)apm_save_cpus();
-               machine_real_restart(po_bios_call, sizeof(po_bios_call));
-       }
-       else
-               (void) set_system_power_state(APM_STATE_OFF);
-}
-
-#ifdef CONFIG_APM_DO_ENABLE
-
-/**
- *     apm_enable_power_management - enable BIOS APM power management
- *     @enable: enable yes/no
- *
- *     Enable or disable the APM BIOS power services. 
- */
- 
-static int apm_enable_power_management(int enable)
-{
-       u32     eax;
-
-       if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
-               return APM_NOT_ENGAGED;
-       if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
-                       enable, &eax))
-               return (eax >> 8) & 0xff;
-       if (enable)
-               apm_info.bios.flags &= ~APM_BIOS_DISABLED;
-       else
-               apm_info.bios.flags |= APM_BIOS_DISABLED;
-       return APM_SUCCESS;
-}
-#endif
-
-/**
- *     apm_get_power_status    -       get current power state
- *     @status: returned status
- *     @bat: battery info
- *     @life: estimated life
- *
- *     Obtain the current power status from the APM BIOS. We return a
- *     status which gives the rough battery status, and current power
- *     source. The bat value returned give an estimate as a percentage
- *     of life and a status value for the battery. The estimated life
- *     if reported is a lifetime in secodnds/minutes at current powwer
- *     consumption.
- */
- 
-static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
-{
-       u32     eax;
-       u32     ebx;
-       u32     ecx;
-       u32     edx;
-       u32     dummy;
-
-       if (apm_info.get_power_status_broken)
-               return APM_32_UNSUPPORTED;
-       if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
-                       &eax, &ebx, &ecx, &edx, &dummy))
-               return (eax >> 8) & 0xff;
-       *status = ebx;
-       *bat = ecx;
-       if (apm_info.get_power_status_swabinminutes) {
-               *life = swab16((u16)edx);
-               *life |= 0x8000;
-       } else
-               *life = edx;
-       return APM_SUCCESS;
-}
-
-#if 0
-static int apm_get_battery_status(u_short which, u_short *status,
-                                 u_short *bat, u_short *life, u_short *nbat)
-{
-       u32     eax;
-       u32     ebx;
-       u32     ecx;
-       u32     edx;
-       u32     esi;
-
-       if (apm_info.connection_version < 0x0102) {
-               /* pretend we only have one battery. */
-               if (which != 1)
-                       return APM_BAD_DEVICE;
-               *nbat = 1;
-               return apm_get_power_status(status, bat, life);
-       }
-
-       if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
-                       &ebx, &ecx, &edx, &esi))
-               return (eax >> 8) & 0xff;
-       *status = ebx;
-       *bat = ecx;
-       *life = edx;
-       *nbat = esi;
-       return APM_SUCCESS;
-}
-#endif
-
-/**
- *     apm_engage_power_management     -       enable PM on a device
- *     @device: identity of device
- *     @enable: on/off
- *
- *     Activate or deactive power management on either a specific device
- *     or the entire system (%APM_DEVICE_ALL).
- */
- 
-static int apm_engage_power_management(u_short device, int enable)
-{
-       u32     eax;
-
-       if ((enable == 0) && (device == APM_DEVICE_ALL)
-           && (apm_info.bios.flags & APM_BIOS_DISABLED))
-               return APM_DISABLED;
-       if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax))
-               return (eax >> 8) & 0xff;
-       if (device == APM_DEVICE_ALL) {
-               if (enable)
-                       apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
-               else
-                       apm_info.bios.flags |= APM_BIOS_DISENGAGED;
-       }
-       return APM_SUCCESS;
-}
-
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-
-/**
- *     apm_console_blank       -       blank the display
- *     @blank: on/off
- *
- *     Attempt to blank the console, firstly by blanking just video device
- *     zero, and if that fails (some BIOSes don't support it) then it blanks
- *     all video devices. Typically the BIOS will do laptop backlight and
- *     monitor powerdown for us.
- */
- 
-static int apm_console_blank(int blank)
-{
-       int     error;
-       u_short state;
-
-       state = blank ? APM_STATE_STANDBY : APM_STATE_READY;
-       /* Blank the first display device */
-       error = set_power_state(0x100, state);
-       if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) {
-               /* try to blank them all instead */
-               error = set_power_state(0x1ff, state);
-               if ((error != APM_SUCCESS) && (error != APM_NO_ERROR))
-                       /* try to blank device one instead */
-                       error = set_power_state(0x101, state);
-       }
-       if ((error == APM_SUCCESS) || (error == APM_NO_ERROR))
-               return 1;
-       if (error == APM_NOT_ENGAGED) {
-               static int tried;
-               int eng_error;
-               if (tried++ == 0) {
-                       eng_error = apm_engage_power_management(APM_DEVICE_ALL, 
1);
-                       if (eng_error) {
-                               apm_error("set display", error);
-                               apm_error("engage interface", eng_error);
-                               return 0;
-                       } else
-                               return apm_console_blank(blank);
-               }
-       }
-       apm_error("set display", error);
-       return 0;
-}
-#endif
-
-static int queue_empty(struct apm_user *as)
-{
-       return as->event_head == as->event_tail;
-}
-
-static apm_event_t get_queued_event(struct apm_user *as)
-{
-       as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
-       return as->events[as->event_tail];
-}
-
-static void queue_event(apm_event_t event, struct apm_user *sender)
-{
-       struct apm_user *       as;
-
-       spin_lock(&user_list_lock);
-       if (user_list == NULL)
-               goto out;
-       for (as = user_list; as != NULL; as = as->next) {
-               if ((as == sender) || (!as->reader))
-                       continue;
-               as->event_head = (as->event_head + 1) % APM_MAX_EVENTS;
-               if (as->event_head == as->event_tail) {
-                       static int notified;
-
-                       if (notified++ == 0)
-                           printk(KERN_ERR "apm: an event queue overflowed\n");
-                       as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
-               }
-               as->events[as->event_head] = event;
-               if ((!as->suser) || (!as->writer))
-                       continue;
-               switch (event) {
-               case APM_SYS_SUSPEND:
-               case APM_USER_SUSPEND:
-                       as->suspends_pending++;
-                       suspends_pending++;
-                       break;
-
-               case APM_SYS_STANDBY:
-               case APM_USER_STANDBY:
-                       as->standbys_pending++;
-                       standbys_pending++;
-                       break;
-               }
-       }
-       wake_up_interruptible(&apm_waitqueue);
-out:
-       spin_unlock(&user_list_lock);
-}
-
-static void set_time(void)
-{
-       if (got_clock_diff) {   /* Must know time zone in order to set clock */
-               xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
-               xtime.tv_nsec = 0; 
-       } 
-}
-
-static void get_time_diff(void)
-{
-#ifndef CONFIG_APM_RTC_IS_GMT
-       /*
-        * Estimate time zone so that set_time can update the clock
-        */
-       clock_cmos_diff = -get_cmos_time();
-       clock_cmos_diff += get_seconds();
-       got_clock_diff = 1;
-#endif
-}
-
-static void reinit_timer(void)
-{
-#ifdef INIT_TIMER_AFTER_SUSPEND
-       unsigned long flags;
-
-       spin_lock_irqsave(&i8253_lock, flags);
-       /* set the clock to 100 Hz */
-       outb_p(0x34, PIT_MODE);         /* binary, mode 2, LSB/MSB, ch 0 */
-       udelay(10);
-       outb_p(LATCH & 0xff, PIT_CH0);  /* LSB */
-       udelay(10);
-       outb(LATCH >> 8, PIT_CH0);      /* MSB */
-       udelay(10);
-       spin_unlock_irqrestore(&i8253_lock, flags);
-#endif
-}
-
-static int suspend(int vetoable)
-{
-       int             err;
-       struct apm_user *as;
-
-       if (pm_send_all(PM_SUSPEND, (void *)3)) {
-               /* Vetoed */
-               if (vetoable) {
-                       if (apm_info.connection_version > 0x100)
-                               set_system_power_state(APM_STATE_REJECT);
-                       err = -EBUSY;
-                       ignore_sys_suspend = 0;
-                       printk(KERN_WARNING "apm: suspend was vetoed.\n");
-                       goto out;
-               }
-               printk(KERN_CRIT "apm: suspend was vetoed, but suspending 
anyway.\n");
-       }
-
-       device_suspend(PMSG_SUSPEND);
-       local_irq_disable();
-       device_power_down(PMSG_SUSPEND);
-
-       /* serialize with the timer interrupt */
-       write_seqlock(&xtime_lock);
-
-       /* protect against access to timer chip registers */
-       spin_lock(&i8253_lock);
-
-       get_time_diff();
-       /*
-        * Irq spinlock must be dropped around set_system_power_state.
-        * We'll undo any timer changes due to interrupts below.
-        */
-       spin_unlock(&i8253_lock);
-       write_sequnlock(&xtime_lock);
-       local_irq_enable();
-
-       save_processor_state();
-       err = set_system_power_state(APM_STATE_SUSPEND);
-       ignore_normal_resume = 1;
-       restore_processor_state();
-
-       local_irq_disable();
-       write_seqlock(&xtime_lock);
-       spin_lock(&i8253_lock);
-       reinit_timer();
-       set_time();
-
-       spin_unlock(&i8253_lock);
-       write_sequnlock(&xtime_lock);
-
-       if (err == APM_NO_ERROR)
-               err = APM_SUCCESS;
-       if (err != APM_SUCCESS)
-               apm_error("suspend", err);
-       err = (err == APM_SUCCESS) ? 0 : -EIO;
-       device_power_up();
-       local_irq_enable();
-       device_resume();
-       pm_send_all(PM_RESUME, (void *)0);
-       queue_event(APM_NORMAL_RESUME, NULL);
- out:
-       spin_lock(&user_list_lock);
-       for (as = user_list; as != NULL; as = as->next) {
-               as->suspend_wait = 0;
-               as->suspend_result = err;
-       }
-       spin_unlock(&user_list_lock);
-       wake_up_interruptible(&apm_suspend_waitqueue);
-       return err;
-}
-
-static void standby(void)
-{
-       int     err;
-
-       local_irq_disable();
-       device_power_down(PMSG_SUSPEND);
-       /* serialize with the timer interrupt */
-       write_seqlock(&xtime_lock);
-       /* If needed, notify drivers here */
-       get_time_diff();
-       write_sequnlock(&xtime_lock);
-       local_irq_enable();
-
-       err = set_system_power_state(APM_STATE_STANDBY);
-       if ((err != APM_SUCCESS) && (err != APM_NO_ERROR))
-               apm_error("standby", err);
-
-       local_irq_disable();
-       device_power_up();
-       local_irq_enable();
-}
-
-static apm_event_t get_event(void)
-{
-       int             error;
-       apm_event_t     event;
-       apm_eventinfo_t info;
-
-       static int notified;
-
-       /* we don't use the eventinfo */
-       error = apm_get_event(&event, &info);
-       if (error == APM_SUCCESS)
-               return event;
-
-       if ((error != APM_NO_EVENTS) && (notified++ == 0))
-               apm_error("get_event", error);
-
-       return 0;
-}
-
-static void check_events(void)
-{
-       apm_event_t             event;
-       static unsigned long    last_resume;
-       static int              ignore_bounce;
-
-       while ((event = get_event()) != 0) {
-               if (debug) {
-                       if (event <= NR_APM_EVENT_NAME)
-                               printk(KERN_DEBUG "apm: received %s notify\n",
-                                      apm_event_name[event - 1]);
-                       else
-                               printk(KERN_DEBUG "apm: received unknown "
-                                      "event 0x%02x\n", event);
-               }
-               if (ignore_bounce
-                   && ((jiffies - last_resume) > bounce_interval))
-                       ignore_bounce = 0;
-
-               switch (event) {
-               case APM_SYS_STANDBY:
-               case APM_USER_STANDBY:
-                       queue_event(event, NULL);
-                       if (standbys_pending <= 0)
-                               standby();
-                       break;
-
-               case APM_USER_SUSPEND:
-#ifdef CONFIG_APM_IGNORE_USER_SUSPEND
-                       if (apm_info.connection_version > 0x100)
-                               set_system_power_state(APM_STATE_REJECT);
-                       break;
-#endif
-               case APM_SYS_SUSPEND:
-                       if (ignore_bounce) {
-                               if (apm_info.connection_version > 0x100)
-                                       
set_system_power_state(APM_STATE_REJECT);
-                               break;
-                       }
-                       /*
-                        * If we are already processing a SUSPEND,
-                        * then further SUSPEND events from the BIOS
-                        * will be ignored.  We also return here to
-                        * cope with the fact that the Thinkpads keep
-                        * sending a SUSPEND event until something else
-                        * happens!
-                        */
-                       if (ignore_sys_suspend)
-                               return;
-                       ignore_sys_suspend = 1;
-                       queue_event(event, NULL);
-                       if (suspends_pending <= 0)
-                               (void) suspend(1);
-                       break;
-
-               case APM_NORMAL_RESUME:
-               case APM_CRITICAL_RESUME:
-               case APM_STANDBY_RESUME:
-                       ignore_sys_suspend = 0;
-                       last_resume = jiffies;
-                       ignore_bounce = 1;
-                       if ((event != APM_NORMAL_RESUME)
-                           || (ignore_normal_resume == 0)) {
-                               write_seqlock_irq(&xtime_lock);
-                               set_time();
-                               write_sequnlock_irq(&xtime_lock);
-                               device_resume();
-                               pm_send_all(PM_RESUME, (void *)0);
-                               queue_event(event, NULL);
-                       }
-                       ignore_normal_resume = 0;
-                       break;
-
-               case APM_CAPABILITY_CHANGE:
-               case APM_LOW_BATTERY:
-               case APM_POWER_STATUS_CHANGE:
-                       queue_event(event, NULL);
-                       /* If needed, notify drivers here */
-                       break;
-
-               case APM_UPDATE_TIME:
-                       write_seqlock_irq(&xtime_lock);
-                       set_time();
-                       write_sequnlock_irq(&xtime_lock);
-                       break;
-
-               case APM_CRITICAL_SUSPEND:
-                       /*
-                        * We are not allowed to reject a critical suspend.
-                        */
-                       (void) suspend(0);
-                       break;
-               }
-       }
-}
-
-static void apm_event_handler(void)
-{
-       static int      pending_count = 4;
-       int             err;
-
-       if ((standbys_pending > 0) || (suspends_pending > 0)) {
-               if ((apm_info.connection_version > 0x100) &&
-                               (pending_count-- <= 0)) {
-                       pending_count = 4;
-                       if (debug)
-                               printk(KERN_DEBUG "apm: setting state busy\n");
-                       err = set_system_power_state(APM_STATE_BUSY);
-                       if (err)
-                               apm_error("busy", err);
-               }
-       } else
-               pending_count = 4;
-       check_events();
-}
-
-/*
- * This is the APM thread main loop.
- */
-
-static void apm_mainloop(void)
-{
-       DECLARE_WAITQUEUE(wait, current);
-
-       add_wait_queue(&apm_waitqueue, &wait);
-       set_current_state(TASK_INTERRUPTIBLE);
-       for (;;) {
-               schedule_timeout(APM_CHECK_TIMEOUT);
-               if (exit_kapmd)
-                       break;
-               /*
-                * Ok, check all events, check for idle (and mark us sleeping
-                * so as not to count towards the load average)..
-                */
-               set_current_state(TASK_INTERRUPTIBLE);
-               apm_event_handler();
-       }
-       remove_wait_queue(&apm_waitqueue, &wait);
-}
-
-static int check_apm_user(struct apm_user *as, const char *func)
-{
-       if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) {
-               printk(KERN_ERR "apm: %s passed bad filp\n", func);
-               return 1;
-       }
-       return 0;
-}
-
-static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t 
*ppos)
-{
-       struct apm_user *       as;
-       int                     i;
-       apm_event_t             event;
-
-       as = fp->private_data;
-       if (check_apm_user(as, "read"))
-               return -EIO;
-       if ((int)count < sizeof(apm_event_t))
-               return -EINVAL;
-       if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK))
-               return -EAGAIN;
-       wait_event_interruptible(apm_waitqueue, !queue_empty(as));
-       i = count;
-       while ((i >= sizeof(event)) && !queue_empty(as)) {
-               event = get_queued_event(as);
-               if (copy_to_user(buf, &event, sizeof(event))) {
-                       if (i < count)
-                               break;
-                       return -EFAULT;
-               }
-               switch (event) {
-               case APM_SYS_SUSPEND:
-               case APM_USER_SUSPEND:
-                       as->suspends_read++;
-                       break;
-
-               case APM_SYS_STANDBY:
-               case APM_USER_STANDBY:
-                       as->standbys_read++;
-                       break;
-               }
-               buf += sizeof(event);
-               i -= sizeof(event);
-       }
-       if (i < count)
-               return count - i;
-       if (signal_pending(current))
-               return -ERESTARTSYS;
-       return 0;
-}
-
-static unsigned int do_poll(struct file *fp, poll_table * wait)
-{
-       struct apm_user * as;
-
-       as = fp->private_data;
-       if (check_apm_user(as, "poll"))
-               return 0;
-       poll_wait(fp, &apm_waitqueue, wait);
-       if (!queue_empty(as))
-               return POLLIN | POLLRDNORM;
-       return 0;
-}
-
-static int do_ioctl(struct inode * inode, struct file *filp,
-                   u_int cmd, u_long arg)
-{
-       struct apm_user *       as;
-
-       as = filp->private_data;
-       if (check_apm_user(as, "ioctl"))
-               return -EIO;
-       if ((!as->suser) || (!as->writer))
-               return -EPERM;
-       switch (cmd) {
-       case APM_IOC_STANDBY:
-               if (as->standbys_read > 0) {
-                       as->standbys_read--;
-                       as->standbys_pending--;
-                       standbys_pending--;
-               } else
-                       queue_event(APM_USER_STANDBY, as);
-               if (standbys_pending <= 0)
-                       standby();
-               break;
-       case APM_IOC_SUSPEND:
-               if (as->suspends_read > 0) {
-                       as->suspends_read--;
-                       as->suspends_pending--;
-                       suspends_pending--;
-               } else
-                       queue_event(APM_USER_SUSPEND, as);
-               if (suspends_pending <= 0) {
-                       return suspend(1);
-               } else {
-                       as->suspend_wait = 1;
-                       wait_event_interruptible(apm_suspend_waitqueue,
-                                       as->suspend_wait == 0);
-                       return as->suspend_result;
-               }
-               break;
-       default:
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static int do_release(struct inode * inode, struct file * filp)
-{
-       struct apm_user *       as;
-
-       as = filp->private_data;
-       if (check_apm_user(as, "release"))
-               return 0;
-       filp->private_data = NULL;
-       if (as->standbys_pending > 0) {
-               standbys_pending -= as->standbys_pending;
-               if (standbys_pending <= 0)
-                       standby();
-       }
-       if (as->suspends_pending > 0) {
-               suspends_pending -= as->suspends_pending;
-               if (suspends_pending <= 0)
-                       (void) suspend(1);
-       }
-       spin_lock(&user_list_lock);
-       if (user_list == as)
-               user_list = as->next;
-       else {
-               struct apm_user *       as1;
-
-               for (as1 = user_list;
-                    (as1 != NULL) && (as1->next != as);
-                    as1 = as1->next)
-                       ;
-               if (as1 == NULL)
-                       printk(KERN_ERR "apm: filp not in user list\n");
-               else
-                       as1->next = as->next;
-       }
-       spin_unlock(&user_list_lock);
-       kfree(as);
-       return 0;
-}
-
-static int do_open(struct inode * inode, struct file * filp)
-{
-       struct apm_user *       as;
-
-       as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL);
-       if (as == NULL) {
-               printk(KERN_ERR "apm: cannot allocate struct of size %d 
bytes\n",
-                      sizeof(*as));
-               return -ENOMEM;
-       }
-       as->magic = APM_BIOS_MAGIC;
-       as->event_tail = as->event_head = 0;
-       as->suspends_pending = as->standbys_pending = 0;
-       as->suspends_read = as->standbys_read = 0;
-       /*
-        * XXX - this is a tiny bit broken, when we consider BSD
-         * process accounting. If the device is opened by root, we
-        * instantly flag that we used superuser privs. Who knows,
-        * we might close the device immediately without doing a
-        * privileged operation -- cevans
-        */
-       as->suser = capable(CAP_SYS_ADMIN);
-       as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE;
-       as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ;
-       spin_lock(&user_list_lock);
-       as->next = user_list;
-       user_list = as;
-       spin_unlock(&user_list_lock);
-       filp->private_data = as;
-       return 0;
-}
-
-static int apm_get_info(char *buf, char **start, off_t fpos, int length)
-{
-       char *          p;
-       unsigned short  bx;
-       unsigned short  cx;
-       unsigned short  dx;
-       int             error;
-       unsigned short  ac_line_status = 0xff;
-       unsigned short  battery_status = 0xff;
-       unsigned short  battery_flag   = 0xff;
-       int             percentage     = -1;
-       int             time_units     = -1;
-       char            *units         = "?";
-
-       p = buf;
-
-       if ((num_online_cpus() == 1) &&
-           !(error = apm_get_power_status(&bx, &cx, &dx))) {
-               ac_line_status = (bx >> 8) & 0xff;
-               battery_status = bx & 0xff;
-               if ((cx & 0xff) != 0xff)
-                       percentage = cx & 0xff;
-
-               if (apm_info.connection_version > 0x100) {
-                       battery_flag = (cx >> 8) & 0xff;
-                       if (dx != 0xffff) {
-                               units = (dx & 0x8000) ? "min" : "sec";
-                               time_units = dx & 0x7fff;
-                       }
-               }
-       }
-       /* Arguments, with symbols from linux/apm_bios.h.  Information is
-          from the Get Power Status (0x0a) call unless otherwise noted.
-
-          0) Linux driver version (this will change if format changes)
-          1) APM BIOS Version.  Usually 1.0, 1.1 or 1.2.
-          2) APM flags from APM Installation Check (0x00):
-             bit 0: APM_16_BIT_SUPPORT
-             bit 1: APM_32_BIT_SUPPORT
-             bit 2: APM_IDLE_SLOWS_CLOCK
-             bit 3: APM_BIOS_DISABLED
-             bit 4: APM_BIOS_DISENGAGED
-          3) AC line status
-             0x00: Off-line
-             0x01: On-line
-             0x02: On backup power (BIOS >= 1.1 only)
-             0xff: Unknown
-          4) Battery status
-             0x00: High
-             0x01: Low
-             0x02: Critical
-             0x03: Charging
-             0x04: Selected battery not present (BIOS >= 1.2 only)
-             0xff: Unknown
-          5) Battery flag
-             bit 0: High
-             bit 1: Low
-             bit 2: Critical
-             bit 3: Charging
-             bit 7: No system battery
-             0xff: Unknown
-          6) Remaining battery life (percentage of charge):
-             0-100: valid
-             -1: Unknown
-          7) Remaining battery life (time units):
-             Number of remaining minutes or seconds
-             -1: Unknown
-          8) min = minutes; sec = seconds */
-
-       p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
-                    driver_version,
-                    (apm_info.bios.version >> 8) & 0xff,
-                    apm_info.bios.version & 0xff,
-                    apm_info.bios.flags,
-                    ac_line_status,
-                    battery_status,
-                    battery_flag,
-                    percentage,
-                    time_units,
-                    units);
-
-       return p - buf;
-}
-
-static int apm(void *unused)
-{
-       unsigned short  bx;
-       unsigned short  cx;
-       unsigned short  dx;
-       int             error;
-       char *          power_stat;
-       char *          bat_stat;
-
-       kapmd_running = 1;
-
-       daemonize("kapmd");
-
-       current->flags |= PF_NOFREEZE;
-
-#ifdef CONFIG_SMP
-       /* 2002/08/01 - WT
-        * This is to avoid random crashes at boot time during initialization
-        * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
-        * Some bioses don't like being called from CPU != 0.
-        * Method suggested by Ingo Molnar.
-        */
-       set_cpus_allowed(current, cpumask_of_cpu(0));
-       BUG_ON(smp_processor_id() != 0);
-#endif
-
-       if (apm_info.connection_version == 0) {
-               apm_info.connection_version = apm_info.bios.version;
-               if (apm_info.connection_version > 0x100) {
-                       /*
-                        * We only support BIOSs up to version 1.2
-                        */
-                       if (apm_info.connection_version > 0x0102)
-                               apm_info.connection_version = 0x0102;
-                       error = 
apm_driver_version(&apm_info.connection_version);
-                       if (error != APM_SUCCESS) {
-                               apm_error("driver version", error);
-                               /* Fall back to an APM 1.0 connection. */
-                               apm_info.connection_version = 0x100;
-                       }
-               }
-       }
-
-       if (debug)
-               printk(KERN_INFO "apm: Connection version %d.%d\n",
-                       (apm_info.connection_version >> 8) & 0xff,
-                       apm_info.connection_version & 0xff);
-
-#ifdef CONFIG_APM_DO_ENABLE
-       if (apm_info.bios.flags & APM_BIOS_DISABLED) {
-               /*
-                * This call causes my NEC UltraLite Versa 33/C to hang if it
-                * is booted with PM disabled but not in the docking station.
-                * Unfortunate ...
-                */
-               error = apm_enable_power_management(1);
-               if (error) {
-                       apm_error("enable power management", error);
-                       return -1;
-               }
-       }
-#endif
-
-       if ((apm_info.bios.flags & APM_BIOS_DISENGAGED)
-           && (apm_info.connection_version > 0x0100)) {
-               error = apm_engage_power_management(APM_DEVICE_ALL, 1);
-               if (error) {
-                       apm_error("engage power management", error);
-                       return -1;
-               }
-       }
-
-       if (debug && (num_online_cpus() == 1 || smp )) {
-               error = apm_get_power_status(&bx, &cx, &dx);
-               if (error)
-                       printk(KERN_INFO "apm: power status not available\n");
-               else {
-                       switch ((bx >> 8) & 0xff) {
-                       case 0: power_stat = "off line"; break;
-                       case 1: power_stat = "on line"; break;
-                       case 2: power_stat = "on backup power"; break;
-                       default: power_stat = "unknown"; break;
-                       }
-                       switch (bx & 0xff) {
-                       case 0: bat_stat = "high"; break;
-                       case 1: bat_stat = "low"; break;
-                       case 2: bat_stat = "critical"; break;
-                       case 3: bat_stat = "charging"; break;
-                       default: bat_stat = "unknown"; break;
-                       }
-                       printk(KERN_INFO
-                              "apm: AC %s, battery status %s, battery life ",
-                              power_stat, bat_stat);
-                       if ((cx & 0xff) == 0xff)
-                               printk("unknown\n");
-                       else
-                               printk("%d%%\n", cx & 0xff);
-                       if (apm_info.connection_version > 0x100) {
-                               printk(KERN_INFO
-                                      "apm: battery flag 0x%02x, battery life 
",
-                                      (cx >> 8) & 0xff);
-                               if (dx == 0xffff)
-                                       printk("unknown\n");
-                               else
-                                       printk("%d %s\n", dx & 0x7fff,
-                                               (dx & 0x8000) ?
-                                               "minutes" : "seconds");
-                       }
-               }
-       }
-
-       /* Install our power off handler.. */
-       if (power_off)
-               pm_power_off = apm_power_off;
-
-       if (num_online_cpus() == 1 || smp) {
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-               console_blank_hook = apm_console_blank;
-#endif
-               apm_mainloop();
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-               console_blank_hook = NULL;
-#endif
-       }
-       kapmd_running = 0;
-
-       return 0;
-}
-
-#ifndef MODULE
-static int __init apm_setup(char *str)
-{
-       int     invert;
-
-       while ((str != NULL) && (*str != '\0')) {
-               if (strncmp(str, "off", 3) == 0)
-                       apm_disabled = 1;
-               if (strncmp(str, "on", 2) == 0)
-                       apm_disabled = 0;
-               if ((strncmp(str, "bounce-interval=", 16) == 0) ||
-                   (strncmp(str, "bounce_interval=", 16) == 0))
-                       bounce_interval = simple_strtol(str + 16, NULL, 0);
-               if ((strncmp(str, "idle-threshold=", 15) == 0) ||
-                   (strncmp(str, "idle_threshold=", 15) == 0))
-                       idle_threshold = simple_strtol(str + 15, NULL, 0);
-               if ((strncmp(str, "idle-period=", 12) == 0) ||
-                   (strncmp(str, "idle_period=", 12) == 0))
-                       idle_period = simple_strtol(str + 12, NULL, 0);
-               invert = (strncmp(str, "no-", 3) == 0) ||
-                       (strncmp(str, "no_", 3) == 0);
-               if (invert)
-                       str += 3;
-               if (strncmp(str, "debug", 5) == 0)
-                       debug = !invert;
-               if ((strncmp(str, "power-off", 9) == 0) ||
-                   (strncmp(str, "power_off", 9) == 0))
-                       power_off = !invert;
-               if (strncmp(str, "smp", 3) == 0)
-               {
-                       smp = !invert;
-                       idle_threshold = 100;
-               }
-               if ((strncmp(str, "allow-ints", 10) == 0) ||
-                   (strncmp(str, "allow_ints", 10) == 0))
-                       apm_info.allow_ints = !invert;
-               if ((strncmp(str, "broken-psr", 10) == 0) ||
-                   (strncmp(str, "broken_psr", 10) == 0))
-                       apm_info.get_power_status_broken = !invert;
-               if ((strncmp(str, "realmode-power-off", 18) == 0) ||
-                   (strncmp(str, "realmode_power_off", 18) == 0))
-                       apm_info.realmode_power_off = !invert;
-               str = strchr(str, ',');
-               if (str != NULL)
-                       str += strspn(str, ", \t");
-       }
-       return 1;
-}
-
-__setup("apm=", apm_setup);
-#endif
-
-static struct file_operations apm_bios_fops = {
-       .owner          = THIS_MODULE,
-       .read           = do_read,
-       .poll           = do_poll,
-       .ioctl          = do_ioctl,
-       .open           = do_open,
-       .release        = do_release,
-};
-
-static struct miscdevice apm_device = {
-       APM_MINOR_DEV,
-       "apm_bios",
-       &apm_bios_fops
-};
-
-
-/* Simple "print if true" callback */
-static int __init print_if_true(struct dmi_system_id *d)
-{
-       printk("%s\n", d->ident);
-       return 0;
-}
-
-/*
- * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was
- * disabled before the suspend. Linux used to get terribly confused by that.
- */
-static int __init broken_ps2_resume(struct dmi_system_id *d)
-{
-       printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround 
hopefully not needed.\n", d->ident);
-       return 0;
-}
-
-/* Some bioses have a broken protected mode poweroff and need to use realmode 
*/
-static int __init set_realmode_power_off(struct dmi_system_id *d)
-{
-       if (apm_info.realmode_power_off == 0) {
-               apm_info.realmode_power_off = 1;
-               printk(KERN_INFO "%s bios detected. Using realmode poweroff 
only.\n", d->ident);
-       }
-       return 0;
-}
-
-/* Some laptops require interrupts to be enabled during APM calls */
-static int __init set_apm_ints(struct dmi_system_id *d)
-{
-       if (apm_info.allow_ints == 0) {
-               apm_info.allow_ints = 1;
-               printk(KERN_INFO "%s machine detected. Enabling interrupts 
during APM calls.\n", d->ident);
-       }
-       return 0;
-}
-
-/* Some APM bioses corrupt memory or just plain do not work */
-static int __init apm_is_horked(struct dmi_system_id *d)
-{
-       if (apm_info.disabled == 0) {
-               apm_info.disabled = 1;
-               printk(KERN_INFO "%s machine detected. Disabling APM.\n", 
d->ident);
-       }
-       return 0;
-}
-
-static int __init apm_is_horked_d850md(struct dmi_system_id *d)
-{
-       if (apm_info.disabled == 0) {
-               apm_info.disabled = 1;
-               printk(KERN_INFO "%s machine detected. Disabling APM.\n", 
d->ident);
-               printk(KERN_INFO "This bug is fixed in bios P15 which is 
available for \n");
-               printk(KERN_INFO "download from support.intel.com \n");
-       }
-       return 0;
-}
-
-/* Some APM bioses hang on APM idle calls */
-static int __init apm_likes_to_melt(struct dmi_system_id *d)
-{
-       if (apm_info.forbid_idle == 0) {
-               apm_info.forbid_idle = 1;
-               printk(KERN_INFO "%s machine detected. Disabling APM idle 
calls.\n", d->ident);
-       }
-       return 0;
-}
-
-/*
- *  Check for clue free BIOS implementations who use
- *  the following QA technique
- *
- *      [ Write BIOS Code ]<------
- *               |                ^
- *      < Does it Compile >----N--
- *               |Y               ^
- *     < Does it Boot Win98 >-N--
- *               |Y
- *           [Ship It]
- *
- *     Phoenix A04  08/24/2000 is known bad (Dell Inspiron 5000e)
- *     Phoenix A07  09/29/2000 is known good (Dell Inspiron 5000)
- */
-static int __init broken_apm_power(struct dmi_system_id *d)
-{
-       apm_info.get_power_status_broken = 1;
-       printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power 
status reporting.\n");
-       return 0;
-}
-
-/*
- * This bios swaps the APM minute reporting bytes over (Many sony laptops
- * have this problem).
- */
-static int __init swab_apm_power_in_minutes(struct dmi_system_id *d)
-{
-       apm_info.get_power_status_swabinminutes = 1;
-       printk(KERN_WARNING "BIOS strings suggest APM reports battery life in 
minutes and wrong byte order.\n");
-       return 0;
-}
-
-static struct dmi_system_id __initdata apm_dmi_table[] = {
-       {
-               print_if_true,
-               KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 
1.02+ may be needed for Linux APM.",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), },
-       },
-       {       /* Handle problems with APM on the C600 */
-               broken_ps2_resume, "Dell Latitude C600",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), },
-       },
-       {       /* Allow interrupts during suspend on Dell Latitude laptops*/
-               set_apm_ints, "Dell Latitude",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), }
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Dell Inspiron 2500",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
-                       DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
-       },
-       {       /* Allow interrupts during suspend on Dell Inspiron laptops*/
-               set_apm_ints, "Dell Inspiron", {
-                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), },
-       },
-       {       /* Handle problems with APM on Inspiron 5000e */
-               broken_apm_power, "Dell Inspiron 5000e",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "A04"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), },
-       },
-       {       /* Handle problems with APM on Inspiron 2500 */
-               broken_apm_power, "Dell Inspiron 2500",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "A12"),
-                       DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Dell Dimension 4100",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"),
-                       DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."),
-                       DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
-       },
-       {       /* Allow interrupts during suspend on Compaq Laptops*/
-               set_apm_ints, "Compaq 12XL125",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"),
-                       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION,"4.06"), },
-       },
-       {       /* Allow interrupts during APM or the clock goes slow */
-               set_apm_ints, "ASUSTeK",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook 
PC"), },
-       },
-       {       /* APM blows on shutdown */
-               apm_is_horked, "ABIT KX7-333[R]",
-               {       DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"),
-                       DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), 
},
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Trigem Delhi3",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Fujitsu-Siemens",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked_d850md, "Intel D850MD",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
-                       DMI_MATCH(DMI_BIOS_VERSION, 
"MV85010A.86A.0016.P07.0201251536"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Intel D810EMO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
-                       DMI_MATCH(DMI_BIOS_VERSION, 
"MO81010A.86A.0008.P04.0004170800"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Dell XPS-Z",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "A11"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Sharp PC-PJ/AX",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"),
-                       DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"),
-                       DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Dell Inspiron 2500",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
-                       DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
-       },
-       {       /* APM idle hangs */
-               apm_likes_to_melt, "Jabil AMD",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), },
-       },
-       {       /* APM idle hangs */
-               apm_likes_to_melt, "AMI Bios",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0206H"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-N505VX */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"),
-                       DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-XG29 */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"),
-                       DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z600NE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"),
-                       DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z600NE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"),
-                       DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z505LS */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"),
-                       DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z505LS */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z505LS (with 
updated BIOS) */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"),
-                       DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-F104K */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), },
-       },
-
-       {       /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"),
-                       DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-C1VE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"),
-                       DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-C1VE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"),
-                       DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), },
-       },
-       {       /* broken PM poweroff bios */
-               set_realmode_power_off, "Award Software v4.60 PGMA",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Award Software 
International, Inc."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"),
-                       DMI_MATCH(DMI_BIOS_DATE, "134526184"), },
-       },
-
-       /* Generic per vendor APM settings  */
-
-       {       /* Allow interrupts during suspend on IBM laptops */
-               set_apm_ints, "IBM",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "IBM"), },
-       },
-
-       { }
-};
-
-/*
- * Just start the APM thread. We do NOT want to do APM BIOS
- * calls from anything but the APM thread, if for no other reason
- * than the fact that we don't trust the APM BIOS. This way,
- * most common APM BIOS problems that lead to protection errors
- * etc will have at least some level of being contained...
- *
- * In short, if something bad happens, at least we have a choice
- * of just killing the apm thread..
- */
-static int __init apm_init(void)
-{
-       struct proc_dir_entry *apm_proc;
-       int ret;
-       int i;
-
-       dmi_check_system(apm_dmi_table);
-
-       if (apm_info.bios.version == 0) {
-               printk(KERN_INFO "apm: BIOS not found.\n");
-               return -ENODEV;
-       }
-       printk(KERN_INFO
-               "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
-               ((apm_info.bios.version >> 8) & 0xff),
-               (apm_info.bios.version & 0xff),
-               apm_info.bios.flags,
-               driver_version);
-       if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) {
-               printk(KERN_INFO "apm: no 32 bit BIOS support\n");
-               return -ENODEV;
-       }
-
-       if (allow_ints)
-               apm_info.allow_ints = 1;
-       if (broken_psr)
-               apm_info.get_power_status_broken = 1;
-       if (realmode_power_off)
-               apm_info.realmode_power_off = 1;
-       /* User can override, but default is to trust DMI */
-       if (apm_disabled != -1)
-               apm_info.disabled = apm_disabled;
-
-       /*
-        * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1
-        * but is reportedly a 1.0 BIOS.
-        */
-       if (apm_info.bios.version == 0x001)
-               apm_info.bios.version = 0x100;
-
-       /* BIOS < 1.2 doesn't set cseg_16_len */
-       if (apm_info.bios.version < 0x102)
-               apm_info.bios.cseg_16_len = 0; /* 64k */
-
-       if (debug) {
-               printk(KERN_INFO "apm: entry %x:%lx cseg16 %x dseg %x",
-                       apm_info.bios.cseg, apm_info.bios.offset,
-                       apm_info.bios.cseg_16, apm_info.bios.dseg);
-               if (apm_info.bios.version > 0x100)
-                       printk(" cseg len %x, dseg len %x",
-                               apm_info.bios.cseg_len,
-                               apm_info.bios.dseg_len);
-               if (apm_info.bios.version > 0x101)
-                       printk(" cseg16 len %x", apm_info.bios.cseg_16_len);
-               printk("\n");
-       }
-
-       if (apm_info.disabled) {
-               printk(KERN_NOTICE "apm: disabled on user request.\n");
-               return -ENODEV;
-       }
-       if ((num_online_cpus() > 1) && !power_off && !smp) {
-               printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
-               apm_info.disabled = 1;
-               return -ENODEV;
-       }
-       if (PM_IS_ACTIVE()) {
-               printk(KERN_NOTICE "apm: overridden by ACPI.\n");
-               apm_info.disabled = 1;
-               return -ENODEV;
-       }
-       pm_active = 1;
-
-       /*
-        * Set up a segment that references the real mode segment 0x40
-        * that extends up to the end of page zero (that we have reserved).
-        * This is for buggy BIOS's that refer to (real mode) segment 0x40
-        * even though they are called in protected mode.
-        */
-       set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
-       _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
-
-       apm_bios_entry.offset = apm_info.bios.offset;
-       apm_bios_entry.segment = APM_CS;
-
-       for (i = 0; i < NR_CPUS; i++) {
-               set_base(get_cpu_gdt_table(i)[APM_CS >> 3],
-                        __va((unsigned long)apm_info.bios.cseg << 4));
-               set_base(get_cpu_gdt_table(i)[APM_CS_16 >> 3],
-                        __va((unsigned long)apm_info.bios.cseg_16 << 4));
-               set_base(get_cpu_gdt_table(i)[APM_DS >> 3],
-                        __va((unsigned long)apm_info.bios.dseg << 4));
-#ifndef APM_RELAX_SEGMENTS
-               if (apm_info.bios.version == 0x100) {
-#endif
-                       /* For ASUS motherboard, Award BIOS rev 110 (and 
others?) */
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS >> 3], 
64 * 1024 - 1);
-                       /* For some unknown machine. */
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS_16 >> 
3], 64 * 1024 - 1);
-                       /* For the DEC Hinote Ultra CT475 (and others?) */
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_DS >> 3], 
64 * 1024 - 1);
-#ifndef APM_RELAX_SEGMENTS
-               } else {
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS >> 3],
-                               (apm_info.bios.cseg_len - 1) & 0xffff);
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS_16 >> 
3],
-                               (apm_info.bios.cseg_16_len - 1) & 0xffff);
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_DS >> 3],
-                               (apm_info.bios.dseg_len - 1) & 0xffff);
-                     /* workaround for broken BIOSes */
-                       if (apm_info.bios.cseg_len <= apm_info.bios.offset)
-                               _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS 
>> 3], 64 * 1024 -1);
-                       if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 
64kB */
-                               /* for the BIOS that assumes granularity = 1 */
-                               get_cpu_gdt_table(i)[APM_DS >> 3].b |= 0x800000;
-                               printk(KERN_NOTICE "apm: we set the granularity 
of dseg.\n");
-                       }
-               }
-#endif
-       }
-
-       apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info);
-       if (apm_proc)
-               apm_proc->owner = THIS_MODULE;
-
-       ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
-       if (ret < 0) {
-               printk(KERN_ERR "apm: disabled - Unable to start kernel 
thread.\n");
-               return -ENOMEM;
-       }
-
-       if (num_online_cpus() > 1 && !smp ) {
-               printk(KERN_NOTICE
-                  "apm: disabled - APM is not SMP safe (power off active).\n");
-               return 0;
-       }
-
-       misc_register(&apm_device);
-
-       if (HZ != 100)
-               idle_period = (idle_period * HZ) / 100;
-       if (idle_threshold < 100) {
-               original_pm_idle = pm_idle;
-               pm_idle  = apm_cpu_idle;
-               set_pm_idle = 1;
-       }
-
-       return 0;
-}
-
-static void __exit apm_exit(void)
-{
-       int     error;
-
-       if (set_pm_idle) {
-               pm_idle = original_pm_idle;
-               /*
-                * We are about to unload the current idle thread pm callback
-                * (pm_idle), Wait for all processors to update cached/local
-                * copies of pm_idle before proceeding.
-                */
-               cpu_idle_wait();
-       }
-       if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
-           && (apm_info.connection_version > 0x0100)) {
-               error = apm_engage_power_management(APM_DEVICE_ALL, 0);
-               if (error)
-                       apm_error("disengage power management", error);
-       }
-       misc_deregister(&apm_device);
-       remove_proc_entry("apm", NULL);
-       if (power_off)
-               pm_power_off = NULL;
-       exit_kapmd = 1;
-       while (kapmd_running)
-               schedule();
-       pm_active = 0;
-}
-
-module_init(apm_init);
-module_exit(apm_exit);
-
-MODULE_AUTHOR("Stephen Rothwell");
-MODULE_DESCRIPTION("Advanced Power Management");
-MODULE_LICENSE("GPL");
-module_param(debug, bool, 0644);
-MODULE_PARM_DESC(debug, "Enable debug mode");
-module_param(power_off, bool, 0444);
-MODULE_PARM_DESC(power_off, "Enable power off");
-module_param(bounce_interval, int, 0444);
-MODULE_PARM_DESC(bounce_interval,
-               "Set the number of ticks to ignore suspend bounces");
-module_param(allow_ints, bool, 0444);
-MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls");
-module_param(broken_psr, bool, 0444);
-MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call");
-module_param(realmode_power_off, bool, 0444);
-MODULE_PARM_DESC(realmode_power_off,
-               "Switch to real mode before powering off");
-module_param(idle_threshold, int, 0444);
-MODULE_PARM_DESC(idle_threshold,
-       "System idle percentage above which to make APM BIOS idle calls");
-module_param(idle_period, int, 0444);
-MODULE_PARM_DESC(idle_period,
-       "Period (in sec/100) over which to caculate the idle percentage");
-module_param(smp, bool, 0444);
-MODULE_PARM_DESC(smp,
-       "Set this to enable APM use on an SMP platform. Use with caution on 
older systems");
-MODULE_ALIAS_MISCDEV(APM_MINOR_DEV);
diff -r d609de73b9fa -r 5a63f675107c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/io_ports.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/io_ports.h       Wed Feb 
 1 17:06:16 2006
+++ /dev/null   Wed Feb  1 18:00:19 2006
@@ -1,30 +0,0 @@
-/*
- *  arch/i386/mach-generic/io_ports.h
- *
- *  Machine specific IO port address definition for generic.
- *  Written by Osamu Tomita <tomita@xxxxxxxxxxx>
- */
-#ifndef _MACH_IO_PORTS_H
-#define _MACH_IO_PORTS_H
-
-/* i8253A PIT registers */
-#define PIT_MODE               0x43
-#define PIT_CH0                        0x40
-#define PIT_CH2                        0x42
-
-/* i8259A PIC registers */
-#define PIC_MASTER_CMD         0x20
-#define PIC_MASTER_IMR         0x21
-#define PIC_MASTER_ISR         PIC_MASTER_CMD
-#define PIC_MASTER_POLL                PIC_MASTER_ISR
-#define PIC_MASTER_OCW3                PIC_MASTER_ISR
-#define PIC_SLAVE_CMD          0xa0
-#define PIC_SLAVE_IMR          0xa1
-
-/* i8259A PIC related value */
-#define PIC_CASCADE_IR         2
-#define MASTER_ICW4_DEFAULT    0x01
-#define SLAVE_ICW4_DEFAULT     0x01
-#define PIC_ICW4_AEOI          2
-
-#endif /* !_MACH_IO_PORTS_H */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Update to Linux 2.6.15., Xen patchbot -unstable <=