WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] upgrade the sparse repository to 2.6.12

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] upgrade the sparse repository to 2.6.12
From: Xen patchbot -2.0-testing <patchbot-2.0-testing@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 10 Aug 2005 08:58:21 -0400
Delivery-date: Wed, 10 Aug 2005 12:59:23 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User vh249@xxxxxxxxxxxxxxxxxxxxxxxx
# Node ID de310533c48375f3008a0484c3a770b807aee5c3
# Parent  fa660d79f69573459949c847743f6341f466c7d0
upgrade the sparse repository to 2.6.12

Signed-off-by: Vincent Hanquez <vincent@xxxxxxxxxxxxx>

diff -r fa660d79f695 -r de310533c483 buildconfigs/mk.linux-2.6-xen0
--- a/buildconfigs/mk.linux-2.6-xen0    Tue Aug  9 15:17:45 2005
+++ b/buildconfigs/mk.linux-2.6-xen0    Tue Aug  9 23:57:17 2005
@@ -2,7 +2,7 @@
 OS           = linux
 
 LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.11
+LINUX_VER    = 2.6.12
 
 EXTRAVERSION = xen0
 
diff -r fa660d79f695 -r de310533c483 buildconfigs/mk.linux-2.6-xenU
--- a/buildconfigs/mk.linux-2.6-xenU    Tue Aug  9 15:17:45 2005
+++ b/buildconfigs/mk.linux-2.6-xenU    Tue Aug  9 23:57:17 2005
@@ -2,7 +2,7 @@
 OS           = linux
 
 LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.11
+LINUX_VER    = 2.6.12
 
 EXTRAVERSION = xenU
 
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig     Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig     Tue Aug  9 23:57:17 2005
@@ -150,3 +150,5 @@
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "arch/xen/Kconfig.debug"
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig      Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig      Tue Aug  9 
23:57:17 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11-xen0
-# Tue May  3 13:22:55 2005
+# Linux kernel version: 2.6.12-xen0
+# Sat Jul  9 09:19:47 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -31,6 +31,7 @@
 CONFIG_BROKEN=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
 
 #
 # General setup
@@ -42,7 +43,6 @@
 # CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
 CONFIG_HOTPLUG=y
 CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
@@ -50,15 +50,18 @@
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SHMEM=y
 CONFIG_CC_ALIGN_FUNCTIONS=0
 CONFIG_CC_ALIGN_LABELS=0
 CONFIG_CC_ALIGN_LOOPS=0
 CONFIG_CC_ALIGN_JUMPS=0
 # CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
 
 #
 # Loadable module support
@@ -97,6 +100,7 @@
 # CONFIG_MWINCHIPC6 is not set
 # CONFIG_MWINCHIP2 is not set
 # CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
 # CONFIG_MCYRIXIII is not set
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_X86_GENERIC is not set
@@ -117,6 +121,7 @@
 # CONFIG_SMP is not set
 CONFIG_PREEMPT=y
 CONFIG_PREEMPT_BKL=y
+# CONFIG_X86_REBOOTFIXUPS is not set
 CONFIG_MICROCODE=y
 CONFIG_X86_CPUID=y
 
@@ -137,6 +142,8 @@
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_LEGACY_PROC=y
 # CONFIG_PCI_NAMES is not set
+# CONFIG_PCI_DEBUG is not set
+CONFIG_ISA_DMA_API=y
 CONFIG_ISA=y
 # CONFIG_EISA is not set
 # CONFIG_MCA is not set
@@ -148,11 +155,6 @@
 # CONFIG_PCCARD is not set
 
 #
-# PC-card bridges
-#
-CONFIG_PCMCIA_PROBE=y
-
-#
 # PCI Hotplug Support
 #
 # CONFIG_HOTPLUG_PCI is not set
@@ -160,12 +162,14 @@
 #
 # Kernel hacking
 #
+# CONFIG_PRINTK_TIME is not set
 CONFIG_DEBUG_KERNEL=y
 CONFIG_EARLY_PRINTK=y
 # CONFIG_DEBUG_STACKOVERFLOW is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_SLAB is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_DEBUG_INFO is not set
@@ -176,6 +180,7 @@
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_X86_BIOS_REBOOT=y
 CONFIG_PC=y
+CONFIG_SECCOMP=y
 
 #
 # Executable file formats
@@ -332,7 +337,7 @@
 #
 # SCSI Transport Attributes
 #
-# CONFIG_SCSI_SPI_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
 # CONFIG_SCSI_FC_ATTRS is not set
 # CONFIG_SCSI_ISCSI_ATTRS is not set
 
@@ -409,6 +414,7 @@
 # CONFIG_SCSI_QLA2300 is not set
 # CONFIG_SCSI_QLA2322 is not set
 # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_LPFC is not set
 # CONFIG_SCSI_SEAGATE is not set
 # CONFIG_SCSI_SYM53C416 is not set
 # CONFIG_SCSI_DC395x is not set
@@ -442,6 +448,7 @@
 CONFIG_DM_SNAPSHOT=y
 CONFIG_DM_MIRROR=y
 # CONFIG_DM_ZERO is not set
+# CONFIG_DM_MULTIPATH is not set
 
 #
 # Fusion MPT device support
@@ -470,7 +477,6 @@
 #
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
 CONFIG_UNIX=y
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
@@ -650,7 +656,6 @@
 # CONFIG_DGRS is not set
 # CONFIG_EEPRO100 is not set
 CONFIG_E100=y
-# CONFIG_E100_NAPI is not set
 # CONFIG_FEALNX is not set
 # CONFIG_NATSEMI is not set
 CONFIG_NE2K_PCI=y
@@ -683,6 +688,7 @@
 # CONFIG_SK98LIN is not set
 # CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
+# CONFIG_BNX2 is not set
 
 #
 # Ethernet (10000 Mbit)
@@ -738,19 +744,6 @@
 # CONFIG_INPUT_TSDEV is not set
 # CONFIG_INPUT_EVDEV is not set
 # CONFIG_INPUT_EVBUG is not set
-
-#
-# Input I/O drivers
-#
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_CT82C710 is not set
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
 
 #
 # Input Device Drivers
@@ -773,6 +766,18 @@
 # CONFIG_INPUT_MISC is not set
 
 #
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
 # Character devices
 #
 CONFIG_VT=y
@@ -788,6 +793,7 @@
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
@@ -820,7 +826,6 @@
 CONFIG_AGP_AMD=m
 CONFIG_AGP_AMD64=m
 CONFIG_AGP_INTEL=m
-CONFIG_AGP_INTEL_MCH=m
 CONFIG_AGP_NVIDIA=m
 CONFIG_AGP_SIS=m
 CONFIG_AGP_SWORKS=m
@@ -841,6 +846,11 @@
 # CONFIG_HANGCHECK_TIMER is not set
 
 #
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+
+#
 # I2C support
 #
 # CONFIG_I2C is not set
@@ -886,6 +896,8 @@
 #
 # USB support
 #
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
 CONFIG_USB=y
 # CONFIG_USB_DEBUG is not set
 
@@ -896,14 +908,14 @@
 # CONFIG_USB_BANDWIDTH is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
 
 #
 # USB Host Controller Drivers
 #
 # CONFIG_USB_EHCI_HCD is not set
 CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 CONFIG_USB_UHCI_HCD=y
 # CONFIG_USB_SL811_HCD is not set
 
@@ -940,7 +952,6 @@
 #
 # CONFIG_USB_MDC800 is not set
 # CONFIG_USB_MICROTEK is not set
-# CONFIG_USB_HPUSBSCSI is not set
 
 #
 # USB Multimedia devices
@@ -959,6 +970,7 @@
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_RTL8150 is not set
 # CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
 
 #
 # USB port drivers
@@ -1174,6 +1186,7 @@
 # CONFIG_CRYPTO_SHA256 is not set
 # CONFIG_CRYPTO_SHA512 is not set
 # CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
 CONFIG_CRYPTO_DES=m
 # CONFIG_CRYPTO_BLOWFISH is not set
 # CONFIG_CRYPTO_TWOFISH is not set
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig      Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig      Tue Aug  9 
23:57:17 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11-xenU
-# Wed Apr 13 23:18:37 2005
+# Linux kernel version: 2.6.12-xenU
+# Sun Jul 10 17:32:04 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -28,6 +28,7 @@
 CONFIG_CLEAN_COMPILE=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
 
 #
 # General setup
@@ -39,23 +40,26 @@
 # CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
 CONFIG_HOTPLUG=y
 CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SHMEM=y
 CONFIG_CC_ALIGN_FUNCTIONS=0
 CONFIG_CC_ALIGN_LABELS=0
 CONFIG_CC_ALIGN_LOOPS=0
 CONFIG_CC_ALIGN_JUMPS=0
 # CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
 
 #
 # Loadable module support
@@ -94,6 +98,7 @@
 # CONFIG_MWINCHIPC6 is not set
 # CONFIG_MWINCHIP2 is not set
 # CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
 # CONFIG_MCYRIXIII is not set
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_X86_GENERIC is not set
@@ -114,6 +119,7 @@
 # CONFIG_SMP is not set
 CONFIG_PREEMPT=y
 CONFIG_PREEMPT_BKL=y
+# CONFIG_X86_REBOOTFIXUPS is not set
 CONFIG_X86_CPUID=y
 
 #
@@ -144,6 +150,8 @@
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_X86_BIOS_REBOOT=y
 CONFIG_PC=y
+CONFIG_SECCOMP=y
+CONFIG_EARLY_PRINTK=y
 
 #
 # Executable file formats
@@ -239,7 +247,6 @@
 #
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
 CONFIG_UNIX=y
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
@@ -506,6 +513,7 @@
 # CONFIG_CRYPTO_SHA256 is not set
 # CONFIG_CRYPTO_SHA512 is not set
 # CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
 # CONFIG_CRYPTO_DES is not set
 # CONFIG_CRYPTO_BLOWFISH is not set
 # CONFIG_CRYPTO_TWOFISH is not set
@@ -534,3 +542,27 @@
 # CONFIG_CRC32 is not set
 CONFIG_LIBCRC32C=m
 CONFIG_ZLIB_INFLATE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Tue Aug  9 23:57:17 2005
@@ -65,6 +65,7 @@
          - "Winchip-C6" for original IDT Winchip.
          - "Winchip-2" for IDT Winchip 2.
          - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
+         - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
          - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
          - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
 
@@ -191,6 +192,11 @@
          and alignment reqirements.  Also enable out of order memory
          stores for this CPU, which can increase performance of some
          operations.
+
+config MGEODEGX1
+       bool "GeodeGX1"
+       help
+         Select this for a Geode GX1 (Cyrix MediaGX) chip.
 
 config MCYRIXIII
        bool "CyrixIII/VIA-C3"
@@ -240,7 +246,7 @@
        int
        default "7" if MPENTIUM4 || X86_GENERIC
        default "4" if X86_ELAN || M486 || M386
-       default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || 
MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX 
|| M586TSC || M586 || MVIAC3_2
+       default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || 
MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX 
|| M586TSC || M586 || MVIAC3_2 || MGEODEGX1
        default "6" if MK7 || MK8 || MPENTIUMM
 
 config RWSEM_GENERIC_SPINLOCK
@@ -259,7 +265,7 @@
 
 config X86_PPRO_FENCE
        bool
-       depends on M686 || M586MMX || M586TSC || M586 || M486 || M386
+       depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || 
MGEODEGX1
        default y
 
 config X86_F00F_BUG
@@ -289,7 +295,7 @@
 
 config X86_ALIGNMENT_16
        bool
-       depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || 
X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2
+       depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || 
X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
        default y
 
 config X86_GOOD_APIC
@@ -415,7 +421,7 @@
 
 #config X86_TSC
 #       bool
-#      depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
+#      depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
 #       default y
 
 #config X86_MCE
@@ -455,6 +461,24 @@
 #         Enabling this feature will cause a message to be printed when the P4
 #         enters thermal throttling.
 
+config X86_REBOOTFIXUPS
+       bool "Enable X86 board specific fixups for reboot"
+       depends on X86
+       default n
+       ---help---
+         This enables chipset and/or board specific fixups to be done
+         in order to get reboot to work correctly. This is only needed on
+         some combinations of hardware and BIOS. The symptom, for which
+         this config is intended, is when reboot ends with a stalled/hung
+         system.
+
+         Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1.
+         combination.
+
+         Say Y if you want to enable the fixup. Currently, it's safe to
+         enable this option even if you don't need it.
+         Say N otherwise.
+
 config MICROCODE
        tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
         depends on XEN_PRIVILEGED_GUEST
@@ -578,6 +602,16 @@
 config HAVE_ARCH_BOOTMEM_NODE
        bool
        depends on NUMA
+       default y
+
+config HAVE_MEMORY_PRESENT
+       bool
+       depends on DISCONTIGMEM
+       default y
+
+config NEED_NODE_MEMMAP_SIZE
+       bool
+       depends on DISCONTIGMEM
        default y
 
 #config HIGHPTE
@@ -673,13 +707,18 @@
 
 config X86_LOCAL_APIC
        bool
-       depends on (X86_VISWS || SMP) && !X86_VOYAGER
+       depends on XEN_PRIVILEGED_GUEST && (X86_UP_APIC || ((X86_VISWS || SMP) 
&& !X86_VOYAGER))
        default y
 
 config X86_IO_APIC
        bool
-       depends on SMP && !(X86_VISWS || X86_VOYAGER)
-       default y
+       depends on XEN_PRIVILEGED_GUEST && (X86_UP_IOAPIC || (SMP && 
!(X86_VISWS || X86_VOYAGER)))
+       default y
+
+config X86_VISWS_APIC
+       bool
+       depends on X86_VISWS
+       default y
 
 config PCI
        bool "PCI support" if !X86_VISWS
@@ -748,6 +787,10 @@
 
 source "drivers/pci/Kconfig"
 
+config ISA_DMA_API
+       bool
+       default y
+
 config ISA
        bool "ISA support"
        depends on !(X86_VOYAGER || X86_VISWS)
@@ -777,17 +820,13 @@
 source "drivers/eisa/Kconfig"
 
 config MCA
-       bool "MCA support"
-       depends on !(X86_VISWS || X86_VOYAGER)
+       bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+       default y if X86_VOYAGER
        help
          MicroChannel Architecture is found in some IBM PS/2 machines and
          laptops.  It is a bus system similar to PCI or ISA. See
          <file:Documentation/mca.txt> (and especially the web page given
          there) before attempting to build an MCA bus kernel.
-
-config MCA
-       depends on X86_VOYAGER
-       default y if X86_VOYAGER
 
 source "drivers/mca/Kconfig"
 
@@ -971,4 +1010,21 @@
        depends on X86 && !EMBEDDED
        default y
 
+config SECCOMP
+       bool "Enable seccomp to safely compute untrusted bytecode"
+       depends on PROC_FS
+       default y
+       help
+         This kernel feature is useful for number crunching applications
+         that may need to compute untrusted bytecode during their
+         execution. By using pipes or other transports made available to
+         the process as file descriptors supporting the read/write
+         syscalls, it's possible to isolate those applications in
+         their own address space using seccomp. Once seccomp is
+         enabled via /proc/<pid>/seccomp, it cannot be disabled
+         and the task is only allowed to execute a few safe syscalls
+         defined by each seccomp mode.
+
+         If unsure, say Y. Only embedded should say N here.
+
 endmenu
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/i386/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/Makefile       Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Makefile       Tue Aug  9 23:57:17 2005
@@ -14,6 +14,8 @@
 # 19990713  Artur Skawina <skawina@xxxxxxxxxxxxx>
 #           Added '-march' and '-mpreferred-stack-boundary' support
 #
+# 20050320  Kianusch Sayah Karadji <kianusch@xxxxxxxxxxx>
+#           Added support for GEODE CPU
 
 XENARCH        := $(subst ",,$(CONFIG_XENARCH))
 
@@ -55,6 +57,9 @@
 
 # AMD Elan support
 cflags-$(CONFIG_X86_ELAN)      += -march=i486
+
+# Geode GX1 support
+cflags-$(CONFIG_MGEODEGX1)             += $(call 
cc-option,-march=pentium-mmx,-march=i486)
 
 # -mregparm=3 works ok on gcc-3.0 and later
 #
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Tue Aug  9 
23:57:17 2005
@@ -32,6 +32,7 @@
 c-obj-$(CONFIG_X86_MPPARSE)    += mpparse.o
 c-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
 c-obj-$(CONFIG_X86_IO_APIC)    += io_apic.o
+c-obj-$(CONFIG_X86_REBOOTFIXUPS)+= reboot_fixups.o
 c-obj-$(CONFIG_X86_NUMAQ)      += numaq.o
 c-obj-$(CONFIG_X86_SUMMIT_NUMA)        += summit.o
 c-obj-$(CONFIG_MODULES)                += module.o
@@ -51,11 +52,11 @@
 # Note: kbuild does not track this dependency due to usage of .incbin
 $(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
 targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
-targets += vsyscall.lds
+targets += vsyscall-note.o vsyscall.lds
 
 # The DSO images are built using a special linker script.
 quiet_cmd_syscall = SYSCALL $@
-      cmd_syscall = $(CC) -nostdlib -m32 $(SYSCFLAGS_$(@F)) \
+      cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
                          -Wl,-T,$(filter-out FORCE,$^) -o $@
 
 export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
@@ -65,7 +66,8 @@
 SYSCFLAGS_vsyscall-int80.so    = $(vsyscall-flags)
 
 $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
+                     $(obj)/vsyscall-%.o FORCE
        $(call if_changed,syscall)
 
 # We also create a special relocatable object that should mirror the symbol
@@ -76,17 +78,20 @@
 $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
 
 SYSCFLAGS_vsyscall-syms.o = -r
-$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE
+$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
+                       $(obj)/vsyscall-sysenter.o FORCE
        $(call if_changed,syscall)
 
 c-link := init_task.o
-s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o
+s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o syscall_table.o
 
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
        @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
 
 $(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
 
+$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
+
 obj-y  += $(c-obj-y) $(s-obj-y)
 
 clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c    Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c    Tue Aug  9 
23:57:17 2005
@@ -22,6 +22,9 @@
 DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
 EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
 
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+
 static int cachesize_override __initdata = -1;
 static int disable_x86_fxsr __initdata = 0;
 static int disable_x86_serial_nr __initdata = 1;
@@ -202,7 +205,7 @@
 
 
 /* Probe for the CPUID instruction */
-int __init have_cpuid_p(void)
+static int __init have_cpuid_p(void)
 {
        return flag_is_changeable_p(X86_EFLAGS_ID);
 }
@@ -210,7 +213,7 @@
 /* Do minimum CPU detection early.
    Fields really needed: vendor, cpuid_level, family, model, mask, cache 
alignment.
    The others are not touched to avoid unwanted side effects. */
-void __init early_cpu_detect(void)
+static void __init early_cpu_detect(void)
 {
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
@@ -243,6 +246,10 @@
        }
 
        early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+       phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
 }
 
 void __init generic_identify(struct cpuinfo_x86 * c)
@@ -431,25 +438,15 @@
        mcheck_init(c);
 #endif
 }
-/*
- *     Perform early boot up checks for a valid TSC. See 
arch/i386/kernel/time.c
- */
- 
-void __init dodgy_tsc(void)
-{
-       if (( boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX ) ||
-           ( boot_cpu_data.x86_vendor == X86_VENDOR_NSC   ))
-               cpu_devs[X86_VENDOR_CYRIX]->c_init(&boot_cpu_data);
-}
 
 #ifdef CONFIG_X86_HT
 void __init detect_ht(struct cpuinfo_x86 *c)
 {
        u32     eax, ebx, ecx, edx;
-       int     index_lsb, index_msb, tmp;
+       int     index_msb, tmp;
        int     cpu = smp_processor_id();
 
-       if (!cpu_has(c, X86_FEATURE_HT))
+       if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                return;
 
        cpuid(1, &eax, &ebx, &ecx, &edx);
@@ -458,7 +455,6 @@
        if (smp_num_siblings == 1) {
                printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
        } else if (smp_num_siblings > 1 ) {
-               index_lsb = 0;
                index_msb = 31;
 
                if (smp_num_siblings > NR_CPUS) {
@@ -467,21 +463,34 @@
                        return;
                }
                tmp = smp_num_siblings;
-               while ((tmp & 1) == 0) {
-                       tmp >>=1 ;
-                       index_lsb++;
-               }
-               tmp = smp_num_siblings;
                while ((tmp & 0x80000000 ) == 0) {
                        tmp <<=1 ;
                        index_msb--;
                }
-               if (index_lsb != index_msb )
+               if (smp_num_siblings & (smp_num_siblings - 1))
                        index_msb++;
                phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
 
                printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                       phys_proc_id[cpu]);
+
+               smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+
+               tmp = smp_num_siblings;
+               index_msb = 31;
+               while ((tmp & 0x80000000) == 0) {
+                       tmp <<=1 ;
+                       index_msb--;
+               }
+
+               if (smp_num_siblings & (smp_num_siblings - 1))
+                       index_msb++;
+
+               cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+               if (c->x86_num_cores > 1)
+                       printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+                              cpu_core_id[cpu]);
        }
 }
 #endif
@@ -528,7 +537,6 @@
 extern int rise_init_cpu(void);
 extern int nexgen_init_cpu(void);
 extern int umc_init_cpu(void);
-void early_cpu_detect(void);
 
 void __init early_cpu_init(void)
 {
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Tue Aug  9 
23:57:17 2005
@@ -31,7 +31,7 @@
 unsigned int num_var_ranges;
 unsigned int *usage_table;
 
-void __init set_num_var_ranges(void)
+static void __init set_num_var_ranges(void)
 {
        dom0_op_t op;
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Tue Aug  9 23:57:17 2005
@@ -595,8 +595,6 @@
        xorl %edx,%edx                  # error code 0
        movl %esp,%eax                  # pt_regs pointer
        call do_debug
-       testl %eax,%eax
-       jnz restore_all
        jmp ret_from_exception
 
 #if 0 /* XEN */
@@ -651,8 +649,6 @@
        xorl %edx,%edx          # zero error code
        movl %esp,%eax          # pt_regs pointer
        call do_int3
-       testl %eax,%eax
-       jnz restore_all
        jmp ret_from_exception
 
 ENTRY(overflow)
@@ -736,296 +732,6 @@
        pushl $do_fixup_4gb_segment
        jmp error_code
 
-.data
-ENTRY(sys_call_table)
-       .long sys_restart_syscall       /* 0 - old "setup()" system call, used 
for restarting */
-       .long sys_exit
-       .long sys_fork
-       .long sys_read
-       .long sys_write
-       .long sys_open          /* 5 */
-       .long sys_close
-       .long sys_waitpid
-       .long sys_creat
-       .long sys_link
-       .long sys_unlink        /* 10 */
-       .long sys_execve
-       .long sys_chdir
-       .long sys_time
-       .long sys_mknod
-       .long sys_chmod         /* 15 */
-       .long sys_lchown16
-       .long sys_ni_syscall    /* old break syscall holder */
-       .long sys_stat
-       .long sys_lseek
-       .long sys_getpid        /* 20 */
-       .long sys_mount
-       .long sys_oldumount
-       .long sys_setuid16
-       .long sys_getuid16
-       .long sys_stime         /* 25 */
-       .long sys_ptrace
-       .long sys_alarm
-       .long sys_fstat
-       .long sys_pause
-       .long sys_utime         /* 30 */
-       .long sys_ni_syscall    /* old stty syscall holder */
-       .long sys_ni_syscall    /* old gtty syscall holder */
-       .long sys_access
-       .long sys_nice
-       .long sys_ni_syscall    /* 35 - old ftime syscall holder */
-       .long sys_sync
-       .long sys_kill
-       .long sys_rename
-       .long sys_mkdir
-       .long sys_rmdir         /* 40 */
-       .long sys_dup
-       .long sys_pipe
-       .long sys_times
-       .long sys_ni_syscall    /* old prof syscall holder */
-       .long sys_brk           /* 45 */
-       .long sys_setgid16
-       .long sys_getgid16
-       .long sys_signal
-       .long sys_geteuid16
-       .long sys_getegid16     /* 50 */
-       .long sys_acct
-       .long sys_umount        /* recycled never used phys() */
-       .long sys_ni_syscall    /* old lock syscall holder */
-       .long sys_ioctl
-       .long sys_fcntl         /* 55 */
-       .long sys_ni_syscall    /* old mpx syscall holder */
-       .long sys_setpgid
-       .long sys_ni_syscall    /* old ulimit syscall holder */
-       .long sys_olduname
-       .long sys_umask         /* 60 */
-       .long sys_chroot
-       .long sys_ustat
-       .long sys_dup2
-       .long sys_getppid
-       .long sys_getpgrp       /* 65 */
-       .long sys_setsid
-       .long sys_sigaction
-       .long sys_sgetmask
-       .long sys_ssetmask
-       .long sys_setreuid16    /* 70 */
-       .long sys_setregid16
-       .long sys_sigsuspend
-       .long sys_sigpending
-       .long sys_sethostname
-       .long sys_setrlimit     /* 75 */
-       .long sys_old_getrlimit
-       .long sys_getrusage
-       .long sys_gettimeofday
-       .long sys_settimeofday
-       .long sys_getgroups16   /* 80 */
-       .long sys_setgroups16
-       .long old_select
-       .long sys_symlink
-       .long sys_lstat
-       .long sys_readlink      /* 85 */
-       .long sys_uselib
-       .long sys_swapon
-       .long sys_reboot
-       .long old_readdir
-       .long old_mmap          /* 90 */
-       .long sys_munmap
-       .long sys_truncate
-       .long sys_ftruncate
-       .long sys_fchmod
-       .long sys_fchown16      /* 95 */
-       .long sys_getpriority
-       .long sys_setpriority
-       .long sys_ni_syscall    /* old profil syscall holder */
-       .long sys_statfs
-       .long sys_fstatfs       /* 100 */
-       .long sys_ioperm
-       .long sys_socketcall
-       .long sys_syslog
-       .long sys_setitimer
-       .long sys_getitimer     /* 105 */
-       .long sys_newstat
-       .long sys_newlstat
-       .long sys_newfstat
-       .long sys_uname
-       .long sys_iopl          /* 110 */
-       .long sys_vhangup
-       .long sys_ni_syscall    /* old "idle" system call */
-       .long sys_vm86old
-       .long sys_wait4
-       .long sys_swapoff       /* 115 */
-       .long sys_sysinfo
-       .long sys_ipc
-       .long sys_fsync
-       .long sys_sigreturn
-       .long sys_clone         /* 120 */
-       .long sys_setdomainname
-       .long sys_newuname
-       .long sys_modify_ldt
-       .long sys_adjtimex
-       .long sys_mprotect      /* 125 */
-       .long sys_sigprocmask
-       .long sys_ni_syscall    /* old "create_module" */ 
-       .long sys_init_module
-       .long sys_delete_module
-       .long sys_ni_syscall    /* 130: old "get_kernel_syms" */
-       .long sys_quotactl
-       .long sys_getpgid
-       .long sys_fchdir
-       .long sys_bdflush
-       .long sys_sysfs         /* 135 */
-       .long sys_personality
-       .long sys_ni_syscall    /* reserved for afs_syscall */
-       .long sys_setfsuid16
-       .long sys_setfsgid16
-       .long sys_llseek        /* 140 */
-       .long sys_getdents
-       .long sys_select
-       .long sys_flock
-       .long sys_msync
-       .long sys_readv         /* 145 */
-       .long sys_writev
-       .long sys_getsid
-       .long sys_fdatasync
-       .long sys_sysctl
-       .long sys_mlock         /* 150 */
-       .long sys_munlock
-       .long sys_mlockall
-       .long sys_munlockall
-       .long sys_sched_setparam
-       .long sys_sched_getparam   /* 155 */
-       .long sys_sched_setscheduler
-       .long sys_sched_getscheduler
-       .long sys_sched_yield
-       .long sys_sched_get_priority_max
-       .long sys_sched_get_priority_min  /* 160 */
-       .long sys_sched_rr_get_interval
-       .long sys_nanosleep
-       .long sys_mremap
-       .long sys_setresuid16
-       .long sys_getresuid16   /* 165 */
-       .long sys_vm86
-       .long sys_ni_syscall    /* Old sys_query_module */
-       .long sys_poll
-       .long sys_nfsservctl
-       .long sys_setresgid16   /* 170 */
-       .long sys_getresgid16
-       .long sys_prctl
-       .long sys_rt_sigreturn
-       .long sys_rt_sigaction
-       .long sys_rt_sigprocmask        /* 175 */
-       .long sys_rt_sigpending
-       .long sys_rt_sigtimedwait
-       .long sys_rt_sigqueueinfo
-       .long sys_rt_sigsuspend
-       .long sys_pread64       /* 180 */
-       .long sys_pwrite64
-       .long sys_chown16
-       .long sys_getcwd
-       .long sys_capget
-       .long sys_capset        /* 185 */
-       .long sys_sigaltstack
-       .long sys_sendfile
-       .long sys_ni_syscall    /* reserved for streams1 */
-       .long sys_ni_syscall    /* reserved for streams2 */
-       .long sys_vfork         /* 190 */
-       .long sys_getrlimit
-       .long sys_mmap2
-       .long sys_truncate64
-       .long sys_ftruncate64
-       .long sys_stat64        /* 195 */
-       .long sys_lstat64
-       .long sys_fstat64
-       .long sys_lchown
-       .long sys_getuid
-       .long sys_getgid        /* 200 */
-       .long sys_geteuid
-       .long sys_getegid
-       .long sys_setreuid
-       .long sys_setregid
-       .long sys_getgroups     /* 205 */
-       .long sys_setgroups
-       .long sys_fchown
-       .long sys_setresuid
-       .long sys_getresuid
-       .long sys_setresgid     /* 210 */
-       .long sys_getresgid
-       .long sys_chown
-       .long sys_setuid
-       .long sys_setgid
-       .long sys_setfsuid      /* 215 */
-       .long sys_setfsgid
-       .long sys_pivot_root
-       .long sys_mincore
-       .long sys_madvise
-       .long sys_getdents64    /* 220 */
-       .long sys_fcntl64
-       .long sys_ni_syscall    /* reserved for TUX */
-       .long sys_ni_syscall
-       .long sys_gettid
-       .long sys_readahead     /* 225 */
-       .long sys_setxattr
-       .long sys_lsetxattr
-       .long sys_fsetxattr
-       .long sys_getxattr
-       .long sys_lgetxattr     /* 230 */
-       .long sys_fgetxattr
-       .long sys_listxattr
-       .long sys_llistxattr
-       .long sys_flistxattr
-       .long sys_removexattr   /* 235 */
-       .long sys_lremovexattr
-       .long sys_fremovexattr
-       .long sys_tkill
-       .long sys_sendfile64
-       .long sys_futex         /* 240 */
-       .long sys_sched_setaffinity
-       .long sys_sched_getaffinity
-       .long sys_set_thread_area
-       .long sys_get_thread_area
-       .long sys_io_setup      /* 245 */
-       .long sys_io_destroy
-       .long sys_io_getevents
-       .long sys_io_submit
-       .long sys_io_cancel
-       .long sys_fadvise64     /* 250 */
-       .long sys_ni_syscall
-       .long sys_exit_group
-       .long sys_lookup_dcookie
-       .long sys_epoll_create
-       .long sys_epoll_ctl     /* 255 */
-       .long sys_epoll_wait
-       .long sys_remap_file_pages
-       .long sys_set_tid_address
-       .long sys_timer_create
-       .long sys_timer_settime         /* 260 */
-       .long sys_timer_gettime
-       .long sys_timer_getoverrun
-       .long sys_timer_delete
-       .long sys_clock_settime
-       .long sys_clock_gettime         /* 265 */
-       .long sys_clock_getres
-       .long sys_clock_nanosleep
-       .long sys_statfs64
-       .long sys_fstatfs64     
-       .long sys_tgkill        /* 270 */
-       .long sys_utimes
-       .long sys_fadvise64_64
-       .long sys_ni_syscall    /* sys_vserver */
-       .long sys_mbind
-       .long sys_get_mempolicy
-       .long sys_set_mempolicy
-       .long sys_mq_open
-       .long sys_mq_unlink
-       .long sys_mq_timedsend
-       .long sys_mq_timedreceive       /* 280 */
-       .long sys_mq_notify
-       .long sys_mq_getsetattr
-       .long sys_ni_syscall            /* reserved for kexec */
-       .long sys_waitid
-       .long sys_ni_syscall            /* 285 */ /* available */
-       .long sys_add_key
-       .long sys_request_key
-       .long sys_keyctl
+#include "syscall_table.S"
 
 syscall_table_size=(.-sys_call_table)
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Tue Aug  9 
23:57:17 2005
@@ -99,6 +99,11 @@
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
 
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
@@ -114,7 +119,6 @@
 EXPORT_SYMBOL(dma_free_coherent);
 
 #ifdef CONFIG_PCI
-EXPORT_SYMBOL(pcibios_penalize_isa_irq);
 EXPORT_SYMBOL(pci_mem_start);
 #endif
 
@@ -146,7 +150,6 @@
 
 /* TLB flushing */
 EXPORT_SYMBOL(flush_tlb_page);
-EXPORT_SYMBOL_GPL(flush_tlb_all);
 #endif
 
 #ifdef CONFIG_X86_IO_APIC
@@ -168,10 +171,6 @@
 EXPORT_SYMBOL_GPL(set_nmi_callback);
 EXPORT_SYMBOL_GPL(unset_nmi_callback);
 
-#undef memcmp
-extern int memcmp(const void *,const void *,__kernel_size_t);
-EXPORT_SYMBOL(memcmp);
-
 EXPORT_SYMBOL(register_die_notifier);
 #ifdef CONFIG_HAVE_DEC_LOCK
 EXPORT_SYMBOL(_atomic_dec_and_lock);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Tue Aug  9 
23:57:17 2005
@@ -86,7 +86,7 @@
                           dma_addr_t *dma_handle)
 #else
 void *dma_alloc_coherent(struct device *dev, size_t size,
-                          dma_addr_t *dma_handle, int gfp)
+                          dma_addr_t *dma_handle, unsigned int __nocast gfp)
 #endif
 {
        void *ret;
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Tue Aug  9 
23:57:17 2005
@@ -36,6 +36,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -57,7 +58,7 @@
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
-int hlt_counter;
+static int hlt_counter;
 
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
@@ -74,7 +75,7 @@
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void);
-static cpumask_t cpu_idle_map;
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
 void disable_hlt(void)
 {
@@ -120,11 +121,11 @@
        while (1) {
                while (!need_resched()) {
 
-                       if (cpu_isset(cpu, cpu_idle_map))
-                               cpu_clear(cpu, cpu_idle_map);
+                       if (__get_cpu_var(cpu_idle_state))
+                               __get_cpu_var(cpu_idle_state) = 0;
                        rmb();
 
-                       irq_stat[cpu].idle_timestamp = jiffies;
+                       __get_cpu_var(irq_stat).idle_timestamp = jiffies;
                        xen_idle();
                }
                schedule();
@@ -133,16 +134,28 @@
 
 void cpu_idle_wait(void)
 {
-       int cpu;
+       unsigned int cpu, this_cpu = get_cpu();
        cpumask_t map;
 
-       for_each_online_cpu(cpu)
-               cpu_set(cpu, cpu_idle_map);
+       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+       put_cpu();
+
+       cpus_clear(map);
+       for_each_online_cpu(cpu) {
+               per_cpu(cpu_idle_state, cpu) = 1;
+               cpu_set(cpu, map);
+       }
+
+       __get_cpu_var(cpu_idle_state) = 0;
 
        wmb();
        do {
                ssleep(1);
-               cpus_and(map, cpu_idle_map, cpu_online_map);
+               for_each_online_cpu(cpu) {
+                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
cpu))
+                               cpu_clear(cpu, map);
+               }
+               cpus_and(map, map, cpu_online_map);
        } while (!cpus_empty(map));
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -286,6 +299,17 @@
        unsigned long eflags;
 
        childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) 
p->thread_info)) - 1;
+       /*
+        * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+        * This is necessary to guarantee that the entire "struct pt_regs"
+        * is accessable even if the CPU haven't stored the SS/ESP registers
+        * on the stack (interrupt gate does not save these registers
+        * when switching to the same priv ring).
+        * Therefore beware: accessing the xss/esp fields of the
+        * "struct pt_regs" is possible, but they may contain the
+        * completely wrong values.
+        */
+       childregs = (struct pt_regs *) ((unsigned long) childregs - 8);
        *childregs = *regs;
        childregs->eax = 0;
        childregs->esp = esp;
@@ -439,12 +463,6 @@
         */
        tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 }
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,register) \
-               HYPERVISOR_set_debugreg((register),     \
-                       (thread->debugreg[register]))
 
 /*
  *     switch_to(x,yn) should switch tasks from x to y.
@@ -777,3 +795,9 @@
        return 0;
 }
 
+unsigned long arch_align_stack(unsigned long sp)
+{
+       if (randomize_va_space)
+               sp -= get_random_int() % 8192;
+       return sp & ~0xf;
+}
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug  9 23:57:17 2005
@@ -40,6 +40,7 @@
 #include <linux/efi.h>
 #include <linux/init.h>
 #include <linux/edd.h>
+#include <linux/nodemask.h>
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <video/edid.h>
@@ -80,7 +81,6 @@
 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
 
 unsigned long mmu_cr4_features;
-EXPORT_SYMBOL_GPL(mmu_cr4_features);
 
 #ifdef CONFIG_ACPI_INTERPRETER
        int acpi_disabled = 0;
@@ -122,8 +122,6 @@
 struct edid_info edid_info;
 struct ist_info ist_info;
 struct e820map e820;
-
-unsigned char aux_device_present;
 
 extern void early_cpu_init(void);
 extern void dmi_scan_machine(void);
@@ -454,10 +452,10 @@
        struct e820entry *pbios; /* pointer to original bios entry */
        unsigned long long addr; /* address for this change point */
 };
-struct change_member change_point_list[2*E820MAX] __initdata;
-struct change_member *change_point[2*E820MAX] __initdata;
-struct e820entry *overlap_list[E820MAX] __initdata;
-struct e820entry new_bios[E820MAX] __initdata;
+static struct change_member change_point_list[2*E820MAX] __initdata;
+static struct change_member *change_point[2*E820MAX] __initdata;
+static struct e820entry *overlap_list[E820MAX] __initdata;
+static struct e820entry new_bios[E820MAX] __initdata;
 
 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
 {
@@ -995,8 +993,6 @@
        return max_low_pfn;
 }
 
-#ifndef CONFIG_DISCONTIGMEM
-
 /*
  * Free all available memory for boot time allocation.  Used
  * as a callback function by efi_memory_walk()
@@ -1070,15 +1066,16 @@
                reserve_bootmem(addr, PAGE_SIZE);       
 }
 
+#ifndef CONFIG_DISCONTIGMEM
+void __init setup_bootmem_allocator(void);
 static unsigned long __init setup_memory(void)
 {
-       unsigned long bootmap_size, start_pfn, max_low_pfn;
 
        /*
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-       start_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + 
xen_start_info.nr_pt_frames;
+       min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + 
xen_start_info.nr_pt_frames;
 
        find_max_pfn();
 
@@ -1094,10 +1091,43 @@
 #endif
        printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
                        pages_to_mb(max_low_pfn));
+
+       setup_bootmem_allocator();
+
+       return max_low_pfn;
+}
+
+void __init zone_sizes_init(void)
+{
+       unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+       unsigned int max_dma, low;
+
+       max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+       low = max_low_pfn;
+
+       if (low < max_dma)
+               zones_size[ZONE_DMA] = low;
+       else {
+               zones_size[ZONE_DMA] = max_dma;
+               zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+               zones_size[ZONE_HIGHMEM] = highend_pfn - low;
+#endif
+       }
+       free_area_init(zones_size);
+}
+#else
+extern unsigned long setup_memory(void);
+extern void zone_sizes_init(void);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+void __init setup_bootmem_allocator(void)
+{
+       unsigned long bootmap_size;
        /*
         * Initialize the boot-time allocator (with low memory only):
         */
-       bootmap_size = init_bootmem(start_pfn, max_low_pfn);
+       bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
 
        register_bootmem_low_pages(max_low_pfn);
 
@@ -1107,7 +1137,7 @@
         * the (very unlikely) case of us accidentally initializing the
         * bootmem allocator with an invalid RAM area.
         */
-       reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+       reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
                         bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
 
        /* reserve EBDA region, it's a 4K region */
@@ -1160,12 +1190,25 @@
 #endif
 
        phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
-
-       return max_low_pfn;
-}
-#else
-extern unsigned long setup_memory(void);
-#endif /* !CONFIG_DISCONTIGMEM */
+}
+
+/*
+ * The node 0 pgdat is initialized before all of these because
+ * it's needed for bootmem.  node>0 pgdats have their virtual
+ * space allocated before the pagetables are in place to access
+ * them, so they can't be cleared then.
+ *
+ * This should all compile down to nothing when NUMA is off.
+ */
+void __init remapped_pgdat_init(void)
+{
+       int nid;
+
+       for_each_online_node(nid) {
+               if (nid != 0)
+                       memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+       }
+}
 
 /*
  * Request address space for all standard RAM and ROM resources
@@ -1440,7 +1483,6 @@
                machine_submodel_id = SYS_DESC_TABLE.table[1];
                BIOS_revision = SYS_DESC_TABLE.table[2];
        }
-       aux_device_present = AUX_DEVICE_INFO;
        bootloader_type = LOADER_TYPE;
 
 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
@@ -1500,6 +1542,8 @@
        smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
 #endif
        paging_init();
+       remapped_pgdat_init();
+       zone_sizes_init();
 
        /* Make sure we have a correctly sized P->M table. */
        if (max_pfn != xen_start_info.nr_pages) {
@@ -1564,11 +1608,13 @@
        if (efi_enabled)
                efi_map_memmap();
 
+#ifdef CONFIG_ACPI_BOOT
        /*
         * Parse the ACPI tables for possible boot-time SMP configuration.
         */
        acpi_boot_table_init();
        acpi_boot_init();
+#endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
        if (smp_found_config)
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c        Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c        Tue Aug  9 
23:57:17 2005
@@ -93,7 +93,7 @@
 
        if (act) {
                old_sigset_t mask;
-               if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+               if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
                    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
                    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
                        return -EFAULT;
@@ -105,7 +105,7 @@
        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
        if (!ret && oact) {
-               if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+               if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
                    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
                    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
                        return -EFAULT;
@@ -187,7 +187,7 @@
                struct _fpstate __user * buf;
                err |= __get_user(buf, &sc->fpstate);
                if (buf) {
-                       if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+                       if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
                                goto badframe;
                        err |= restore_i387(buf);
                } else {
@@ -213,7 +213,7 @@
        sigset_t set;
        int eax;
 
-       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+       if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
                goto badframe;
        if (__get_user(set.sig[0], &frame->sc.oldmask)
            || (_NSIG_WORDS > 1
@@ -243,7 +243,7 @@
        sigset_t set;
        int eax;
 
-       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+       if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
                goto badframe;
        if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
                goto badframe;
@@ -557,6 +557,16 @@
                }
        }
 
+       /*
+        * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
+        * that register information in the sigcontext is correct.
+        */
+       if (unlikely(regs->eflags & TF_MASK)
+           && likely(current->ptrace & PT_DTRACE)) {
+               current->ptrace &= ~PT_DTRACE;
+               regs->eflags &= ~TF_MASK;
+       }
+
        /* Set up the stack frame */
        if (ka->sa.sa_flags & SA_SIGINFO)
                setup_rt_frame(sig, ka, info, oldset, regs);
@@ -608,8 +618,7 @@
                 * inside the kernel.
                 */
                if (unlikely(current->thread.debugreg[7])) {
-                       HYPERVISOR_set_debugreg(7,
-                                               current->thread.debugreg[7]);
+                       loaddebug(&current->thread, 7);
                }
 
                /* Whee!  Actually deliver the signal.  */
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Tue Aug  9 23:57:17 2005
@@ -176,6 +176,35 @@
  ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
 
 /*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with.  It is required for NMI access to the
+ * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+       unsigned char val;
+       lock_cmos_prefix(addr);
+       outb_p(addr, RTC_PORT(0));
+       val = inb_p(RTC_PORT(1));
+       lock_cmos_suffix(addr);
+       return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+       lock_cmos_prefix(addr);
+       outb_p(addr, RTC_PORT(0));
+       outb_p(val, RTC_PORT(1));
+       lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
+/*
  * This version of gettimeofday has microsecond resolution
  * and better than microsecond precision on fast x86 machines with TSC.
  */
@@ -335,15 +364,22 @@
 {
        int retval;
 
+       WARN_ON(irqs_disabled());
+
        /* gets recalled with irq locally disabled */
-       spin_lock(&rtc_lock);
+       spin_lock_irq(&rtc_lock);
        if (efi_enabled)
                retval = efi_set_rtc_mmss(nowtime);
        else
                retval = mach_set_rtc_mmss(nowtime);
-       spin_unlock(&rtc_lock);
+       spin_unlock_irq(&rtc_lock);
 
        return retval;
+}
+#else
+static int set_rtc_mmss(unsigned long nowtime)
+{
+       return 0;
 }
 #endif
 
@@ -476,29 +512,6 @@
 
                last_update_to_xen = xtime.tv_sec;
        }
-
-       /*
-        * If we have an externally synchronized Linux clock, then update
-        * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-        * called as close as possible to 500 ms before the new second starts.
-        */
-       if ((time_status & STA_UNSYNC) == 0 &&
-           xtime.tv_sec > last_rtc_update + 660 &&
-           (xtime.tv_nsec / 1000)
-                       >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
-           (xtime.tv_nsec / 1000)
-                       <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
-               /* horrible...FIXME */
-               if (efi_enabled) {
-                       if (efi_set_rtc_mmss(xtime.tv_sec) == 0)
-                               last_rtc_update = xtime.tv_sec;
-                       else
-                               last_rtc_update = xtime.tv_sec - 600;
-               } else if (set_rtc_mmss(xtime.tv_sec) == 0)
-                       last_rtc_update = xtime.tv_sec;
-               else
-                       last_rtc_update = xtime.tv_sec - 600; /* do it again in 
60 s */
-       }
 #endif
 }
 
@@ -538,10 +551,59 @@
 
        return retval;
 }
+static void sync_cmos_clock(unsigned long dummy);
+
+static struct timer_list sync_cmos_timer =
+                                      TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
+
+static void sync_cmos_clock(unsigned long dummy)
+{
+       struct timeval now, next;
+       int fail = 1;
+
+       /*
+        * If we have an externally synchronized Linux clock, then update
+        * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+        * called as close as possible to 500 ms before the new second starts.
+        * This code is run on a timer.  If the clock is set, that timer
+        * may not expire at the correct time.  Thus, we adjust...
+        */
+       if ((time_status & STA_UNSYNC) != 0)
+               /*
+                * Not synced, exit, do not restart a timer (if one is
+                * running, let it run out).
+                */
+               return;
+
+       do_gettimeofday(&now);
+       if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+           now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
+               fail = set_rtc_mmss(now.tv_sec);
+
+       next.tv_usec = USEC_AFTER - now.tv_usec;
+       if (next.tv_usec <= 0)
+               next.tv_usec += USEC_PER_SEC;
+
+       if (!fail)
+               next.tv_sec = 659;
+       else
+               next.tv_sec = 0;
+
+       if (next.tv_usec >= USEC_PER_SEC) {
+               next.tv_sec++;
+               next.tv_usec -= USEC_PER_SEC;
+       }
+       mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
+}
+
+void notify_arch_cmos_timer(void)
+{
+       mod_timer(&sync_cmos_timer, jiffies + 1);
+}
 
 static long clock_cmos_diff, sleep_start;
 
-static int timer_suspend(struct sys_device *dev, u32 state)
+static int timer_suspend(struct sys_device *dev, pm_message_t state)
 {
        /*
         * Estimate time zone so that set_time can update the clock
@@ -599,14 +661,14 @@
 #ifdef CONFIG_HPET_TIMER
 extern void (*late_time_init)(void);
 /* Duplicate of time_init() below, with hpet_enable part added */
-void __init hpet_time_init(void)
+static void __init hpet_time_init(void)
 {
        xtime.tv_sec = get_cmos_time();
        xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
        set_normalized_timespec(&wall_to_monotonic,
                -xtime.tv_sec, -xtime.tv_nsec);
 
-       if (hpet_enable() >= 0) {
+       if ((hpet_enable() >= 0) && hpet_use_timer) {
                printk("Using HPET for base-timer\n");
        }
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Tue Aug  9 23:57:17 2005
@@ -342,8 +342,7 @@
 
        if (panic_on_oops) {
                printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(5 * HZ);
+               ssleep(5);
                panic("Fatal exception");
        }
        do_exit(SIGSEGV);
@@ -450,6 +449,7 @@
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
 #ifdef CONFIG_X86_MCE
 DO_ERROR(18, SIGBUS, "machine check", machine_check)
 #endif
@@ -635,16 +635,15 @@
 }
 
 #ifdef CONFIG_KPROBES
-fastcall int do_int3(struct pt_regs *regs, long error_code)
+fastcall void do_int3(struct pt_regs *regs, long error_code)
 {
        if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
                        == NOTIFY_STOP)
-               return 1;
+               return;
        /* This is an interrupt gate, because kprobes wants interrupts
        disabled.  Normal trap handlers don't. */
        restore_interrupts(regs);
        do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
-       return 0;
 }
 #endif
 
@@ -701,8 +700,6 @@
        /*
         * Single-stepping through TF: make sure we ignore any events in
         * kernel space (but re-enable TF when returning to user mode).
-        * And if the event was due to a debugger (PT_DTRACE), clear the
-        * TF flag so that register information is correct.
         */
        if (condition & DR_STEP) {
                /*
@@ -712,11 +709,6 @@
                 */
                if ((regs->xcs & 2) == 0)
                        goto clear_TF_reenable;
-
-               if (likely(tsk->ptrace & PT_DTRACE)) {
-                       tsk->ptrace &= ~PT_DTRACE;
-                       regs->eflags &= ~TF_MASK;
-               }
        }
 
        /* Ok, finally something we can handle */
@@ -806,7 +798,7 @@
        math_error((void __user *)regs->eip);
 }
 
-void simd_math_error(void __user *eip)
+static void simd_math_error(void __user *eip)
 {
        struct task_struct * task;
        siginfo_t info;
@@ -876,6 +868,51 @@
                current->thread.error_code = error_code;
                force_sig(SIGSEGV, current);
        }
+}
+
+fastcall void setup_x86_bogus_stack(unsigned char * stk)
+{
+       unsigned long *switch16_ptr, *switch32_ptr;
+       struct pt_regs *regs;
+       unsigned long stack_top, stack_bot;
+       unsigned short iret_frame16_off;
+       int cpu = smp_processor_id();
+       /* reserve the space on 32bit stack for the magic switch16 pointer */
+       memmove(stk, stk + 8, sizeof(struct pt_regs));
+       switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
+       regs = (struct pt_regs *)stk;
+       /* now the switch32 on 16bit stack */
+       stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+       stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+       switch32_ptr = (unsigned long *)(stack_top - 8);
+       iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
+       /* copy iret frame on 16bit stack */
+       memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
+       /* fill in the switch pointers */
+       switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
+       switch16_ptr[1] = __ESPFIX_SS;
+       switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
+               8 - CPU_16BIT_STACK_SIZE;
+       switch32_ptr[1] = __KERNEL_DS;
+}
+
+fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
+{
+       unsigned long *switch32_ptr;
+       unsigned char *stack16, *stack32;
+       unsigned long stack_top, stack_bot;
+       int len;
+       int cpu = smp_processor_id();
+       stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+       stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+       switch32_ptr = (unsigned long *)(stack_top - 8);
+       /* copy the data from 16bit stack to 32bit stack */
+       len = CPU_16BIT_STACK_SIZE - 8 - sp;
+       stack16 = (unsigned char *)(stack_bot + sp);
+       stack32 = (unsigned char *)
+               (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
+       memcpy(stack32, stack16, len);
+       return stack32;
 }
 
 /*
@@ -978,3 +1015,10 @@
         */
        cpu_init();
 }
+
+static int __init kstack_setup(char *s)
+{
+       kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+       return 0;
+}
+__setup("kstack=", kstack_setup);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c   Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c   Tue Aug  9 23:57:17 2005
@@ -77,7 +77,7 @@
         * force other mappings to Oops if they'll try to access
         * this pte without first remap it
         */
-       pte_clear(kmap_pte-idx);
+       pte_clear(&init_mm, vaddr, kmap_pte-idx);
        __flush_tlb_one(vaddr);
 #endif
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Tue Aug  9 23:57:17 2005
@@ -249,13 +249,10 @@
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
 
-EXPORT_SYMBOL(kmap_prot);
-EXPORT_SYMBOL(kmap_pte);
-
 #define kmap_get_fixmap_pte(vaddr)                                     \
        pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), 
(vaddr)), (vaddr))
 
-void __init kmap_init(void)
+static void __init kmap_init(void)
 {
        unsigned long kmap_vstart;
 
@@ -266,7 +263,7 @@
        kmap_prot = PAGE_KERNEL;
 }
 
-void __init permanent_kmaps_init(pgd_t *pgd_base)
+static void __init permanent_kmaps_init(pgd_t *pgd_base)
 {
        pgd_t *pgd;
        pud_t *pud;
@@ -298,7 +295,7 @@
 }
 
 #ifndef CONFIG_DISCONTIGMEM
-void __init set_highmem_pages_init(int bad_ppro) 
+static void __init set_highmem_pages_init(int bad_ppro)
 {
        int pfn;
        for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
@@ -426,38 +423,6 @@
        flush_tlb_all();
 }
 
-#ifndef CONFIG_DISCONTIGMEM
-void __init zone_sizes_init(void)
-{
-       unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
-       unsigned int /*max_dma,*/ high, low;
-       
-       /*
-        * XEN: Our notion of "DMA memory" is fake when running over Xen.
-        * We simply put all RAM in the DMA zone so that those drivers which
-        * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
-        * Those drivers that *do* require lowmem are screwed anyway when
-        * running over Xen!
-        */
-       /*max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;*/
-       low = max_low_pfn;
-       high = highend_pfn;
-       
-       /*if (low < max_dma)*/
-               zones_size[ZONE_DMA] = low;
-       /*else*/ {
-               /*zones_size[ZONE_DMA] = max_dma;*/
-               /*zones_size[ZONE_NORMAL] = low - max_dma;*/
-#ifdef CONFIG_HIGHMEM
-               zones_size[ZONE_HIGHMEM] = high - low;
-#endif
-       }
-       free_area_init(zones_size);     
-}
-#else
-extern void zone_sizes_init(void);
-#endif /* !CONFIG_DISCONTIGMEM */
-
 static int disable_nx __initdata = 0;
 u64 __supported_pte_mask = ~_PAGE_NX;
 
@@ -560,7 +525,6 @@
        __flush_tlb_all();
 
        kmap_init();
-       zone_sizes_init();
 
        /* Switch to the real shared_info page, and clear the dummy page. */
        flush_page_update_queue();
@@ -586,7 +550,7 @@
  * but fortunately the switch to using exceptions got rid of all that.
  */
 
-void __init test_wp_bit(void)
+static void __init test_wp_bit(void)
 {
        printk("Checking if this processor honours the WP bit even in 
supervisor mode... ");
 
@@ -605,20 +569,17 @@
        }
 }
 
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+       num_physpages = highend_pfn;
+#else
+       num_physpages = max_low_pfn;
+#endif
 #ifndef CONFIG_DISCONTIGMEM
-static void __init set_max_mapnr_init(void)
-{
-#ifdef CONFIG_HIGHMEM
-       max_mapnr = num_physpages = highend_pfn;
-#else
-       max_mapnr = num_physpages = max_low_pfn;
-#endif
-}
-#define __free_all_bootmem() free_all_bootmem()
-#else
-#define __free_all_bootmem() free_all_bootmem_node(NODE_DATA(0))
-extern void set_max_mapnr_init(void);
-#endif /* !CONFIG_DISCONTIGMEM */
+       max_mapnr = num_physpages;
+#endif
+}
 
 static struct kcore_list kcore_mem, kcore_vmalloc; 
 
@@ -650,16 +611,16 @@
        set_max_mapnr_init();
 
 #ifdef CONFIG_HIGHMEM
-       high_memory = (void *) __va(highstart_pfn * PAGE_SIZE);
+       high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-       high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+       high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
        printk("vmalloc area: %lx-%lx, maxmem %lx\n",
               VMALLOC_START,VMALLOC_END,MAXMEM);
        BUG_ON(VMALLOC_START > VMALLOC_END);
        
        /* this will put all low memory onto the freelists */
-       totalram_pages += __free_all_bootmem();
+       totalram_pages += free_all_bootmem();
        /* XEN: init and count low-mem pages outside initial allocation. */
        for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
                ClearPageReserved(&mem_map[pfn]);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Tue Aug  9 23:57:17 2005
@@ -368,7 +368,7 @@
        if (PTRS_PER_PMD > 1)
                for (i = 0; i < USER_PTRS_PER_PGD; ++i)
                        kmem_cache_free(pmd_cache, (void 
*)__va(pgd_val(pgd[i])-1));
-       /* in the non-PAE case, clear_page_range() clears user pgd entries */
+       /* in the non-PAE case, free_pgtables() clears user pgd entries */
        kmem_cache_free(pgd_cache, pgd);
 }
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c      Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c      Tue Aug  9 23:57:17 2005
@@ -114,11 +114,11 @@
        if (pin != 0) {
                if (dev->irq != 0)
                        printk(KERN_INFO "PCI: Obtained IRQ %d for device %s\n",
-                           dev->irq, dev->slot_name);
+                           dev->irq, pci_name(dev));
                else
                        printk(KERN_WARNING "PCI: No IRQ known for interrupt "
                            "pin %c of device %s.\n", 'A' + pin - 1,
-                           dev->slot_name);
+                           pci_name(dev));
        }
 
        return 0;
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/Makefile
--- a/linux-2.6-xen-sparse/drivers/Makefile     Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/Makefile     Tue Aug  9 23:57:17 2005
@@ -48,8 +48,8 @@
 obj-$(CONFIG_TC)               += tc/
 obj-$(CONFIG_USB)              += usb/
 obj-$(CONFIG_USB_GADGET)       += usb/gadget/
+obj-$(CONFIG_GAMEPORT)         += input/gameport/
 obj-$(CONFIG_INPUT)            += input/
-obj-$(CONFIG_GAMEPORT)         += input/gameport/
 obj-$(CONFIG_I2O)              += message/
 obj-$(CONFIG_I2C)              += i2c/
 obj-$(CONFIG_W1)               += w1/
@@ -62,5 +62,6 @@
 obj-$(CONFIG_CPU_FREQ)         += cpufreq/
 obj-$(CONFIG_MMC)              += mmc/
 obj-$(CONFIG_INFINIBAND)       += infiniband/
+obj-$(CONFIG_BLK_DEV_SGIIOC4)  += sn/
 obj-y                          += firmware/
 obj-$(CONFIG_CRYPTO)           += crypto/
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/char/mem.c   Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c   Tue Aug  9 23:57:17 2005
@@ -23,6 +23,7 @@
 #include <linux/devfs_fs_kernel.h>
 #include <linux/ptrace.h>
 #include <linux/device.h>
+#include <linux/backing-dev.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -76,14 +77,6 @@
         * On ia64, we ignore O_SYNC because we cannot tolerate memory 
attribute aliases.
         */
        return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
-#elif defined(CONFIG_PPC64)
-       /* On PPC64, we always do non-cacheable access to the IO hole and
-        * cacheable elsewhere. Cache paradox can checkstop the CPU and
-        * the high_memory heuristic below is wrong on machines with memory
-        * above the IO hole... Ah, and of course, XFree86 doesn't pass
-        * O_SYNC when mapping us to tap IO space. Surprised ?
-        */
-       return !page_is_ram(addr >> PAGE_SHIFT);
 #else
        /*
         * Accessing memory above the top the kernel knows about or through a 
file pointer
@@ -111,38 +104,6 @@
 }
 #endif
 
-static ssize_t do_write_mem(void *p, unsigned long realp,
-                           const char __user * buf, size_t count, loff_t *ppos)
-{
-       ssize_t written;
-       unsigned long copied;
-
-       written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
-       /* we don't have page 0 mapped on sparc and m68k.. */
-       if (realp < PAGE_SIZE) {
-               unsigned long sz = PAGE_SIZE-realp;
-               if (sz > count) sz = count; 
-               /* Hmm. Do something? */
-               buf+=sz;
-               p+=sz;
-               count-=sz;
-               written+=sz;
-       }
-#endif
-       copied = copy_from_user(p, buf, count);
-       if (copied) {
-               ssize_t ret = written + (count - copied);
-
-               if (ret)
-                       return ret;
-               return -EFAULT;
-       }
-       written += count;
-       *ppos += written;
-       return written;
-}
-
 #ifndef ARCH_HAS_DEV_MEM
 /*
  * This funcion reads the *physical* memory. The f_pos points directly to the 
@@ -152,15 +113,16 @@
                        size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
-       ssize_t read;
+       ssize_t read, sz;
+       char *ptr;
 
        if (!valid_phys_addr_range(p, &count))
                return -EFAULT;
        read = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
        /* we don't have page 0 mapped on sparc and m68k.. */
        if (p < PAGE_SIZE) {
-               unsigned long sz = PAGE_SIZE-p;
+               sz = PAGE_SIZE - p;
                if (sz > count) 
                        sz = count; 
                if (sz > 0) {
@@ -173,9 +135,33 @@
                }
        }
 #endif
-       if (copy_to_user(buf, __va(p), count))
-               return -EFAULT;
-       read += count;
+
+       while (count > 0) {
+               /*
+                * Handle first page in case it's not aligned
+                */
+               if (-p & (PAGE_SIZE - 1))
+                       sz = -p & (PAGE_SIZE - 1);
+               else
+                       sz = PAGE_SIZE;
+
+               sz = min_t(unsigned long, sz, count);
+
+               /*
+                * On ia64 if a page has been mapped somewhere as
+                * uncached, then it must also be accessed uncached
+                * by the kernel or data corruption may occur
+                */
+               ptr = xlate_dev_mem_ptr(p);
+
+               if (copy_to_user(buf, ptr, sz))
+                       return -EFAULT;
+               buf += sz;
+               p += sz;
+               count -= sz;
+               read += sz;
+       }
+
        *ppos += read;
        return read;
 }
@@ -184,16 +170,76 @@
                         size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
+       ssize_t written, sz;
+       unsigned long copied;
+       void *ptr;
 
        if (!valid_phys_addr_range(p, &count))
                return -EFAULT;
-       return do_write_mem(__va(p), p, buf, count, ppos);
+
+       written = 0;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+       /* we don't have page 0 mapped on sparc and m68k.. */
+       if (p < PAGE_SIZE) {
+               unsigned long sz = PAGE_SIZE - p;
+               if (sz > count)
+                       sz = count;
+               /* Hmm. Do something? */
+               buf += sz;
+               p += sz;
+               count -= sz;
+               written += sz;
+       }
+#endif
+
+       while (count > 0) {
+               /*
+                * Handle first page in case it's not aligned
+                */
+               if (-p & (PAGE_SIZE - 1))
+                       sz = -p & (PAGE_SIZE - 1);
+               else
+                       sz = PAGE_SIZE;
+
+               sz = min_t(unsigned long, sz, count);
+
+               /*
+                * On ia64 if a page has been mapped somewhere as
+                * uncached, then it must also be accessed uncached
+                * by the kernel or data corruption may occur
+                */
+               ptr = xlate_dev_mem_ptr(p);
+
+               copied = copy_from_user(ptr, buf, sz);
+               if (copied) {
+                       ssize_t ret;
+
+                       ret = written + (sz - copied);
+                       if (ret)
+                               return ret;
+                       return -EFAULT;
+               }
+               buf += sz;
+               p += sz;
+               count -= sz;
+               written += sz;
+       }
+
+       *ppos += written;
+       return written;
 }
 #endif
 
 static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
 {
-#ifdef pgprot_noncached
+#if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
+       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+       vma->vm_page_prot = phys_mem_access_prot(file, offset,
+                                                vma->vm_end - vma->vm_start,
+                                                vma->vm_page_prot);
+#elif defined(pgprot_noncached)
        unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
        int uncached;
 
@@ -212,6 +258,25 @@
        return 0;
 }
 
+#if 0
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+        unsigned long long val;
+       /*
+        * RED-PEN: on some architectures there is more mapped memory
+        * than available in mem_map which pfn_valid checks
+        * for. Perhaps should add a new macro here.
+        *
+        * RED-PEN: vmalloc is not supported right now.
+        */
+       if (!pfn_valid(vma->vm_pgoff))
+               return -EIO;
+       val = (u64)vma->vm_pgoff << PAGE_SHIFT;
+       vma->vm_pgoff = __pa(val) >> PAGE_SHIFT;
+       return mmap_mem(file, vma);
+}
+#endif
+
 extern long vread(char *buf, char *addr, unsigned long count);
 extern long vwrite(char *buf, char *addr, unsigned long count);
 
@@ -222,33 +287,55 @@
                         size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
-       ssize_t read = 0;
-       ssize_t virtr = 0;
+       ssize_t low_count, read, sz;
        char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
-               
+
+       read = 0;
        if (p < (unsigned long) high_memory) {
-               read = count;
+               low_count = count;
                if (count > (unsigned long) high_memory - p)
-                       read = (unsigned long) high_memory - p;
-
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+                       low_count = (unsigned long) high_memory - p;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
                /* we don't have page 0 mapped on sparc and m68k.. */
-               if (p < PAGE_SIZE && read > 0) {
+               if (p < PAGE_SIZE && low_count > 0) {
                        size_t tmp = PAGE_SIZE - p;
-                       if (tmp > read) tmp = read;
+                       if (tmp > low_count) tmp = low_count;
                        if (clear_user(buf, tmp))
                                return -EFAULT;
                        buf += tmp;
                        p += tmp;
-                       read -= tmp;
+                       read += tmp;
+                       low_count -= tmp;
                        count -= tmp;
                }
 #endif
-               if (copy_to_user(buf, (char *)p, read))
-                       return -EFAULT;
-               p += read;
-               buf += read;
-               count -= read;
+               while (low_count > 0) {
+                       /*
+                        * Handle first page in case it's not aligned
+                        */
+                       if (-p & (PAGE_SIZE - 1))
+                               sz = -p & (PAGE_SIZE - 1);
+                       else
+                               sz = PAGE_SIZE;
+
+                       sz = min_t(unsigned long, sz, low_count);
+
+                       /*
+                        * On ia64 if a page has been mapped somewhere as
+                        * uncached, then it must also be accessed uncached
+                        * by the kernel or data corruption may occur
+                        */
+                       kbuf = xlate_dev_kmem_ptr((char *)p);
+
+                       if (copy_to_user(buf, kbuf, sz))
+                               return -EFAULT;
+                       buf += sz;
+                       p += sz;
+                       read += sz;
+                       low_count -= sz;
+                       count -= sz;
+               }
        }
 
        if (count > 0) {
@@ -269,14 +356,78 @@
                        }
                        count -= len;
                        buf += len;
-                       virtr += len;
+                       read += len;
                        p += len;
                }
                free_page((unsigned long)kbuf);
        }
        *ppos = p;
-       return virtr + read;
-}
+       return read;
+}
+
+
+static inline ssize_t
+do_write_kmem(void *p, unsigned long realp, const char __user * buf,
+             size_t count, loff_t *ppos)
+{
+       ssize_t written, sz;
+       unsigned long copied;
+
+       written = 0;
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+       /* we don't have page 0 mapped on sparc and m68k.. */
+       if (realp < PAGE_SIZE) {
+               unsigned long sz = PAGE_SIZE - realp;
+               if (sz > count)
+                       sz = count;
+               /* Hmm. Do something? */
+               buf += sz;
+               p += sz;
+               realp += sz;
+               count -= sz;
+               written += sz;
+       }
+#endif
+
+       while (count > 0) {
+               char *ptr;
+               /*
+                * Handle first page in case it's not aligned
+                */
+               if (-realp & (PAGE_SIZE - 1))
+                       sz = -realp & (PAGE_SIZE - 1);
+               else
+                       sz = PAGE_SIZE;
+
+               sz = min_t(unsigned long, sz, count);
+
+               /*
+                * On ia64 if a page has been mapped somewhere as
+                * uncached, then it must also be accessed uncached
+                * by the kernel or data corruption may occur
+                */
+               ptr = xlate_dev_kmem_ptr(p);
+
+               copied = copy_from_user(ptr, buf, sz);
+               if (copied) {
+                       ssize_t ret;
+
+                       ret = written + (sz - copied);
+                       if (ret)
+                               return ret;
+                       return -EFAULT;
+               }
+               buf += sz;
+               p += sz;
+               realp += sz;
+               count -= sz;
+               written += sz;
+       }
+
+       *ppos += written;
+       return written;
+}
+
 
 /*
  * This function writes to the *virtual* memory as seen by the kernel.
@@ -296,7 +447,7 @@
                if (count > (unsigned long) high_memory - p)
                        wrote = (unsigned long) high_memory - p;
 
-               written = do_write_mem((void*)p, p, buf, wrote, ppos);
+               written = do_write_kmem((void*)p, p, buf, wrote, ppos);
                if (written != wrote)
                        return written;
                wrote = written;
@@ -344,7 +495,7 @@
        unsigned long i = *ppos;
        char __user *tmp = buf;
 
-       if (verify_area(VERIFY_WRITE,buf,count))
+       if (!access_ok(VERIFY_WRITE, buf, count))
                return -EFAULT; 
        while (count-- > 0 && i < 65536) {
                if (__put_user(inb(i),tmp) < 0) 
@@ -362,7 +513,7 @@
        unsigned long i = *ppos;
        const char __user * tmp = buf;
 
-       if (verify_area(VERIFY_READ,buf,count))
+       if (!access_ok(VERIFY_READ,buf,count))
                return -EFAULT;
        while (count-- > 0 && i < 65536) {
                char c;
@@ -568,7 +719,6 @@
        return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
-#define mmap_mem       mmap_kmem
 #define zero_lseek     null_lseek
 #define full_lseek      null_lseek
 #define write_zero     write_null
@@ -581,7 +731,7 @@
        .llseek         = memory_lseek,
        .read           = read_mem,
        .write          = write_mem,
-       .mmap           = mmap_mem,
+       .mmap           = mmap_kmem,
        .open           = open_mem,
 };
 #else
@@ -616,6 +766,10 @@
        .read           = read_zero,
        .write          = write_zero,
        .mmap           = mmap_zero,
+};
+
+static struct backing_dev_info zero_bdi = {
+       .capabilities   = BDI_CAP_MAP_COPY,
 };
 
 static struct file_operations full_fops = {
@@ -664,6 +818,7 @@
                        break;
 #endif
                case 5:
+                       filp->f_mapping->backing_dev_info = &zero_bdi;
                        filp->f_op = &zero_fops;
                        break;
                case 7:
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/char/tty_io.c
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c        Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c        Tue Aug  9 23:57:17 2005
@@ -187,7 +187,7 @@
 
 EXPORT_SYMBOL(tty_name);
 
-inline int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
+int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
                              const char *routine)
 {
 #ifdef TTY_PARANOIA_CHECK
@@ -1791,7 +1791,6 @@
        }
 #ifdef CONFIG_VT
        if (console_use_vt && (device == MKDEV(TTY_MAJOR,0))) {
-               extern int fg_console;
                extern struct tty_driver *console_driver;
                driver = console_driver;
                index = fg_console;
@@ -2018,11 +2017,10 @@
                return 0;
 #ifdef CONFIG_VT
        if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) {
-               unsigned int currcons = tty->index;
                int rc;
 
                acquire_console_sem();
-               rc = vc_resize(currcons, tmp_ws.ws_col, tmp_ws.ws_row);
+               rc = vc_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row);
                release_console_sem();
                if (rc)
                        return -ENXIO;
@@ -2634,6 +2632,7 @@
        tty->magic = TTY_MAGIC;
        tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
        tty->pgrp = -1;
+       tty->overrun_time = jiffies;
        tty->flip.char_buf_ptr = tty->flip.char_buf;
        tty->flip.flag_buf_ptr = tty->flip.flag_buf;
        INIT_WORK(&tty->flip.work, flush_to_ldisc, tty);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-generic/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h        Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-generic/pgtable.h        Tue Aug  9 
23:57:17 2005
@@ -16,7 +16,7 @@
 #ifndef __HAVE_ARCH_SET_PTE_ATOMIC
 #define ptep_establish(__vma, __address, __ptep, __entry)              \
 do {                                                                   \
-       set_pte(__ptep, __entry);                                       \
+       set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry);       \
        flush_tlb_page(__vma, __address);                               \
 } while (0)
 #else /* __HAVE_ARCH_SET_PTE_ATOMIC */
@@ -37,7 +37,7 @@
  */
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
 do {                                                                     \
-       set_pte(__ptep, __entry);                                         \
+       set_pte_at((__vma)>vm_mm, (__address), __ptep, __entry);          \
        flush_tlb_page(__vma, __address);                                 \
 } while (0)
 #endif
@@ -53,20 +53,24 @@
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_young(pte))
-               return 0;
-       set_pte(ptep, pte_mkold(pte));
-       return 1;
-}
+#define ptep_test_and_clear_young(__vma, __address, __ptep)            \
+({                                                                     \
+       pte_t __pte = *(__ptep);                                        \
+       int r = 1;                                                      \
+       if (!pte_young(__pte))                                          \
+               r = 0;                                                  \
+       else                                                            \
+               set_pte_at((__vma)->vm_mm, (__address),                 \
+                          (__ptep), pte_mkold(__pte));                 \
+       r;                                                              \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young(__vma, __address, __ptep)               \
 ({                                                                     \
-       int __young = ptep_test_and_clear_young(__ptep);                \
+       int __young;                                                    \
+       __young = ptep_test_and_clear_young(__vma, __address, __ptep);  \
        if (__young)                                                    \
                flush_tlb_page(__vma, __address);                       \
        __young;                                                        \
@@ -74,20 +78,24 @@
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_dirty(pte))
-               return 0;
-       set_pte(ptep, pte_mkclean(pte));
-       return 1;
-}
+#define ptep_test_and_clear_dirty(__vma, __address, __ptep)            \
+({                                                                     \
+       pte_t __pte = *__ptep;                                          \
+       int r = 1;                                                      \
+       if (!pte_dirty(__pte))                                          \
+               r = 0;                                                  \
+       else                                                            \
+               set_pte_at((__vma)->vm_mm, (__address), (__ptep),       \
+                          pte_mkclean(__pte));                         \
+       r;                                                              \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
 #define ptep_clear_flush_dirty(__vma, __address, __ptep)               \
 ({                                                                     \
-       int __dirty = ptep_test_and_clear_dirty(__ptep);                \
+       int __dirty;                                                    \
+       __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep);  \
        if (__dirty)                                                    \
                flush_tlb_page(__vma, __address);                       \
        __dirty;                                                        \
@@ -95,36 +103,29 @@
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       pte_clear(ptep);
-       return pte;
-}
+#define ptep_get_and_clear(__mm, __address, __ptep)                    \
+({                                                                     \
+       pte_t __pte = *(__ptep);                                        \
+       pte_clear((__mm), (__address), (__ptep));                       \
+       __pte;                                                          \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
 #define ptep_clear_flush(__vma, __address, __ptep)                     \
 ({                                                                     \
-       pte_t __pte = ptep_get_and_clear(__ptep);                       \
+       pte_t __pte;                                                    \
+       __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep);  \
        flush_tlb_page(__vma, __address);                               \
        __pte;                                                          \
 })
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long 
address, pte_t *ptep)
 {
        pte_t old_pte = *ptep;
-       set_pte(ptep, pte_wrprotect(old_pte));
-}
-#endif
-
-#ifndef __HAVE_ARCH_PTEP_MKDIRTY
-static inline void ptep_mkdirty(pte_t *ptep)
-{
-       pte_t old_pte = *ptep;
-       set_pte(ptep, pte_mkdirty(old_pte));
+       set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 }
 #endif
 
@@ -144,4 +145,77 @@
 #define pgd_offset_gate(mm, addr)      pgd_offset(mm, addr)
 #endif
 
+#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+#define lazy_mmu_prot_update(pte)      do { } while (0)
+#endif
+
+/*
+ * When walking page tables, get the address of the next boundary,
+ * or the end address of the range if that comes earlier.  Although no
+ * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
+ */
+
+#define pgd_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;  \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+
+#ifndef pud_addr_end
+#define pud_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+#endif
+
+#ifndef pmd_addr_end
+#define pmd_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;      \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+#endif
+
+#ifndef __ASSEMBLY__
+/*
+ * When walking page tables, we usually want to skip any p?d_none entries;
+ * and any p?d_bad entries - reporting the error before resetting to none.
+ * Do the tests inline, but report and clear the bad entry in mm/memory.c.
+ */
+void pgd_clear_bad(pgd_t *);
+void pud_clear_bad(pud_t *);
+void pmd_clear_bad(pmd_t *);
+
+static inline int pgd_none_or_clear_bad(pgd_t *pgd)
+{
+       if (pgd_none(*pgd))
+               return 1;
+       if (unlikely(pgd_bad(*pgd))) {
+               pgd_clear_bad(pgd);
+               return 1;
+       }
+       return 0;
+}
+
+static inline int pud_none_or_clear_bad(pud_t *pud)
+{
+       if (pud_none(*pud))
+               return 1;
+       if (unlikely(pud_bad(*pud))) {
+               pud_clear_bad(pud);
+               return 1;
+       }
+       return 0;
+}
+
+static inline int pmd_none_or_clear_bad(pmd_t *pmd)
+{
+       if (pmd_none(*pmd))
+               return 1;
+       if (unlikely(pmd_bad(*pmd))) {
+               pmd_clear_bad(pmd);
+               return 1;
+       }
+       return 0;
+}
+#endif /* !__ASSEMBLY__ */
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h      Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h      Tue Aug  9 
23:57:17 2005
@@ -3,6 +3,8 @@
 
 #include <asm/ldt.h>
 #include <asm/segment.h>
+
+#define CPU_16BIT_STACK_SIZE 1024
 
 #ifndef __ASSEMBLY__
 
@@ -12,6 +14,8 @@
 #include <asm/mmu.h>
 
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+
+DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 
 struct Xgt_desc_struct {
        unsigned short size;
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h       Tue Aug 
 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h       Tue Aug 
 9 23:57:17 2005
@@ -11,7 +11,7 @@
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-                          dma_addr_t *dma_handle, int flag);
+                          dma_addr_t *dma_handle, unsigned int __nocast flag);
 
 void dma_free_coherent(struct device *dev, size_t size,
                         void *vaddr, dma_addr_t dma_handle);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h   Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h   Tue Aug  9 
23:57:17 2005
@@ -32,8 +32,6 @@
 extern pte_t *kmap_pte;
 extern pgprot_t kmap_prot;
 extern pte_t *pkmap_page_table;
-
-extern void kmap_init(void);
 
 /*
  * Right now we initialize only a single pte table. It can be extended
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h        Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h        Tue Aug  9 
23:57:17 2005
@@ -49,6 +49,17 @@
 
 #include <linux/vmalloc.h>
 #include <asm/fixmap.h>
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)   __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)  p
 
 /**
  *     virt_to_phys    -       map virtual addresses to physical
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h       Tue Aug 
 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h       Tue Aug 
 9 23:57:17 2005
@@ -64,7 +64,7 @@
 }
 
 #define deactivate_mm(tsk, mm) \
-       asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+       asm("mov %0,%%fs ; mov %0,%%gs": :"r" (0))
 
 #define activate_mm(prev, next) do {           \
        switch_mm((prev),(next),NULL);          \
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h   Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h   Tue Aug  9 
23:57:17 2005
@@ -2,7 +2,6 @@
 #define _I386_PGALLOC_H
 
 #include <linux/config.h>
-#include <asm/processor.h>
 #include <asm/fixmap.h>
 #include <linux/threads.h>
 #include <linux/mm.h>          /* for struct page */
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h    Tue Aug 
 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h    Tue Aug 
 9 23:57:17 2005
@@ -16,6 +16,7 @@
 #define set_pte_batched(pteptr, pteval) \
        queue_l1_entry_update(pteptr, (pteval).pte_low)
 #define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
 #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
 
@@ -28,14 +29,8 @@
  * each domain will have separate page tables, with their own versions of
  * accessed & dirty state.
  */
-static inline pte_t ptep_get_and_clear(pte_t *xp)
-{
-       pte_t pte = *xp;
-       if (pte.pte_low)
-               set_pte(xp, __pte_ma(0));
-       return pte;
-}
 
+#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
 #define pte_same(a, b)         ((a).pte_low == (b).pte_low)
 /*
  * We detect special mappings in one of two ways:
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Tue Aug  9 
23:57:17 2005
@@ -64,7 +64,7 @@
 #define PGDIR_MASK     (~(PGDIR_SIZE-1))
 
 #define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
-#define FIRST_USER_PGD_NR      0
+#define FIRST_USER_ADDRESS     0
 
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
@@ -200,15 +200,15 @@
 /*
  * Define this if things work differently on an i386 and an i486:
  * it will (on an i486) warn about kernel memory accesses that are
- * done without a 'verify_area(VERIFY_WRITE,..)'
- */
-#undef TEST_VERIFY_AREA
+ * done without a 'access_ok(VERIFY_WRITE,..)'
+ */
+#undef TEST_ACCESS_OK
 
 /* The boot page tables (all created as a single array) */
 extern unsigned long pg0[];
 
 #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(xp)  do { set_pte(xp, __pte(0)); } while (0)
+#define pte_clear(mm,addr,xp)  do { set_pte_at(mm, addr, xp, __pte(0)); } 
while (0)
 
 #define pmd_none(x)    (!pmd_val(x))
 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
@@ -252,7 +252,7 @@
 # include <asm/pgtable-2level.h>
 #endif
 
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, 
unsigned long addr, pte_t *ptep)
 {
        pte_t pte = *ptep;
        int ret = pte_dirty(pte);
@@ -261,7 +261,7 @@
        return ret;
 }
 
-static inline int ptep_test_and_clear_young(pte_t *ptep)
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 
unsigned long addr, pte_t *ptep)
 {
        pte_t pte = *ptep;
        int ret = pte_young(pte);
@@ -270,18 +270,11 @@
        return ret;
 }
 
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long 
addr, pte_t *ptep)
 {
        pte_t pte = *ptep;
        if (pte_write(pte))
                set_pte(ptep, pte_wrprotect(pte));
-}
-
-static inline void ptep_mkdirty(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_dirty(pte))
-               xen_l1_entry_update(ptep, pte_mkdirty(pte).pte_low);
 }
 
 /*
@@ -495,11 +488,14 @@
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
 direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO)
 
+#define MK_IOSPACE_PFN(space, pfn)     (pfn)
+#define GET_IOSPACE(pfn)               0
+#define GET_PFN(pfn)                   (pfn)
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
 #define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Tue Aug  9 
23:57:17 2005
@@ -99,12 +99,12 @@
 #endif
 
 extern int phys_proc_id[NR_CPUS];
+extern int cpu_core_id[NR_CPUS];
 extern char ignore_fpu_irq;
 
 extern void identify_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void dodgy_tsc(void);
 
 #ifdef CONFIG_X86_HT
 extern void detect_ht(struct cpuinfo_x86 *c);
@@ -138,7 +138,7 @@
  * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
  * resulting in stale register contents being returned.
  */
-static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int 
*ebx, unsigned int *ecx, unsigned int *edx)
 {
        __asm__("cpuid"
                : "=a" (*eax),
@@ -146,6 +146,18 @@
                  "=c" (*ecx),
                  "=d" (*edx)
                : "0" (op), "c"(0));
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+               int *edx)
+{
+       __asm__("cpuid"
+               : "=a" (*eax),
+                 "=b" (*ebx),
+                 "=c" (*ecx),
+                 "=d" (*edx)
+               : "0" (op), "c" (count));
 }
 
 /*
@@ -501,6 +513,13 @@
        regs->esp = new_esp;                                    \
 } while (0)
 
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+       HYPERVISOR_set_debugreg((register),     \
+                       ((thread)->debugreg[register]))
+
 /* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h   Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h   Tue Aug  9 
23:57:17 2005
@@ -38,7 +38,7 @@
  *  24 - APM BIOS support
  *  25 - APM BIOS support 
  *
- *  26 - unused
+ *  26 - ESPFIX small SS
  *  27 - unused
  *  28 - unused
  *  29 - unused
@@ -71,6 +71,9 @@
 #define GDT_ENTRY_PNPBIOS_BASE         (GDT_ENTRY_KERNEL_BASE + 6)
 #define GDT_ENTRY_APMBIOS_BASE         (GDT_ENTRY_KERNEL_BASE + 11)
 
+#define GDT_ENTRY_ESPFIX_SS            (GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
 #define GDT_ENTRY_DOUBLEFAULT_TSS      31
 
 /*
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h     Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h     Tue Aug  9 
23:57:17 2005
@@ -16,7 +16,7 @@
 #define MAXMEM_PFN     PFN_DOWN(MAXMEM)
 #define MAX_NONPAE_PFN (1 << 20)
 
-#define PARAM_SIZE 2048
+#define PARAM_SIZE 4096
 #define COMMAND_LINE_SIZE 256
 
 #define OLD_CL_MAGIC_ADDR      0x90020
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Tue Aug  9 
23:57:17 2005
@@ -84,7 +84,7 @@
 #define loadsegment(seg,value)                 \
        asm volatile("\n"                       \
                "1:\t"                          \
-               "movl %0,%%" #seg "\n"          \
+               "mov %0,%%" #seg "\n"           \
                "2:\n"                          \
                ".section .fixup,\"ax\"\n"      \
                "3:\t"                          \
@@ -96,13 +96,13 @@
                ".align 4\n\t"                  \
                ".long 1b,3b\n"                 \
                ".previous"                     \
-               : :"m" (*(unsigned int *)&(value)))
+               : :"m" (value))
 
 /*
  * Save a segment register away
  */
 #define savesegment(seg, value) \
-       asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
+       asm volatile("mov %%" #seg ",%0":"=m" (value))
 
 /*
  * Clear and set 'TS' bit respectively
@@ -519,4 +519,6 @@
 extern int es7000_plat;
 void cpu_idle_wait(void);
 
+extern unsigned long arch_align_stack(unsigned long sp);
+
 #endif
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/include/linux/gfp.h
--- a/linux-2.6-xen-sparse/include/linux/gfp.h  Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/linux/gfp.h  Tue Aug  9 23:57:17 2005
@@ -26,26 +26,28 @@
  *
  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
  */
-#define __GFP_WAIT     0x10    /* Can wait and reschedule? */
-#define __GFP_HIGH     0x20    /* Should access emergency pools? */
-#define __GFP_IO       0x40    /* Can start physical IO? */
-#define __GFP_FS       0x80    /* Can call down to low-level FS? */
-#define __GFP_COLD     0x100   /* Cache-cold page required */
-#define __GFP_NOWARN   0x200   /* Suppress page allocation failure warning */
-#define __GFP_REPEAT   0x400   /* Retry the allocation.  Might fail */
-#define __GFP_NOFAIL   0x800   /* Retry for ever.  Cannot fail */
-#define __GFP_NORETRY  0x1000  /* Do not retry.  Might fail */
-#define __GFP_NO_GROW  0x2000  /* Slab internal usage */
-#define __GFP_COMP     0x4000  /* Add compound page metadata */
-#define __GFP_ZERO     0x8000  /* Return zeroed page on success */
+#define __GFP_WAIT     0x10u   /* Can wait and reschedule? */
+#define __GFP_HIGH     0x20u   /* Should access emergency pools? */
+#define __GFP_IO       0x40u   /* Can start physical IO? */
+#define __GFP_FS       0x80u   /* Can call down to low-level FS? */
+#define __GFP_COLD     0x100u  /* Cache-cold page required */
+#define __GFP_NOWARN   0x200u  /* Suppress page allocation failure warning */
+#define __GFP_REPEAT   0x400u  /* Retry the allocation.  Might fail */
+#define __GFP_NOFAIL   0x800u  /* Retry for ever.  Cannot fail */
+#define __GFP_NORETRY  0x1000u /* Do not retry.  Might fail */
+#define __GFP_NO_GROW  0x2000u /* Slab internal usage */
+#define __GFP_COMP     0x4000u /* Add compound page metadata */
+#define __GFP_ZERO     0x8000u /* Return zeroed page on success */
+#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
 
-#define __GFP_BITS_SHIFT 16    /* Room for 16 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 20    /* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
 
 /* if you forget to add the bitmask here kernel will crash, period */
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
                        __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
-                       __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+                       __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+                       __GFP_NOMEMALLOC)
 
 #define GFP_ATOMIC     (__GFP_HIGH)
 #define GFP_NOIO       (__GFP_WAIT)
@@ -86,7 +88,7 @@
 extern struct page *
 FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
 
-static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
+static inline struct page *alloc_pages_node(int nid, unsigned int __nocast 
gfp_mask,
                                                unsigned int order)
 {
        if (unlikely(order >= MAX_ORDER))
@@ -97,17 +99,17 @@
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order);
+extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, 
unsigned order);
 
 static inline struct page *
-alloc_pages(unsigned int gfp_mask, unsigned int order)
+alloc_pages(unsigned int __nocast gfp_mask, unsigned int order)
 {
        if (unlikely(order >= MAX_ORDER))
                return NULL;
 
        return alloc_pages_current(gfp_mask, order);
 }
-extern struct page *alloc_page_vma(unsigned gfp_mask,
+extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
                        struct vm_area_struct *vma, unsigned long addr);
 #else
 #define alloc_pages(gfp_mask, order) \
@@ -116,8 +118,8 @@
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
-extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned 
int order));
-extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
+extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, 
unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask));
 
 #define __get_free_page(gfp_mask) \
                __get_free_pages((gfp_mask),0)
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/highmem.c
--- a/linux-2.6-xen-sparse/mm/highmem.c Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/highmem.c Tue Aug  9 23:57:17 2005
@@ -30,9 +30,9 @@
 
 static mempool_t *page_pool, *isa_page_pool;
 
-static void *page_pool_alloc(int gfp_mask, void *data)
-{
-       int gfp = gfp_mask | (int) (long) data;
+static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
+{
+       unsigned int gfp = gfp_mask | (unsigned int) (long) data;
 
        return alloc_page(gfp);
 }
@@ -90,7 +90,8 @@
                 * So no dangers, even with speculative execution.
                 */
                page = pte_page(pkmap_page_table[i]);
-               pte_clear(&pkmap_page_table[i]);
+               pte_clear(&init_mm, (unsigned long)page_address(page),
+                         &pkmap_page_table[i]);
 
                set_page_address(page, NULL);
        }
@@ -138,7 +139,8 @@
                }
        }
        vaddr = PKMAP_ADDR(last_pkmap_nr);
-       set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
+       set_pte_at(&init_mm, vaddr,
+                  &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
 
        pkmap_count[last_pkmap_nr] = 1;
        set_page_address(page, (void *)vaddr);
@@ -332,6 +334,7 @@
                        continue;
 
                mempool_free(bvec->bv_page, pool);      
+               dec_page_state(nr_bounce);
        }
 
        bio_endio(bio_orig, bio_orig->bi_size, err);
@@ -412,6 +415,7 @@
                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
                to->bv_len = from->bv_len;
                to->bv_offset = from->bv_offset;
+               inc_page_state(nr_bounce);
 
                if (rw == WRITE) {
                        char *vto, *vfrom;
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c  Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/memory.c  Tue Aug  9 23:57:17 2005
@@ -46,7 +46,6 @@
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
-#include <linux/acct.h>
 #include <linux/module.h>
 #include <linux/init.h>
 
@@ -84,116 +83,205 @@
 EXPORT_SYMBOL(vmalloc_earlyreserve);
 
 /*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none.  Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+       pgd_ERROR(*pgd);
+       pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+       pud_ERROR(*pud);
+       pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+       pmd_ERROR(*pmd);
+       pmd_clear(pmd);
+}
+
+/*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
-static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, 
unsigned long start, unsigned long end)
-{
-       struct page *page;
-
-       if (pmd_none(*pmd))
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+{
+       struct page *page = pmd_page(*pmd);
+       pmd_clear(pmd);
+       pte_free_tlb(tlb, page);
+       dec_page_state(nr_page_table_pages);
+       tlb->mm->nr_ptes--;
+}
+
+static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
+{
+       pmd_t *pmd;
+       unsigned long next;
+       unsigned long start;
+
+       start = addr;
+       pmd = pmd_offset(pud, addr);
+       do {
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(pmd))
+                       continue;
+               free_pte_range(tlb, pmd);
+       } while (pmd++, addr = next, addr != end);
+
+       start &= PUD_MASK;
+       if (start < floor)
                return;
-       if (unlikely(pmd_bad(*pmd))) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
+       if (ceiling) {
+               ceiling &= PUD_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
                return;
-       }
-       if (!((start | end) & ~PMD_MASK)) {
-               /* Only clear full, aligned ranges */
-               page = pmd_page(*pmd);
-               pmd_clear(pmd);
-               dec_page_state(nr_page_table_pages);
-               tlb->mm->nr_ptes--;
-               pte_free_tlb(tlb, page);
-       }
-}
-
-static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud, 
unsigned long start, unsigned long end)
-{
-       unsigned long addr = start, next;
-       pmd_t *pmd, *__pmd;
-
-       if (pud_none(*pud))
+
+       pmd = pmd_offset(pud, start);
+       pud_clear(pud);
+       pmd_free_tlb(tlb, pmd);
+}
+
+static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
+{
+       pud_t *pud;
+       unsigned long next;
+       unsigned long start;
+
+       start = addr;
+       pud = pud_offset(pgd, addr);
+       do {
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(pud))
+                       continue;
+               free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+       } while (pud++, addr = next, addr != end);
+
+       start &= PGDIR_MASK;
+       if (start < floor)
                return;
-       if (unlikely(pud_bad(*pud))) {
-               pud_ERROR(*pud);
-               pud_clear(pud);
+       if (ceiling) {
+               ceiling &= PGDIR_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
                return;
-       }
-
-       pmd = __pmd = pmd_offset(pud, start);
+
+       pud = pud_offset(pgd, start);
+       pgd_clear(pgd);
+       pud_free_tlb(tlb, pud);
+}
+
+/*
+ * This function frees user-level page tables of a process.
+ *
+ * Must be called with pagetable lock held.
+ */
+void free_pgd_range(struct mmu_gather **tlb,
+                       unsigned long addr, unsigned long end,
+                       unsigned long floor, unsigned long ceiling)
+{
+       pgd_t *pgd;
+       unsigned long next;
+       unsigned long start;
+
+       /*
+        * The next few lines have given us lots of grief...
+        *
+        * Why are we testing PMD* at this top level?  Because often
+        * there will be no work to do at all, and we'd prefer not to
+        * go all the way down to the bottom just to discover that.
+        *
+        * Why all these "- 1"s?  Because 0 represents both the bottom
+        * of the address space and the top of it (using -1 for the
+        * top wouldn't help much: the masks would do the wrong thing).
+        * The rule is that addr 0 and floor 0 refer to the bottom of
+        * the address space, but end 0 and ceiling 0 refer to the top
+        * Comparisons need to use "end - 1" and "ceiling - 1" (though
+        * that end 0 case should be mythical).
+        *
+        * Wherever addr is brought up or ceiling brought down, we must
+        * be careful to reject "the opposite 0" before it confuses the
+        * subsequent tests.  But what about where end is brought down
+        * by PMD_SIZE below? no, end can't go down to 0 there.
+        *
+        * Whereas we round start (addr) and ceiling down, by different
+        * masks at different levels, in order to test whether a table
+        * now has no other vmas using it, so can be freed, we don't
+        * bother to round floor or end up - the tests don't need that.
+        */
+
+       addr &= PMD_MASK;
+       if (addr < floor) {
+               addr += PMD_SIZE;
+               if (!addr)
+                       return;
+       }
+       if (ceiling) {
+               ceiling &= PMD_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
+               end -= PMD_SIZE;
+       if (addr > end - 1)
+               return;
+
+       start = addr;
+       pgd = pgd_offset((*tlb)->mm, addr);
        do {
-               next = (addr + PMD_SIZE) & PMD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pmd_range(tlb, pmd, addr, next);
-               pmd++;
-               addr = next;
-       } while (addr && (addr < end));
-
-       if (!((start | end) & ~PUD_MASK)) {
-               /* Only clear full, aligned ranges */
-               pud_clear(pud);
-               pmd_free_tlb(tlb, __pmd);
-       }
-}
-
-
-static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, 
unsigned long start, unsigned long end)
-{
-       unsigned long addr = start, next;
-       pud_t *pud, *__pud;
-
-       if (pgd_none(*pgd))
-               return;
-       if (unlikely(pgd_bad(*pgd))) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
-               return;
-       }
-
-       pud = __pud = pud_offset(pgd, start);
-       do {
-               next = (addr + PUD_SIZE) & PUD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pud_range(tlb, pud, addr, next);
-               pud++;
-               addr = next;
-       } while (addr && (addr < end));
-
-       if (!((start | end) & ~PGDIR_MASK)) {
-               /* Only clear full, aligned ranges */
-               pgd_clear(pgd);
-               pud_free_tlb(tlb, __pud);
-       }
-}
-
-/*
- * This function clears user-level page tables of a process.
- *
- * Must be called with pagetable lock held.
- */
-void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned 
long end)
-{
-       unsigned long addr = start, next;
-       pgd_t * pgd = pgd_offset(tlb->mm, start);
-       unsigned long i;
-
-       for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
-               next = (addr + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pgd_range(tlb, pgd, addr, next);
-               pgd++;
-               addr = next;
-       }
-}
-
-pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long 
address)
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+                       continue;
+               free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+       } while (pgd++, addr = next, addr != end);
+
+       if (!tlb_is_full_mm(*tlb))
+               flush_tlb_pgtables((*tlb)->mm, start, end);
+}
+
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+               unsigned long floor, unsigned long ceiling)
+{
+       while (vma) {
+               struct vm_area_struct *next = vma->vm_next;
+               unsigned long addr = vma->vm_start;
+
+               if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+                       hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
+                               floor, next? next->vm_start: ceiling);
+               } else {
+                       /*
+                        * Optimization: gather nearby vmas into one call down
+                        */
+                       while (next && next->vm_start <= vma->vm_end + PMD_SIZE
+                         && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
+                                                       HPAGE_SIZE)) {
+                               vma = next;
+                               next = vma->vm_next;
+                       }
+                       free_pgd_range(tlb, addr, vma->vm_end,
+                               floor, next? next->vm_start: ceiling);
+               }
+               vma = next;
+       }
+}
+
+pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
+                               unsigned long address)
 {
        if (!pmd_present(*pmd)) {
                struct page *new;
@@ -254,20 +342,7 @@
  */
 
 static inline void
-copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
-{
-       if (pte_file(pte))
-               return;
-       swap_duplicate(pte_to_swp_entry(pte));
-       if (list_empty(&dst_mm->mmlist)) {
-               spin_lock(&mmlist_lock);
-               list_add(&dst_mm->mmlist, &src_mm->mmlist);
-               spin_unlock(&mmlist_lock);
-       }
-}
-
-static inline void
-copy_one_pte(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
                unsigned long addr)
 {
@@ -275,12 +350,21 @@
        struct page *page;
        unsigned long pfn;
 
-       /* pte contains position in swap, so copy. */
-       if (!pte_present(pte)) {
-               copy_swap_pte(dst_mm, src_mm, pte);
-               set_pte(dst_pte, pte);
+       /* pte contains position in swap or file, so copy. */
+       if (unlikely(!pte_present(pte))) {
+               if (!pte_file(pte)) {
+                       swap_duplicate(pte_to_swp_entry(pte));
+                       /* make sure dst_mm is on swapoff's mmlist. */
+                       if (unlikely(list_empty(&dst_mm->mmlist))) {
+                               spin_lock(&mmlist_lock);
+                               list_add(&dst_mm->mmlist, &src_mm->mmlist);
+                               spin_unlock(&mmlist_lock);
+                       }
+               }
+               set_pte_at(dst_mm, addr, dst_pte, pte);
                return;
        }
+
        pfn = pte_pfn(pte);
        /* the pte points outside of valid memory, the
         * mapping is assumed to be good, meaningful
@@ -292,7 +376,7 @@
                page = pfn_to_page(pfn);
 
        if (!page || PageReserved(page)) {
-               set_pte(dst_pte, pte);
+               set_pte_at(dst_mm, addr, dst_pte, pte);
                return;
        }
 
@@ -301,7 +385,7 @@
         * in the parent and the child
         */
        if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
-               ptep_set_wrprotect(src_pte);
+               ptep_set_wrprotect(src_mm, addr, src_pte);
                pte = *src_pte;
        }
 
@@ -313,172 +397,137 @@
                pte = pte_mkclean(pte);
        pte = pte_mkold(pte);
        get_page(page);
-       dst_mm->rss++;
+       inc_mm_counter(dst_mm, rss);
        if (PageAnon(page))
-               dst_mm->anon_rss++;
-       set_pte(dst_pte, pte);
+               inc_mm_counter(dst_mm, anon_rss);
+       set_pte_at(dst_mm, addr, dst_pte, pte);
        page_dup_rmap(page);
 }
 
-static int copy_pte_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
                unsigned long addr, unsigned long end)
 {
        pte_t *src_pte, *dst_pte;
-       pte_t *s, *d;
        unsigned long vm_flags = vma->vm_flags;
-
-       d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+       int progress;
+
+again:
+       dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
        if (!dst_pte)
                return -ENOMEM;
-
+       src_pte = pte_offset_map_nested(src_pmd, addr);
+
+       progress = 0;
        spin_lock(&src_mm->page_table_lock);
-       s = src_pte = pte_offset_map_nested(src_pmd, addr);
-       for (; addr < end; addr += PAGE_SIZE, s++, d++) {
-               if (pte_none(*s))
+       do {
+               /*
+                * We are holding two locks at this point - either of them
+                * could generate latencies in another task on another CPU.
+                */
+               if (progress >= 32 && (need_resched() ||
+                   need_lockbreak(&src_mm->page_table_lock) ||
+                   need_lockbreak(&dst_mm->page_table_lock)))
+                       break;
+               if (pte_none(*src_pte)) {
+                       progress++;
                        continue;
-               copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
-       }
-       pte_unmap_nested(src_pte);
-       pte_unmap(dst_pte);
+               }
+               copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+               progress += 8;
+       } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
        spin_unlock(&src_mm->page_table_lock);
+
+       pte_unmap_nested(src_pte - 1);
+       pte_unmap(dst_pte - 1);
        cond_resched_lock(&dst_mm->page_table_lock);
+       if (addr != end)
+               goto again;
        return 0;
 }
 
-static int copy_pmd_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct 
*src_mm,
                pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
                unsigned long addr, unsigned long end)
 {
        pmd_t *src_pmd, *dst_pmd;
-       int err = 0;
        unsigned long next;
 
-       src_pmd = pmd_offset(src_pud, addr);
        dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
        if (!dst_pmd)
                return -ENOMEM;
-
-       for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
-               next = (addr + PMD_SIZE) & PMD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pmd_none(*src_pmd))
+       src_pmd = pmd_offset(src_pud, addr);
+       do {
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(src_pmd))
                        continue;
-               if (pmd_bad(*src_pmd)) {
-                       pmd_ERROR(*src_pmd);
-                       pmd_clear(src_pmd);
-                       continue;
-               }
-               err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
-                                                       vma, addr, next);
-               if (err)
-                       break;
-       }
-       return err;
-}
-
-static int copy_pud_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+               if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+       return 0;
+}
+
+static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct 
*src_mm,
                pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
                unsigned long addr, unsigned long end)
 {
        pud_t *src_pud, *dst_pud;
-       int err = 0;
        unsigned long next;
 
-       src_pud = pud_offset(src_pgd, addr);
        dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
        if (!dst_pud)
                return -ENOMEM;
-
-       for (; addr < end; addr = next, src_pud++, dst_pud++) {
-               next = (addr + PUD_SIZE) & PUD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pud_none(*src_pud))
+       src_pud = pud_offset(src_pgd, addr);
+       do {
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(src_pud))
                        continue;
-               if (pud_bad(*src_pud)) {
-                       pud_ERROR(*src_pud);
-                       pud_clear(src_pud);
+               if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pud++, src_pud++, addr = next, addr != end);
+       return 0;
+}
+
+int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+               struct vm_area_struct *vma)
+{
+       pgd_t *src_pgd, *dst_pgd;
+       unsigned long next;
+       unsigned long addr = vma->vm_start;
+       unsigned long end = vma->vm_end;
+
+       if (is_vm_hugetlb_page(vma))
+               return copy_hugetlb_page_range(dst_mm, src_mm, vma);
+
+       dst_pgd = pgd_offset(dst_mm, addr);
+       src_pgd = pgd_offset(src_mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(src_pgd))
                        continue;
-               }
-               err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
-                                                       vma, addr, next);
-               if (err)
-                       break;
-       }
-       return err;
-}
-
-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-               struct vm_area_struct *vma)
-{
-       pgd_t *src_pgd, *dst_pgd;
-       unsigned long addr, start, end, next;
-       int err = 0;
-
-       if (is_vm_hugetlb_page(vma))
-               return copy_hugetlb_page_range(dst, src, vma);
-
-       start = vma->vm_start;
-       src_pgd = pgd_offset(src, start);
-       dst_pgd = pgd_offset(dst, start);
-
-       end = vma->vm_end;
-       addr = start;
-       while (addr && (addr < end-1)) {
-               next = (addr + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pgd_none(*src_pgd))
-                       goto next_pgd;
-               if (pgd_bad(*src_pgd)) {
-                       pgd_ERROR(*src_pgd);
-                       pgd_clear(src_pgd);
-                       goto next_pgd;
-               }
-               err = copy_pud_range(dst, src, dst_pgd, src_pgd,
-                                                       vma, addr, next);
-               if (err)
-                       break;
-
-next_pgd:
-               src_pgd++;
-               dst_pgd++;
-               addr = next;
-       }
-
-       return err;
-}
-
-static void zap_pte_range(struct mmu_gather *tlb,
-               pmd_t *pmd, unsigned long address,
-               unsigned long size, struct zap_details *details)
-{
-       unsigned long offset;
-       pte_t *ptep;
-
-       if (pmd_none(*pmd))
-               return;
-       if (unlikely(pmd_bad(*pmd))) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
-               return;
-       }
-       ptep = pte_offset_map(pmd, address);
-       offset = address & ~PMD_MASK;
-       if (offset + size > PMD_SIZE)
-               size = PMD_SIZE - offset;
-       size &= PAGE_MASK;
-       if (details && !details->check_mapping && !details->nonlinear_vma)
-               details = NULL;
-       for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
-               pte_t pte = *ptep;
-               if (pte_none(pte))
+               if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+       return 0;
+}
+
+static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       pte_t *pte;
+
+       pte = pte_offset_map(pmd, addr);
+       do {
+               pte_t ptent = *pte;
+               if (pte_none(ptent))
                        continue;
-               if (pte_present(pte)) {
+               if (pte_present(ptent)) {
                        struct page *page = NULL;
-                       unsigned long pfn = pte_pfn(pte);
+                       unsigned long pfn = pte_pfn(ptent);
                        if (pfn_valid(pfn)) {
                                page = pfn_to_page(pfn);
                                if (PageReserved(page))
@@ -502,19 +551,20 @@
                                     page->index > details->last_index))
                                        continue;
                        }
-                       pte = ptep_get_and_clear(ptep);
-                       tlb_remove_tlb_entry(tlb, ptep, address+offset);
+                       ptent = ptep_get_and_clear(tlb->mm, addr, pte);
+                       tlb_remove_tlb_entry(tlb, pte, addr);
                        if (unlikely(!page))
                                continue;
                        if (unlikely(details) && details->nonlinear_vma
                            && linear_page_index(details->nonlinear_vma,
-                                       address+offset) != page->index)
-                               set_pte(ptep, pgoff_to_pte(page->index));
-                       if (pte_dirty(pte))
+                                               addr) != page->index)
+                               set_pte_at(tlb->mm, addr, pte,
+                                          pgoff_to_pte(page->index));
+                       if (pte_dirty(ptent))
                                set_page_dirty(page);
                        if (PageAnon(page))
-                               tlb->mm->anon_rss--;
-                       else if (pte_young(pte))
+                               dec_mm_counter(tlb->mm, anon_rss);
+                       else if (pte_young(ptent))
                                mark_page_accessed(page);
                        tlb->freed++;
                        page_remove_rmap(page);
@@ -527,78 +577,64 @@
                 */
                if (unlikely(details))
                        continue;
-               if (!pte_file(pte))
-                       free_swap_and_cache(pte_to_swp_entry(pte));
-               pte_clear(ptep);
-       }
-       pte_unmap(ptep-1);
-}
-
-static void zap_pmd_range(struct mmu_gather *tlb,
-               pud_t *pud, unsigned long address,
-               unsigned long size, struct zap_details *details)
-{
-       pmd_t * pmd;
-       unsigned long end;
-
-       if (pud_none(*pud))
-               return;
-       if (unlikely(pud_bad(*pud))) {
-               pud_ERROR(*pud);
-               pud_clear(pud);
-               return;
-       }
-       pmd = pmd_offset(pud, address);
-       end = address + size;
-       if (end > ((address + PUD_SIZE) & PUD_MASK))
-               end = ((address + PUD_SIZE) & PUD_MASK);
+               if (!pte_file(ptent))
+                       free_swap_and_cache(pte_to_swp_entry(ptent));
+               pte_clear(tlb->mm, addr, pte);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
+}
+
+static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       pmd_t *pmd;
+       unsigned long next;
+
+       pmd = pmd_offset(pud, addr);
        do {
-               zap_pte_range(tlb, pmd, address, end - address, details);
-               address = (address + PMD_SIZE) & PMD_MASK; 
-               pmd++;
-       } while (address && (address < end));
-}
-
-static void zap_pud_range(struct mmu_gather *tlb,
-               pgd_t * pgd, unsigned long address,
-               unsigned long end, struct zap_details *details)
-{
-       pud_t * pud;
-
-       if (pgd_none(*pgd))
-               return;
-       if (unlikely(pgd_bad(*pgd))) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
-               return;
-       }
-       pud = pud_offset(pgd, address);
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(pmd))
+                       continue;
+               zap_pte_range(tlb, pmd, addr, next, details);
+       } while (pmd++, addr = next, addr != end);
+}
+
+static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       pud_t *pud;
+       unsigned long next;
+
+       pud = pud_offset(pgd, addr);
        do {
-               zap_pmd_range(tlb, pud, address, end - address, details);
-               address = (address + PUD_SIZE) & PUD_MASK; 
-               pud++;
-       } while (address && (address < end));
-}
-
-static void unmap_page_range(struct mmu_gather *tlb,
-               struct vm_area_struct *vma, unsigned long address,
-               unsigned long end, struct zap_details *details)
-{
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(pud))
+                       continue;
+               zap_pmd_range(tlb, pud, addr, next, details);
+       } while (pud++, addr = next, addr != end);
+}
+
+static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct 
*vma,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       pgd_t *pgd;
        unsigned long next;
-       pgd_t *pgd;
-       int i;
-
-       BUG_ON(address >= end);
-       pgd = pgd_offset(vma->vm_mm, address);
+
+       if (details && !details->check_mapping && !details->nonlinear_vma)
+               details = NULL;
+
+       BUG_ON(addr >= end);
        tlb_start_vma(tlb, vma);
-       for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
-               next = (address + PGDIR_SIZE) & PGDIR_MASK;
-               if (next <= address || next > end)
-                       next = end;
-               zap_pud_range(tlb, pgd, address, next, details);
-               address = next;
-               pgd++;
-       }
+       pgd = pgd_offset(vma->vm_mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+                       continue;
+               zap_pud_range(tlb, pgd, addr, next, details);
+       } while (pgd++, addr = next, addr != end);
        tlb_end_vma(tlb, vma);
 }
 
@@ -619,7 +655,7 @@
  * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
  * @details: details of nonlinear truncation or shared cache invalidation
  *
- * Returns the number of vma's which were covered by the unmapping.
+ * Returns the end address of the unmapping (restart addr if interrupted).
  *
  * Unmap all pages in the vma list.  Called under page_table_lock.
  *
@@ -636,7 +672,7 @@
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long start_addr,
                unsigned long end_addr, unsigned long *nr_accounted,
                struct zap_details *details)
@@ -644,12 +680,11 @@
        unsigned long zap_bytes = ZAP_BLOCK_SIZE;
        unsigned long tlb_start = 0;    /* For tlb_finish_mmu */
        int tlb_start_valid = 0;
-       int ret = 0;
+       unsigned long start = start_addr;
        spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
        int fullmm = tlb_is_full_mm(*tlbp);
 
        for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
-               unsigned long start;
                unsigned long end;
 
                start = max(vma->vm_start, start_addr);
@@ -662,7 +697,6 @@
                if (vma->vm_flags & VM_ACCOUNT)
                        *nr_accounted += (end - start) >> PAGE_SHIFT;
 
-               ret++;
                while (start != end) {
                        unsigned long block;
 
@@ -693,7 +727,6 @@
                                if (i_mmap_lock) {
                                        /* must reset count of rss freed */
                                        *tlbp = tlb_gather_mmu(mm, fullmm);
-                                       details->break_addr = start;
                                        goto out;
                                }
                                spin_unlock(&mm->page_table_lock);
@@ -707,7 +740,7 @@
                }
        }
 out:
-       return ret;
+       return start;   /* which is now the end (or restart) address */
 }
 
 /**
@@ -717,7 +750,7 @@
  * @size: number of bytes to zap
  * @details: details of nonlinear truncation or shared cache invalidation
  */
-void zap_page_range(struct vm_area_struct *vma, unsigned long address,
+unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
                unsigned long size, struct zap_details *details)
 {
        struct mm_struct *mm = vma->vm_mm;
@@ -727,16 +760,16 @@
 
        if (is_vm_hugetlb_page(vma)) {
                zap_hugepage_range(vma, address, size);
-               return;
+               return end;
        }
 
        lru_add_drain();
        spin_lock(&mm->page_table_lock);
        tlb = tlb_gather_mmu(mm, 0);
-       unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
+       end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
        tlb_finish_mmu(tlb, address, end);
-       acct_update_integrals();
        spin_unlock(&mm->page_table_lock);
+       return end;
 }
 
 /*
@@ -987,111 +1020,78 @@
 
 EXPORT_SYMBOL(get_user_pages);
 
-static void zeromap_pte_range(pte_t * pte, unsigned long address,
-                                     unsigned long size, pgprot_t prot)
-{
-       unsigned long end;
-
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
+static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
+{
+       pte_t *pte;
+
+       pte = pte_alloc_map(mm, pmd, addr);
+       if (!pte)
+               return -ENOMEM;
        do {
-               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), 
prot));
+               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
                BUG_ON(!pte_none(*pte));
-               set_pte(pte, zero_pte);
-               address += PAGE_SIZE;
-               pte++;
-       } while (address && (address < end));
-}
-
-static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd,
-               unsigned long address, unsigned long size, pgprot_t prot)
-{
-       unsigned long base, end;
-
-       base = address & PUD_MASK;
-       address &= ~PUD_MASK;
-       end = address + size;
-       if (end > PUD_SIZE)
-               end = PUD_SIZE;
+               set_pte_at(mm, addr, pte, zero_pte);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
+       return 0;
+}
+
+static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
+{
+       pmd_t *pmd;
+       unsigned long next;
+
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return -ENOMEM;
        do {
-               pte_t * pte = pte_alloc_map(mm, pmd, base + address);
-               if (!pte)
+               next = pmd_addr_end(addr, end);
+               if (zeromap_pte_range(mm, pmd, addr, next, prot))
                        return -ENOMEM;
-               zeromap_pte_range(pte, base + address, end - address, prot);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
+       } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
-static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud,
-                                   unsigned long address,
-                                    unsigned long size, pgprot_t prot)
-{
-       unsigned long base, end;
-       int error = 0;
-
-       base = address & PGDIR_MASK;
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
+static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
+{
+       pud_t *pud;
+       unsigned long next;
+
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return -ENOMEM;
        do {
-               pmd_t * pmd = pmd_alloc(mm, pud, base + address);
-               error = -ENOMEM;
-               if (!pmd)
+               next = pud_addr_end(addr, end);
+               if (zeromap_pmd_range(mm, pud, addr, next, prot))
+                       return -ENOMEM;
+       } while (pud++, addr = next, addr != end);
+       return 0;
+}
+
+int zeromap_page_range(struct vm_area_struct *vma,
+                       unsigned long addr, unsigned long size, pgprot_t prot)
+{
+       pgd_t *pgd;
+       unsigned long next;
+       unsigned long end = addr + size;
+       struct mm_struct *mm = vma->vm_mm;
+       int err;
+
+       BUG_ON(addr >= end);
+       pgd = pgd_offset(mm, addr);
+       flush_cache_range(vma, addr, end);
+       spin_lock(&mm->page_table_lock);
+       do {
+               next = pgd_addr_end(addr, end);
+               err = zeromap_pud_range(mm, pgd, addr, next, prot);
+               if (err)
                        break;
-               error = zeromap_pmd_range(mm, pmd, base + address,
-                                         end - address, prot);
-               if (error)
-                       break;
-               address = (address + PUD_SIZE) & PUD_MASK;
-               pud++;
-       } while (address && (address < end));
-       return 0;
-}
-
-int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
-                                       unsigned long size, pgprot_t prot)
-{
-       int i;
-       int error = 0;
-       pgd_t * pgd;
-       unsigned long beg = address;
-       unsigned long end = address + size;
-       unsigned long next;
-       struct mm_struct *mm = vma->vm_mm;
-
-       pgd = pgd_offset(mm, address);
-       flush_cache_range(vma, beg, end);
-       BUG_ON(address >= end);
-       BUG_ON(end > vma->vm_end);
-
-       spin_lock(&mm->page_table_lock);
-       for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
-               pud_t *pud = pud_alloc(mm, pgd, address);
-               error = -ENOMEM;
-               if (!pud)
-                       break;
-               next = (address + PGDIR_SIZE) & PGDIR_MASK;
-               if (next <= beg || next > end)
-                       next = end;
-               error = zeromap_pud_range(mm, pud, address,
-                                               next - address, prot);
-               if (error)
-                       break;
-               address = next;
-               pgd++;
-       }
-       /*
-        * Why flush? zeromap_pte_range has a BUG_ON for !pte_none()
-        */
-       flush_tlb_range(vma, beg, end);
+       } while (pgd++, addr = next, addr != end);
        spin_unlock(&mm->page_table_lock);
-       return error;
+       return err;
 }
 
 /*
@@ -1099,95 +1099,74 @@
  * mappings are removed. any references to nonexistent pages results
  * in null mappings (currently treated as "copy-on-access")
  */
-static inline void
-remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
-               unsigned long pfn, pgprot_t prot)
-{
-       unsigned long end;
-
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
+static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
+{
+       pte_t *pte;
+
+       pte = pte_alloc_map(mm, pmd, addr);
+       if (!pte)
+               return -ENOMEM;
        do {
                BUG_ON(!pte_none(*pte));
                if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
-                       set_pte(pte, pfn_pte(pfn, prot));
-               address += PAGE_SIZE;
+                       set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
                pfn++;
-               pte++;
-       } while (address && (address < end));
-}
-
-static inline int
-remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
-               unsigned long size, unsigned long pfn, pgprot_t prot)
-{
-       unsigned long base, end;
-
-       base = address & PUD_MASK;
-       address &= ~PUD_MASK;
-       end = address + size;
-       if (end > PUD_SIZE)
-               end = PUD_SIZE;
-       pfn -= (address >> PAGE_SHIFT);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
+       return 0;
+}
+
+static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
+{
+       pmd_t *pmd;
+       unsigned long next;
+
+       pfn -= addr >> PAGE_SHIFT;
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return -ENOMEM;
        do {
-               pte_t * pte = pte_alloc_map(mm, pmd, base + address);
-               if (!pte)
+               next = pmd_addr_end(addr, end);
+               if (remap_pte_range(mm, pmd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot))
                        return -ENOMEM;
-               remap_pte_range(pte, base + address, end - address,
-                               (address >> PAGE_SHIFT) + pfn, prot);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
+       } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
-static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud,
-                                 unsigned long address, unsigned long size,
-                                 unsigned long pfn, pgprot_t prot)
-{
-       unsigned long base, end;
-       int error;
-
-       base = address & PGDIR_MASK;
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
-       pfn -= address >> PAGE_SHIFT;
+static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
+{
+       pud_t *pud;
+       unsigned long next;
+
+       pfn -= addr >> PAGE_SHIFT;
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return -ENOMEM;
        do {
-               pmd_t *pmd = pmd_alloc(mm, pud, base+address);
-               error = -ENOMEM;
-               if (!pmd)
-                       break;
-               error = remap_pmd_range(mm, pmd, base + address, end - address,
-                               (address >> PAGE_SHIFT) + pfn, prot);
-               if (error)
-                       break;
-               address = (address + PUD_SIZE) & PUD_MASK;
-               pud++;
-       } while (address && (address < end));
-       return error;
+               next = pud_addr_end(addr, end);
+               if (remap_pmd_range(mm, pud, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot))
+                       return -ENOMEM;
+       } while (pud++, addr = next, addr != end);
+       return 0;
 }
 
 /*  Note: this is only safe if the mm semaphore is held when called. */
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                    unsigned long pfn, unsigned long size, pgprot_t prot)
 {
-       int error = 0;
        pgd_t *pgd;
-       unsigned long beg = from;
-       unsigned long end = from + size;
        unsigned long next;
+       unsigned long end = addr + size;
        struct mm_struct *mm = vma->vm_mm;
-       int i;
-
-       pfn -= from >> PAGE_SHIFT;
-       pgd = pgd_offset(mm, from);
-       flush_cache_range(vma, beg, end);
-       BUG_ON(from >= end);
+       int err;
 
        /*
         * Physically remapped pages are special. Tell the
@@ -1199,31 +1178,21 @@
         */
        vma->vm_flags |= VM_IO | VM_RESERVED;
 
+       BUG_ON(addr >= end);
+       pfn -= addr >> PAGE_SHIFT;
+       pgd = pgd_offset(mm, addr);
+       flush_cache_range(vma, addr, end);
        spin_lock(&mm->page_table_lock);
-       for (i = pgd_index(beg); i <= pgd_index(end-1); i++) {
-               pud_t *pud = pud_alloc(mm, pgd, from);
-               error = -ENOMEM;
-               if (!pud)
+       do {
+               next = pgd_addr_end(addr, end);
+               err = remap_pud_range(mm, pgd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
                        break;
-               next = (from + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= from)
-                       next = end;
-               error = remap_pud_range(mm, pud, from, end - from,
-                                       pfn + (from >> PAGE_SHIFT), prot);
-               if (error)
-                       break;
-               from = next;
-               pgd++;
-       }
-       /*
-        * Why flush? remap_pte_range has a BUG_ON for !pte_none()
-        */
-       flush_tlb_range(vma, beg, end);
+       } while (pgd++, addr = next, addr != end);
        spin_unlock(&mm->page_table_lock);
-
-       return error;
-}
-
+       return err;
+}
 EXPORT_SYMBOL(remap_pfn_range);
 
 /*
@@ -1247,11 +1216,11 @@
 {
        pte_t entry;
 
-       flush_cache_page(vma, address);
        entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
                              vma);
        ptep_establish(vma, address, page_table, entry);
        update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
 }
 
 /*
@@ -1299,11 +1268,12 @@
                int reuse = can_share_swap_page(old_page);
                unlock_page(old_page);
                if (reuse) {
-                       flush_cache_page(vma, address);
+                       flush_cache_page(vma, address, pfn);
                        entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
                                              vma);
                        ptep_set_access_flags(vma, address, page_table, entry, 
1);
                        update_mmu_cache(vma, address, entry);
+                       lazy_mmu_prot_update(entry);
                        pte_unmap(page_table);
                        spin_unlock(&mm->page_table_lock);
                        return VM_FAULT_MINOR;
@@ -1337,13 +1307,12 @@
        page_table = pte_offset_map(pmd, address);
        if (likely(pte_same(*page_table, pte))) {
                if (PageAnon(old_page))
-                       mm->anon_rss--;
-               if (PageReserved(old_page)) {
-                       ++mm->rss;
-                       acct_update_integrals();
-                       update_mem_hiwater();
-               } else
+                       dec_mm_counter(mm, anon_rss);
+               if (PageReserved(old_page))
+                       inc_mm_counter(mm, rss);
+               else
                        page_remove_rmap(old_page);
+               flush_cache_page(vma, address, pfn);
                break_cow(vma, new_page, address, page_table);
                lru_cache_add_active(new_page);
                page_add_anon_rmap(new_page, vma, address);
@@ -1387,7 +1356,7 @@
  * i_mmap_lock.
  *
  * In order to make forward progress despite repeatedly restarting some
- * large vma, note the break_addr set by unmap_vmas when it breaks out:
+ * large vma, note the restart_addr from unmap_vmas when it breaks out:
  * and restart from that address when we reach that vma again.  It might
  * have been split or merged, shrunk or extended, but never shifted: so
  * restart_addr remains valid so long as it remains in the vma's range.
@@ -1425,8 +1394,8 @@
                }
        }
 
-       details->break_addr = end_addr;
-       zap_page_range(vma, start_addr, end_addr - start_addr, details);
+       restart_addr = zap_page_range(vma, start_addr,
+                                       end_addr - start_addr, details);
 
        /*
         * We cannot rely on the break test in unmap_vmas:
@@ -1437,14 +1406,14 @@
        need_break = need_resched() ||
                        need_lockbreak(details->i_mmap_lock);
 
-       if (details->break_addr >= end_addr) {
+       if (restart_addr >= end_addr) {
                /* We have now completed this vma: mark it so */
                vma->vm_truncate_count = details->truncate_count;
                if (!need_break)
                        return 0;
        } else {
                /* Note restart_addr in vma's truncate_count field */
-               vma->vm_truncate_count = details->break_addr;
+               vma->vm_truncate_count = restart_addr;
                if (!need_break)
                        goto again;
        }
@@ -1732,12 +1701,13 @@
        spin_lock(&mm->page_table_lock);
        page_table = pte_offset_map(pmd, address);
        if (unlikely(!pte_same(*page_table, orig_pte))) {
-               pte_unmap(page_table);
-               spin_unlock(&mm->page_table_lock);
-               unlock_page(page);
-               page_cache_release(page);
                ret = VM_FAULT_MINOR;
-               goto out;
+               goto out_nomap;
+       }
+
+       if (unlikely(!PageUptodate(page))) {
+               ret = VM_FAULT_SIGBUS;
+               goto out_nomap;
        }
 
        /* The page isn't present yet, go ahead with the fault. */
@@ -1746,10 +1716,7 @@
        if (vm_swap_full())
                remove_exclusive_swap_page(page);
 
-       mm->rss++;
-       acct_update_integrals();
-       update_mem_hiwater();
-
+       inc_mm_counter(mm, rss);
        pte = mk_pte(page, vma->vm_page_prot);
        if (write_access && can_share_swap_page(page)) {
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1758,7 +1725,7 @@
        unlock_page(page);
 
        flush_icache_page(vma, page);
-       set_pte(page_table, pte);
+       set_pte_at(mm, address, page_table, pte);
        page_add_anon_rmap(page, vma, address);
 
        if (write_access) {
@@ -1770,10 +1737,17 @@
 
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, address, pte);
+       lazy_mmu_prot_update(pte);
        pte_unmap(page_table);
        spin_unlock(&mm->page_table_lock);
 out:
        return ret;
+out_nomap:
+       pte_unmap(page_table);
+       spin_unlock(&mm->page_table_lock);
+       unlock_page(page);
+       page_cache_release(page);
+       goto out;
 }
 
 /*
@@ -1813,9 +1787,7 @@
                        spin_unlock(&mm->page_table_lock);
                        goto out;
                }
-               mm->rss++;
-               acct_update_integrals();
-               update_mem_hiwater();
+               inc_mm_counter(mm, rss);
                entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
                                                         vma->vm_page_prot)),
                                      vma);
@@ -1824,11 +1796,12 @@
                page_add_anon_rmap(page, vma, addr);
        }
 
-       ptep_establish_new(vma, addr, page_table, entry);
+       set_pte_at(mm, addr, page_table, entry);
        pte_unmap(page_table);
 
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, addr, entry);
+       lazy_mmu_prot_update(entry);
        spin_unlock(&mm->page_table_lock);
 out:
        return VM_FAULT_MINOR;
@@ -1931,15 +1904,13 @@
        /* Only go through if we didn't race with anybody else... */
        if (pte_none(*page_table)) {
                if (!PageReserved(new_page))
-                       ++mm->rss;
-               acct_update_integrals();
-               update_mem_hiwater();
+                       inc_mm_counter(mm, rss);
 
                flush_icache_page(vma, new_page);
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-               ptep_establish_new(vma, address, page_table, entry);
+               set_pte_at(mm, address, page_table, entry);
                if (anon) {
                        lru_cache_add_active(new_page);
                        page_add_anon_rmap(new_page, vma, address);
@@ -1956,6 +1927,7 @@
 
        /* no need to invalidate: a not-present page shouldn't be cached */
        update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
        spin_unlock(&mm->page_table_lock);
 out:
        return ret;
@@ -1983,7 +1955,7 @@
         */
        if (!vma->vm_ops || !vma->vm_ops->populate || 
                        (write_access && !(vma->vm_flags & VM_SHARED))) {
-               pte_clear(pte);
+               pte_clear(mm, address, pte);
                return do_no_page(mm, vma, address, write_access, pte, pmd);
        }
 
@@ -2050,6 +2022,7 @@
        entry = pte_mkyoung(entry);
        ptep_set_access_flags(vma, address, pte, entry, write_access);
        update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
        pte_unmap(pte);
        spin_unlock(&mm->page_table_lock);
        return VM_FAULT_MINOR;
@@ -2099,15 +2072,12 @@
        return VM_FAULT_OOM;
 }
 
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#ifndef __PAGETABLE_PUD_FOLDED
 /*
  * Allocate page upper directory.
  *
  * We've already handled the fast-path in-line, and we own the
  * page table lock.
- *
- * On a two-level or three-level page table, this ends up actually being
- * entirely optimized away.
  */
 pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long 
address)
 {
@@ -2131,15 +2101,14 @@
  out:
        return pud_offset(pgd, address);
 }
-
+#endif /* __PAGETABLE_PUD_FOLDED */
+
+#ifndef __PAGETABLE_PMD_FOLDED
 /*
  * Allocate page middle directory.
  *
  * We've already handled the fast-path in-line, and we own the
  * page table lock.
- *
- * On a two-level page table, this ends up actually being entirely
- * optimized away.
  */
 pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
@@ -2155,38 +2124,24 @@
         * Because we dropped the lock, we should re-check the
         * entry, as somebody else could have populated it..
         */
+#ifndef __ARCH_HAS_4LEVEL_HACK
        if (pud_present(*pud)) {
                pmd_free(new);
                goto out;
        }
        pud_populate(mm, pud, new);
- out:
-       return pmd_offset(pud, address);
-}
 #else
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
-{
-       pmd_t *new;
-
-       spin_unlock(&mm->page_table_lock);
-       new = pmd_alloc_one(mm, address);
-       spin_lock(&mm->page_table_lock);
-       if (!new)
-               return NULL;
-
-       /*
-        * Because we dropped the lock, we should re-check the
-        * entry, as somebody else could have populated it..
-        */
        if (pgd_present(*pud)) {
                pmd_free(new);
                goto out;
        }
        pgd_populate(mm, pud, new);
-out:
+#endif /* __ARCH_HAS_4LEVEL_HACK */
+
+ out:
        return pmd_offset(pud, address);
 }
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
 
 int make_pages_present(unsigned long addr, unsigned long end)
 {
@@ -2253,13 +2208,13 @@
  * update_mem_hiwater
  *     - update per process rss and vm high water data
  */
-void update_mem_hiwater(void)
-{
-       struct task_struct *tsk = current;
-
+void update_mem_hiwater(struct task_struct *tsk)
+{
        if (tsk->mm) {
-               if (tsk->mm->hiwater_rss < tsk->mm->rss)
-                       tsk->mm->hiwater_rss = tsk->mm->rss;
+               unsigned long rss = get_mm_counter(tsk->mm, rss);
+
+               if (tsk->mm->hiwater_rss < rss)
+                       tsk->mm->hiwater_rss = rss;
                if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
                        tsk->mm->hiwater_vm = tsk->mm->total_vm;
        }
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/page_alloc.c
--- a/linux-2.6-xen-sparse/mm/page_alloc.c      Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c      Tue Aug  9 23:57:17 2005
@@ -31,19 +31,26 @@
 #include <linux/topology.h>
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
+#include <linux/cpuset.h>
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
 
-/* MCD - HACK: Find somewhere to initialize this EARLY, or make this 
initializer cleaner */
+/*
+ * MCD - HACK: Find somewhere to initialize this EARLY, or make this
+ * initializer cleaner
+ */
 nodemask_t node_online_map = { { [0] = 1UL } };
+EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map = NODE_MASK_ALL;
+EXPORT_SYMBOL(node_possible_map);
 struct pglist_data *pgdat_list;
 unsigned long totalram_pages;
 unsigned long totalhigh_pages;
 long nr_swap_pages;
+
 /*
  * results with 256, 32 in the lowmem_reserve sysctl:
  *     1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
@@ -188,6 +195,37 @@
 {
        __ClearPagePrivate(page);
        page->private = 0;
+}
+
+/*
+ * Locate the struct page for both the matching buddy in our
+ * pair (buddy1) and the combined O(n+1) page they form (page).
+ *
+ * 1) Any buddy B1 will have an order O twin B2 which satisfies
+ * the following equation:
+ *     B2 = B1 ^ (1 << O)
+ * For example, if the starting buddy (buddy2) is #8 its order
+ * 1 buddy is #10:
+ *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
+ *
+ * 2) Any buddy B will have an order O+1 parent P which
+ * satisfies the following equation:
+ *     P = B & ~(1 << O)
+ *
+ * Assumption: *_mem_map is contigious at least up to MAX_ORDER
+ */
+static inline struct page *
+__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int 
order)
+{
+       unsigned long buddy_idx = page_idx ^ (1 << order);
+
+       return page + (buddy_idx - page_idx);
+}
+
+static inline unsigned long
+__find_combined_index(unsigned long page_idx, unsigned int order)
+{
+       return (page_idx & ~(1 << order));
 }
 
 /*
@@ -233,50 +271,49 @@
  * -- wli
  */
 
-static inline void __free_pages_bulk (struct page *page, struct page *base,
+static inline void __free_pages_bulk (struct page *page,
                struct zone *zone, unsigned int order)
 {
        unsigned long page_idx;
-       struct page *coalesced;
        int order_size = 1 << order;
 
        if (unlikely(order))
                destroy_compound_page(page, order);
 
-       page_idx = page - base;
+       page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 
        BUG_ON(page_idx & (order_size - 1));
        BUG_ON(bad_range(zone, page));
 
        zone->free_pages += order_size;
        while (order < MAX_ORDER-1) {
+               unsigned long combined_idx;
                struct free_area *area;
                struct page *buddy;
-               int buddy_idx;
-
-               buddy_idx = (page_idx ^ (1 << order));
-               buddy = base + buddy_idx;
+
+               combined_idx = __find_combined_index(page_idx, order);
+               buddy = __page_find_buddy(page, page_idx, order);
+
                if (bad_range(zone, buddy))
                        break;
                if (!page_is_buddy(buddy, order))
-                       break;
-               /* Move the buddy up one level. */
+                       break;          /* Move the buddy up one level. */
                list_del(&buddy->lru);
                area = zone->free_area + order;
                area->nr_free--;
                rmv_page_order(buddy);
-               page_idx &= buddy_idx;
+               page = page + (combined_idx - page_idx);
+               page_idx = combined_idx;
                order++;
        }
-       coalesced = base + page_idx;
-       set_page_order(coalesced, order);
-       list_add(&coalesced->lru, &zone->free_area[order].free_list);
+       set_page_order(page, order);
+       list_add(&page->lru, &zone->free_area[order].free_list);
        zone->free_area[order].nr_free++;
 }
 
 static inline void free_pages_check(const char *function, struct page *page)
 {
-       if (    page_mapped(page) ||
+       if (    page_mapcount(page) ||
                page->mapping != NULL ||
                page_count(page) != 0 ||
                (page->flags & (
@@ -309,10 +346,9 @@
                struct list_head *list, unsigned int order)
 {
        unsigned long flags;
-       struct page *base, *page = NULL;
+       struct page *page = NULL;
        int ret = 0;
 
-       base = zone->zone_mem_map;
        spin_lock_irqsave(&zone->lock, flags);
        zone->all_unreclaimable = 0;
        zone->pages_scanned = 0;
@@ -320,7 +356,7 @@
                page = list_entry(list->prev, struct page, lru);
                /* have to delete it as __free_pages_bulk list manipulates */
                list_del(&page->lru);
-               __free_pages_bulk(page, base, zone, order);
+               __free_pages_bulk(page, zone, order);
                ret++;
        }
        spin_unlock_irqrestore(&zone->lock, flags);
@@ -405,7 +441,7 @@
  */
 static void prep_new_page(struct page *page, int order)
 {
-       if (page->mapping || page_mapped(page) ||
+       if (page->mapping || page_mapcount(page) ||
            (page->flags & (
                        1 << PG_private |
                        1 << PG_locked  |
@@ -601,7 +637,7 @@
        free_hot_cold_page(page, 1);
 }
 
-static inline void prep_zero_page(struct page *page, int order, int gfp_flags)
+static inline void prep_zero_page(struct page *page, int order, unsigned int 
__nocast gfp_flags)
 {
        int i;
 
@@ -616,7 +652,7 @@
  * or two.
  */
 static struct page *
-buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
+buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
 {
        unsigned long flags;
        struct page *page = NULL;
@@ -694,7 +730,7 @@
  * This is the 'heart' of the zoned buddy allocator.
  */
 struct page * fastcall
-__alloc_pages(unsigned int gfp_mask, unsigned int order,
+__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
                struct zonelist *zonelist)
 {
        const int wait = gfp_mask & __GFP_WAIT;
@@ -734,6 +770,9 @@
                                       classzone_idx, 0, 0))
                        continue;
 
+               if (!cpuset_zone_allowed(z))
+                       continue;
+
                page = buffered_rmqueue(z, order, gfp_mask);
                if (page)
                        goto got_pg;
@@ -745,6 +784,9 @@
        /*
         * Go through the zonelist again. Let __GFP_HIGH and allocations
         * coming from realtime tasks to go deeper into reserves
+        *
+        * This is the last chance, in general, before the goto nopage.
+        * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
         */
        for (i = 0; (z = zones[i]) != NULL; i++) {
                if (!zone_watermark_ok(z, order, z->pages_min,
@@ -752,18 +794,27 @@
                                       gfp_mask & __GFP_HIGH))
                        continue;
 
+               if (wait && !cpuset_zone_allowed(z))
+                       continue;
+
                page = buffered_rmqueue(z, order, gfp_mask);
                if (page)
                        goto got_pg;
        }
 
        /* This allocation should allow future memory freeing. */
-       if (((p->flags & PF_MEMALLOC) || 
unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) {
-               /* go through the zonelist yet again, ignoring mins */
-               for (i = 0; (z = zones[i]) != NULL; i++) {
-                       page = buffered_rmqueue(z, order, gfp_mask);
-                       if (page)
-                               goto got_pg;
+
+       if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
+                       && !in_interrupt()) {
+               if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+                       /* go through the zonelist yet again, ignoring mins */
+                       for (i = 0; (z = zones[i]) != NULL; i++) {
+                               if (!cpuset_zone_allowed(z))
+                                       continue;
+                               page = buffered_rmqueue(z, order, gfp_mask);
+                               if (page)
+                                       goto got_pg;
+                       }
                }
                goto nopage;
        }
@@ -798,6 +849,9 @@
                        if (!zone_watermark_ok(z, order, z->pages_min,
                                               classzone_idx, can_try_harder,
                                               gfp_mask & __GFP_HIGH))
+                               continue;
+
+                       if (!cpuset_zone_allowed(z))
                                continue;
 
                        page = buffered_rmqueue(z, order, gfp_mask);
@@ -816,6 +870,9 @@
                                               classzone_idx, 0, 0))
                                continue;
 
+                       if (!cpuset_zone_allowed(z))
+                               continue;
+
                        page = buffered_rmqueue(z, order, gfp_mask);
                        if (page)
                                goto got_pg;
@@ -862,7 +919,7 @@
 /*
  * Common helper functions.
  */
-fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int 
order)
+fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, 
unsigned int order)
 {
        struct page * page;
        page = alloc_pages(gfp_mask, order);
@@ -873,7 +930,7 @@
 
 EXPORT_SYMBOL(__get_free_pages);
 
-fastcall unsigned long get_zeroed_page(unsigned int gfp_mask)
+fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask)
 {
        struct page * page;
 
@@ -1302,8 +1359,7 @@
 #define MAX_NODE_LOAD (num_online_nodes())
 static int __initdata node_load[MAX_NUMNODES];
 /**
- * find_next_best_node - find the next node that should appear in a given
- *    node's fallback list
+ * find_next_best_node - find the next node that should appear in a given 
node's fallback list
  * @node: node whose fallback list we're appending
  * @used_node_mask: nodemask_t of already used nodes
  *
@@ -1372,7 +1428,6 @@
        /* initialize zonelists */
        for (i = 0; i < GFP_ZONETYPES; i++) {
                zonelist = pgdat->node_zonelists + i;
-               memset(zonelist, 0, sizeof(*zonelist));
                zonelist->zones[0] = NULL;
        }
 
@@ -1419,7 +1474,6 @@
                struct zonelist *zonelist;
 
                zonelist = pgdat->node_zonelists + i;
-               memset(zonelist, 0, sizeof(*zonelist));
 
                j = 0;
                k = ZONE_NORMAL;
@@ -1461,6 +1515,7 @@
        for_each_online_node(i)
                build_zonelists(NODE_DATA(i));
        printk("Built %i zonelists\n", num_online_nodes());
+       cpuset_init_current_mems_allowed();
 }
 
 /*
@@ -1622,6 +1677,18 @@
                batch /= 4;             /* We effectively *= 4 below */
                if (batch < 1)
                        batch = 1;
+
+               /*
+                * Clamp the batch to a 2^n - 1 value. Having a power
+                * of 2 value was found to be more likely to have
+                * suboptimal cache aliasing properties in some cases.
+                *
+                * For example if 2 tasks are alternately allocating
+                * batches of pages, one task can end up with a lot
+                * of pages of one half of the possible page colors
+                * and the other with pages of the other colors.
+                */
+               batch = (1 << fls(batch + batch/2)) - 1;
 
                for (cpu = 0; cpu < NR_CPUS; cpu++) {
                        struct per_cpu_pages *pcp;
@@ -1681,14 +1748,25 @@
        }
 }
 
-void __init node_alloc_mem_map(struct pglist_data *pgdat)
+static void __init alloc_node_mem_map(struct pglist_data *pgdat)
 {
        unsigned long size;
 
-       size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
-       pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+       /* Skip empty nodes */
+       if (!pgdat->node_spanned_pages)
+               return;
+
+       /* ia64 gets its own node_mem_map, before this, without bootmem */
+       if (!pgdat->node_mem_map) {
+               size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+               pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+       }
 #ifndef CONFIG_DISCONTIGMEM
-       mem_map = contig_page_data.node_mem_map;
+       /*
+        * With no DISCONTIG, the global mem_map is just set as node 0's
+        */
+       if (pgdat == NODE_DATA(0))
+               mem_map = NODE_DATA(0)->node_mem_map;
 #endif
 }
 
@@ -1700,8 +1778,7 @@
        pgdat->node_start_pfn = node_start_pfn;
        calculate_zone_totalpages(pgdat, zones_size, zholes_size);
 
-       if (!pfn_to_page(node_start_pfn))
-               node_alloc_mem_map(pgdat);
+       alloc_node_mem_map(pgdat);
 
        free_area_init_core(pgdat, zones_size, zholes_size);
 }
@@ -1823,6 +1900,7 @@
        "allocstall",
 
        "pgrotated",
+       "nr_bounce",
 };
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
@@ -1926,15 +2004,20 @@
 
        for_each_pgdat(pgdat) {
                for (j = 0; j < MAX_NR_ZONES; j++) {
-                       struct zone * zone = pgdat->node_zones + j;
+                       struct zone *zone = pgdat->node_zones + j;
                        unsigned long present_pages = zone->present_pages;
 
                        zone->lowmem_reserve[j] = 0;
 
                        for (idx = j-1; idx >= 0; idx--) {
-                               struct zone * lower_zone = pgdat->node_zones + 
idx;
-
-                               lower_zone->lowmem_reserve[j] = present_pages / 
sysctl_lowmem_reserve_ratio[idx];
+                               struct zone *lower_zone;
+
+                               if (sysctl_lowmem_reserve_ratio[idx] < 1)
+                                       sysctl_lowmem_reserve_ratio[idx] = 1;
+
+                               lower_zone = pgdat->node_zones + idx;
+                               lower_zone->lowmem_reserve[j] = present_pages /
+                                       sysctl_lowmem_reserve_ratio[idx];
                                present_pages += lower_zone->present_pages;
                        }
                }
@@ -2041,7 +2124,7 @@
  *     changes.
  */
 int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
-               struct file *file, void __user *buffer, size_t *length, loff_t 
*ppos)
+       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
 {
        proc_dointvec(table, write, file, buffer, length, ppos);
        setup_per_zone_pages_min();
@@ -2058,7 +2141,7 @@
  * if in function of the boot time zone sizes.
  */
 int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
-                struct file *file, void __user *buffer, size_t *length, loff_t 
*ppos)
+       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
 {
        proc_dointvec_minmax(table, write, file, buffer, length, ppos);
        setup_per_zone_lowmem_reserve();

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] upgrade the sparse repository to 2.6.12, Xen patchbot -2 . 0-testing <=