# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1200582338 0
# Node ID b17dfd182f7c4bda5417e39f5d1c9ee01273ec12
# Parent c9b32b389e62948b20447bd7e0d4b59dd7ecb887
Update to Linux 2.6.18.8.
Signed-off-by: S.Caglar Onur <caglar@xxxxxxxxxxxxx>
---
patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch
| 27
patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch
| 61
patches/linux-2.6.18/blktap-aio-16_03_06.patch
| 271 ----
patches/linux-2.6.18/fix-ide-cd-pio-mode.patch
| 13
patches/linux-2.6.18/fixaddr-top.patch
| 64 -
patches/linux-2.6.18/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
| 274 ----
patches/linux-2.6.18/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
| 301 ----
patches/linux-2.6.18/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch
| 23
patches/linux-2.6.18/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch
| 28
patches/linux-2.6.18/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
| 18
patches/linux-2.6.18/i386-mach-io-check-nmi.patch
| 35
patches/linux-2.6.18/linux-2.6.18-xen-375-748cd890ea7f
| 214 ---
patches/linux-2.6.18/linux-2.6.18-xen-376-353802ec1caf
| 34
patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
| 85 -
patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
| 78 -
patches/linux-2.6.18/net-csum.patch
| 57
patches/linux-2.6.18/net-gso-5-rcv-mss.patch
| 10
patches/linux-2.6.18/net-gso-6-linear-segmentation.patch
| 21
patches/linux-2.6.18/pmd-shared.patch
| 100 -
patches/linux-2.6.18/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
| 26
patches/linux-2.6.18/series
| 26
patches/linux-2.6.18/softlockup-no-idle-hz.patch
| 52
patches/linux-2.6.18/x86-elfnote-as-preprocessor-macro.patch
| 41
patches/linux-2.6.18/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
| 138 --
patches/linux-2.6.18/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
| 72 -
patches/linux-2.6.18/xen-hotplug.patch
| 10
patches/linux-2.6.18/xenoprof-generic.patch
| 615 ----------
buildconfigs/mk.linux-2.6-xen
| 2
linux-2.6-xen-sparse/arch/i386/Kconfig.cpu
| 3
linux-2.6-xen-sparse/drivers/char/mem.c
| 12
linux-2.6-xen-sparse/mm/memory.c
| 41
linux-2.6-xen-sparse/mm/page_alloc.c
| 25
linux-2.6-xen-sparse/net/core/dev.c
| 16
linux-2.6-xen-sparse/net/core/skbuff.c
| 1
patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch
| 28
patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch
| 51
patches/linux-2.6.18.8/blktap-aio-16_03_06.patch
| 161 ++
patches/linux-2.6.18.8/fix-ide-cd-pio-mode.patch
| 13
patches/linux-2.6.18.8/fixaddr-top.patch
| 50
patches/linux-2.6.18.8/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
| 178 ++
patches/linux-2.6.18.8/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
| 136 ++
patches/linux-2.6.18.8/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch
| 21
patches/linux-2.6.18.8/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch
| 27
patches/linux-2.6.18.8/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
| 17
patches/linux-2.6.18.8/i386-mach-io-check-nmi.patch
| 30
patches/linux-2.6.18.8/linux-2.6.18-xen-375-748cd890ea7f
| 233 +++
patches/linux-2.6.18.8/linux-2.6.18-xen-376-353802ec1caf
| 34
patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
| 96 +
patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
| 96 +
patches/linux-2.6.18.8/net-csum.patch
| 40
patches/linux-2.6.18.8/net-gso-5-rcv-mss.patch
| 10
patches/linux-2.6.18.8/pmd-shared.patch
| 57
patches/linux-2.6.18.8/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
| 26
patches/linux-2.6.18.8/series
| 25
patches/linux-2.6.18.8/softlockup-no-idle-hz.patch
| 32
patches/linux-2.6.18.8/x86-elfnote-as-preprocessor-macro.patch
| 25
patches/linux-2.6.18.8/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
| 39
patches/linux-2.6.18.8/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
| 63 +
patches/linux-2.6.18.8/xen-hotplug.patch
| 10
patches/linux-2.6.18.8/xenoprof-generic.patch
| 294 ++++
60 files changed, 1865 insertions(+), 2721 deletions(-)
diff -r c9b32b389e62 -r b17dfd182f7c buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen Thu Jan 17 14:35:38 2008 +0000
+++ b/buildconfigs/mk.linux-2.6-xen Thu Jan 17 15:05:38 2008 +0000
@@ -1,5 +1,5 @@ LINUX_SERIES = 2.6
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.18
+LINUX_VER = 2.6.18.8
EXTRAVERSION ?= xen
diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/arch/i386/Kconfig.cpu
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Thu Jan 17 14:35:38
2008 +0000
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Thu Jan 17 15:05:38
2008 +0000
@@ -7,6 +7,7 @@ choice
config M386
bool "386"
+ depends on !UML
---help---
This is the processor type of your CPU. This information is used for
optimizing purposes. In order to compile a kernel that can run on
@@ -301,7 +302,7 @@ config X86_USE_PPRO_CHECKSUM
config X86_USE_3DNOW
bool
- depends on MCYRIXIII || MK7 || MGEODE_LX
+ depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
default y
config X86_OOSTORE
diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/drivers/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/char/mem.c Thu Jan 17 14:35:38 2008 +0000
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c Thu Jan 17 15:05:38 2008 +0000
@@ -618,7 +618,8 @@ static inline size_t read_zero_pagealign
count = size;
zap_page_range(vma, addr, count, NULL);
- zeromap_page_range(vma, addr, count, PAGE_COPY);
+ if (zeromap_page_range(vma, addr, count, PAGE_COPY))
+ break;
size -= count;
buf += count;
@@ -685,11 +686,14 @@ out:
static int mmap_zero(struct file * file, struct vm_area_struct * vma)
{
+ int err;
+
if (vma->vm_flags & VM_SHARED)
return shmem_zero_setup(vma);
- if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start,
vma->vm_page_prot))
- return -EAGAIN;
- return 0;
+ err = zeromap_page_range(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+ BUG_ON(err == -EEXIST);
+ return err;
}
#else /* CONFIG_MMU */
static ssize_t read_zero(struct file * file, char * buf,
diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c Thu Jan 17 14:35:38 2008 +0000
+++ b/linux-2.6-xen-sparse/mm/memory.c Thu Jan 17 15:05:38 2008 +0000
@@ -1131,21 +1131,27 @@ static int zeromap_pte_range(struct mm_s
{
pte_t *pte;
spinlock_t *ptl;
+ int err = 0;
pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
- return -ENOMEM;
+ return -EAGAIN;
do {
struct page *page = ZERO_PAGE(addr);
pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+
+ if (unlikely(!pte_none(*pte))) {
+ err = -EEXIST;
+ pte++;
+ break;
+ }
page_cache_get(page);
page_add_file_rmap(page);
inc_mm_counter(mm, file_rss);
- BUG_ON(!pte_none(*pte));
set_pte_at(mm, addr, pte, zero_pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(pte - 1, ptl);
- return 0;
+ return err;
}
static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1153,16 +1159,18 @@ static inline int zeromap_pmd_range(stru
{
pmd_t *pmd;
unsigned long next;
+ int err;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
- return -ENOMEM;
+ return -EAGAIN;
do {
next = pmd_addr_end(addr, end);
- if (zeromap_pte_range(mm, pmd, addr, next, prot))
- return -ENOMEM;
+ err = zeromap_pte_range(mm, pmd, addr, next, prot);
+ if (err)
+ break;
} while (pmd++, addr = next, addr != end);
- return 0;
+ return err;
}
static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
@@ -1170,16 +1178,18 @@ static inline int zeromap_pud_range(stru
{
pud_t *pud;
unsigned long next;
+ int err;
pud = pud_alloc(mm, pgd, addr);
if (!pud)
- return -ENOMEM;
+ return -EAGAIN;
do {
next = pud_addr_end(addr, end);
- if (zeromap_pmd_range(mm, pud, addr, next, prot))
- return -ENOMEM;
+ err = zeromap_pmd_range(mm, pud, addr, next, prot);
+ if (err)
+ break;
} while (pud++, addr = next, addr != end);
- return 0;
+ return err;
}
int zeromap_page_range(struct vm_area_struct *vma,
@@ -1674,7 +1684,14 @@ gotten:
entry = mk_pte(new_page, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
lazy_mmu_prot_update(entry);
- ptep_establish(vma, address, page_table, entry);
+ /*
+ * Clear the pte entry and flush it first, before updating the
+ * pte with the new entry. This will avoid a race condition
+ * seen in the presence of one thread doing SMC and another
+ * thread doing COW.
+ */
+ ptep_clear_flush(vma, address, page_table);
+ set_pte_at(mm, address, page_table, entry);
update_mmu_cache(vma, address, entry);
lru_cache_add_active(new_page);
page_add_new_anon_rmap(new_page, vma, address);
diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/mm/page_alloc.c
--- a/linux-2.6-xen-sparse/mm/page_alloc.c Thu Jan 17 14:35:38 2008 +0000
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c Thu Jan 17 15:05:38 2008 +0000
@@ -1687,6 +1687,8 @@ void __meminit memmap_init_zone(unsigned
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
if (!early_pfn_valid(pfn))
continue;
+ if (!early_pfn_in_nid(pfn, nid))
+ continue;
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
init_page_count(page);
@@ -1859,8 +1861,10 @@ static inline void free_zone_pagesets(in
for_each_zone(zone) {
struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+ /* Free per_cpu_pageset if it is slab allocated */
+ if (pset != &boot_pageset[cpu])
+ kfree(pset);
zone_pcp(zone, cpu) = NULL;
- kfree(pset);
}
}
@@ -2022,6 +2026,7 @@ static void __meminit free_area_init_cor
#ifdef CONFIG_NUMA
zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
/ 100;
+ zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
spin_lock_init(&zone->lock);
@@ -2030,7 +2035,7 @@ static void __meminit free_area_init_cor
zone->zone_pgdat = pgdat;
zone->free_pages = 0;
- zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
+ zone->prev_priority = DEF_PRIORITY;
zone_pcp_init(zone);
INIT_LIST_HEAD(&zone->active_list);
@@ -2332,6 +2337,22 @@ int sysctl_min_unmapped_ratio_sysctl_han
sysctl_min_unmapped_ratio) / 100;
return 0;
}
+
+int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+ struct zone *zone;
+ int rc;
+
+ rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ if (rc)
+ return rc;
+
+ for_each_zone(zone)
+ zone->min_slab_pages = (zone->present_pages *
+ sysctl_min_slab_ratio) / 100;
+ return 0;
+}
#endif
/*
diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/net/core/dev.c
--- a/linux-2.6-xen-sparse/net/core/dev.c Thu Jan 17 14:35:38 2008 +0000
+++ b/linux-2.6-xen-sparse/net/core/dev.c Thu Jan 17 15:05:38 2008 +0000
@@ -1528,14 +1528,16 @@ gso:
if (q->enqueue) {
/* Grab device queue */
spin_lock(&dev->queue_lock);
-
- rc = q->enqueue(skb, q);
-
- qdisc_run(dev);
-
+ q = dev->qdisc;
+ if (q->enqueue) {
+ rc = q->enqueue(skb, q);
+ qdisc_run(dev);
+ spin_unlock(&dev->queue_lock);
+
+ rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+ goto out;
+ }
spin_unlock(&dev->queue_lock);
- rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
- goto out;
}
/* The device has no queue. Common case for software devices:
diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/net/core/skbuff.c
--- a/linux-2.6-xen-sparse/net/core/skbuff.c Thu Jan 17 14:35:38 2008 +0000
+++ b/linux-2.6-xen-sparse/net/core/skbuff.c Thu Jan 17 15:05:38 2008 +0000
@@ -643,6 +643,7 @@ struct sk_buff *pskb_copy(struct sk_buff
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
+ n->truesize += skb->data_len;
n->data_len = skb->data_len;
n->len = skb->len;
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,30 @@
+From: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
+
+In file included from arch/i386/kernel/setup.c:46:
+include/linux/crash_dump.h:19:36: warning: extra tokens at end of #ifndef
directive
+
+Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
+Cc: Andi Kleen <ak@xxxxxxx>
+Cc: Horms <horms@xxxxxxxxxxxx>
+Cc: Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
+Cc: Magnus Damm <magnus.damm@xxxxxxxxx>
+Cc: Vivek Goyal <vgoyal@xxxxxxxxxx>
+Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
+---
+
+ include/linux/crash_dump.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff -puN
include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps-fix
include/linux/crash_dump.h
+---
a/include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps-fix
++++ a/include/linux/crash_dump.h
+@@ -16,7 +16,7 @@ extern struct proc_dir_entry *proc_vmcor
+
+ /* Architecture code defines this if there are other possible ELF
+ * machine types, e.g. on bi-arch capable hardware. */
+-#ifndef vmcore_elf_check_arch_cross(x)
++#ifndef vmcore_elf_check_arch_cross
+ #define vmcore_elf_check_arch_cross(x) 0
+ #endif
+
+_
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,66 @@
+From: Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
+
+The specific case I am encountering is kdump under Xen with a 64 bit
+hypervisor and 32 bit kernel/userspace. The dump created is 64 bit due to
+the hypervisor but the dump kernel is 32 bit for maximum compatibility.
+
+It's possibly less likely to be useful in a purely native scenario but I
+see no reason to disallow it.
+
+Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
+Acked-by: Vivek Goyal <vgoyal@xxxxxxxxxx>
+Cc: Horms <horms@xxxxxxxxxxxx>
+Cc: Magnus Damm <magnus.damm@xxxxxxxxx>
+Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
+Cc: Andi Kleen <ak@xxxxxxx>
+Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
+---
+
+ fs/proc/vmcore.c | 2 +-
+ include/asm-i386/kexec.h | 3 +++
+ include/linux/crash_dump.h | 8 ++++++++
+ 3 files changed, 12 insertions(+), 1 deletion(-)
+
+diff -puN fs/proc/vmcore.c~allow-i386-crash-kernels-to-handle-x86_64-dumps
fs/proc/vmcore.c
+--- a/fs/proc/vmcore.c~allow-i386-crash-kernels-to-handle-x86_64-dumps
++++ a/fs/proc/vmcore.c
+@@ -514,7 +514,7 @@ static int __init parse_crash_elf64_head
+ /* Do some basic Verification. */
+ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
+ (ehdr.e_type != ET_CORE) ||
+- !elf_check_arch(&ehdr) ||
++ !vmcore_elf_check_arch(&ehdr) ||
+ ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
+ ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
+ ehdr.e_version != EV_CURRENT ||
+diff -puN
include/asm-i386/kexec.h~allow-i386-crash-kernels-to-handle-x86_64-dumps
include/asm-i386/kexec.h
+--- a/include/asm-i386/kexec.h~allow-i386-crash-kernels-to-handle-x86_64-dumps
++++ a/include/asm-i386/kexec.h
+@@ -47,6 +47,9 @@
+ /* The native architecture */
+ #define KEXEC_ARCH KEXEC_ARCH_386
+
++/* We can also handle crash dumps from 64 bit kernel. */
++#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
++
+ #define MAX_NOTE_BYTES 1024
+
+ /* CPU does not save ss and esp on stack if execution is already
+diff -puN
include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps
include/linux/crash_dump.h
+---
a/include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps
++++ a/include/linux/crash_dump.h
+@@ -14,5 +14,13 @@ extern ssize_t copy_oldmem_page(unsigned
+ extern const struct file_operations proc_vmcore_operations;
+ extern struct proc_dir_entry *proc_vmcore;
+
++/* Architecture code defines this if there are other possible ELF
++ * machine types, e.g. on bi-arch capable hardware. */
++#ifndef vmcore_elf_check_arch_cross(x)
++#define vmcore_elf_check_arch_cross(x) 0
++#endif
++
++#define vmcore_elf_check_arch(x) (elf_check_arch(x) ||
vmcore_elf_check_arch_cross(x))
++
+ #endif /* CONFIG_CRASH_DUMP */
+ #endif /* LINUX_CRASHDUMP_H */
+_
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/blktap-aio-16_03_06.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/blktap-aio-16_03_06.patch Thu Jan 17 15:05:38
2008 +0000
@@ -0,0 +1,294 @@
+diff -pruN ../orig-linux-2.6.18/fs/aio.c ./fs/aio.c
+--- ../orig-linux-2.6.18/fs/aio.c 2006-09-20 04:42:06.000000000 +0100
++++ ./fs/aio.c 2007-01-12 16:04:15.000000000 +0000
+@@ -34,6 +34,11 @@
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+
++#ifdef CONFIG_EPOLL
++#include <linux/poll.h>
++#include <linux/eventpoll.h>
++#endif
++
+ #if DEBUG > 1
+ #define dprintk printk
+ #else
+@@ -1015,6 +1020,10 @@ put_rq:
+ if (waitqueue_active(&ctx->wait))
+ wake_up(&ctx->wait);
+
++#ifdef CONFIG_EPOLL
++ if (ctx->file && waitqueue_active(&ctx->poll_wait))
++ wake_up(&ctx->poll_wait);
++#endif
+ if (ret)
+ put_ioctx(ctx);
+
+@@ -1024,6 +1033,8 @@ put_rq:
+ /* aio_read_evt
+ * Pull an event off of the ioctx's event ring. Returns the number of
+ * events fetched (0 or 1 ;-)
++ * If ent parameter is 0, just returns the number of events that would
++ * be fetched.
+ * FIXME: make this use cmpxchg.
+ * TODO: make the ringbuffer user mmap()able (requires FIXME).
+ */
+@@ -1046,13 +1057,18 @@ static int aio_read_evt(struct kioctx *i
+
+ head = ring->head % info->nr;
+ if (head != ring->tail) {
+- struct io_event *evp = aio_ring_event(info, head, KM_USER1);
+- *ent = *evp;
+- head = (head + 1) % info->nr;
+- smp_mb(); /* finish reading the event before updatng the head */
+- ring->head = head;
+- ret = 1;
+- put_aio_ring_event(evp, KM_USER1);
++ if (ent) { /* event requested */
++ struct io_event *evp =
++ aio_ring_event(info, head, KM_USER1);
++ *ent = *evp;
++ head = (head + 1) % info->nr;
++ /* finish reading the event before updatng the head */
++ smp_mb();
++ ring->head = head;
++ ret = 1;
++ put_aio_ring_event(evp, KM_USER1);
++ } else /* only need to know availability */
++ ret = 1;
+ }
+ spin_unlock(&info->ring_lock);
+
+@@ -1235,9 +1251,78 @@ static void io_destroy(struct kioctx *io
+
+ aio_cancel_all(ioctx);
+ wait_for_all_aios(ioctx);
++#ifdef CONFIG_EPOLL
++ /* forget the poll file, but it's up to the user to close it */
++ if (ioctx->file) {
++ ioctx->file->private_data = 0;
++ ioctx->file = 0;
++ }
++#endif
+ put_ioctx(ioctx); /* once for the lookup */
+ }
+
++#ifdef CONFIG_EPOLL
++
++static int aio_queue_fd_close(struct inode *inode, struct file *file)
++{
++ struct kioctx *ioctx = file->private_data;
++ if (ioctx) {
++ file->private_data = 0;
++ spin_lock_irq(&ioctx->ctx_lock);
++ ioctx->file = 0;
++ spin_unlock_irq(&ioctx->ctx_lock);
++ }
++ return 0;
++}
++
++static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
++{ unsigned int pollflags = 0;
++ struct kioctx *ioctx = file->private_data;
++
++ if (ioctx) {
++
++ spin_lock_irq(&ioctx->ctx_lock);
++ /* Insert inside our poll wait queue */
++ poll_wait(file, &ioctx->poll_wait, wait);
++
++ /* Check our condition */
++ if (aio_read_evt(ioctx, 0))
++ pollflags = POLLIN | POLLRDNORM;
++ spin_unlock_irq(&ioctx->ctx_lock);
++ }
++
++ return pollflags;
++}
++
++static const struct file_operations aioq_fops = {
++ .release = aio_queue_fd_close,
++ .poll = aio_queue_fd_poll
++};
++
++/* make_aio_fd:
++ * Create a file descriptor that can be used to poll the event queue.
++ * Based and piggybacked on the excellent epoll code.
++ */
++
++static int make_aio_fd(struct kioctx *ioctx)
++{
++ int error, fd;
++ struct inode *inode;
++ struct file *file;
++
++ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
++ if (error)
++ return error;
++
++ /* associate the file with the IO context */
++ file->private_data = ioctx;
++ ioctx->file = file;
++ init_waitqueue_head(&ioctx->poll_wait);
++ return fd;
++}
++#endif
++
++
+ /* sys_io_setup:
+ * Create an aio_context capable of receiving at least nr_events.
+ * ctxp must not point to an aio_context that already exists, and
+@@ -1250,18 +1335,30 @@ static void io_destroy(struct kioctx *io
+ * resources are available. May fail with -EFAULT if an invalid
+ * pointer is passed for ctxp. Will fail with -ENOSYS if not
+ * implemented.
++ *
++ * To request a selectable fd, the user context has to be initialized
++ * to 1, instead of 0, and the return value is the fd.
++ * This keeps the system call compatible, since a non-zero value
++ * was not allowed so far.
+ */
+ asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
+ {
+ struct kioctx *ioctx = NULL;
+ unsigned long ctx;
+ long ret;
++ int make_fd = 0;
+
+ ret = get_user(ctx, ctxp);
+ if (unlikely(ret))
+ goto out;
+
+ ret = -EINVAL;
++#ifdef CONFIG_EPOLL
++ if (ctx == 1) {
++ make_fd = 1;
++ ctx = 0;
++ }
++#endif
+ if (unlikely(ctx || nr_events == 0)) {
+ pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
+ ctx, nr_events);
+@@ -1272,8 +1369,12 @@ asmlinkage long sys_io_setup(unsigned nr
+ ret = PTR_ERR(ioctx);
+ if (!IS_ERR(ioctx)) {
+ ret = put_user(ioctx->user_id, ctxp);
+- if (!ret)
+- return 0;
++#ifdef CONFIG_EPOLL
++ if (make_fd && ret >= 0)
++ ret = make_aio_fd(ioctx);
++#endif
++ if (ret >= 0)
++ return ret;
+
+ get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
+ io_destroy(ioctx);
+diff -pruN ../orig-linux-2.6.18/fs/eventpoll.c ./fs/eventpoll.c
+--- ../orig-linux-2.6.18/fs/eventpoll.c 2006-09-20 04:42:06.000000000
+0100
++++ ./fs/eventpoll.c 2007-01-12 16:04:41.000000000 +0000
+@@ -236,8 +236,6 @@ struct ep_pqueue {
+
+ static void ep_poll_safewake_init(struct poll_safewake *psw);
+ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t
*wq);
+-static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+- struct eventpoll *ep);
+ static int ep_alloc(struct eventpoll **pep);
+ static void ep_free(struct eventpoll *ep);
+ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int
fd);
+@@ -267,7 +265,7 @@ static int ep_events_transfer(struct eve
+ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
+ int maxevents, long timeout);
+ static int eventpollfs_delete_dentry(struct dentry *dentry);
+-static struct inode *ep_eventpoll_inode(void);
++static struct inode *ep_eventpoll_inode(const struct file_operations *fops);
+ static int eventpollfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data, struct vfsmount *mnt);
+@@ -517,7 +515,7 @@ asmlinkage long sys_epoll_create(int siz
+ * Creates all the items needed to setup an eventpoll file. That is,
+ * a file structure, and inode and a free file descriptor.
+ */
+- error = ep_getfd(&fd, &inode, &file, ep);
++ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops);
+ if (error)
+ goto eexit_2;
+
+@@ -702,8 +700,8 @@ eexit_1:
+ /*
+ * Creates the file descriptor to be used by the epoll interface.
+ */
+-static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+- struct eventpoll *ep)
++int ep_getfd(int *efd, struct inode **einode, struct file **efile,
++ struct eventpoll *ep, const struct file_operations *fops)
+ {
+ struct qstr this;
+ char name[32];
+@@ -719,7 +717,7 @@ static int ep_getfd(int *efd, struct ino
+ goto eexit_1;
+
+ /* Allocates an inode from the eventpoll file system */
+- inode = ep_eventpoll_inode();
++ inode = ep_eventpoll_inode(fops);
+ error = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto eexit_2;
+@@ -750,7 +748,7 @@ static int ep_getfd(int *efd, struct ino
+
+ file->f_pos = 0;
+ file->f_flags = O_RDONLY;
+- file->f_op = &eventpoll_fops;
++ file->f_op = fops;
+ file->f_mode = FMODE_READ;
+ file->f_version = 0;
+ file->private_data = ep;
+@@ -1569,7 +1567,7 @@ static int eventpollfs_delete_dentry(str
+ }
+
+
+-static struct inode *ep_eventpoll_inode(void)
++static struct inode *ep_eventpoll_inode(const struct file_operations *fops)
+ {
+ int error = -ENOMEM;
+ struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
+@@ -1577,7 +1575,7 @@ static struct inode *ep_eventpoll_inode(
+ if (!inode)
+ goto eexit_1;
+
+- inode->i_fop = &eventpoll_fops;
++ inode->i_fop = fops;
+
+ /*
+ * Mark the inode dirty from the very beginning,
+diff -pruN ../orig-linux-2.6.18/include/linux/aio.h ./include/linux/aio.h
+--- ../orig-linux-2.6.18/include/linux/aio.h 2006-09-20 04:42:06.000000000
+0100
++++ ./include/linux/aio.h 2007-01-12 16:04:15.000000000 +0000
+@@ -191,6 +191,11 @@ struct kioctx {
+ struct aio_ring_info ring_info;
+
+ struct work_struct wq;
++#ifdef CONFIG_EPOLL
++ // poll integration
++ wait_queue_head_t poll_wait;
++ struct file *file;
++#endif
+ };
+
+ /* prototypes */
+diff -pruN ../orig-linux-2.6.18/include/linux/eventpoll.h
./include/linux/eventpoll.h
+--- ../orig-linux-2.6.18/include/linux/eventpoll.h 2006-09-20
04:42:06.000000000 +0100
++++ ./include/linux/eventpoll.h 2007-01-12 16:04:15.000000000 +0000
+@@ -90,6 +90,12 @@ static inline void eventpoll_release(str
+ eventpoll_release_file(file);
+ }
+
++/*
++ * called by aio code to create fd that can poll the aio event queueQ
++ */
++struct eventpoll;
++int ep_getfd(int *efd, struct inode **einode, struct file **efile,
++ struct eventpoll *ep, const struct file_operations *fops);
+ #else
+
+ static inline void eventpoll_init_file(struct file *file) {}
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/fix-ide-cd-pio-mode.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/fix-ide-cd-pio-mode.patch Thu Jan 17 15:05:38
2008 +0000
@@ -0,0 +1,18 @@
+diff -pruN ../orig-linux-2.6.18/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c
+--- ../orig-linux-2.6.18/drivers/ide/ide-lib.c 2006-09-20 04:42:06.000000000
+0100
++++ ./drivers/ide/ide-lib.c 2007-01-12 16:07:37.000000000 +0000
+@@ -408,10 +408,10 @@ void ide_toggle_bounce(ide_drive_t *driv
+ {
+ u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
+
+- if (!PCI_DMA_BUS_IS_PHYS) {
+- addr = BLK_BOUNCE_ANY;
+- } else if (on && drive->media == ide_disk) {
+- if (HWIF(drive)->pci_dev)
++ if (on && drive->media == ide_disk) {
++ if (!PCI_DMA_BUS_IS_PHYS)
++ addr = BLK_BOUNCE_ANY;
++ else if (HWIF(drive)->pci_dev)
+ addr = HWIF(drive)->pci_dev->dma_mask;
+ }
+
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/fixaddr-top.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/fixaddr-top.patch Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,69 @@
+diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c
+--- ../orig-linux-2.6.18/arch/i386/mm/pgtable.c 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/i386/mm/pgtable.c 2007-01-17 17:19:36.000000000 +0000
+@@ -12,6 +12,7 @@
+ #include <linux/slab.h>
+ #include <linux/pagemap.h>
+ #include <linux/spinlock.h>
++#include <linux/module.h>
+
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -137,6 +138,10 @@ void set_pmd_pfn(unsigned long vaddr, un
+ __flush_tlb_one(vaddr);
+ }
+
++static int nr_fixmaps = 0;
++unsigned long __FIXADDR_TOP = 0xfffff000;
++EXPORT_SYMBOL(__FIXADDR_TOP);
++
+ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t
flags)
+ {
+ unsigned long address = __fix_to_virt(idx);
+@@ -146,6 +151,13 @@ void __set_fixmap (enum fixed_addresses
+ return;
+ }
+ set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
++ nr_fixmaps++;
++}
++
++void set_fixaddr_top(unsigned long top)
++{
++ BUG_ON(nr_fixmaps > 0);
++ __FIXADDR_TOP = top - PAGE_SIZE;
+ }
+
+ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+diff -pruN ../orig-linux-2.6.18/include/asm-i386/fixmap.h
./include/asm-i386/fixmap.h
+--- ../orig-linux-2.6.18/include/asm-i386/fixmap.h 2006-09-20
04:42:06.000000000 +0100
++++ ./include/asm-i386/fixmap.h 2007-01-17 17:19:36.000000000 +0000
+@@ -19,7 +19,7 @@
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap.
+ */
+-#define __FIXADDR_TOP 0xfffff000
++extern unsigned long __FIXADDR_TOP;
+
+ #ifndef __ASSEMBLY__
+ #include <linux/kernel.h>
+@@ -94,6 +94,8 @@ enum fixed_addresses {
+ extern void __set_fixmap (enum fixed_addresses idx,
+ unsigned long phys, pgprot_t flags);
+
++extern void set_fixaddr_top(unsigned long top);
++
+ #define set_fixmap(idx, phys) \
+ __set_fixmap(idx, phys, PAGE_KERNEL)
+ /*
+diff -pruN ../orig-linux-2.6.18/include/asm-i386/page.h
./include/asm-i386/page.h
+--- ../orig-linux-2.6.18/include/asm-i386/page.h 2006-09-20
04:42:06.000000000 +0100
++++ ./include/asm-i386/page.h 2007-01-17 17:19:36.000000000 +0000
+@@ -122,7 +122,7 @@ extern int page_is_ram(unsigned long pag
+
+ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+ #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
+-#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
++#define MAXMEM
(__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE)
+ #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
+ #define __va(x) ((void *)((unsigned
long)(x)+PAGE_OFFSET))
+ #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,382 @@
+diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c
./arch/i386/kernel/machine_kexec.c
+--- ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:23.000000000 +0000
+@@ -20,70 +20,13 @@
+ #include <asm/system.h>
+
+ #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+-
+-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L2_ATTR (_PAGE_PRESENT)
+-
+-#define LEVEL0_SIZE (1UL << 12UL)
+-
+-#ifndef CONFIG_X86_PAE
+-#define LEVEL1_SIZE (1UL << 22UL)
+-static u32 pgtable_level1[1024] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+- unsigned long level1_index, level2_index;
+- u32 *pgtable_level2;
+-
+- /* Find the current page table */
+- pgtable_level2 = __va(read_cr3());
+-
+- /* Find the indexes of the physical address to identity map */
+- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+- level2_index = address / LEVEL1_SIZE;
+-
+- /* Identity map the page table entry */
+- pgtable_level1[level1_index] = address | L0_ATTR;
+- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+-
+- /* Flush the tlb so the new mapping takes effect.
+- * Global tlb entries are not flushed but that is not an issue.
+- */
+- load_cr3(pgtable_level2);
+-}
+-
+-#else
+-#define LEVEL1_SIZE (1UL << 21UL)
+-#define LEVEL2_SIZE (1UL << 30UL)
+-static u64 pgtable_level1[512] PAGE_ALIGNED;
+-static u64 pgtable_level2[512] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+- unsigned long level1_index, level2_index, level3_index;
+- u64 *pgtable_level3;
+-
+- /* Find the current page table */
+- pgtable_level3 = __va(read_cr3());
+-
+- /* Find the indexes of the physical address to identity map */
+- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+- level3_index = address / LEVEL2_SIZE;
+-
+- /* Identity map the page table entry */
+- pgtable_level1[level1_index] = address | L0_ATTR;
+- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+- set_64bit(&pgtable_level3[level3_index],
+- __pa(pgtable_level2) | L2_ATTR);
+-
+- /* Flush the tlb so the new mapping takes effect.
+- * Global tlb entries are not flushed but that is not an issue.
+- */
+- load_cr3(pgtable_level3);
+-}
++static u32 kexec_pgd[1024] PAGE_ALIGNED;
++#ifdef CONFIG_X86_PAE
++static u32 kexec_pmd0[1024] PAGE_ALIGNED;
++static u32 kexec_pmd1[1024] PAGE_ALIGNED;
+ #endif
++static u32 kexec_pte0[1024] PAGE_ALIGNED;
++static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+ static void set_idt(void *newidt, __u16 limit)
+ {
+@@ -127,16 +70,6 @@ static void load_segments(void)
+ #undef __STR
+ }
+
+-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
+- unsigned long indirection_page,
+- unsigned long reboot_code_buffer,
+- unsigned long start_address,
+- unsigned int has_pae) ATTRIB_NORET;
+-
+-extern const unsigned char relocate_new_kernel[];
+-extern void relocate_new_kernel_end(void);
+-extern const unsigned int relocate_new_kernel_size;
+-
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+@@ -169,25 +102,29 @@ void machine_kexec_cleanup(struct kimage
+ */
+ NORET_TYPE void machine_kexec(struct kimage *image)
+ {
+- unsigned long page_list;
+- unsigned long reboot_code_buffer;
+-
+- relocate_new_kernel_t rnk;
++ unsigned long page_list[PAGES_NR];
++ void *control_page;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+- /* Compute some offsets */
+- reboot_code_buffer = page_to_pfn(image->control_code_page)
+- << PAGE_SHIFT;
+- page_list = image->head;
+-
+- /* Set up an identity mapping for the reboot_code_buffer */
+- identity_map_page(reboot_code_buffer);
+-
+- /* copy it out */
+- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+- relocate_new_kernel_size);
++ control_page = page_address(image->control_code_page);
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++ page_list[PA_CONTROL_PAGE] = __pa(control_page);
++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
++ page_list[PA_PGD] = __pa(kexec_pgd);
++ page_list[VA_PGD] = (unsigned long)kexec_pgd;
++#ifdef CONFIG_X86_PAE
++ page_list[PA_PMD_0] = __pa(kexec_pmd0);
++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
++ page_list[PA_PMD_1] = __pa(kexec_pmd1);
++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
++#endif
++ page_list[PA_PTE_0] = __pa(kexec_pte0);
++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
++ page_list[PA_PTE_1] = __pa(kexec_pte1);
++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
+ /* The segment registers are funny things, they have both a
+ * visible and an invisible part. Whenever the visible part is
+@@ -206,6 +143,6 @@ NORET_TYPE void machine_kexec(struct kim
+ set_idt(phys_to_virt(0),0);
+
+ /* now call it */
+- rnk = (relocate_new_kernel_t) reboot_code_buffer;
+- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
++ image->start, cpu_has_pae);
+ }
+diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S
./arch/i386/kernel/relocate_kernel.S
+--- ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:23.000000000
+0000
+@@ -7,16 +7,138 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/page.h>
++#include <asm/kexec.h>
++
++/*
++ * Must be relocatable PIC code callable as a C function
++ */
++
++#define PTR(x) (x << 2)
++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
++
++ .text
++ .align PAGE_ALIGNED
++ .globl relocate_kernel
++relocate_kernel:
++ movl 8(%esp), %ebp /* list of pages */
++
++#ifdef CONFIG_X86_PAE
++ /* map the control page at its virtual address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xc0000000, %eax
++ shrl $27, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PMD_0)(%ebp), %edx
++ orl $PAE_PGD_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PMD_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x3fe00000, %eax
++ shrl $18, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_0)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x001ff000, %eax
++ shrl $9, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ /* identity map the control page at its physical address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xc0000000, %eax
++ shrl $27, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PMD_1)(%ebp), %edx
++ orl $PAE_PGD_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PMD_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x3fe00000, %eax
++ shrl $18, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_1)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x001ff000, %eax
++ shrl $9, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++#else
++ /* map the control page at its virtual address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xffc00000, %eax
++ shrl $20, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_0)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x003ff000, %eax
++ shrl $10, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ /* identity map the control page at its physical address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xffc00000, %eax
++ shrl $20, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_1)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x003ff000, %eax
++ shrl $10, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++#endif
+
+- /*
+- * Must be relocatable PIC code callable as a C function, that once
+- * it starts can not use the previous processes stack.
+- */
+- .globl relocate_new_kernel
+ relocate_new_kernel:
+ /* read the arguments and say goodbye to the stack */
+ movl 4(%esp), %ebx /* page_list */
+- movl 8(%esp), %ebp /* reboot_code_buffer */
++ movl 8(%esp), %ebp /* list of pages */
+ movl 12(%esp), %edx /* start address */
+ movl 16(%esp), %ecx /* cpu_has_pae */
+
+@@ -24,11 +146,26 @@ relocate_new_kernel:
+ pushl $0
+ popfl
+
+- /* set a new stack at the bottom of our page... */
+- lea 4096(%ebp), %esp
++ /* get physical address of control page now */
++ /* this is impossible after page table switch */
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
++
++ /* switch to new set of page tables */
++ movl PTR(PA_PGD)(%ebp), %eax
++ movl %eax, %cr3
++
++ /* setup a new stack at the end of the physical control page */
++ lea 4096(%edi), %esp
+
+- /* store the parameters back on the stack */
+- pushl %edx /* store the start address */
++ /* jump to identity mapped page */
++ movl %edi, %eax
++ addl $(identity_mapped - relocate_kernel), %eax
++ pushl %eax
++ ret
++
++identity_mapped:
++ /* store the start address on the stack */
++ pushl %edx
+
+ /* Set cr0 to a known state:
+ * 31 0 == Paging disabled
+@@ -113,8 +250,3 @@ relocate_new_kernel:
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
+-relocate_new_kernel_end:
+-
+- .globl relocate_new_kernel_size
+-relocate_new_kernel_size:
+- .long relocate_new_kernel_end - relocate_new_kernel
+diff -pruN ../orig-linux-2.6.18/include/asm-i386/kexec.h
./include/asm-i386/kexec.h
+--- ../orig-linux-2.6.18/include/asm-i386/kexec.h 2006-09-20
04:42:06.000000000 +0100
++++ ./include/asm-i386/kexec.h 2007-01-12 16:03:23.000000000 +0000
+@@ -1,6 +1,26 @@
+ #ifndef _I386_KEXEC_H
+ #define _I386_KEXEC_H
+
++#define PA_CONTROL_PAGE 0
++#define VA_CONTROL_PAGE 1
++#define PA_PGD 2
++#define VA_PGD 3
++#define PA_PTE_0 4
++#define VA_PTE_0 5
++#define PA_PTE_1 6
++#define VA_PTE_1 7
++#ifdef CONFIG_X86_PAE
++#define PA_PMD_0 8
++#define VA_PMD_0 9
++#define PA_PMD_1 10
++#define VA_PMD_1 11
++#define PAGES_NR 12
++#else
++#define PAGES_NR 8
++#endif
++
++#ifndef __ASSEMBLY__
++
+ #include <asm/fixmap.h>
+ #include <asm/ptrace.h>
+ #include <asm/string.h>
+@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru
+ newregs->eip = (unsigned long)current_text_addr();
+ }
+ }
++asmlinkage NORET_TYPE void
++relocate_kernel(unsigned long indirection_page,
++ unsigned long control_page,
++ unsigned long start_address,
++ unsigned int has_pae) ATTRIB_NORET;
++
++#endif /* __ASSEMBLY__ */
+
+ #endif /* _I386_KEXEC_H */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,355 @@
+diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c
./arch/x86_64/kernel/machine_kexec.c
+--- ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:03:49.000000000
+0000
+@@ -15,6 +15,15 @@
+ #include <asm/mmu_context.h>
+ #include <asm/io.h>
+
++#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
++static u64 kexec_pgd[512] PAGE_ALIGNED;
++static u64 kexec_pud0[512] PAGE_ALIGNED;
++static u64 kexec_pmd0[512] PAGE_ALIGNED;
++static u64 kexec_pte0[512] PAGE_ALIGNED;
++static u64 kexec_pud1[512] PAGE_ALIGNED;
++static u64 kexec_pmd1[512] PAGE_ALIGNED;
++static u64 kexec_pte1[512] PAGE_ALIGNED;
++
+ static void init_level2_page(pmd_t *level2p, unsigned long addr)
+ {
+ unsigned long end_addr;
+@@ -144,32 +153,19 @@ static void load_segments(void)
+ );
+ }
+
+-typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long
indirection_page,
+- unsigned long control_code_buffer,
+- unsigned long start_address,
+- unsigned long pgtable) ATTRIB_NORET;
+-
+-extern const unsigned char relocate_new_kernel[];
+-extern const unsigned long relocate_new_kernel_size;
+-
+ int machine_kexec_prepare(struct kimage *image)
+ {
+- unsigned long start_pgtable, control_code_buffer;
++ unsigned long start_pgtable;
+ int result;
+
+ /* Calculate the offsets */
+ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+- control_code_buffer = start_pgtable + PAGE_SIZE;
+
+ /* Setup the identity mapped 64bit page table */
+ result = init_pgtable(image, start_pgtable);
+ if (result)
+ return result;
+
+- /* Place the code in the reboot code buffer */
+- memcpy(__va(control_code_buffer), relocate_new_kernel,
+- relocate_new_kernel_size);
+-
+ return 0;
+ }
+
+@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage
+ */
+ NORET_TYPE void machine_kexec(struct kimage *image)
+ {
+- unsigned long page_list;
+- unsigned long control_code_buffer;
+- unsigned long start_pgtable;
+- relocate_new_kernel_t rnk;
++ unsigned long page_list[PAGES_NR];
++ void *control_page;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+- /* Calculate the offsets */
+- page_list = image->head;
+- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+- control_code_buffer = start_pgtable + PAGE_SIZE;
++ control_page = page_address(image->control_code_page) + PAGE_SIZE;
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
+
+- /* Set the low half of the page table to my identity mapped
+- * page table for kexec. Leave the high half pointing at the
+- * kernel pages. Don't bother to flush the global pages
+- * as that will happen when I fully switch to my identity mapped
+- * page table anyway.
+- */
+- memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2);
+- __flush_tlb();
++ page_list[PA_CONTROL_PAGE] = __pa(control_page);
++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
++ page_list[PA_PGD] = __pa(kexec_pgd);
++ page_list[VA_PGD] = (unsigned long)kexec_pgd;
++ page_list[PA_PUD_0] = __pa(kexec_pud0);
++ page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
++ page_list[PA_PMD_0] = __pa(kexec_pmd0);
++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
++ page_list[PA_PTE_0] = __pa(kexec_pte0);
++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
++ page_list[PA_PUD_1] = __pa(kexec_pud1);
++ page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
++ page_list[PA_PMD_1] = __pa(kexec_pmd1);
++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
++ page_list[PA_PTE_1] = __pa(kexec_pte1);
++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
++ page_list[PA_TABLE_PAGE] =
++ (unsigned long)__pa(page_address(image->control_code_page));
+
+ /* The segment registers are funny things, they have both a
+ * visible and an invisible part. Whenever the visible part is
+@@ -222,7 +224,8 @@ NORET_TYPE void machine_kexec(struct kim
+ */
+ set_gdt(phys_to_virt(0),0);
+ set_idt(phys_to_virt(0),0);
++
+ /* now call it */
+- rnk = (relocate_new_kernel_t) control_code_buffer;
+- (*rnk)(page_list, control_code_buffer, image->start, start_pgtable);
++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
++ image->start);
+ }
+diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S
./arch/x86_64/kernel/relocate_kernel.S
+--- ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:03:49.000000000
+0000
+@@ -7,31 +7,169 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/page.h>
++#include <asm/kexec.h>
+
+- /*
+- * Must be relocatable PIC code callable as a C function, that once
+- * it starts can not use the previous processes stack.
+- */
+- .globl relocate_new_kernel
++/*
++ * Must be relocatable PIC code callable as a C function
++ */
++
++#define PTR(x) (x << 3)
++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
++
++ .text
++ .align PAGE_ALIGNED
+ .code64
++ .globl relocate_kernel
++relocate_kernel:
++ /* %rdi indirection_page
++ * %rsi page_list
++ * %rdx start address
++ */
++
++ /* map the control page at its virtual address */
++
++ movq $0x0000ff8000000000, %r10 /* mask */
++ mov $(39 - 3), %cl /* bits to shift */
++ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PGD)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PUD_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PUD_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PMD_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PMD_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PTE_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PTE_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ /* identity map the control page at its physical address */
++
++ movq $0x0000ff8000000000, %r10 /* mask */
++ mov $(39 - 3), %cl /* bits to shift */
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PGD)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PUD_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PUD_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PMD_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PMD_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PTE_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PTE_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
+ relocate_new_kernel:
+- /* %rdi page_list
+- * %rsi reboot_code_buffer
++ /* %rdi indirection_page
++ * %rsi page_list
+ * %rdx start address
+- * %rcx page_table
+- * %r8 arg5
+- * %r9 arg6
+ */
+
+ /* zero out flags, and disable interrupts */
+ pushq $0
+ popfq
+
+- /* set a new stack at the bottom of our page... */
+- lea 4096(%rsi), %rsp
++ /* get physical address of control page now */
++ /* this is impossible after page table switch */
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++
++ /* get physical address of page table now too */
++ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
++
++ /* switch to new set of page tables */
++ movq PTR(PA_PGD)(%rsi), %r9
++ movq %r9, %cr3
++
++ /* setup a new stack at the end of the physical control page */
++ lea 4096(%r8), %rsp
++
++ /* jump to identity mapped page */
++ addq $(identity_mapped - relocate_kernel), %r8
++ pushq %r8
++ ret
+
+- /* store the parameters back on the stack */
+- pushq %rdx /* store the start address */
++identity_mapped:
++ /* store the start address on the stack */
++ pushq %rdx
+
+ /* Set cr0 to a known state:
+ * 31 1 == Paging enabled
+@@ -136,8 +274,3 @@ relocate_new_kernel:
+ xorq %r15, %r15
+
+ ret
+-relocate_new_kernel_end:
+-
+- .globl relocate_new_kernel_size
+-relocate_new_kernel_size:
+- .quad relocate_new_kernel_end - relocate_new_kernel
+diff -pruN ../orig-linux-2.6.18/include/asm-x86_64/kexec.h
./include/asm-x86_64/kexec.h
+--- ../orig-linux-2.6.18/include/asm-x86_64/kexec.h 2006-09-20
04:42:06.000000000 +0100
++++ ./include/asm-x86_64/kexec.h 2007-01-12 16:03:49.000000000 +0000
+@@ -1,6 +1,27 @@
+ #ifndef _X86_64_KEXEC_H
+ #define _X86_64_KEXEC_H
+
++#define PA_CONTROL_PAGE 0
++#define VA_CONTROL_PAGE 1
++#define PA_PGD 2
++#define VA_PGD 3
++#define PA_PUD_0 4
++#define VA_PUD_0 5
++#define PA_PMD_0 6
++#define VA_PMD_0 7
++#define PA_PTE_0 8
++#define VA_PTE_0 9
++#define PA_PUD_1 10
++#define VA_PUD_1 11
++#define PA_PMD_1 12
++#define VA_PMD_1 13
++#define PA_PTE_1 14
++#define VA_PTE_1 15
++#define PA_TABLE_PAGE 16
++#define PAGES_NR 17
++
++#ifndef __ASSEMBLY__
++
+ #include <linux/string.h>
+
+ #include <asm/page.h>
+@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru
+ newregs->rip = (unsigned long)current_text_addr();
+ }
+ }
++
++NORET_TYPE void
++relocate_kernel(unsigned long indirection_page,
++ unsigned long page_list,
++ unsigned long start_address) ATTRIB_NORET;
++
++#endif /* __ASSEMBLY__ */
++
+ #endif /* _X86_64_KEXEC_H */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,28 @@
+--- ./arch/ia64/kernel/smp.c.orig 2007-05-02 19:00:01.000000000 +0900
++++ ./arch/ia64/kernel/smp.c 2007-05-02 19:04:32.000000000 +0900
+@@ -328,10 +328,14 @@ int
+ smp_call_function (void (*func) (void *info), void *info, int nonatomic, int
wait)
+ {
+ struct call_data_struct data;
+- int cpus = num_online_cpus()-1;
++ int cpus;
+
+- if (!cpus)
++ spin_lock(&call_lock);
++ cpus = num_online_cpus()-1;
++ if (!cpus) {
++ spin_unlock(&call_lock);
+ return 0;
++ }
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+@@ -343,8 +347,6 @@ smp_call_function (void (*func) (void *i
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+- spin_lock(&call_lock);
+-
+ call_data = &data;
+ mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC
*/
+ send_IPI_allbutself(IPI_CALL_FUNC);
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,29 @@
+commit c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4
+Author: Vivek Goyal <vgoyal@xxxxxxxxxx>
+Date: Wed Nov 8 17:44:41 2006 -0800
+
+ [PATCH] i386: Force data segment to be 4K aligned
+
+ o Currently there is no specific alignment restriction in linker script
+ and in some cases it can be placed non 4K aligned addresses. This fails
+ kexec which checks that segment to be loaded is page aligned.
+
+ o I guess, it does not harm data segment to be 4K aligned.
+
+ Signed-off-by: Vivek Goyal <vgoyal@xxxxxxxxxx>
+ Signed-off-by: Andi Kleen <ak@xxxxxxx>
+ Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
+ Signed-off-by: Linus Torvalds <torvalds@xxxxxxxx>
+
+diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
+index adc1f23..c6f84a0 100644
+--- a/arch/i386/kernel/vmlinux.lds.S
++++ b/arch/i386/kernel/vmlinux.lds.S
+@@ -51,6 +51,7 @@ SECTIONS
+ __tracedata_end = .;
+
+ /* writeable */
++ . = ALIGN(4096);
+ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
+ *(.data)
+ CONSTRUCTORS
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,20 @@
+diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S
./arch/x86_64/kernel/vmlinux.lds.S
+--- ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S 2007-01-12
18:20:02.000000000 +0000
++++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:20.000000000 +0000
+@@ -17,6 +17,7 @@ PHDRS {
+ text PT_LOAD FLAGS(5); /* R_E */
+ data PT_LOAD FLAGS(7); /* RWE */
+ user PT_LOAD FLAGS(7); /* RWE */
++ data.init PT_LOAD FLAGS(7); /* RWE */
+ note PT_NOTE FLAGS(4); /* R__ */
+ }
+ SECTIONS
+@@ -131,7 +132,7 @@ SECTIONS
+ . = ALIGN(8192); /* init_task */
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ *(.data.init_task)
+- } :data
++ }:data.init
+
+ . = ALIGN(4096);
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/i386-mach-io-check-nmi.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/i386-mach-io-check-nmi.patch Thu Jan 17
15:05:38 2008 +0000
@@ -0,0 +1,45 @@
+diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/traps.c
./arch/i386/kernel/traps.c
+--- ../orig-linux-2.6.18/arch/i386/kernel/traps.c 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/i386/kernel/traps.c 2007-01-12 16:07:49.000000000 +0000
+@@ -642,18 +642,11 @@ static void mem_parity_error(unsigned ch
+
+ static void io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+- unsigned long i;
+-
+ printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+- reason = (reason & 0xf) | 8;
+- outb(reason, 0x61);
+- i = 2000;
+- while (--i) udelay(1000);
+- reason &= ~8;
+- outb(reason, 0x61);
++ clear_io_check_error(reason);
+ }
+
+ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+diff -pruN ../orig-linux-2.6.18/include/asm-i386/mach-default/mach_traps.h
./include/asm-i386/mach-default/mach_traps.h
+--- ../orig-linux-2.6.18/include/asm-i386/mach-default/mach_traps.h
2006-09-20 04:42:06.000000000 +0100
++++ ./include/asm-i386/mach-default/mach_traps.h 2007-01-12
16:07:49.000000000 +0000
+@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig
+ outb(reason, 0x61);
+ }
+
++static inline void clear_io_check_error(unsigned char reason)
++{
++ unsigned long i;
++
++ reason = (reason & 0xf) | 8;
++ outb(reason, 0x61);
++ i = 2000;
++ while (--i) udelay(1000);
++ reason &= ~8;
++ outb(reason, 0x61);
++}
++
+ static inline unsigned char get_nmi_reason(void)
+ {
+ return inb(0x61);
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/linux-2.6.18-xen-375-748cd890ea7f
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/linux-2.6.18-xen-375-748cd890ea7f Thu Jan 17
15:05:38 2008 +0000
@@ -0,0 +1,294 @@
+# HG changeset patch
+# User Keir Fraser <keir.fraser@xxxxxxxxxx>
+# Date 1199916724 0
+# Node ID 748cd890ea7f56752311e519e80eece9d850c01a
+# Parent fedc10fba9f1d5ec0c72dbcbca87e508222b4c48
+x86_64: Add TIF_RESTORE_SIGMASK (from upstream Linux)
+
+We need TIF_RESTORE_SIGMASK in order to support ppoll() and pselect()
+system calls. This patch originally came from Andi, and was based
+heavily on David Howells' implementation of same on i386. I fixed a
+typo which was causing do_signal() to use the wrong signal mask.
+
+Signed-off-by: David Woodhouse <dwmw2@xxxxxxxxxxxxx>
+Signed-off-by: Andi Kleen <ak@xxxxxxx>
+
+diff -r fedc10fba9f1 -r 748cd890ea7f arch/x86_64/ia32/ia32_signal.c
+--- a/arch/x86_64/ia32/ia32_signal.c Tue Jan 08 09:55:29 2008 +0000
++++ b/arch/x86_64/ia32/ia32_signal.c Wed Jan 09 22:12:04 2008 +0000
+@@ -113,25 +113,19 @@ int copy_siginfo_from_user32(siginfo_t *
+ }
+
+ asmlinkage long
+-sys32_sigsuspend(int history0, int history1, old_sigset_t mask,
+- struct pt_regs *regs)
+-{
+- sigset_t saveset;
+-
++sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
++{
+ mask &= _BLOCKABLE;
+ spin_lock_irq(¤t->sighand->siglock);
+- saveset = current->blocked;
++ current->saved_sigmask = current->blocked;
+ siginitset(¤t->blocked, mask);
+ recalc_sigpending();
+ spin_unlock_irq(¤t->sighand->siglock);
+
+- regs->rax = -EINTR;
+- while (1) {
+- current->state = TASK_INTERRUPTIBLE;
+- schedule();
+- if (do_signal(regs, &saveset))
+- return -EINTR;
+- }
++ current->state = TASK_INTERRUPTIBLE;
++ schedule();
++ set_thread_flag(TIF_RESTORE_SIGMASK);
++ return -ERESTARTNOHAND;
+ }
+
+ asmlinkage long
+@@ -508,11 +502,11 @@ int ia32_setup_frame(int sig, struct k_s
+ current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ #endif
+
+- return 1;
++ return 0;
+
+ give_sigsegv:
+ force_sigsegv(sig, current);
+- return 0;
++ return -EFAULT;
+ }
+
+ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+@@ -595,7 +589,7 @@ int ia32_setup_rt_frame(int sig, struct
+ regs->ss = __USER32_DS;
+
+ set_fs(USER_DS);
+- regs->eflags &= ~TF_MASK;
++ regs->eflags &= ~TF_MASK;
+ if (test_thread_flag(TIF_SINGLESTEP))
+ ptrace_notify(SIGTRAP);
+
+@@ -604,9 +598,9 @@ int ia32_setup_rt_frame(int sig, struct
+ current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ #endif
+
+- return 1;
++ return 0;
+
+ give_sigsegv:
+ force_sigsegv(sig, current);
+- return 0;
+-}
++ return -EFAULT;
++}
+diff -r fedc10fba9f1 -r 748cd890ea7f arch/x86_64/kernel/signal.c
+--- a/arch/x86_64/kernel/signal.c Tue Jan 08 09:55:29 2008 +0000
++++ b/arch/x86_64/kernel/signal.c Wed Jan 09 22:12:04 2008 +0000
+@@ -36,37 +36,6 @@ int ia32_setup_rt_frame(int sig, struct
+ sigset_t *set, struct pt_regs * regs);
+ int ia32_setup_frame(int sig, struct k_sigaction *ka,
+ sigset_t *set, struct pt_regs * regs);
+-
+-asmlinkage long
+-sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs
*regs)
+-{
+- sigset_t saveset, newset;
+-
+- /* XXX: Don't preclude handling different sized sigset_t's. */
+- if (sigsetsize != sizeof(sigset_t))
+- return -EINVAL;
+-
+- if (copy_from_user(&newset, unewset, sizeof(newset)))
+- return -EFAULT;
+- sigdelsetmask(&newset, ~_BLOCKABLE);
+-
+- spin_lock_irq(¤t->sighand->siglock);
+- saveset = current->blocked;
+- current->blocked = newset;
+- recalc_sigpending();
+- spin_unlock_irq(¤t->sighand->siglock);
+-#ifdef DEBUG_SIG
+- printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
+- saveset, newset, regs, regs->rip);
+-#endif
+- regs->rax = -EINTR;
+- while (1) {
+- current->state = TASK_INTERRUPTIBLE;
+- schedule();
+- if (do_signal(regs, &saveset))
+- return -EINTR;
+- }
+-}
+
+ asmlinkage long
+ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+@@ -341,11 +310,11 @@ static int setup_rt_frame(int sig, struc
+ current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ #endif
+
+- return 1;
++ return 0;
+
+ give_sigsegv:
+ force_sigsegv(sig, current);
+- return 0;
++ return -EFAULT;
+ }
+
+ /*
+@@ -408,7 +377,7 @@ handle_signal(unsigned long sig, siginfo
+ #endif
+ ret = setup_rt_frame(sig, ka, info, oldset, regs);
+
+- if (ret) {
++ if (ret == 0) {
+ spin_lock_irq(¤t->sighand->siglock);
+ sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask);
+ if (!(ka->sa.sa_flags & SA_NODEFER))
+@@ -425,11 +394,12 @@ handle_signal(unsigned long sig, siginfo
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+-int do_signal(struct pt_regs *regs, sigset_t *oldset)
++static void do_signal(struct pt_regs *regs)
+ {
+ struct k_sigaction ka;
+ siginfo_t info;
+ int signr;
++ sigset_t *oldset;
+
+ /*
+ * We want the common case to go fast, which
+@@ -438,9 +408,11 @@ int do_signal(struct pt_regs *regs, sigs
+ * if so.
+ */
+ if (!user_mode(regs))
+- return 1;
+-
+- if (!oldset)
++ return;
++
++ if (test_thread_flag(TIF_RESTORE_SIGMASK))
++ oldset = ¤t->saved_sigmask;
++ else
+ oldset = ¤t->blocked;
+
+ signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+@@ -454,30 +426,46 @@ int do_signal(struct pt_regs *regs, sigs
+ set_debugreg(current->thread.debugreg7, 7);
+
+ /* Whee! Actually deliver the signal. */
+- return handle_signal(signr, &info, &ka, oldset, regs);
++ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
++ /* a signal was successfully delivered; the saved
++ * sigmask will have been stored in the signal frame,
++ * and will be restored by sigreturn, so we can simply
++ * clear the TIF_RESTORE_SIGMASK flag */
++ clear_thread_flag(TIF_RESTORE_SIGMASK);
++ }
++ return;
+ }
+
+ /* Did we come from a system call? */
+ if ((long)regs->orig_rax >= 0) {
+ /* Restart the system call - no handlers present */
+ long res = regs->rax;
+- if (res == -ERESTARTNOHAND ||
+- res == -ERESTARTSYS ||
+- res == -ERESTARTNOINTR) {
++ switch (res) {
++ case -ERESTARTNOHAND:
++ case -ERESTARTSYS:
++ case -ERESTARTNOINTR:
+ regs->rax = regs->orig_rax;
+ regs->rip -= 2;
+- }
+- if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
++ break;
++ case -ERESTART_RESTARTBLOCK:
+ regs->rax = test_thread_flag(TIF_IA32) ?
+ __NR_ia32_restart_syscall :
+ __NR_restart_syscall;
+ regs->rip -= 2;
+- }
+- }
+- return 0;
+-}
+-
+-void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32
thread_info_flags)
++ break;
++ }
++ }
++
++ /* if there's no signal to deliver, we just put the saved sigmask
++ back. */
++ if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
++ clear_thread_flag(TIF_RESTORE_SIGMASK);
++ sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL);
++ }
++}
++
++void
++do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
+ {
+ #ifdef DEBUG_SIG
+ printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx
pending:%lx\n",
+@@ -491,8 +479,8 @@ void do_notify_resume(struct pt_regs *re
+ }
+
+ /* deal with pending signal delivery */
+- if (thread_info_flags & _TIF_SIGPENDING)
+- do_signal(regs,oldset);
++ if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
++ do_signal(regs);
+ }
+
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/signal.h
+--- a/include/asm-x86_64/signal.h Tue Jan 08 09:55:29 2008 +0000
++++ b/include/asm-x86_64/signal.h Wed Jan 09 22:12:04 2008 +0000
+@@ -22,10 +22,6 @@ typedef struct {
+ typedef struct {
+ unsigned long sig[_NSIG_WORDS];
+ } sigset_t;
+-
+-
+-struct pt_regs;
+-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+
+
+ #else
+diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/thread_info.h
+--- a/include/asm-x86_64/thread_info.h Tue Jan 08 09:55:29 2008 +0000
++++ b/include/asm-x86_64/thread_info.h Wed Jan 09 22:12:04 2008 +0000
+@@ -114,6 +114,7 @@ static inline struct thread_info *stack_
+ #define TIF_IRET 5 /* force IRET */
+ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
+ #define TIF_SECCOMP 8 /* secure computing */
++#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
+ /* 16 free */
+ #define TIF_IA32 17 /* 32bit process */
+ #define TIF_FORK 18 /* ret_from_fork */
+@@ -128,6 +129,7 @@ static inline struct thread_info *stack_
+ #define _TIF_IRET (1<<TIF_IRET)
+ #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP (1<<TIF_SECCOMP)
++#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+ #define _TIF_IA32 (1<<TIF_IA32)
+ #define _TIF_FORK (1<<TIF_FORK)
+ #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
+diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/unistd.h
+--- a/include/asm-x86_64/unistd.h Tue Jan 08 09:55:29 2008 +0000
++++ b/include/asm-x86_64/unistd.h Wed Jan 09 22:12:04 2008 +0000
+@@ -658,6 +658,7 @@ do { \
+ #define __ARCH_WANT_SYS_SIGPENDING
+ #define __ARCH_WANT_SYS_SIGPROCMASK
+ #define __ARCH_WANT_SYS_RT_SIGACTION
++#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+ #define __ARCH_WANT_SYS_TIME
+ #define __ARCH_WANT_COMPAT_SYS_TIME
+
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/linux-2.6.18-xen-376-353802ec1caf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/linux-2.6.18-xen-376-353802ec1caf Thu Jan 17
15:05:38 2008 +0000
@@ -0,0 +1,40 @@
+# HG changeset patch
+# User Keir Fraser <keir.fraser@xxxxxxxxxx>
+# Date 1199916752 0
+# Node ID 353802ec1caf399143e48713a04cedd37a106347
+# Parent 748cd890ea7f56752311e519e80eece9d850c01a
+x86_64: Add ppoll/pselect syscalls (from upstream Linux)
+
+Needed TIF_RESTORE_SIGMASK first
+
+Signed-off-by: Andi Kleen <ak@xxxxxxx>
+
+diff -r 748cd890ea7f -r 353802ec1caf arch/x86_64/ia32/ia32entry.S
+--- a/arch/x86_64/ia32/ia32entry.S Wed Jan 09 22:12:04 2008 +0000
++++ b/arch/x86_64/ia32/ia32entry.S Wed Jan 09 22:12:32 2008 +0000
+@@ -703,8 +703,8 @@ ia32_sys_call_table:
+ .quad sys_readlinkat /* 305 */
+ .quad sys_fchmodat
+ .quad sys_faccessat
+- .quad quiet_ni_syscall /* pselect6 for now */
+- .quad quiet_ni_syscall /* ppoll for now */
++ .quad compat_sys_pselect6
++ .quad compat_sys_ppoll
+ .quad sys_unshare /* 310 */
+ .quad compat_sys_set_robust_list
+ .quad compat_sys_get_robust_list
+diff -r 748cd890ea7f -r 353802ec1caf include/asm-x86_64/unistd.h
+--- a/include/asm-x86_64/unistd.h Wed Jan 09 22:12:04 2008 +0000
++++ b/include/asm-x86_64/unistd.h Wed Jan 09 22:12:32 2008 +0000
+@@ -600,9 +600,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat)
+ #define __NR_faccessat 269
+ __SYSCALL(__NR_faccessat, sys_faccessat)
+ #define __NR_pselect6 270
+-__SYSCALL(__NR_pselect6, sys_ni_syscall) /* for now */
++__SYSCALL(__NR_pselect6, sys_pselect6)
+ #define __NR_ppoll 271
+-__SYSCALL(__NR_ppoll, sys_ni_syscall) /* for now */
++__SYSCALL(__NR_ppoll, sys_ppoll)
+ #define __NR_unshare 272
+ __SYSCALL(__NR_unshare, sys_unshare)
+ #define __NR_set_robust_list 273
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,151 @@
+diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c
./arch/i386/kernel/machine_kexec.c
+--- ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c 2007-01-12
16:03:23.000000000 +0000
++++ ./arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:37.000000000 +0000
+@@ -28,48 +28,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
+ static u32 kexec_pte0[1024] PAGE_ALIGNED;
+ static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+-static void set_idt(void *newidt, __u16 limit)
+-{
+- struct Xgt_desc_struct curidt;
+-
+- /* ia32 supports unaliged loads & stores */
+- curidt.size = limit;
+- curidt.address = (unsigned long)newidt;
+-
+- load_idt(&curidt);
+-};
+-
+-
+-static void set_gdt(void *newgdt, __u16 limit)
+-{
+- struct Xgt_desc_struct curgdt;
+-
+- /* ia32 supports unaligned loads & stores */
+- curgdt.size = limit;
+- curgdt.address = (unsigned long)newgdt;
+-
+- load_gdt(&curgdt);
+-};
+-
+-static void load_segments(void)
+-{
+-#define __STR(X) #X
+-#define STR(X) __STR(X)
+-
+- __asm__ __volatile__ (
+- "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+- "\t1:\n"
+- "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
+- "\tmovl %%eax,%%ds\n"
+- "\tmovl %%eax,%%es\n"
+- "\tmovl %%eax,%%fs\n"
+- "\tmovl %%eax,%%gs\n"
+- "\tmovl %%eax,%%ss\n"
+- ::: "eax", "memory");
+-#undef STR
+-#undef __STR
+-}
+-
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+@@ -126,23 +84,6 @@ NORET_TYPE void machine_kexec(struct kim
+ page_list[PA_PTE_1] = __pa(kexec_pte1);
+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
+- /* The segment registers are funny things, they have both a
+- * visible and an invisible part. Whenever the visible part is
+- * set to a specific selector, the invisible part is loaded
+- * with from a table in memory. At no other time is the
+- * descriptor table in memory accessed.
+- *
+- * I take advantage of this here by force loading the
+- * segments, before I zap the gdt with an invalid value.
+- */
+- load_segments();
+- /* The gdt & idt are now invalid.
+- * If you want to load them you must set up your own idt & gdt.
+- */
+- set_gdt(phys_to_virt(0),0);
+- set_idt(phys_to_virt(0),0);
+-
+- /* now call it */
+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+ image->start, cpu_has_pae);
+ }
+diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S
./arch/i386/kernel/relocate_kernel.S
+--- ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S 2007-01-12
16:03:23.000000000 +0000
++++ ./arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:37.000000000
+0000
+@@ -154,14 +154,45 @@ relocate_new_kernel:
+ movl PTR(PA_PGD)(%ebp), %eax
+ movl %eax, %cr3
+
++ /* setup idt */
++ movl %edi, %eax
++ addl $(idt_48 - relocate_kernel), %eax
++ lidtl (%eax)
++
++ /* setup gdt */
++ movl %edi, %eax
++ addl $(gdt - relocate_kernel), %eax
++ movl %edi, %esi
++ addl $((gdt_48 - relocate_kernel) + 2), %esi
++ movl %eax, (%esi)
++
++ movl %edi, %eax
++ addl $(gdt_48 - relocate_kernel), %eax
++ lgdtl (%eax)
++
++ /* setup data segment registers */
++ mov $(gdt_ds - gdt), %eax
++ mov %eax, %ds
++ mov %eax, %es
++ mov %eax, %fs
++ mov %eax, %gs
++ mov %eax, %ss
++
+ /* setup a new stack at the end of the physical control page */
+ lea 4096(%edi), %esp
+
+- /* jump to identity mapped page */
+- movl %edi, %eax
+- addl $(identity_mapped - relocate_kernel), %eax
+- pushl %eax
+- ret
++ /* load new code segment and jump to identity mapped page */
++ movl %edi, %esi
++ xorl %eax, %eax
++ pushl %eax
++ pushl %esi
++ pushl %eax
++ movl $(gdt_cs - gdt), %eax
++ pushl %eax
++ movl %edi, %eax
++ addl $(identity_mapped - relocate_kernel),%eax
++ pushl %eax
++ iretl
+
+ identity_mapped:
+ /* store the start address on the stack */
+@@ -250,3 +281,20 @@ identity_mapped:
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
++
++ .align 16
++gdt:
++ .quad 0x0000000000000000 /* NULL descriptor */
++gdt_cs:
++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
++gdt_ds:
++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
++gdt_end:
++
++gdt_48:
++ .word gdt_end - gdt - 1 /* limit */
++ .long 0 /* base - filled in by code above */
++
++idt_48:
++ .word 0 /* limit */
++ .long 0 /* base */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,143 @@
+diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c
./arch/x86_64/kernel/machine_kexec.c
+--- ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c 2007-01-12
16:03:49.000000000 +0000
++++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:04:02.000000000
+0000
+@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i
+ return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
+ }
+
+-static void set_idt(void *newidt, u16 limit)
+-{
+- struct desc_ptr curidt;
+-
+- /* x86-64 supports unaliged loads & stores */
+- curidt.size = limit;
+- curidt.address = (unsigned long)newidt;
+-
+- __asm__ __volatile__ (
+- "lidtq %0\n"
+- : : "m" (curidt)
+- );
+-};
+-
+-
+-static void set_gdt(void *newgdt, u16 limit)
+-{
+- struct desc_ptr curgdt;
+-
+- /* x86-64 supports unaligned loads & stores */
+- curgdt.size = limit;
+- curgdt.address = (unsigned long)newgdt;
+-
+- __asm__ __volatile__ (
+- "lgdtq %0\n"
+- : : "m" (curgdt)
+- );
+-};
+-
+-static void load_segments(void)
+-{
+- __asm__ __volatile__ (
+- "\tmovl %0,%%ds\n"
+- "\tmovl %0,%%es\n"
+- "\tmovl %0,%%ss\n"
+- "\tmovl %0,%%fs\n"
+- "\tmovl %0,%%gs\n"
+- : : "a" (__KERNEL_DS) : "memory"
+- );
+-}
+-
+ int machine_kexec_prepare(struct kimage *image)
+ {
+ unsigned long start_pgtable;
+@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim
+ page_list[PA_TABLE_PAGE] =
+ (unsigned long)__pa(page_address(image->control_code_page));
+
+- /* The segment registers are funny things, they have both a
+- * visible and an invisible part. Whenever the visible part is
+- * set to a specific selector, the invisible part is loaded
+- * with from a table in memory. At no other time is the
+- * descriptor table in memory accessed.
+- *
+- * I take advantage of this here by force loading the
+- * segments, before I zap the gdt with an invalid value.
+- */
+- load_segments();
+- /* The gdt & idt are now invalid.
+- * If you want to load them you must set up your own idt & gdt.
+- */
+- set_gdt(phys_to_virt(0),0);
+- set_idt(phys_to_virt(0),0);
+-
+- /* now call it */
+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+ image->start);
+ }
+diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S
./arch/x86_64/kernel/relocate_kernel.S
+--- ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S 2007-01-12
16:03:49.000000000 +0000
++++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:04:02.000000000
+0000
+@@ -159,13 +159,39 @@ relocate_new_kernel:
+ movq PTR(PA_PGD)(%rsi), %r9
+ movq %r9, %cr3
+
++ /* setup idt */
++ movq %r8, %rax
++ addq $(idt_80 - relocate_kernel), %rax
++ lidtq (%rax)
++
++ /* setup gdt */
++ movq %r8, %rax
++ addq $(gdt - relocate_kernel), %rax
++ movq %r8, %r9
++ addq $((gdt_80 - relocate_kernel) + 2), %r9
++ movq %rax, (%r9)
++
++ movq %r8, %rax
++ addq $(gdt_80 - relocate_kernel), %rax
++ lgdtq (%rax)
++
++ /* setup data segment registers */
++ xorl %eax, %eax
++ movl %eax, %ds
++ movl %eax, %es
++ movl %eax, %fs
++ movl %eax, %gs
++ movl %eax, %ss
++
+ /* setup a new stack at the end of the physical control page */
+ lea 4096(%r8), %rsp
+
+- /* jump to identity mapped page */
+- addq $(identity_mapped - relocate_kernel), %r8
+- pushq %r8
+- ret
++ /* load new code segment and jump to identity mapped page */
++ movq %r8, %rax
++ addq $(identity_mapped - relocate_kernel), %rax
++ pushq $(gdt_cs - gdt)
++ pushq %rax
++ lretq
+
+ identity_mapped:
+ /* store the start address on the stack */
+@@ -272,5 +298,19 @@ identity_mapped:
+ xorq %r13, %r13
+ xorq %r14, %r14
+ xorq %r15, %r15
+-
+ ret
++
++ .align 16
++gdt:
++ .quad 0x0000000000000000 /* NULL descriptor */
++gdt_cs:
++ .quad 0x00af9a000000ffff
++gdt_end:
++
++gdt_80:
++ .word gdt_end - gdt - 1 /* limit */
++ .quad 0 /* base - filled in by code above */
++
++idt_80:
++ .word 0 /* limit */
++ .quad 0 /* base */
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/net-csum.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/net-csum.patch Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,63 @@
+diff -pruN ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_tcp.c
./net/ipv4/netfilter/ip_nat_proto_tcp.c
+--- ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-20
04:42:06.000000000 +0100
++++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2007-01-12 16:08:53.000000000
+0000
+@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb,
+ if (hdrsize < sizeof(*hdr))
+ return 1;
+
+- hdr->check = ip_nat_cheat_check(~oldip, newip,
++#ifdef CONFIG_XEN
++ if ((*pskb)->proto_csum_blank)
++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
++ else
++#endif
++ hdr->check = ip_nat_cheat_check(~oldip, newip,
+ ip_nat_cheat_check(oldport ^ 0xFFFF,
+ newport,
+ hdr->check));
+diff -pruN ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_udp.c
./net/ipv4/netfilter/ip_nat_proto_udp.c
+--- ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-20
04:42:06.000000000 +0100
++++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2007-01-12 16:08:53.000000000
+0000
+@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb,
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+- if (hdr->check) /* 0 is a special case meaning no checksum */
+- hdr->check = ip_nat_cheat_check(~oldip, newip,
++ if (hdr->check) { /* 0 is a special case meaning no checksum */
++#ifdef CONFIG_XEN
++ if ((*pskb)->proto_csum_blank)
++ hdr->check = ip_nat_cheat_check(oldip, ~newip,
hdr->check);
++ else
++#endif
++ hdr->check = ip_nat_cheat_check(~oldip, newip,
+ ip_nat_cheat_check(*portptr ^ 0xFFFF,
+ newport,
+ hdr->check));
++ }
+ *portptr = newport;
+ return 1;
+ }
+diff -pruN ../orig-linux-2.6.18/net/ipv4/xfrm4_output.c
./net/ipv4/xfrm4_output.c
+--- ../orig-linux-2.6.18/net/ipv4/xfrm4_output.c 2006-09-20
04:42:06.000000000 +0100
++++ ./net/ipv4/xfrm4_output.c 2007-01-12 17:38:34.000000000 +0000
+@@ -18,6 +18,8 @@
+ #include <net/xfrm.h>
+ #include <net/icmp.h>
+
++extern int skb_checksum_setup(struct sk_buff *skb);
++
+ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
+ {
+ int mtu, ret = 0;
+@@ -48,6 +50,10 @@ static int xfrm4_output_one(struct sk_bu
+ struct xfrm_state *x = dst->xfrm;
+ int err;
+
++ err = skb_checksum_setup(skb);
++ if (err)
++ goto error_nolock;
++
+ if (skb->ip_summed == CHECKSUM_HW) {
+ err = skb_checksum_help(skb, 0);
+ if (err)
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/net-gso-5-rcv-mss.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/net-gso-5-rcv-mss.patch Thu Jan 17 15:05:38
2008 +0000
@@ -0,0 +1,12 @@
+diff -pruN ../orig-linux-2.6.18/net/ipv4/tcp_input.c ./net/ipv4/tcp_input.c
+--- ../orig-linux-2.6.18/net/ipv4/tcp_input.c 2006-09-20 04:42:06.000000000
+0100
++++ ./net/ipv4/tcp_input.c 2007-01-12 18:10:16.000000000 +0000
+@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct s
+ /* skb->len may jitter because of SACKs, even if peer
+ * sends good full-sized frames.
+ */
+- len = skb->len;
++ len = skb_shinfo(skb)->gso_size ?: skb->len;
+ if (len >= icsk->icsk_ack.rcv_mss) {
+ icsk->icsk_ack.rcv_mss = len;
+ } else {
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/pmd-shared.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/pmd-shared.patch Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,111 @@
+diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pageattr.c
./arch/i386/mm/pageattr.c
+--- ../orig-linux-2.6.18/arch/i386/mm/pageattr.c 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/i386/mm/pageattr.c 2007-01-12 18:11:06.000000000 +0000
+@@ -84,7 +84,7 @@ static void set_pmd_pte(pte_t *kpte, uns
+ unsigned long flags;
+
+ set_pte_atomic(kpte, pte); /* change init_mm */
+- if (PTRS_PER_PMD > 1)
++ if (HAVE_SHARED_KERNEL_PMD)
+ return;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c
+--- ../orig-linux-2.6.18/arch/i386/mm/pgtable.c 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/i386/mm/pgtable.c 2007-01-12 18:11:06.000000000 +0000
+@@ -214,9 +214,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
+ spin_lock_irqsave(&pgd_lock, flags);
+ }
+
+- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+- swapper_pg_dir + USER_PTRS_PER_PGD,
+- KERNEL_PGD_PTRS);
++ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD)
++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
++ swapper_pg_dir + USER_PTRS_PER_PGD,
++ KERNEL_PGD_PTRS);
+ if (PTRS_PER_PMD > 1)
+ return;
+
+@@ -248,6 +249,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ goto out_oom;
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ }
++
++ if (!HAVE_SHARED_KERNEL_PMD) {
++ unsigned long flags;
++
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
++ if (!pmd)
++ goto out_oom;
++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
++ }
++
++ spin_lock_irqsave(&pgd_lock, flags);
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ unsigned long v = (unsigned long)i << PGDIR_SHIFT;
++ pgd_t *kpgd = pgd_offset_k(v);
++ pud_t *kpud = pud_offset(kpgd, v);
++ pmd_t *kpmd = pmd_offset(kpud, v);
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ memcpy(pmd, kpmd, PAGE_SIZE);
++ }
++ pgd_list_add(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ }
++
+ return pgd;
+
+ out_oom:
+@@ -262,9 +287,23 @@ void pgd_free(pgd_t *pgd)
+ int i;
+
+ /* in the PAE case user pgd entries are overwritten before usage */
+- if (PTRS_PER_PMD > 1)
+- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
+- kmem_cache_free(pmd_cache, (void
*)__va(pgd_val(pgd[i])-1));
++ if (PTRS_PER_PMD > 1) {
++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ kmem_cache_free(pmd_cache, pmd);
++ }
++ if (!HAVE_SHARED_KERNEL_PMD) {
++ unsigned long flags;
++ spin_lock_irqsave(&pgd_lock, flags);
++ pgd_list_del(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
++ kmem_cache_free(pmd_cache, pmd);
++ }
++ }
++ }
+ /* in the non-PAE case, free_pgtables() clears user pgd entries */
+ kmem_cache_free(pgd_cache, pgd);
+ }
+diff -pruN ../orig-linux-2.6.18/include/asm-i386/pgtable-2level-defs.h
./include/asm-i386/pgtable-2level-defs.h
+--- ../orig-linux-2.6.18/include/asm-i386/pgtable-2level-defs.h
2006-09-20 04:42:06.000000000 +0100
++++ ./include/asm-i386/pgtable-2level-defs.h 2007-01-12 18:11:06.000000000
+0000
+@@ -1,6 +1,8 @@
+ #ifndef _I386_PGTABLE_2LEVEL_DEFS_H
+ #define _I386_PGTABLE_2LEVEL_DEFS_H
+
++#define HAVE_SHARED_KERNEL_PMD 0
++
+ /*
+ * traditional i386 two-level paging structure:
+ */
+diff -pruN ../orig-linux-2.6.18/include/asm-i386/pgtable-3level-defs.h
./include/asm-i386/pgtable-3level-defs.h
+--- ../orig-linux-2.6.18/include/asm-i386/pgtable-3level-defs.h
2006-09-20 04:42:06.000000000 +0100
++++ ./include/asm-i386/pgtable-3level-defs.h 2007-01-12 18:11:06.000000000
+0000
+@@ -1,6 +1,8 @@
+ #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
+ #define _I386_PGTABLE_3LEVEL_DEFS_H
+
++#define HAVE_SHARED_KERNEL_PMD 1
++
+ /*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,30 @@
+diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/entry.S
./arch/i386/kernel/entry.S
+--- ../orig-linux-2.6.18/arch/i386/kernel/entry.S 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/i386/kernel/entry.S 2007-01-12 18:12:31.000000000 +0000
+@@ -269,7 +269,7 @@ ENTRY(sysenter_entry)
+ CFI_STARTPROC simple
+ CFI_DEF_CFA esp, 0
+ CFI_REGISTER esp, ebp
+- movl TSS_sysenter_esp0(%esp),%esp
++ movl SYSENTER_stack_esp0(%esp),%esp
+ sysenter_past_esp:
+ /*
+ * No need to follow this irqs on/off section: the syscall
+@@ -689,7 +689,7 @@ device_not_available_emulate:
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
++ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+@@ -701,7 +701,7 @@ device_not_available_emulate:
+ cmpw $__KERNEL_CS,4(%esp); \
+ jne ok; \
+ label: \
+- movl TSS_sysenter_esp0+offset(%esp),%esp; \
++ movl SYSENTER_stack_esp0+offset(%esp),%esp; \
+ pushfl; \
+ pushl $__KERNEL_CS; \
+ pushl $sysenter_past_esp
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/series
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/series Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,25 @@
+git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
+linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
+git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
+linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
+blktap-aio-16_03_06.patch
+fix-ide-cd-pio-mode.patch
+i386-mach-io-check-nmi.patch
+net-csum.patch
+net-gso-5-rcv-mss.patch
+pmd-shared.patch
+rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
+xen-hotplug.patch
+xenoprof-generic.patch
+x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
+x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
+git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
+x86-elfnote-as-preprocessor-macro.patch
+fixaddr-top.patch
+git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch
+softlockup-no-idle-hz.patch
+allow-i386-crash-kernels-to-handle-x86_64-dumps.patch
+allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch
+git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch
+linux-2.6.18-xen-375-748cd890ea7f
+linux-2.6.18-xen-376-353802ec1caf
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/softlockup-no-idle-hz.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/softlockup-no-idle-hz.patch Thu Jan 17
15:05:38 2008 +0000
@@ -0,0 +1,56 @@
+diff -pruN ../orig-linux-2.6.18/include/linux/sched.h ./include/linux/sched.h
+--- ../orig-linux-2.6.18/include/linux/sched.h 2006-09-20 04:42:06.000000000
+0100
++++ ./include/linux/sched.h 2007-02-07 01:10:24.000000000 +0000
+@@ -211,10 +211,15 @@ extern void update_process_times(int use
+ extern void scheduler_tick(void);
+
+ #ifdef CONFIG_DETECT_SOFTLOCKUP
++extern unsigned long softlockup_get_next_event(void);
+ extern void softlockup_tick(void);
+ extern void spawn_softlockup_task(void);
+ extern void touch_softlockup_watchdog(void);
+ #else
++static inline unsigned long softlockup_get_next_event(void)
++{
++ return MAX_JIFFY_OFFSET;
++}
+ static inline void softlockup_tick(void)
+ {
+ }
+diff -pruN ../orig-linux-2.6.18/kernel/softlockup.c ./kernel/softlockup.c
+--- ../orig-linux-2.6.18/kernel/softlockup.c 2006-09-20 04:42:06.000000000
+0100
++++ ./kernel/softlockup.c 2007-02-07 01:53:22.000000000 +0000
+@@ -40,6 +40,19 @@ void touch_softlockup_watchdog(void)
+ }
+ EXPORT_SYMBOL(touch_softlockup_watchdog);
+
++unsigned long softlockup_get_next_event(void)
++{
++ int this_cpu = smp_processor_id();
++ unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
++
++ if (per_cpu(print_timestamp, this_cpu) == touch_timestamp ||
++ did_panic ||
++ !per_cpu(watchdog_task, this_cpu))
++ return MAX_JIFFY_OFFSET;
++
++ return max_t(long, 0, touch_timestamp + HZ - jiffies);
++}
++
+ /*
+ * This callback runs from the timer interrupt, and checks
+ * whether the watchdog thread has hung or not:
+diff -pruN ../orig-linux-2.6.18/kernel/timer.c ./kernel/timer.c
+--- ../orig-linux-2.6.18/kernel/timer.c 2006-09-20 04:42:06.000000000
+0100
++++ ./kernel/timer.c 2007-02-07 01:29:34.000000000 +0000
+@@ -485,7 +485,9 @@ unsigned long next_timer_interrupt(void)
+ if (hr_expires < 3)
+ return hr_expires + jiffies;
+ }
+- hr_expires += jiffies;
++ hr_expires = min_t(unsigned long,
++ softlockup_get_next_event(),
++ hr_expires) + jiffies;
+
+ base = __get_cpu_var(tvec_bases);
+ spin_lock(&base->lock);
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/x86-elfnote-as-preprocessor-macro.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/x86-elfnote-as-preprocessor-macro.patch Thu Jan
17 15:05:38 2008 +0000
@@ -0,0 +1,56 @@
+diff -pruN ../orig-linux-2.6.18/include/linux/elfnote.h
./include/linux/elfnote.h
+--- ../orig-linux-2.6.18/include/linux/elfnote.h 2007-01-12
18:19:44.000000000 +0000
++++ ./include/linux/elfnote.h 2007-01-12 18:21:02.000000000 +0000
+@@ -31,22 +31,38 @@
+ /*
+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
+ * turn out to be the same size and shape), followed by the name and
+- * desc data with appropriate padding. The 'desc' argument includes
+- * the assembler pseudo op defining the type of the data: .asciz
+- * "hello, world"
++ * desc data with appropriate padding. The 'desctype' argument is the
++ * assembler pseudo op defining the type of the data e.g. .asciz while
++ * 'descdata' is the data itself e.g. "hello, world".
++ *
++ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
++ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
+ */
+-.macro ELFNOTE name type desc:vararg
+-.pushsection ".note.\name"
+- .align 4
+- .long 2f - 1f /* namesz */
+- .long 4f - 3f /* descsz */
+- .long \type
+-1:.asciz "\name"
+-2:.align 4
+-3:\desc
+-4:.align 4
++#ifdef __STDC__
++#define ELFNOTE(name, type, desctype, descdata...) \
++.pushsection .note.name ; \
++ .align 4 ; \
++ .long 2f - 1f /* namesz */ ; \
++ .long 4f - 3f /* descsz */ ; \
++ .long type ; \
++1:.asciz #name ; \
++2:.align 4 ; \
++3:desctype descdata ; \
++4:.align 4 ; \
+ .popsection
+-.endm
++#else /* !__STDC__, i.e. -traditional */
++#define ELFNOTE(name, type, desctype, descdata) \
++.pushsection .note.name ; \
++ .align 4 ; \
++ .long 2f - 1f /* namesz */ ; \
++ .long 4f - 3f /* descsz */ ; \
++ .long type ; \
++1:.asciz "name" ; \
++2:.align 4 ; \
++3:desctype descdata ; \
++4:.align 4 ; \
++.popsection
++#endif /* __STDC__ */
+ #else /* !__ASSEMBLER__ */
+ #include <linux/elf.h>
+ /*
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.18.8/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,143 @@
+diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/vmlinux.lds.S
./arch/i386/kernel/vmlinux.lds.S
+--- ../orig-linux-2.6.18/arch/i386/kernel/vmlinux.lds.S 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/i386/kernel/vmlinux.lds.S 2007-01-12 18:19:44.000000000 +0000
+@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386"
+ OUTPUT_ARCH(i386)
+ ENTRY(phys_startup_32)
+ jiffies = jiffies_64;
++
++PHDRS {
++ text PT_LOAD FLAGS(5); /* R_E */
++ data PT_LOAD FLAGS(7); /* RWE */
++ note PT_NOTE FLAGS(4); /* R__ */
++}
+ SECTIONS
+ {
+ . = __KERNEL_START;
+@@ -26,7 +32,7 @@ SECTIONS
+ KPROBES_TEXT
+ *(.fixup)
+ *(.gnu.warning)
+- } = 0x9090
++ } :text = 0x9090
+
+ _etext = .; /* End of text section */
+
+@@ -48,7 +54,7 @@ SECTIONS
+ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
+ *(.data)
+ CONSTRUCTORS
+- }
++ } :data
+
+ . = ALIGN(4096);
+ __nosave_begin = .;
+@@ -184,4 +190,6 @@ SECTIONS
+ STABS_DEBUG
+
+ DWARF_DEBUG
++
++ NOTES
+ }
+diff -pruN ../orig-linux-2.6.18/include/asm-generic/vmlinux.lds.h
./include/asm-generic/vmlinux.lds.h
+--- ../orig-linux-2.6.18/include/asm-generic/vmlinux.lds.h 2006-09-20
04:42:06.000000000 +0100
++++ ./include/asm-generic/vmlinux.lds.h 2007-01-12 18:19:44.000000000
+0000
+@@ -194,3 +194,6 @@
+ .stab.index 0 : { *(.stab.index) } \
+ .stab.indexstr 0 : { *(.stab.indexstr) } \
+ .comment 0 : { *(.comment) }
++
++#define NOTES \
++ .notes : { *(.note.*) } :note
+diff -pruN ../orig-linux-2.6.18/include/linux/elfnote.h
./include/linux/elfnote.h
+--- ../orig-linux-2.6.18/include/linux/elfnote.h 1970-01-01
01:00:00.000000000 +0100
++++ ./include/linux/elfnote.h 2007-01-12 18:19:44.000000000 +0000
+@@ -0,0 +1,88 @@
++#ifndef _LINUX_ELFNOTE_H
++#define _LINUX_ELFNOTE_H
++/*
++ * Helper macros to generate ELF Note structures, which are put into a
++ * PT_NOTE segment of the final vmlinux image. These are useful for
++ * including name-value pairs of metadata into the kernel binary (or
++ * modules?) for use by external programs.
++ *
++ * Each note has three parts: a name, a type and a desc. The name is
++ * intended to distinguish the note's originator, so it would be a
++ * company, project, subsystem, etc; it must be in a suitable form for
++ * use in a section name. The type is an integer which is used to tag
++ * the data, and is considered to be within the "name" namespace (so
++ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
++ * "desc" field is the actual data. There are no constraints on the
++ * desc field's contents, though typically they're fairly small.
++ *
++ * All notes from a given NAME are put into a section named
++ * .note.NAME. When the kernel image is finally linked, all the notes
++ * are packed into a single .notes section, which is mapped into the
++ * PT_NOTE segment. Because notes for a given name are grouped into
++ * the same section, they'll all be adjacent the output file.
++ *
++ * This file defines macros for both C and assembler use. Their
++ * syntax is slightly different, but they're semantically similar.
++ *
++ * See the ELF specification for more detail about ELF notes.
++ */
++
++#ifdef __ASSEMBLER__
++/*
++ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
++ * turn out to be the same size and shape), followed by the name and
++ * desc data with appropriate padding. The 'desc' argument includes
++ * the assembler pseudo op defining the type of the data: .asciz
++ * "hello, world"
++ */
++.macro ELFNOTE name type desc:vararg
++.pushsection ".note.\name"
++ .align 4
++ .long 2f - 1f /* namesz */
++ .long 4f - 3f /* descsz */
++ .long \type
++1:.asciz "\name"
++2:.align 4
++3:\desc
++4:.align 4
++.popsection
++.endm
++#else /* !__ASSEMBLER__ */
++#include <linux/elf.h>
++/*
++ * Use an anonymous structure which matches the shape of
++ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and
++ * type of name and desc depend on the macro arguments. "name" must
++ * be a literal string, and "desc" must be passed by value. You may
++ * only define one note per line, since __LINE__ is used to generate
++ * unique symbols.
++ */
++#define _ELFNOTE_PASTE(a,b) a##b
++#define _ELFNOTE(size, name, unique, type, desc) \
++ static const struct { \
++ struct elf##size##_note _nhdr; \
++ unsigned char _name[sizeof(name)] \
++ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
++ typeof(desc) _desc \
++
__attribute__((aligned(sizeof(Elf##size##_Word)))); \
++ } _ELFNOTE_PASTE(_note_, unique) \
++ __attribute_used__ \
++ __attribute__((section(".note." name), \
++ aligned(sizeof(Elf##size##_Word)), \
++ unused)) = { \
++ { \
++ sizeof(name), \
++ sizeof(desc), \
++ type, \
++ }, \
++ name, \
++ desc \
++ }
++#define ELFNOTE(size, name, type, desc) \
++ _ELFNOTE(size, name, __LINE__, type, desc)
++
++#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
++#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
++#endif /* __ASSEMBLER__ */
++
++#endif /* _LINUX_ELFNOTE_H */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.18.8/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,84 @@
+diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S
./arch/x86_64/kernel/vmlinux.lds.S
+--- ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S 2006-09-20
04:42:06.000000000 +0100
++++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:02.000000000 +0000
+@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86
+ OUTPUT_ARCH(i386:x86-64)
+ ENTRY(phys_startup_64)
+ jiffies_64 = jiffies;
++PHDRS {
++ text PT_LOAD FLAGS(5); /* R_E */
++ data PT_LOAD FLAGS(7); /* RWE */
++ user PT_LOAD FLAGS(7); /* RWE */
++ note PT_NOTE FLAGS(4); /* R__ */
++}
+ SECTIONS
+ {
+ . = __START_KERNEL;
+@@ -31,7 +37,7 @@ SECTIONS
+ KPROBES_TEXT
+ *(.fixup)
+ *(.gnu.warning)
+- } = 0x9090
++ } :text = 0x9090
+ /* out-of-line lock text */
+ .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
+
+@@ -57,17 +63,10 @@ SECTIONS
+ .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ *(.data)
+ CONSTRUCTORS
+- }
++ } :data
+
+ _edata = .; /* End of data section */
+
+- __bss_start = .; /* BSS */
+- .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+- *(.bss.page_aligned)
+- *(.bss)
+- }
+- __bss_stop = .;
+-
+ . = ALIGN(PAGE_SIZE);
+ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+@@ -89,7 +88,7 @@ SECTIONS
+ #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+
+ . = VSYSCALL_ADDR;
+- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) }
++ .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
+ __vsyscall_0 = VSYSCALL_VIRT_ADDR;
+
+ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+@@ -132,7 +131,7 @@ SECTIONS
+ . = ALIGN(8192); /* init_task */
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ *(.data.init_task)
+- }
++ } :data
+
+ . = ALIGN(4096);
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+@@ -222,6 +221,14 @@ SECTIONS
+ . = ALIGN(4096);
+ __nosave_end = .;
+
++ __bss_start = .; /* BSS */
++ . = ALIGN(4096);
++ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
++ *(.bss.page_aligned)
++ *(.bss)
++ }
++ __bss_stop = .;
++
+ _end = . ;
+
+ /* Sections to be discarded */
+@@ -235,4 +242,6 @@ SECTIONS
+ STABS_DEBUG
+
+ DWARF_DEBUG
++
++ NOTES
+ }
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/xen-hotplug.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/xen-hotplug.patch Thu Jan 17 15:05:38 2008 +0000
@@ -0,0 +1,12 @@
+diff -pruN ../orig-linux-2.6.18/fs/proc/proc_misc.c ./fs/proc/proc_misc.c
+--- ../orig-linux-2.6.18/fs/proc/proc_misc.c 2006-09-20 04:42:06.000000000
+0100
++++ ./fs/proc/proc_misc.c 2007-01-12 18:18:36.000000000 +0000
+@@ -471,7 +471,7 @@ static int show_stat(struct seq_file *p,
+ (unsigned long long)cputime64_to_clock_t(irq),
+ (unsigned long long)cputime64_to_clock_t(softirq),
+ (unsigned long long)cputime64_to_clock_t(steal));
+- for_each_online_cpu(i) {
++ for_each_possible_cpu(i) {
+
+ /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
+ user = kstat_cpu(i).cpustat.user;
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18.8/xenoprof-generic.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.18.8/xenoprof-generic.patch Thu Jan 17 15:05:38
2008 +0000
@@ -0,0 +1,662 @@
+diff -pruN ../orig-linux-2.6.18/drivers/oprofile/buffer_sync.c
./drivers/oprofile/buffer_sync.c
+--- ../orig-linux-2.6.18/drivers/oprofile/buffer_sync.c 2006-09-20
04:42:06.000000000 +0100
++++ ./drivers/oprofile/buffer_sync.c 2007-01-12 18:19:28.000000000 +0000
+@@ -6,6 +6,10 @@
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
+ * This is the core of the buffer management. Each
+ * CPU buffer is processed and entered into the
+ * global event buffer. Such processing is necessary
+@@ -38,6 +42,7 @@ static cpumask_t marked_cpus = CPU_MASK_
+ static DEFINE_SPINLOCK(task_mortuary);
+ static void process_task_mortuary(void);
+
++static int cpu_current_domain[NR_CPUS];
+
+ /* Take ownership of the task struct and place it on the
+ * list for processing. Only after two full buffer syncs
+@@ -146,6 +151,11 @@ static void end_sync(void)
+ int sync_start(void)
+ {
+ int err;
++ int i;
++
++ for (i = 0; i < NR_CPUS; i++) {
++ cpu_current_domain[i] = COORDINATOR_DOMAIN;
++ }
+
+ start_cpu_work();
+
+@@ -275,15 +285,31 @@ static void add_cpu_switch(int i)
+ last_cookie = INVALID_COOKIE;
+ }
+
+-static void add_kernel_ctx_switch(unsigned int in_kernel)
++static void add_cpu_mode_switch(unsigned int cpu_mode)
+ {
+ add_event_entry(ESCAPE_CODE);
+- if (in_kernel)
+- add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+- else
+- add_event_entry(KERNEL_EXIT_SWITCH_CODE);
++ switch (cpu_mode) {
++ case CPU_MODE_USER:
++ add_event_entry(USER_ENTER_SWITCH_CODE);
++ break;
++ case CPU_MODE_KERNEL:
++ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
++ break;
++ case CPU_MODE_XEN:
++ add_event_entry(XEN_ENTER_SWITCH_CODE);
++ break;
++ default:
++ break;
++ }
+ }
+-
++
++static void add_domain_switch(unsigned long domain_id)
++{
++ add_event_entry(ESCAPE_CODE);
++ add_event_entry(DOMAIN_SWITCH_CODE);
++ add_event_entry(domain_id);
++}
++
+ static void
+ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
+ {
+@@ -348,9 +374,9 @@ static int add_us_sample(struct mm_struc
+ * for later lookup from userspace.
+ */
+ static int
+-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
+ {
+- if (in_kernel) {
++ if (cpu_mode >= CPU_MODE_KERNEL) {
+ add_sample_entry(s->eip, s->event);
+ return 1;
+ } else if (mm) {
+@@ -496,15 +522,21 @@ void sync_buffer(int cpu)
+ struct mm_struct *mm = NULL;
+ struct task_struct * new;
+ unsigned long cookie = 0;
+- int in_kernel = 1;
++ int cpu_mode = 1;
+ unsigned int i;
+ sync_buffer_state state = sb_buffer_start;
+ unsigned long available;
++ int domain_switch = 0;
+
+ mutex_lock(&buffer_mutex);
+
+ add_cpu_switch(cpu);
+
++ /* We need to assign the first samples in this CPU buffer to the
++ same domain that we were processing at the last sync_buffer */
++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
++ add_domain_switch(cpu_current_domain[cpu]);
++ }
+ /* Remember, only we can modify tail_pos */
+
+ available = get_slots(cpu_buf);
+@@ -512,16 +544,18 @@ void sync_buffer(int cpu)
+ for (i = 0; i < available; ++i) {
+ struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
+
+- if (is_code(s->eip)) {
+- if (s->event <= CPU_IS_KERNEL) {
+- /* kernel/userspace switch */
+- in_kernel = s->event;
++ if (is_code(s->eip) && !domain_switch) {
++ if (s->event <= CPU_MODE_XEN) {
++ /* xen/kernel/userspace switch */
++ cpu_mode = s->event;
+ if (state == sb_buffer_start)
+ state = sb_sample_start;
+- add_kernel_ctx_switch(s->event);
++ add_cpu_mode_switch(s->event);
+ } else if (s->event == CPU_TRACE_BEGIN) {
+ state = sb_bt_start;
+ add_trace_begin();
++ } else if (s->event == CPU_DOMAIN_SWITCH) {
++ domain_switch = 1;
+ } else {
+ struct mm_struct * oldmm = mm;
+
+@@ -535,11 +569,21 @@ void sync_buffer(int cpu)
+ add_user_ctx_switch(new, cookie);
+ }
+ } else {
+- if (state >= sb_bt_start &&
+- !add_sample(mm, s, in_kernel)) {
+- if (state == sb_bt_start) {
+- state = sb_bt_ignore;
+-
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
++ if (domain_switch) {
++ cpu_current_domain[cpu] = s->eip;
++ add_domain_switch(s->eip);
++ domain_switch = 0;
++ } else {
++ if (cpu_current_domain[cpu] !=
++ COORDINATOR_DOMAIN) {
++ add_sample_entry(s->eip, s->event);
++ }
++ else if (state >= sb_bt_start &&
++ !add_sample(mm, s, cpu_mode)) {
++ if (state == sb_bt_start) {
++ state = sb_bt_ignore;
++
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
++ }
+ }
+ }
+ }
+@@ -548,6 +592,11 @@ void sync_buffer(int cpu)
+ }
+ release_mm(mm);
+
++ /* We reset domain to COORDINATOR at each CPU switch */
++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
++ add_domain_switch(COORDINATOR_DOMAIN);
++ }
++
+ mark_done(cpu);
+
+ mutex_unlock(&buffer_mutex);
+diff -pruN ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.c
./drivers/oprofile/cpu_buffer.c
+--- ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.c 2006-09-20
04:42:06.000000000 +0100
++++ ./drivers/oprofile/cpu_buffer.c 2007-01-12 18:18:50.000000000 +0000
+@@ -6,6 +6,10 @@
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
+ * Each CPU has a local buffer that stores PC value/event
+ * pairs. We also log context switches when we notice them.
+ * Eventually each CPU's buffer is processed into the global
+@@ -34,6 +38,8 @@ static void wq_sync_buffer(void *);
+ #define DEFAULT_TIMER_EXPIRE (HZ / 10)
+ static int work_enabled;
+
++static int32_t current_domain = COORDINATOR_DOMAIN;
++
+ void free_cpu_buffers(void)
+ {
+ int i;
+@@ -57,7 +63,7 @@ int alloc_cpu_buffers(void)
+ goto fail;
+
+ b->last_task = NULL;
+- b->last_is_kernel = -1;
++ b->last_cpu_mode = -1;
+ b->tracing = 0;
+ b->buffer_size = buffer_size;
+ b->tail_pos = 0;
+@@ -113,7 +119,7 @@ void cpu_buffer_reset(struct oprofile_cp
+ * collected will populate the buffer with proper
+ * values to initialize the buffer
+ */
+- cpu_buf->last_is_kernel = -1;
++ cpu_buf->last_cpu_mode = -1;
+ cpu_buf->last_task = NULL;
+ }
+
+@@ -163,13 +169,13 @@ add_code(struct oprofile_cpu_buffer * bu
+ * because of the head/tail separation of the writer and reader
+ * of the CPU buffer.
+ *
+- * is_kernel is needed because on some architectures you cannot
++ * cpu_mode is needed because on some architectures you cannot
+ * tell if you are in kernel or user space simply by looking at
+- * pc. We tag this in the buffer by generating kernel enter/exit
+- * events whenever is_kernel changes
++ * pc. We tag this in the buffer by generating kernel/user (and xen)
++ * enter events whenever cpu_mode changes
+ */
+ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
+- int is_kernel, unsigned long event)
++ int cpu_mode, unsigned long event)
+ {
+ struct task_struct * task;
+
+@@ -180,18 +186,18 @@ static int log_sample(struct oprofile_cp
+ return 0;
+ }
+
+- is_kernel = !!is_kernel;
+-
+ task = current;
+
+ /* notice a switch from user->kernel or vice versa */
+- if (cpu_buf->last_is_kernel != is_kernel) {
+- cpu_buf->last_is_kernel = is_kernel;
+- add_code(cpu_buf, is_kernel);
++ if (cpu_buf->last_cpu_mode != cpu_mode) {
++ cpu_buf->last_cpu_mode = cpu_mode;
++ add_code(cpu_buf, cpu_mode);
+ }
+-
++
+ /* notice a task switch */
+- if (cpu_buf->last_task != task) {
++ /* if not processing other domain samples */
++ if ((cpu_buf->last_task != task) &&
++ (current_domain == COORDINATOR_DOMAIN)) {
+ cpu_buf->last_task = task;
+ add_code(cpu_buf, (unsigned long)task);
+ }
+@@ -275,6 +281,25 @@ void oprofile_add_trace(unsigned long pc
+ add_sample(cpu_buf, pc, 0);
+ }
+
++int oprofile_add_domain_switch(int32_t domain_id)
++{
++ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
++
++ /* should have space for switching into and out of domain
++ (2 slots each) plus one sample and one cpu mode switch */
++ if (((nr_available_slots(cpu_buf) < 6) &&
++ (domain_id != COORDINATOR_DOMAIN)) ||
++ (nr_available_slots(cpu_buf) < 2))
++ return 0;
++
++ add_code(cpu_buf, CPU_DOMAIN_SWITCH);
++ add_sample(cpu_buf, domain_id, 0);
++
++ current_domain = domain_id;
++
++ return 1;
++}
++
+ /*
+ * This serves to avoid cpu buffer overflow, and makes sure
+ * the task mortuary progresses
+diff -pruN ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.h
./drivers/oprofile/cpu_buffer.h
+--- ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.h 2006-09-20
04:42:06.000000000 +0100
++++ ./drivers/oprofile/cpu_buffer.h 2007-01-12 18:18:50.000000000 +0000
+@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
+ volatile unsigned long tail_pos;
+ unsigned long buffer_size;
+ struct task_struct * last_task;
+- int last_is_kernel;
++ int last_cpu_mode;
+ int tracing;
+ struct op_sample * buffer;
+ unsigned long sample_received;
+@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_bu
+ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
+
+ /* transient events for the CPU buffer -> event buffer */
+-#define CPU_IS_KERNEL 1
+-#define CPU_TRACE_BEGIN 2
++#define CPU_MODE_USER 0
++#define CPU_MODE_KERNEL 1
++#define CPU_MODE_XEN 2
++#define CPU_TRACE_BEGIN 3
++#define CPU_DOMAIN_SWITCH 4
+
+ #endif /* OPROFILE_CPU_BUFFER_H */
+diff -pruN ../orig-linux-2.6.18/drivers/oprofile/event_buffer.h
./drivers/oprofile/event_buffer.h
+--- ../orig-linux-2.6.18/drivers/oprofile/event_buffer.h 2006-09-20
04:42:06.000000000 +0100
++++ ./drivers/oprofile/event_buffer.h 2007-01-12 18:18:50.000000000 +0000
+@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void);
+ #define CPU_SWITCH_CODE 2
+ #define COOKIE_SWITCH_CODE 3
+ #define KERNEL_ENTER_SWITCH_CODE 4
+-#define KERNEL_EXIT_SWITCH_CODE 5
++#define USER_ENTER_SWITCH_CODE 5
+ #define MODULE_LOADED_CODE 6
+ #define CTX_TGID_CODE 7
+ #define TRACE_BEGIN_CODE 8
+ #define TRACE_END_CODE 9
++#define XEN_ENTER_SWITCH_CODE 10
++#define DOMAIN_SWITCH_CODE 11
+
+ #define INVALID_COOKIE ~0UL
+ #define NO_COOKIE 0UL
+
++/* Constant used to refer to coordinator domain (Xen) */
++#define COORDINATOR_DOMAIN -1
++
+ /* add data to the event buffer */
+ void add_event_entry(unsigned long data);
+
+diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprof.c
./drivers/oprofile/oprof.c
+--- ../orig-linux-2.6.18/drivers/oprofile/oprof.c 2006-09-20
04:42:06.000000000 +0100
++++ ./drivers/oprofile/oprof.c 2007-01-12 18:18:50.000000000 +0000
+@@ -5,6 +5,10 @@
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
++ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+ #include <linux/kernel.h>
+@@ -19,7 +23,7 @@
+ #include "cpu_buffer.h"
+ #include "buffer_sync.h"
+ #include "oprofile_stats.h"
+-
++
+ struct oprofile_operations oprofile_ops;
+
+ unsigned long oprofile_started;
+@@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex);
+ */
+ static int timer = 0;
+
++int oprofile_set_active(int active_domains[], unsigned int adomains)
++{
++ int err;
++
++ if (!oprofile_ops.set_active)
++ return -EINVAL;
++
++ mutex_lock(&start_mutex);
++ err = oprofile_ops.set_active(active_domains, adomains);
++ mutex_unlock(&start_mutex);
++ return err;
++}
++
++int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
++{
++ int err;
++
++ if (!oprofile_ops.set_passive)
++ return -EINVAL;
++
++ mutex_lock(&start_mutex);
++ err = oprofile_ops.set_passive(passive_domains, pdomains);
++ mutex_unlock(&start_mutex);
++ return err;
++}
++
+ int oprofile_setup(void)
+ {
+ int err;
+diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprof.h
./drivers/oprofile/oprof.h
+--- ../orig-linux-2.6.18/drivers/oprofile/oprof.h 2006-09-20
04:42:06.000000000 +0100
++++ ./drivers/oprofile/oprof.h 2007-01-12 18:18:50.000000000 +0000
+@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_
+ void oprofile_timer_init(struct oprofile_operations * ops);
+
+ int oprofile_set_backtrace(unsigned long depth);
++
++int oprofile_set_active(int active_domains[], unsigned int adomains);
++int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
+
+ #endif /* OPROF_H */
+diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprofile_files.c
./drivers/oprofile/oprofile_files.c
+--- ../orig-linux-2.6.18/drivers/oprofile/oprofile_files.c 2006-09-20
04:42:06.000000000 +0100
++++ ./drivers/oprofile/oprofile_files.c 2007-01-12 18:18:50.000000000
+0000
+@@ -5,15 +5,21 @@
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
++ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+ #include <linux/fs.h>
+ #include <linux/oprofile.h>
++#include <asm/uaccess.h>
++#include <linux/ctype.h>
+
+ #include "event_buffer.h"
+ #include "oprofile_stats.h"
+ #include "oprof.h"
+-
++
+ unsigned long fs_buffer_size = 131072;
+ unsigned long fs_cpu_buffer_size = 8192;
+ unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
+@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file *
+ static struct file_operations dump_fops = {
+ .write = dump_write,
+ };
+-
++
++#define TMPBUFSIZE 512
++
++static unsigned int adomains = 0;
++static int active_domains[MAX_OPROF_DOMAINS + 1];
++static DEFINE_MUTEX(adom_mutex);
++
++static ssize_t adomain_write(struct file * file, char const __user * buf,
++ size_t count, loff_t * offset)
++{
++ char *tmpbuf;
++ char *startp, *endp;
++ int i;
++ unsigned long val;
++ ssize_t retval = count;
++
++ if (*offset)
++ return -EINVAL;
++ if (count > TMPBUFSIZE - 1)
++ return -EINVAL;
++
++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
++ return -ENOMEM;
++
++ if (copy_from_user(tmpbuf, buf, count)) {
++ kfree(tmpbuf);
++ return -EFAULT;
++ }
++ tmpbuf[count] = 0;
++
++ mutex_lock(&adom_mutex);
++
++ startp = tmpbuf;
++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
++ val = simple_strtoul(startp, &endp, 0);
++ if (endp == startp)
++ break;
++ while (ispunct(*endp) || isspace(*endp))
++ endp++;
++ active_domains[i] = val;
++ if (active_domains[i] != val)
++ /* Overflow, force error below */
++ i = MAX_OPROF_DOMAINS + 1;
++ startp = endp;
++ }
++ /* Force error on trailing junk */
++ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
++
++ kfree(tmpbuf);
++
++ if (adomains > MAX_OPROF_DOMAINS
++ || oprofile_set_active(active_domains, adomains)) {
++ adomains = 0;
++ retval = -EINVAL;
++ }
++
++ mutex_unlock(&adom_mutex);
++ return retval;
++}
++
++static ssize_t adomain_read(struct file * file, char __user * buf,
++ size_t count, loff_t * offset)
++{
++ char * tmpbuf;
++ size_t len;
++ int i;
++ ssize_t retval;
++
++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
++ return -ENOMEM;
++
++ mutex_lock(&adom_mutex);
++
++ len = 0;
++ for (i = 0; i < adomains; i++)
++ len += snprintf(tmpbuf + len,
++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
++ "%u ", active_domains[i]);
++ WARN_ON(len > TMPBUFSIZE);
++ if (len != 0 && len <= TMPBUFSIZE)
++ tmpbuf[len-1] = '\n';
++
++ mutex_unlock(&adom_mutex);
++
++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
++
++ kfree(tmpbuf);
++ return retval;
++}
++
++
++static struct file_operations active_domain_ops = {
++ .read = adomain_read,
++ .write = adomain_write,
++};
++
++static unsigned int pdomains = 0;
++static int passive_domains[MAX_OPROF_DOMAINS];
++static DEFINE_MUTEX(pdom_mutex);
++
++static ssize_t pdomain_write(struct file * file, char const __user * buf,
++ size_t count, loff_t * offset)
++{
++ char *tmpbuf;
++ char *startp, *endp;
++ int i;
++ unsigned long val;
++ ssize_t retval = count;
++
++ if (*offset)
++ return -EINVAL;
++ if (count > TMPBUFSIZE - 1)
++ return -EINVAL;
++
++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
++ return -ENOMEM;
++
++ if (copy_from_user(tmpbuf, buf, count)) {
++ kfree(tmpbuf);
++ return -EFAULT;
++ }
++ tmpbuf[count] = 0;
++
++ mutex_lock(&pdom_mutex);
++
++ startp = tmpbuf;
++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
++ val = simple_strtoul(startp, &endp, 0);
++ if (endp == startp)
++ break;
++ while (ispunct(*endp) || isspace(*endp))
++ endp++;
++ passive_domains[i] = val;
++ if (passive_domains[i] != val)
++ /* Overflow, force error below */
++ i = MAX_OPROF_DOMAINS + 1;
++ startp = endp;
++ }
++ /* Force error on trailing junk */
++ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
++
++ kfree(tmpbuf);
++
++ if (pdomains > MAX_OPROF_DOMAINS
++ || oprofile_set_passive(passive_domains, pdomains)) {
++ pdomains = 0;
++ retval = -EINVAL;
++ }
++
++ mutex_unlock(&pdom_mutex);
++ return retval;
++}
++
++static ssize_t pdomain_read(struct file * file, char __user * buf,
++ size_t count, loff_t * offset)
++{
++ char * tmpbuf;
++ size_t len;
++ int i;
++ ssize_t retval;
++
++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
++ return -ENOMEM;
++
++ mutex_lock(&pdom_mutex);
++
++ len = 0;
++ for (i = 0; i < pdomains; i++)
++ len += snprintf(tmpbuf + len,
++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
++ "%u ", passive_domains[i]);
++ WARN_ON(len > TMPBUFSIZE);
++ if (len != 0 && len <= TMPBUFSIZE)
++ tmpbuf[len-1] = '\n';
++
++ mutex_unlock(&pdom_mutex);
++
++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
++
++ kfree(tmpbuf);
++ return retval;
++}
++
++static struct file_operations passive_domain_ops = {
++ .read = pdomain_read,
++ .write = pdomain_write,
++};
++
+ void oprofile_create_files(struct super_block * sb, struct dentry * root)
+ {
+ oprofilefs_create_file(sb, root, "enable", &enable_fops);
+ oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
++ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
++ oprofilefs_create_file(sb, root, "passive_domains",
&passive_domain_ops);
+ oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
+ oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
+ oprofilefs_create_ulong(sb, root, "buffer_watershed",
&fs_buffer_watershed);
+diff -pruN ../orig-linux-2.6.18/include/linux/oprofile.h
./include/linux/oprofile.h
+--- ../orig-linux-2.6.18/include/linux/oprofile.h 2006-09-20
04:42:06.000000000 +0100
++++ ./include/linux/oprofile.h 2007-01-12 18:18:50.000000000 +0000
+@@ -16,6 +16,8 @@
+ #include <linux/types.h>
+ #include <linux/spinlock.h>
+ #include <asm/atomic.h>
++
++#include <xen/interface/xenoprof.h>
+
+ struct super_block;
+ struct dentry;
+@@ -27,6 +29,11 @@ struct oprofile_operations {
+ /* create any necessary configuration files in the oprofile fs.
+ * Optional. */
+ int (*create_files)(struct super_block * sb, struct dentry * root);
++ /* setup active domains with Xen */
++ int (*set_active)(int *active_domains, unsigned int adomains);
++ /* setup passive domains with Xen */
++ int (*set_passive)(int *passive_domains, unsigned int pdomains);
++
+ /* Do any necessary interrupt setup. Optional. */
+ int (*setup)(void);
+ /* Do any necessary interrupt shutdown. Optional. */
+@@ -78,6 +85,8 @@ void oprofile_add_pc(unsigned long pc, i
+ /* add a backtrace entry, to be called from the ->backtrace callback */
+ void oprofile_add_trace(unsigned long eip);
+
++/* add a domain switch entry */
++int oprofile_add_domain_switch(int32_t domain_id);
+
+ /**
+ * Create a file of the given name as a child of the given root, with
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch
---
a/patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-From: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
-
-In file included from arch/i386/kernel/setup.c:46:
-include/linux/crash_dump.h:19:36: warning: extra tokens at end of #ifndef
directive
-
-Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
-Cc: Andi Kleen <ak@xxxxxxx>
-Cc: Horms <horms@xxxxxxxxxxxx>
-Cc: Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
-Cc: Magnus Damm <magnus.damm@xxxxxxxxx>
-Cc: Vivek Goyal <vgoyal@xxxxxxxxxx>
-Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
----
-
- include/linux/crash_dump.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff -puN
include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps-fix
include/linux/crash_dump.h
----
a/include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps-fix
-+++ a/include/linux/crash_dump.h
-@@ -16,7 +16,7 @@ extern struct proc_dir_entry *proc_vmcor
-
- /* Architecture code defines this if there are other possible ELF
- * machine types, e.g. on bi-arch capable hardware. */
--#ifndef vmcore_elf_check_arch_cross(x)
-+#ifndef vmcore_elf_check_arch_cross
- #define vmcore_elf_check_arch_cross(x) 0
- #endif
-
-_
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch
---
a/patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-From: Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
-
-The specific case I am encountering is kdump under Xen with a 64 bit
-hypervisor and 32 bit kernel/userspace. The dump created is 64 bit due to
-the hypervisor but the dump kernel is 32 bit for maximum compatibility.
-
-It's possibly less likely to be useful in a purely native scenario but I
-see no reason to disallow it.
-
-Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
-Acked-by: Vivek Goyal <vgoyal@xxxxxxxxxx>
-Cc: Horms <horms@xxxxxxxxxxxx>
-Cc: Magnus Damm <magnus.damm@xxxxxxxxx>
-Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
-Cc: Andi Kleen <ak@xxxxxxx>
-Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
----
-
- fs/proc/vmcore.c | 2 +-
- include/asm-i386/kexec.h | 3 +++
- include/linux/crash_dump.h | 8 ++++++++
- 3 files changed, 12 insertions(+), 1 deletion(-)
-
-diff -puN fs/proc/vmcore.c~allow-i386-crash-kernels-to-handle-x86_64-dumps
fs/proc/vmcore.c
---- a/fs/proc/vmcore.c~allow-i386-crash-kernels-to-handle-x86_64-dumps
-+++ a/fs/proc/vmcore.c
-@@ -514,7 +514,7 @@ static int __init parse_crash_elf64_head
- /* Do some basic Verification. */
- if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
- (ehdr.e_type != ET_CORE) ||
-- !elf_check_arch(&ehdr) ||
-+ !vmcore_elf_check_arch(&ehdr) ||
- ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
- ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
- ehdr.e_version != EV_CURRENT ||
-diff -puN
include/asm-i386/kexec.h~allow-i386-crash-kernels-to-handle-x86_64-dumps
include/asm-i386/kexec.h
---- a/include/asm-i386/kexec.h~allow-i386-crash-kernels-to-handle-x86_64-dumps
-+++ a/include/asm-i386/kexec.h
-@@ -47,6 +47,9 @@
- /* The native architecture */
- #define KEXEC_ARCH KEXEC_ARCH_386
-
-+/* We can also handle crash dumps from 64 bit kernel. */
-+#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
-+
- #define MAX_NOTE_BYTES 1024
-
- /* CPU does not save ss and esp on stack if execution is already
-diff -puN
include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps
include/linux/crash_dump.h
----
a/include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps
-+++ a/include/linux/crash_dump.h
-@@ -14,5 +14,13 @@ extern ssize_t copy_oldmem_page(unsigned
- extern const struct file_operations proc_vmcore_operations;
- extern struct proc_dir_entry *proc_vmcore;
-
-+/* Architecture code defines this if there are other possible ELF
-+ * machine types, e.g. on bi-arch capable hardware. */
-+#ifndef vmcore_elf_check_arch_cross(x)
-+#define vmcore_elf_check_arch_cross(x) 0
-+#endif
-+
-+#define vmcore_elf_check_arch(x) (elf_check_arch(x) ||
vmcore_elf_check_arch_cross(x))
-+
- #endif /* CONFIG_CRASH_DUMP */
- #endif /* LINUX_CRASHDUMP_H */
-_
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/blktap-aio-16_03_06.patch
--- a/patches/linux-2.6.18/blktap-aio-16_03_06.patch Thu Jan 17 14:35:38
2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,294 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/fs/aio.c ./fs/aio.c
---- ../orig-linux-2.6.18/fs/aio.c 2006-09-20 04:42:06.000000000 +0100
-+++ ./fs/aio.c 2007-01-12 16:04:15.000000000 +0000
-@@ -34,6 +34,11 @@
- #include <asm/uaccess.h>
- #include <asm/mmu_context.h>
-
-+#ifdef CONFIG_EPOLL
-+#include <linux/poll.h>
-+#include <linux/eventpoll.h>
-+#endif
-+
- #if DEBUG > 1
- #define dprintk printk
- #else
-@@ -1015,6 +1020,10 @@ put_rq:
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
-
-+#ifdef CONFIG_EPOLL
-+ if (ctx->file && waitqueue_active(&ctx->poll_wait))
-+ wake_up(&ctx->poll_wait);
-+#endif
- if (ret)
- put_ioctx(ctx);
-
-@@ -1024,6 +1033,8 @@ put_rq:
- /* aio_read_evt
- * Pull an event off of the ioctx's event ring. Returns the number of
- * events fetched (0 or 1 ;-)
-+ * If ent parameter is 0, just returns the number of events that would
-+ * be fetched.
- * FIXME: make this use cmpxchg.
- * TODO: make the ringbuffer user mmap()able (requires FIXME).
- */
-@@ -1046,13 +1057,18 @@ static int aio_read_evt(struct kioctx *i
-
- head = ring->head % info->nr;
- if (head != ring->tail) {
-- struct io_event *evp = aio_ring_event(info, head, KM_USER1);
-- *ent = *evp;
-- head = (head + 1) % info->nr;
-- smp_mb(); /* finish reading the event before updatng the head */
-- ring->head = head;
-- ret = 1;
-- put_aio_ring_event(evp, KM_USER1);
-+ if (ent) { /* event requested */
-+ struct io_event *evp =
-+ aio_ring_event(info, head, KM_USER1);
-+ *ent = *evp;
-+ head = (head + 1) % info->nr;
-+ /* finish reading the event before updatng the head */
-+ smp_mb();
-+ ring->head = head;
-+ ret = 1;
-+ put_aio_ring_event(evp, KM_USER1);
-+ } else /* only need to know availability */
-+ ret = 1;
- }
- spin_unlock(&info->ring_lock);
-
-@@ -1235,9 +1251,78 @@ static void io_destroy(struct kioctx *io
-
- aio_cancel_all(ioctx);
- wait_for_all_aios(ioctx);
-+#ifdef CONFIG_EPOLL
-+ /* forget the poll file, but it's up to the user to close it */
-+ if (ioctx->file) {
-+ ioctx->file->private_data = 0;
-+ ioctx->file = 0;
-+ }
-+#endif
- put_ioctx(ioctx); /* once for the lookup */
- }
-
-+#ifdef CONFIG_EPOLL
-+
-+static int aio_queue_fd_close(struct inode *inode, struct file *file)
-+{
-+ struct kioctx *ioctx = file->private_data;
-+ if (ioctx) {
-+ file->private_data = 0;
-+ spin_lock_irq(&ioctx->ctx_lock);
-+ ioctx->file = 0;
-+ spin_unlock_irq(&ioctx->ctx_lock);
-+ }
-+ return 0;
-+}
-+
-+static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
-+{ unsigned int pollflags = 0;
-+ struct kioctx *ioctx = file->private_data;
-+
-+ if (ioctx) {
-+
-+ spin_lock_irq(&ioctx->ctx_lock);
-+ /* Insert inside our poll wait queue */
-+ poll_wait(file, &ioctx->poll_wait, wait);
-+
-+ /* Check our condition */
-+ if (aio_read_evt(ioctx, 0))
-+ pollflags = POLLIN | POLLRDNORM;
-+ spin_unlock_irq(&ioctx->ctx_lock);
-+ }
-+
-+ return pollflags;
-+}
-+
-+static const struct file_operations aioq_fops = {
-+ .release = aio_queue_fd_close,
-+ .poll = aio_queue_fd_poll
-+};
-+
-+/* make_aio_fd:
-+ * Create a file descriptor that can be used to poll the event queue.
-+ * Based and piggybacked on the excellent epoll code.
-+ */
-+
-+static int make_aio_fd(struct kioctx *ioctx)
-+{
-+ int error, fd;
-+ struct inode *inode;
-+ struct file *file;
-+
-+ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
-+ if (error)
-+ return error;
-+
-+ /* associate the file with the IO context */
-+ file->private_data = ioctx;
-+ ioctx->file = file;
-+ init_waitqueue_head(&ioctx->poll_wait);
-+ return fd;
-+}
-+#endif
-+
-+
- /* sys_io_setup:
- * Create an aio_context capable of receiving at least nr_events.
- * ctxp must not point to an aio_context that already exists, and
-@@ -1250,18 +1335,30 @@ static void io_destroy(struct kioctx *io
- * resources are available. May fail with -EFAULT if an invalid
- * pointer is passed for ctxp. Will fail with -ENOSYS if not
- * implemented.
-+ *
-+ * To request a selectable fd, the user context has to be initialized
-+ * to 1, instead of 0, and the return value is the fd.
-+ * This keeps the system call compatible, since a non-zero value
-+ * was not allowed so far.
- */
- asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
- {
- struct kioctx *ioctx = NULL;
- unsigned long ctx;
- long ret;
-+ int make_fd = 0;
-
- ret = get_user(ctx, ctxp);
- if (unlikely(ret))
- goto out;
-
- ret = -EINVAL;
-+#ifdef CONFIG_EPOLL
-+ if (ctx == 1) {
-+ make_fd = 1;
-+ ctx = 0;
-+ }
-+#endif
- if (unlikely(ctx || nr_events == 0)) {
- pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
- ctx, nr_events);
-@@ -1272,8 +1369,12 @@ asmlinkage long sys_io_setup(unsigned nr
- ret = PTR_ERR(ioctx);
- if (!IS_ERR(ioctx)) {
- ret = put_user(ioctx->user_id, ctxp);
-- if (!ret)
-- return 0;
-+#ifdef CONFIG_EPOLL
-+ if (make_fd && ret >= 0)
-+ ret = make_aio_fd(ioctx);
-+#endif
-+ if (ret >= 0)
-+ return ret;
-
- get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
- io_destroy(ioctx);
-diff -pruN ../orig-linux-2.6.18/fs/eventpoll.c ./fs/eventpoll.c
---- ../orig-linux-2.6.18/fs/eventpoll.c 2006-09-20 04:42:06.000000000
+0100
-+++ ./fs/eventpoll.c 2007-01-12 16:04:41.000000000 +0000
-@@ -236,8 +236,6 @@ struct ep_pqueue {
-
- static void ep_poll_safewake_init(struct poll_safewake *psw);
- static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t
*wq);
--static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-- struct eventpoll *ep);
- static int ep_alloc(struct eventpoll **pep);
- static void ep_free(struct eventpoll *ep);
- static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int
fd);
-@@ -267,7 +265,7 @@ static int ep_events_transfer(struct eve
- static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
- int maxevents, long timeout);
- static int eventpollfs_delete_dentry(struct dentry *dentry);
--static struct inode *ep_eventpoll_inode(void);
-+static struct inode *ep_eventpoll_inode(const struct file_operations *fops);
- static int eventpollfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data, struct vfsmount *mnt);
-@@ -517,7 +515,7 @@ asmlinkage long sys_epoll_create(int siz
- * Creates all the items needed to setup an eventpoll file. That is,
- * a file structure, and inode and a free file descriptor.
- */
-- error = ep_getfd(&fd, &inode, &file, ep);
-+ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops);
- if (error)
- goto eexit_2;
-
-@@ -702,8 +700,8 @@ eexit_1:
- /*
- * Creates the file descriptor to be used by the epoll interface.
- */
--static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-- struct eventpoll *ep)
-+int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-+ struct eventpoll *ep, const struct file_operations *fops)
- {
- struct qstr this;
- char name[32];
-@@ -719,7 +717,7 @@ static int ep_getfd(int *efd, struct ino
- goto eexit_1;
-
- /* Allocates an inode from the eventpoll file system */
-- inode = ep_eventpoll_inode();
-+ inode = ep_eventpoll_inode(fops);
- error = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto eexit_2;
-@@ -750,7 +748,7 @@ static int ep_getfd(int *efd, struct ino
-
- file->f_pos = 0;
- file->f_flags = O_RDONLY;
-- file->f_op = &eventpoll_fops;
-+ file->f_op = fops;
- file->f_mode = FMODE_READ;
- file->f_version = 0;
- file->private_data = ep;
-@@ -1569,7 +1567,7 @@ static int eventpollfs_delete_dentry(str
- }
-
-
--static struct inode *ep_eventpoll_inode(void)
-+static struct inode *ep_eventpoll_inode(const struct file_operations *fops)
- {
- int error = -ENOMEM;
- struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
-@@ -1577,7 +1575,7 @@ static struct inode *ep_eventpoll_inode(
- if (!inode)
- goto eexit_1;
-
-- inode->i_fop = &eventpoll_fops;
-+ inode->i_fop = fops;
-
- /*
- * Mark the inode dirty from the very beginning,
-diff -pruN ../orig-linux-2.6.18/include/linux/aio.h ./include/linux/aio.h
---- ../orig-linux-2.6.18/include/linux/aio.h 2006-09-20 04:42:06.000000000
+0100
-+++ ./include/linux/aio.h 2007-01-12 16:04:15.000000000 +0000
-@@ -191,6 +191,11 @@ struct kioctx {
- struct aio_ring_info ring_info;
-
- struct work_struct wq;
-+#ifdef CONFIG_EPOLL
-+ // poll integration
-+ wait_queue_head_t poll_wait;
-+ struct file *file;
-+#endif
- };
-
- /* prototypes */
-diff -pruN ../orig-linux-2.6.18/include/linux/eventpoll.h
./include/linux/eventpoll.h
---- ../orig-linux-2.6.18/include/linux/eventpoll.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./include/linux/eventpoll.h 2007-01-12 16:04:15.000000000 +0000
-@@ -90,6 +90,12 @@ static inline void eventpoll_release(str
- eventpoll_release_file(file);
- }
-
-+/*
-+ * called by aio code to create fd that can poll the aio event queueQ
-+ */
-+struct eventpoll;
-+int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-+ struct eventpoll *ep, const struct file_operations *fops);
- #else
-
- static inline void eventpoll_init_file(struct file *file) {}
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/fix-ide-cd-pio-mode.patch
--- a/patches/linux-2.6.18/fix-ide-cd-pio-mode.patch Thu Jan 17 14:35:38
2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c
---- ../orig-linux-2.6.18/drivers/ide/ide-lib.c 2006-09-20 04:42:06.000000000
+0100
-+++ ./drivers/ide/ide-lib.c 2007-01-12 16:07:37.000000000 +0000
-@@ -408,10 +408,10 @@ void ide_toggle_bounce(ide_drive_t *driv
- {
- u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
-
-- if (!PCI_DMA_BUS_IS_PHYS) {
-- addr = BLK_BOUNCE_ANY;
-- } else if (on && drive->media == ide_disk) {
-- if (HWIF(drive)->pci_dev)
-+ if (on && drive->media == ide_disk) {
-+ if (!PCI_DMA_BUS_IS_PHYS)
-+ addr = BLK_BOUNCE_ANY;
-+ else if (HWIF(drive)->pci_dev)
- addr = HWIF(drive)->pci_dev->dma_mask;
- }
-
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/fixaddr-top.patch
--- a/patches/linux-2.6.18/fixaddr-top.patch Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c
---- ../orig-linux-2.6.18/arch/i386/mm/pgtable.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/i386/mm/pgtable.c 2007-01-17 17:19:36.000000000 +0000
-@@ -12,6 +12,7 @@
- #include <linux/slab.h>
- #include <linux/pagemap.h>
- #include <linux/spinlock.h>
-+#include <linux/module.h>
-
- #include <asm/system.h>
- #include <asm/pgtable.h>
-@@ -137,6 +138,10 @@ void set_pmd_pfn(unsigned long vaddr, un
- __flush_tlb_one(vaddr);
- }
-
-+static int nr_fixmaps = 0;
-+unsigned long __FIXADDR_TOP = 0xfffff000;
-+EXPORT_SYMBOL(__FIXADDR_TOP);
-+
- void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t
flags)
- {
- unsigned long address = __fix_to_virt(idx);
-@@ -146,6 +151,13 @@ void __set_fixmap (enum fixed_addresses
- return;
- }
- set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
-+ nr_fixmaps++;
-+}
-+
-+void set_fixaddr_top(unsigned long top)
-+{
-+ BUG_ON(nr_fixmaps > 0);
-+ __FIXADDR_TOP = top - PAGE_SIZE;
- }
-
- pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-diff -pruN ../orig-linux-2.6.18/include/asm-i386/fixmap.h
./include/asm-i386/fixmap.h
---- ../orig-linux-2.6.18/include/asm-i386/fixmap.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./include/asm-i386/fixmap.h 2007-01-17 17:19:36.000000000 +0000
-@@ -19,7 +19,7 @@
- * Leave one empty page between vmalloc'ed areas and
- * the start of the fixmap.
- */
--#define __FIXADDR_TOP 0xfffff000
-+extern unsigned long __FIXADDR_TOP;
-
- #ifndef __ASSEMBLY__
- #include <linux/kernel.h>
-@@ -94,6 +94,8 @@ enum fixed_addresses {
- extern void __set_fixmap (enum fixed_addresses idx,
- unsigned long phys, pgprot_t flags);
-
-+extern void set_fixaddr_top(unsigned long top);
-+
- #define set_fixmap(idx, phys) \
- __set_fixmap(idx, phys, PAGE_KERNEL)
- /*
-diff -pruN ../orig-linux-2.6.18/include/asm-i386/page.h
./include/asm-i386/page.h
---- ../orig-linux-2.6.18/include/asm-i386/page.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./include/asm-i386/page.h 2007-01-17 17:19:36.000000000 +0000
-@@ -122,7 +122,7 @@ extern int page_is_ram(unsigned long pag
-
- #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
- #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
--#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
-+#define MAXMEM
(__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE)
- #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
- #define __va(x) ((void *)((unsigned
long)(x)+PAGE_OFFSET))
- #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
--- a/patches/linux-2.6.18/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,382 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c
./arch/i386/kernel/machine_kexec.c
---- ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:23.000000000 +0000
-@@ -20,70 +20,13 @@
- #include <asm/system.h>
-
- #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
--
--#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
--#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
--#define L2_ATTR (_PAGE_PRESENT)
--
--#define LEVEL0_SIZE (1UL << 12UL)
--
--#ifndef CONFIG_X86_PAE
--#define LEVEL1_SIZE (1UL << 22UL)
--static u32 pgtable_level1[1024] PAGE_ALIGNED;
--
--static void identity_map_page(unsigned long address)
--{
-- unsigned long level1_index, level2_index;
-- u32 *pgtable_level2;
--
-- /* Find the current page table */
-- pgtable_level2 = __va(read_cr3());
--
-- /* Find the indexes of the physical address to identity map */
-- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
-- level2_index = address / LEVEL1_SIZE;
--
-- /* Identity map the page table entry */
-- pgtable_level1[level1_index] = address | L0_ATTR;
-- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
--
-- /* Flush the tlb so the new mapping takes effect.
-- * Global tlb entries are not flushed but that is not an issue.
-- */
-- load_cr3(pgtable_level2);
--}
--
--#else
--#define LEVEL1_SIZE (1UL << 21UL)
--#define LEVEL2_SIZE (1UL << 30UL)
--static u64 pgtable_level1[512] PAGE_ALIGNED;
--static u64 pgtable_level2[512] PAGE_ALIGNED;
--
--static void identity_map_page(unsigned long address)
--{
-- unsigned long level1_index, level2_index, level3_index;
-- u64 *pgtable_level3;
--
-- /* Find the current page table */
-- pgtable_level3 = __va(read_cr3());
--
-- /* Find the indexes of the physical address to identity map */
-- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
-- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
-- level3_index = address / LEVEL2_SIZE;
--
-- /* Identity map the page table entry */
-- pgtable_level1[level1_index] = address | L0_ATTR;
-- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
-- set_64bit(&pgtable_level3[level3_index],
-- __pa(pgtable_level2) | L2_ATTR);
--
-- /* Flush the tlb so the new mapping takes effect.
-- * Global tlb entries are not flushed but that is not an issue.
-- */
-- load_cr3(pgtable_level3);
--}
-+static u32 kexec_pgd[1024] PAGE_ALIGNED;
-+#ifdef CONFIG_X86_PAE
-+static u32 kexec_pmd0[1024] PAGE_ALIGNED;
-+static u32 kexec_pmd1[1024] PAGE_ALIGNED;
- #endif
-+static u32 kexec_pte0[1024] PAGE_ALIGNED;
-+static u32 kexec_pte1[1024] PAGE_ALIGNED;
-
- static void set_idt(void *newidt, __u16 limit)
- {
-@@ -127,16 +70,6 @@ static void load_segments(void)
- #undef __STR
- }
-
--typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
-- unsigned long indirection_page,
-- unsigned long reboot_code_buffer,
-- unsigned long start_address,
-- unsigned int has_pae) ATTRIB_NORET;
--
--extern const unsigned char relocate_new_kernel[];
--extern void relocate_new_kernel_end(void);
--extern const unsigned int relocate_new_kernel_size;
--
- /*
- * A architecture hook called to validate the
- * proposed image and prepare the control pages
-@@ -169,25 +102,29 @@ void machine_kexec_cleanup(struct kimage
- */
- NORET_TYPE void machine_kexec(struct kimage *image)
- {
-- unsigned long page_list;
-- unsigned long reboot_code_buffer;
--
-- relocate_new_kernel_t rnk;
-+ unsigned long page_list[PAGES_NR];
-+ void *control_page;
-
- /* Interrupts aren't acceptable while we reboot */
- local_irq_disable();
-
-- /* Compute some offsets */
-- reboot_code_buffer = page_to_pfn(image->control_code_page)
-- << PAGE_SHIFT;
-- page_list = image->head;
--
-- /* Set up an identity mapping for the reboot_code_buffer */
-- identity_map_page(reboot_code_buffer);
--
-- /* copy it out */
-- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
-- relocate_new_kernel_size);
-+ control_page = page_address(image->control_code_page);
-+ memcpy(control_page, relocate_kernel, PAGE_SIZE);
-+
-+ page_list[PA_CONTROL_PAGE] = __pa(control_page);
-+ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
-+ page_list[PA_PGD] = __pa(kexec_pgd);
-+ page_list[VA_PGD] = (unsigned long)kexec_pgd;
-+#ifdef CONFIG_X86_PAE
-+ page_list[PA_PMD_0] = __pa(kexec_pmd0);
-+ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
-+ page_list[PA_PMD_1] = __pa(kexec_pmd1);
-+ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
-+#endif
-+ page_list[PA_PTE_0] = __pa(kexec_pte0);
-+ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
-+ page_list[PA_PTE_1] = __pa(kexec_pte1);
-+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
- /* The segment registers are funny things, they have both a
- * visible and an invisible part. Whenever the visible part is
-@@ -206,6 +143,6 @@ NORET_TYPE void machine_kexec(struct kim
- set_idt(phys_to_virt(0),0);
-
- /* now call it */
-- rnk = (relocate_new_kernel_t) reboot_code_buffer;
-- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
-+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
-+ image->start, cpu_has_pae);
- }
-diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S
./arch/i386/kernel/relocate_kernel.S
---- ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:23.000000000
+0000
-@@ -7,16 +7,138 @@
- */
-
- #include <linux/linkage.h>
-+#include <asm/page.h>
-+#include <asm/kexec.h>
-+
-+/*
-+ * Must be relocatable PIC code callable as a C function
-+ */
-+
-+#define PTR(x) (x << 2)
-+#define PAGE_ALIGNED (1 << PAGE_SHIFT)
-+#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
-+#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
-+
-+ .text
-+ .align PAGE_ALIGNED
-+ .globl relocate_kernel
-+relocate_kernel:
-+ movl 8(%esp), %ebp /* list of pages */
-+
-+#ifdef CONFIG_X86_PAE
-+ /* map the control page at its virtual address */
-+
-+ movl PTR(VA_PGD)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0xc0000000, %eax
-+ shrl $27, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PMD_0)(%ebp), %edx
-+ orl $PAE_PGD_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PMD_0)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x3fe00000, %eax
-+ shrl $18, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PTE_0)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PTE_0)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x001ff000, %eax
-+ shrl $9, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ /* identity map the control page at its physical address */
-+
-+ movl PTR(VA_PGD)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0xc0000000, %eax
-+ shrl $27, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PMD_1)(%ebp), %edx
-+ orl $PAE_PGD_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PMD_1)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x3fe00000, %eax
-+ shrl $18, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PTE_1)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PTE_1)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x001ff000, %eax
-+ shrl $9, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+#else
-+ /* map the control page at its virtual address */
-+
-+ movl PTR(VA_PGD)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0xffc00000, %eax
-+ shrl $20, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PTE_0)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PTE_0)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x003ff000, %eax
-+ shrl $10, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ /* identity map the control page at its physical address */
-+
-+ movl PTR(VA_PGD)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0xffc00000, %eax
-+ shrl $20, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PTE_1)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PTE_1)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x003ff000, %eax
-+ shrl $10, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+#endif
-
-- /*
-- * Must be relocatable PIC code callable as a C function, that once
-- * it starts can not use the previous processes stack.
-- */
-- .globl relocate_new_kernel
- relocate_new_kernel:
- /* read the arguments and say goodbye to the stack */
- movl 4(%esp), %ebx /* page_list */
-- movl 8(%esp), %ebp /* reboot_code_buffer */
-+ movl 8(%esp), %ebp /* list of pages */
- movl 12(%esp), %edx /* start address */
- movl 16(%esp), %ecx /* cpu_has_pae */
-
-@@ -24,11 +146,26 @@ relocate_new_kernel:
- pushl $0
- popfl
-
-- /* set a new stack at the bottom of our page... */
-- lea 4096(%ebp), %esp
-+ /* get physical address of control page now */
-+ /* this is impossible after page table switch */
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
-+
-+ /* switch to new set of page tables */
-+ movl PTR(PA_PGD)(%ebp), %eax
-+ movl %eax, %cr3
-+
-+ /* setup a new stack at the end of the physical control page */
-+ lea 4096(%edi), %esp
-
-- /* store the parameters back on the stack */
-- pushl %edx /* store the start address */
-+ /* jump to identity mapped page */
-+ movl %edi, %eax
-+ addl $(identity_mapped - relocate_kernel), %eax
-+ pushl %eax
-+ ret
-+
-+identity_mapped:
-+ /* store the start address on the stack */
-+ pushl %edx
-
- /* Set cr0 to a known state:
- * 31 0 == Paging disabled
-@@ -113,8 +250,3 @@ relocate_new_kernel:
- xorl %edi, %edi
- xorl %ebp, %ebp
- ret
--relocate_new_kernel_end:
--
-- .globl relocate_new_kernel_size
--relocate_new_kernel_size:
-- .long relocate_new_kernel_end - relocate_new_kernel
-diff -pruN ../orig-linux-2.6.18/include/asm-i386/kexec.h
./include/asm-i386/kexec.h
---- ../orig-linux-2.6.18/include/asm-i386/kexec.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./include/asm-i386/kexec.h 2007-01-12 16:03:23.000000000 +0000
-@@ -1,6 +1,26 @@
- #ifndef _I386_KEXEC_H
- #define _I386_KEXEC_H
-
-+#define PA_CONTROL_PAGE 0
-+#define VA_CONTROL_PAGE 1
-+#define PA_PGD 2
-+#define VA_PGD 3
-+#define PA_PTE_0 4
-+#define VA_PTE_0 5
-+#define PA_PTE_1 6
-+#define VA_PTE_1 7
-+#ifdef CONFIG_X86_PAE
-+#define PA_PMD_0 8
-+#define VA_PMD_0 9
-+#define PA_PMD_1 10
-+#define VA_PMD_1 11
-+#define PAGES_NR 12
-+#else
-+#define PAGES_NR 8
-+#endif
-+
-+#ifndef __ASSEMBLY__
-+
- #include <asm/fixmap.h>
- #include <asm/ptrace.h>
- #include <asm/string.h>
-@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru
- newregs->eip = (unsigned long)current_text_addr();
- }
- }
-+asmlinkage NORET_TYPE void
-+relocate_kernel(unsigned long indirection_page,
-+ unsigned long control_page,
-+ unsigned long start_address,
-+ unsigned int has_pae) ATTRIB_NORET;
-+
-+#endif /* __ASSEMBLY__ */
-
- #endif /* _I386_KEXEC_H */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
--- a/patches/linux-2.6.18/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,355 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c
./arch/x86_64/kernel/machine_kexec.c
---- ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:03:49.000000000
+0000
-@@ -15,6 +15,15 @@
- #include <asm/mmu_context.h>
- #include <asm/io.h>
-
-+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-+static u64 kexec_pgd[512] PAGE_ALIGNED;
-+static u64 kexec_pud0[512] PAGE_ALIGNED;
-+static u64 kexec_pmd0[512] PAGE_ALIGNED;
-+static u64 kexec_pte0[512] PAGE_ALIGNED;
-+static u64 kexec_pud1[512] PAGE_ALIGNED;
-+static u64 kexec_pmd1[512] PAGE_ALIGNED;
-+static u64 kexec_pte1[512] PAGE_ALIGNED;
-+
- static void init_level2_page(pmd_t *level2p, unsigned long addr)
- {
- unsigned long end_addr;
-@@ -144,32 +153,19 @@ static void load_segments(void)
- );
- }
-
--typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long
indirection_page,
-- unsigned long control_code_buffer,
-- unsigned long start_address,
-- unsigned long pgtable) ATTRIB_NORET;
--
--extern const unsigned char relocate_new_kernel[];
--extern const unsigned long relocate_new_kernel_size;
--
- int machine_kexec_prepare(struct kimage *image)
- {
-- unsigned long start_pgtable, control_code_buffer;
-+ unsigned long start_pgtable;
- int result;
-
- /* Calculate the offsets */
- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-- control_code_buffer = start_pgtable + PAGE_SIZE;
-
- /* Setup the identity mapped 64bit page table */
- result = init_pgtable(image, start_pgtable);
- if (result)
- return result;
-
-- /* Place the code in the reboot code buffer */
-- memcpy(__va(control_code_buffer), relocate_new_kernel,
-- relocate_new_kernel_size);
--
- return 0;
- }
-
-@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage
- */
- NORET_TYPE void machine_kexec(struct kimage *image)
- {
-- unsigned long page_list;
-- unsigned long control_code_buffer;
-- unsigned long start_pgtable;
-- relocate_new_kernel_t rnk;
-+ unsigned long page_list[PAGES_NR];
-+ void *control_page;
-
- /* Interrupts aren't acceptable while we reboot */
- local_irq_disable();
-
-- /* Calculate the offsets */
-- page_list = image->head;
-- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-- control_code_buffer = start_pgtable + PAGE_SIZE;
-+ control_page = page_address(image->control_code_page) + PAGE_SIZE;
-+ memcpy(control_page, relocate_kernel, PAGE_SIZE);
-
-- /* Set the low half of the page table to my identity mapped
-- * page table for kexec. Leave the high half pointing at the
-- * kernel pages. Don't bother to flush the global pages
-- * as that will happen when I fully switch to my identity mapped
-- * page table anyway.
-- */
-- memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2);
-- __flush_tlb();
-+ page_list[PA_CONTROL_PAGE] = __pa(control_page);
-+ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
-+ page_list[PA_PGD] = __pa(kexec_pgd);
-+ page_list[VA_PGD] = (unsigned long)kexec_pgd;
-+ page_list[PA_PUD_0] = __pa(kexec_pud0);
-+ page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
-+ page_list[PA_PMD_0] = __pa(kexec_pmd0);
-+ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
-+ page_list[PA_PTE_0] = __pa(kexec_pte0);
-+ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
-+ page_list[PA_PUD_1] = __pa(kexec_pud1);
-+ page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
-+ page_list[PA_PMD_1] = __pa(kexec_pmd1);
-+ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
-+ page_list[PA_PTE_1] = __pa(kexec_pte1);
-+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
-+ page_list[PA_TABLE_PAGE] =
-+ (unsigned long)__pa(page_address(image->control_code_page));
-
- /* The segment registers are funny things, they have both a
- * visible and an invisible part. Whenever the visible part is
-@@ -222,7 +224,8 @@ NORET_TYPE void machine_kexec(struct kim
- */
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
-+
- /* now call it */
-- rnk = (relocate_new_kernel_t) control_code_buffer;
-- (*rnk)(page_list, control_code_buffer, image->start, start_pgtable);
-+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
-+ image->start);
- }
-diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S
./arch/x86_64/kernel/relocate_kernel.S
---- ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:03:49.000000000
+0000
-@@ -7,31 +7,169 @@
- */
-
- #include <linux/linkage.h>
-+#include <asm/page.h>
-+#include <asm/kexec.h>
-
-- /*
-- * Must be relocatable PIC code callable as a C function, that once
-- * it starts can not use the previous processes stack.
-- */
-- .globl relocate_new_kernel
-+/*
-+ * Must be relocatable PIC code callable as a C function
-+ */
-+
-+#define PTR(x) (x << 3)
-+#define PAGE_ALIGNED (1 << PAGE_SHIFT)
-+#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
-+
-+ .text
-+ .align PAGE_ALIGNED
- .code64
-+ .globl relocate_kernel
-+relocate_kernel:
-+ /* %rdi indirection_page
-+ * %rsi page_list
-+ * %rdx start address
-+ */
-+
-+ /* map the control page at its virtual address */
-+
-+ movq $0x0000ff8000000000, %r10 /* mask */
-+ mov $(39 - 3), %cl /* bits to shift */
-+ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PGD)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PUD_0)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PUD_0)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PMD_0)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PMD_0)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PTE_0)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PTE_0)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ /* identity map the control page at its physical address */
-+
-+ movq $0x0000ff8000000000, %r10 /* mask */
-+ mov $(39 - 3), %cl /* bits to shift */
-+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PGD)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PUD_1)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PUD_1)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PMD_1)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PMD_1)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PTE_1)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PTE_1)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
- relocate_new_kernel:
-- /* %rdi page_list
-- * %rsi reboot_code_buffer
-+ /* %rdi indirection_page
-+ * %rsi page_list
- * %rdx start address
-- * %rcx page_table
-- * %r8 arg5
-- * %r9 arg6
- */
-
- /* zero out flags, and disable interrupts */
- pushq $0
- popfq
-
-- /* set a new stack at the bottom of our page... */
-- lea 4096(%rsi), %rsp
-+ /* get physical address of control page now */
-+ /* this is impossible after page table switch */
-+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
-+
-+ /* get physical address of page table now too */
-+ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
-+
-+ /* switch to new set of page tables */
-+ movq PTR(PA_PGD)(%rsi), %r9
-+ movq %r9, %cr3
-+
-+ /* setup a new stack at the end of the physical control page */
-+ lea 4096(%r8), %rsp
-+
-+ /* jump to identity mapped page */
-+ addq $(identity_mapped - relocate_kernel), %r8
-+ pushq %r8
-+ ret
-
-- /* store the parameters back on the stack */
-- pushq %rdx /* store the start address */
-+identity_mapped:
-+ /* store the start address on the stack */
-+ pushq %rdx
-
- /* Set cr0 to a known state:
- * 31 1 == Paging enabled
-@@ -136,8 +274,3 @@ relocate_new_kernel:
- xorq %r15, %r15
-
- ret
--relocate_new_kernel_end:
--
-- .globl relocate_new_kernel_size
--relocate_new_kernel_size:
-- .quad relocate_new_kernel_end - relocate_new_kernel
-diff -pruN ../orig-linux-2.6.18/include/asm-x86_64/kexec.h
./include/asm-x86_64/kexec.h
---- ../orig-linux-2.6.18/include/asm-x86_64/kexec.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./include/asm-x86_64/kexec.h 2007-01-12 16:03:49.000000000 +0000
-@@ -1,6 +1,27 @@
- #ifndef _X86_64_KEXEC_H
- #define _X86_64_KEXEC_H
-
-+#define PA_CONTROL_PAGE 0
-+#define VA_CONTROL_PAGE 1
-+#define PA_PGD 2
-+#define VA_PGD 3
-+#define PA_PUD_0 4
-+#define VA_PUD_0 5
-+#define PA_PMD_0 6
-+#define VA_PMD_0 7
-+#define PA_PTE_0 8
-+#define VA_PTE_0 9
-+#define PA_PUD_1 10
-+#define VA_PUD_1 11
-+#define PA_PMD_1 12
-+#define VA_PMD_1 13
-+#define PA_PTE_1 14
-+#define VA_PTE_1 15
-+#define PA_TABLE_PAGE 16
-+#define PAGES_NR 17
-+
-+#ifndef __ASSEMBLY__
-+
- #include <linux/string.h>
-
- #include <asm/page.h>
-@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru
- newregs->rip = (unsigned long)current_text_addr();
- }
- }
-+
-+NORET_TYPE void
-+relocate_kernel(unsigned long indirection_page,
-+ unsigned long page_list,
-+ unsigned long start_address) ATTRIB_NORET;
-+
-+#endif /* __ASSEMBLY__ */
-+
- #endif /* _X86_64_KEXEC_H */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch
--- a/patches/linux-2.6.18/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
---- ./arch/ia64/kernel/smp.c.orig 2007-05-02 19:00:01.000000000 +0900
-+++ ./arch/ia64/kernel/smp.c 2007-05-02 19:04:32.000000000 +0900
-@@ -328,10 +328,14 @@ int
- smp_call_function (void (*func) (void *info), void *info, int nonatomic, int
wait)
- {
- struct call_data_struct data;
-- int cpus = num_online_cpus()-1;
-+ int cpus;
-
-- if (!cpus)
-+ spin_lock(&call_lock);
-+ cpus = num_online_cpus()-1;
-+ if (!cpus) {
-+ spin_unlock(&call_lock);
- return 0;
-+ }
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-@@ -343,8 +347,6 @@ smp_call_function (void (*func) (void *i
- if (wait)
- atomic_set(&data.finished, 0);
-
-- spin_lock(&call_lock);
--
- call_data = &data;
- mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC
*/
- send_IPI_allbutself(IPI_CALL_FUNC);
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch
--- a/patches/linux-2.6.18/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-commit c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4
-Author: Vivek Goyal <vgoyal@xxxxxxxxxx>
-Date: Wed Nov 8 17:44:41 2006 -0800
-
- [PATCH] i386: Force data segment to be 4K aligned
-
- o Currently there is no specific alignment restriction in linker script
- and in some cases it can be placed non 4K aligned addresses. This fails
- kexec which checks that segment to be loaded is page aligned.
-
- o I guess, it does not harm data segment to be 4K aligned.
-
- Signed-off-by: Vivek Goyal <vgoyal@xxxxxxxxxx>
- Signed-off-by: Andi Kleen <ak@xxxxxxx>
- Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
- Signed-off-by: Linus Torvalds <torvalds@xxxxxxxx>
-
-diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
-index adc1f23..c6f84a0 100644
---- a/arch/i386/kernel/vmlinux.lds.S
-+++ b/arch/i386/kernel/vmlinux.lds.S
-@@ -51,6 +51,7 @@ SECTIONS
- __tracedata_end = .;
-
- /* writeable */
-+ . = ALIGN(4096);
- .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
- *(.data)
- CONSTRUCTORS
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
--- a/patches/linux-2.6.18/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S
./arch/x86_64/kernel/vmlinux.lds.S
---- ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S 2007-01-12
18:20:02.000000000 +0000
-+++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:20.000000000 +0000
-@@ -17,6 +17,7 @@ PHDRS {
- text PT_LOAD FLAGS(5); /* R_E */
- data PT_LOAD FLAGS(7); /* RWE */
- user PT_LOAD FLAGS(7); /* RWE */
-+ data.init PT_LOAD FLAGS(7); /* RWE */
- note PT_NOTE FLAGS(4); /* R__ */
- }
- SECTIONS
-@@ -131,7 +132,7 @@ SECTIONS
- . = ALIGN(8192); /* init_task */
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
- *(.data.init_task)
-- } :data
-+ }:data.init
-
- . = ALIGN(4096);
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/i386-mach-io-check-nmi.patch
--- a/patches/linux-2.6.18/i386-mach-io-check-nmi.patch Thu Jan 17 14:35:38
2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/traps.c
./arch/i386/kernel/traps.c
---- ../orig-linux-2.6.18/arch/i386/kernel/traps.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/i386/kernel/traps.c 2007-01-12 16:07:49.000000000 +0000
-@@ -642,18 +642,11 @@ static void mem_parity_error(unsigned ch
-
- static void io_check_error(unsigned char reason, struct pt_regs * regs)
- {
-- unsigned long i;
--
- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
- show_registers(regs);
-
- /* Re-enable the IOCK line, wait for a few seconds */
-- reason = (reason & 0xf) | 8;
-- outb(reason, 0x61);
-- i = 2000;
-- while (--i) udelay(1000);
-- reason &= ~8;
-- outb(reason, 0x61);
-+ clear_io_check_error(reason);
- }
-
- static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-diff -pruN ../orig-linux-2.6.18/include/asm-i386/mach-default/mach_traps.h
./include/asm-i386/mach-default/mach_traps.h
---- ../orig-linux-2.6.18/include/asm-i386/mach-default/mach_traps.h
2006-09-20 04:42:06.000000000 +0100
-+++ ./include/asm-i386/mach-default/mach_traps.h 2007-01-12
16:07:49.000000000 +0000
-@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig
- outb(reason, 0x61);
- }
-
-+static inline void clear_io_check_error(unsigned char reason)
-+{
-+ unsigned long i;
-+
-+ reason = (reason & 0xf) | 8;
-+ outb(reason, 0x61);
-+ i = 2000;
-+ while (--i) udelay(1000);
-+ reason &= ~8;
-+ outb(reason, 0x61);
-+}
-+
- static inline unsigned char get_nmi_reason(void)
- {
- return inb(0x61);
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/linux-2.6.18-xen-375-748cd890ea7f
--- a/patches/linux-2.6.18/linux-2.6.18-xen-375-748cd890ea7f Thu Jan 17
14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,294 +0,0 @@
-# HG changeset patch
-# User Keir Fraser <keir.fraser@xxxxxxxxxx>
-# Date 1199916724 0
-# Node ID 748cd890ea7f56752311e519e80eece9d850c01a
-# Parent fedc10fba9f1d5ec0c72dbcbca87e508222b4c48
-x86_64: Add TIF_RESTORE_SIGMASK (from upstream Linux)
-
-We need TIF_RESTORE_SIGMASK in order to support ppoll() and pselect()
-system calls. This patch originally came from Andi, and was based
-heavily on David Howells' implementation of same on i386. I fixed a
-typo which was causing do_signal() to use the wrong signal mask.
-
-Signed-off-by: David Woodhouse <dwmw2@xxxxxxxxxxxxx>
-Signed-off-by: Andi Kleen <ak@xxxxxxx>
-
-diff -r fedc10fba9f1 -r 748cd890ea7f arch/x86_64/ia32/ia32_signal.c
---- a/arch/x86_64/ia32/ia32_signal.c Tue Jan 08 09:55:29 2008 +0000
-+++ b/arch/x86_64/ia32/ia32_signal.c Wed Jan 09 22:12:04 2008 +0000
-@@ -113,25 +113,19 @@ int copy_siginfo_from_user32(siginfo_t *
- }
-
- asmlinkage long
--sys32_sigsuspend(int history0, int history1, old_sigset_t mask,
-- struct pt_regs *regs)
--{
-- sigset_t saveset;
--
-+sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
-+{
- mask &= _BLOCKABLE;
- spin_lock_irq(¤t->sighand->siglock);
-- saveset = current->blocked;
-+ current->saved_sigmask = current->blocked;
- siginitset(¤t->blocked, mask);
- recalc_sigpending();
- spin_unlock_irq(¤t->sighand->siglock);
-
-- regs->rax = -EINTR;
-- while (1) {
-- current->state = TASK_INTERRUPTIBLE;
-- schedule();
-- if (do_signal(regs, &saveset))
-- return -EINTR;
-- }
-+ current->state = TASK_INTERRUPTIBLE;
-+ schedule();
-+ set_thread_flag(TIF_RESTORE_SIGMASK);
-+ return -ERESTARTNOHAND;
- }
-
- asmlinkage long
-@@ -508,11 +502,11 @@ int ia32_setup_frame(int sig, struct k_s
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
- #endif
-
-- return 1;
-+ return 0;
-
- give_sigsegv:
- force_sigsegv(sig, current);
-- return 0;
-+ return -EFAULT;
- }
-
- int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-@@ -595,7 +589,7 @@ int ia32_setup_rt_frame(int sig, struct
- regs->ss = __USER32_DS;
-
- set_fs(USER_DS);
-- regs->eflags &= ~TF_MASK;
-+ regs->eflags &= ~TF_MASK;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
-@@ -604,9 +598,9 @@ int ia32_setup_rt_frame(int sig, struct
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
- #endif
-
-- return 1;
-+ return 0;
-
- give_sigsegv:
- force_sigsegv(sig, current);
-- return 0;
--}
-+ return -EFAULT;
-+}
-diff -r fedc10fba9f1 -r 748cd890ea7f arch/x86_64/kernel/signal.c
---- a/arch/x86_64/kernel/signal.c Tue Jan 08 09:55:29 2008 +0000
-+++ b/arch/x86_64/kernel/signal.c Wed Jan 09 22:12:04 2008 +0000
-@@ -36,37 +36,6 @@ int ia32_setup_rt_frame(int sig, struct
- sigset_t *set, struct pt_regs * regs);
- int ia32_setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs * regs);
--
--asmlinkage long
--sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs
*regs)
--{
-- sigset_t saveset, newset;
--
-- /* XXX: Don't preclude handling different sized sigset_t's. */
-- if (sigsetsize != sizeof(sigset_t))
-- return -EINVAL;
--
-- if (copy_from_user(&newset, unewset, sizeof(newset)))
-- return -EFAULT;
-- sigdelsetmask(&newset, ~_BLOCKABLE);
--
-- spin_lock_irq(¤t->sighand->siglock);
-- saveset = current->blocked;
-- current->blocked = newset;
-- recalc_sigpending();
-- spin_unlock_irq(¤t->sighand->siglock);
--#ifdef DEBUG_SIG
-- printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
-- saveset, newset, regs, regs->rip);
--#endif
-- regs->rax = -EINTR;
-- while (1) {
-- current->state = TASK_INTERRUPTIBLE;
-- schedule();
-- if (do_signal(regs, &saveset))
-- return -EINTR;
-- }
--}
-
- asmlinkage long
- sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-@@ -341,11 +310,11 @@ static int setup_rt_frame(int sig, struc
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
- #endif
-
-- return 1;
-+ return 0;
-
- give_sigsegv:
- force_sigsegv(sig, current);
-- return 0;
-+ return -EFAULT;
- }
-
- /*
-@@ -408,7 +377,7 @@ handle_signal(unsigned long sig, siginfo
- #endif
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
-
-- if (ret) {
-+ if (ret == 0) {
- spin_lock_irq(¤t->sighand->siglock);
- sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
-@@ -425,11 +394,12 @@ handle_signal(unsigned long sig, siginfo
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
--int do_signal(struct pt_regs *regs, sigset_t *oldset)
-+static void do_signal(struct pt_regs *regs)
- {
- struct k_sigaction ka;
- siginfo_t info;
- int signr;
-+ sigset_t *oldset;
-
- /*
- * We want the common case to go fast, which
-@@ -438,9 +408,11 @@ int do_signal(struct pt_regs *regs, sigs
- * if so.
- */
- if (!user_mode(regs))
-- return 1;
--
-- if (!oldset)
-+ return;
-+
-+ if (test_thread_flag(TIF_RESTORE_SIGMASK))
-+ oldset = ¤t->saved_sigmask;
-+ else
- oldset = ¤t->blocked;
-
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-@@ -454,30 +426,46 @@ int do_signal(struct pt_regs *regs, sigs
- set_debugreg(current->thread.debugreg7, 7);
-
- /* Whee! Actually deliver the signal. */
-- return handle_signal(signr, &info, &ka, oldset, regs);
-+ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
-+ /* a signal was successfully delivered; the saved
-+ * sigmask will have been stored in the signal frame,
-+ * and will be restored by sigreturn, so we can simply
-+ * clear the TIF_RESTORE_SIGMASK flag */
-+ clear_thread_flag(TIF_RESTORE_SIGMASK);
-+ }
-+ return;
- }
-
- /* Did we come from a system call? */
- if ((long)regs->orig_rax >= 0) {
- /* Restart the system call - no handlers present */
- long res = regs->rax;
-- if (res == -ERESTARTNOHAND ||
-- res == -ERESTARTSYS ||
-- res == -ERESTARTNOINTR) {
-+ switch (res) {
-+ case -ERESTARTNOHAND:
-+ case -ERESTARTSYS:
-+ case -ERESTARTNOINTR:
- regs->rax = regs->orig_rax;
- regs->rip -= 2;
-- }
-- if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
-+ break;
-+ case -ERESTART_RESTARTBLOCK:
- regs->rax = test_thread_flag(TIF_IA32) ?
- __NR_ia32_restart_syscall :
- __NR_restart_syscall;
- regs->rip -= 2;
-- }
-- }
-- return 0;
--}
--
--void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32
thread_info_flags)
-+ break;
-+ }
-+ }
-+
-+ /* if there's no signal to deliver, we just put the saved sigmask
-+ back. */
-+ if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-+ clear_thread_flag(TIF_RESTORE_SIGMASK);
-+ sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL);
-+ }
-+}
-+
-+void
-+do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
- {
- #ifdef DEBUG_SIG
- printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx
pending:%lx\n",
-@@ -491,8 +479,8 @@ void do_notify_resume(struct pt_regs *re
- }
-
- /* deal with pending signal delivery */
-- if (thread_info_flags & _TIF_SIGPENDING)
-- do_signal(regs,oldset);
-+ if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
-+ do_signal(regs);
- }
-
- void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/signal.h
---- a/include/asm-x86_64/signal.h Tue Jan 08 09:55:29 2008 +0000
-+++ b/include/asm-x86_64/signal.h Wed Jan 09 22:12:04 2008 +0000
-@@ -22,10 +22,6 @@ typedef struct {
- typedef struct {
- unsigned long sig[_NSIG_WORDS];
- } sigset_t;
--
--
--struct pt_regs;
--asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
-
-
- #else
-diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/thread_info.h
---- a/include/asm-x86_64/thread_info.h Tue Jan 08 09:55:29 2008 +0000
-+++ b/include/asm-x86_64/thread_info.h Wed Jan 09 22:12:04 2008 +0000
-@@ -114,6 +114,7 @@ static inline struct thread_info *stack_
- #define TIF_IRET 5 /* force IRET */
- #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
- #define TIF_SECCOMP 8 /* secure computing */
-+#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
- /* 16 free */
- #define TIF_IA32 17 /* 32bit process */
- #define TIF_FORK 18 /* ret_from_fork */
-@@ -128,6 +129,7 @@ static inline struct thread_info *stack_
- #define _TIF_IRET (1<<TIF_IRET)
- #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
- #define _TIF_SECCOMP (1<<TIF_SECCOMP)
-+#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
- #define _TIF_IA32 (1<<TIF_IA32)
- #define _TIF_FORK (1<<TIF_FORK)
- #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
-diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/unistd.h
---- a/include/asm-x86_64/unistd.h Tue Jan 08 09:55:29 2008 +0000
-+++ b/include/asm-x86_64/unistd.h Wed Jan 09 22:12:04 2008 +0000
-@@ -658,6 +658,7 @@ do { \
- #define __ARCH_WANT_SYS_SIGPENDING
- #define __ARCH_WANT_SYS_SIGPROCMASK
- #define __ARCH_WANT_SYS_RT_SIGACTION
-+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
- #define __ARCH_WANT_SYS_TIME
- #define __ARCH_WANT_COMPAT_SYS_TIME
-
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/linux-2.6.18-xen-376-353802ec1caf
--- a/patches/linux-2.6.18/linux-2.6.18-xen-376-353802ec1caf Thu Jan 17
14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-# HG changeset patch
-# User Keir Fraser <keir.fraser@xxxxxxxxxx>
-# Date 1199916752 0
-# Node ID 353802ec1caf399143e48713a04cedd37a106347
-# Parent 748cd890ea7f56752311e519e80eece9d850c01a
-x86_64: Add ppoll/pselect syscalls (from upstream Linux)
-
-Needed TIF_RESTORE_SIGMASK first
-
-Signed-off-by: Andi Kleen <ak@xxxxxxx>
-
-diff -r 748cd890ea7f -r 353802ec1caf arch/x86_64/ia32/ia32entry.S
---- a/arch/x86_64/ia32/ia32entry.S Wed Jan 09 22:12:04 2008 +0000
-+++ b/arch/x86_64/ia32/ia32entry.S Wed Jan 09 22:12:32 2008 +0000
-@@ -703,8 +703,8 @@ ia32_sys_call_table:
- .quad sys_readlinkat /* 305 */
- .quad sys_fchmodat
- .quad sys_faccessat
-- .quad quiet_ni_syscall /* pselect6 for now */
-- .quad quiet_ni_syscall /* ppoll for now */
-+ .quad compat_sys_pselect6
-+ .quad compat_sys_ppoll
- .quad sys_unshare /* 310 */
- .quad compat_sys_set_robust_list
- .quad compat_sys_get_robust_list
-diff -r 748cd890ea7f -r 353802ec1caf include/asm-x86_64/unistd.h
---- a/include/asm-x86_64/unistd.h Wed Jan 09 22:12:04 2008 +0000
-+++ b/include/asm-x86_64/unistd.h Wed Jan 09 22:12:32 2008 +0000
-@@ -600,9 +600,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat)
- #define __NR_faccessat 269
- __SYSCALL(__NR_faccessat, sys_faccessat)
- #define __NR_pselect6 270
--__SYSCALL(__NR_pselect6, sys_ni_syscall) /* for now */
-+__SYSCALL(__NR_pselect6, sys_pselect6)
- #define __NR_ppoll 271
--__SYSCALL(__NR_ppoll, sys_ni_syscall) /* for now */
-+__SYSCALL(__NR_ppoll, sys_ppoll)
- #define __NR_unshare 272
- __SYSCALL(__NR_unshare, sys_unshare)
- #define __NR_set_robust_list 273
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
--- a/patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,151 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c
./arch/i386/kernel/machine_kexec.c
---- ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c 2007-01-12
16:03:23.000000000 +0000
-+++ ./arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:37.000000000 +0000
-@@ -28,48 +28,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
- static u32 kexec_pte0[1024] PAGE_ALIGNED;
- static u32 kexec_pte1[1024] PAGE_ALIGNED;
-
--static void set_idt(void *newidt, __u16 limit)
--{
-- struct Xgt_desc_struct curidt;
--
-- /* ia32 supports unaliged loads & stores */
-- curidt.size = limit;
-- curidt.address = (unsigned long)newidt;
--
-- load_idt(&curidt);
--};
--
--
--static void set_gdt(void *newgdt, __u16 limit)
--{
-- struct Xgt_desc_struct curgdt;
--
-- /* ia32 supports unaligned loads & stores */
-- curgdt.size = limit;
-- curgdt.address = (unsigned long)newgdt;
--
-- load_gdt(&curgdt);
--};
--
--static void load_segments(void)
--{
--#define __STR(X) #X
--#define STR(X) __STR(X)
--
-- __asm__ __volatile__ (
-- "\tljmp $"STR(__KERNEL_CS)",$1f\n"
-- "\t1:\n"
-- "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
-- "\tmovl %%eax,%%ds\n"
-- "\tmovl %%eax,%%es\n"
-- "\tmovl %%eax,%%fs\n"
-- "\tmovl %%eax,%%gs\n"
-- "\tmovl %%eax,%%ss\n"
-- ::: "eax", "memory");
--#undef STR
--#undef __STR
--}
--
- /*
- * A architecture hook called to validate the
- * proposed image and prepare the control pages
-@@ -126,23 +84,6 @@ NORET_TYPE void machine_kexec(struct kim
- page_list[PA_PTE_1] = __pa(kexec_pte1);
- page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
-- /* The segment registers are funny things, they have both a
-- * visible and an invisible part. Whenever the visible part is
-- * set to a specific selector, the invisible part is loaded
-- * with from a table in memory. At no other time is the
-- * descriptor table in memory accessed.
-- *
-- * I take advantage of this here by force loading the
-- * segments, before I zap the gdt with an invalid value.
-- */
-- load_segments();
-- /* The gdt & idt are now invalid.
-- * If you want to load them you must set up your own idt & gdt.
-- */
-- set_gdt(phys_to_virt(0),0);
-- set_idt(phys_to_virt(0),0);
--
-- /* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start, cpu_has_pae);
- }
-diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S
./arch/i386/kernel/relocate_kernel.S
---- ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S 2007-01-12
16:03:23.000000000 +0000
-+++ ./arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:37.000000000
+0000
-@@ -154,14 +154,45 @@ relocate_new_kernel:
- movl PTR(PA_PGD)(%ebp), %eax
- movl %eax, %cr3
-
-+ /* setup idt */
-+ movl %edi, %eax
-+ addl $(idt_48 - relocate_kernel), %eax
-+ lidtl (%eax)
-+
-+ /* setup gdt */
-+ movl %edi, %eax
-+ addl $(gdt - relocate_kernel), %eax
-+ movl %edi, %esi
-+ addl $((gdt_48 - relocate_kernel) + 2), %esi
-+ movl %eax, (%esi)
-+
-+ movl %edi, %eax
-+ addl $(gdt_48 - relocate_kernel), %eax
-+ lgdtl (%eax)
-+
-+ /* setup data segment registers */
-+ mov $(gdt_ds - gdt), %eax
-+ mov %eax, %ds
-+ mov %eax, %es
-+ mov %eax, %fs
-+ mov %eax, %gs
-+ mov %eax, %ss
-+
- /* setup a new stack at the end of the physical control page */
- lea 4096(%edi), %esp
-
-- /* jump to identity mapped page */
-- movl %edi, %eax
-- addl $(identity_mapped - relocate_kernel), %eax
-- pushl %eax
-- ret
-+ /* load new code segment and jump to identity mapped page */
-+ movl %edi, %esi
-+ xorl %eax, %eax
-+ pushl %eax
-+ pushl %esi
-+ pushl %eax
-+ movl $(gdt_cs - gdt), %eax
-+ pushl %eax
-+ movl %edi, %eax
-+ addl $(identity_mapped - relocate_kernel),%eax
-+ pushl %eax
-+ iretl
-
- identity_mapped:
- /* store the start address on the stack */
-@@ -250,3 +281,20 @@ identity_mapped:
- xorl %edi, %edi
- xorl %ebp, %ebp
- ret
-+
-+ .align 16
-+gdt:
-+ .quad 0x0000000000000000 /* NULL descriptor */
-+gdt_cs:
-+ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
-+gdt_ds:
-+ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
-+gdt_end:
-+
-+gdt_48:
-+ .word gdt_end - gdt - 1 /* limit */
-+ .long 0 /* base - filled in by code above */
-+
-+idt_48:
-+ .word 0 /* limit */
-+ .long 0 /* base */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
---
a/patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,143 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c
./arch/x86_64/kernel/machine_kexec.c
---- ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c 2007-01-12
16:03:49.000000000 +0000
-+++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:04:02.000000000
+0000
-@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i
- return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
- }
-
--static void set_idt(void *newidt, u16 limit)
--{
-- struct desc_ptr curidt;
--
-- /* x86-64 supports unaliged loads & stores */
-- curidt.size = limit;
-- curidt.address = (unsigned long)newidt;
--
-- __asm__ __volatile__ (
-- "lidtq %0\n"
-- : : "m" (curidt)
-- );
--};
--
--
--static void set_gdt(void *newgdt, u16 limit)
--{
-- struct desc_ptr curgdt;
--
-- /* x86-64 supports unaligned loads & stores */
-- curgdt.size = limit;
-- curgdt.address = (unsigned long)newgdt;
--
-- __asm__ __volatile__ (
-- "lgdtq %0\n"
-- : : "m" (curgdt)
-- );
--};
--
--static void load_segments(void)
--{
-- __asm__ __volatile__ (
-- "\tmovl %0,%%ds\n"
-- "\tmovl %0,%%es\n"
-- "\tmovl %0,%%ss\n"
-- "\tmovl %0,%%fs\n"
-- "\tmovl %0,%%gs\n"
-- : : "a" (__KERNEL_DS) : "memory"
-- );
--}
--
- int machine_kexec_prepare(struct kimage *image)
- {
- unsigned long start_pgtable;
-@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim
- page_list[PA_TABLE_PAGE] =
- (unsigned long)__pa(page_address(image->control_code_page));
-
-- /* The segment registers are funny things, they have both a
-- * visible and an invisible part. Whenever the visible part is
-- * set to a specific selector, the invisible part is loaded
-- * with from a table in memory. At no other time is the
-- * descriptor table in memory accessed.
-- *
-- * I take advantage of this here by force loading the
-- * segments, before I zap the gdt with an invalid value.
-- */
-- load_segments();
-- /* The gdt & idt are now invalid.
-- * If you want to load them you must set up your own idt & gdt.
-- */
-- set_gdt(phys_to_virt(0),0);
-- set_idt(phys_to_virt(0),0);
--
-- /* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start);
- }
-diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S
./arch/x86_64/kernel/relocate_kernel.S
---- ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S 2007-01-12
16:03:49.000000000 +0000
-+++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:04:02.000000000
+0000
-@@ -159,13 +159,39 @@ relocate_new_kernel:
- movq PTR(PA_PGD)(%rsi), %r9
- movq %r9, %cr3
-
-+ /* setup idt */
-+ movq %r8, %rax
-+ addq $(idt_80 - relocate_kernel), %rax
-+ lidtq (%rax)
-+
-+ /* setup gdt */
-+ movq %r8, %rax
-+ addq $(gdt - relocate_kernel), %rax
-+ movq %r8, %r9
-+ addq $((gdt_80 - relocate_kernel) + 2), %r9
-+ movq %rax, (%r9)
-+
-+ movq %r8, %rax
-+ addq $(gdt_80 - relocate_kernel), %rax
-+ lgdtq (%rax)
-+
-+ /* setup data segment registers */
-+ xorl %eax, %eax
-+ movl %eax, %ds
-+ movl %eax, %es
-+ movl %eax, %fs
-+ movl %eax, %gs
-+ movl %eax, %ss
-+
- /* setup a new stack at the end of the physical control page */
- lea 4096(%r8), %rsp
-
-- /* jump to identity mapped page */
-- addq $(identity_mapped - relocate_kernel), %r8
-- pushq %r8
-- ret
-+ /* load new code segment and jump to identity mapped page */
-+ movq %r8, %rax
-+ addq $(identity_mapped - relocate_kernel), %rax
-+ pushq $(gdt_cs - gdt)
-+ pushq %rax
-+ lretq
-
- identity_mapped:
- /* store the start address on the stack */
-@@ -272,5 +298,19 @@ identity_mapped:
- xorq %r13, %r13
- xorq %r14, %r14
- xorq %r15, %r15
--
- ret
-+
-+ .align 16
-+gdt:
-+ .quad 0x0000000000000000 /* NULL descriptor */
-+gdt_cs:
-+ .quad 0x00af9a000000ffff
-+gdt_end:
-+
-+gdt_80:
-+ .word gdt_end - gdt - 1 /* limit */
-+ .quad 0 /* base - filled in by code above */
-+
-+idt_80:
-+ .word 0 /* limit */
-+ .quad 0 /* base */
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/net-csum.patch
--- a/patches/linux-2.6.18/net-csum.patch Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_tcp.c
./net/ipv4/netfilter/ip_nat_proto_tcp.c
---- ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2007-01-12 16:08:53.000000000
+0000
-@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb,
- if (hdrsize < sizeof(*hdr))
- return 1;
-
-- hdr->check = ip_nat_cheat_check(~oldip, newip,
-+#ifdef CONFIG_XEN
-+ if ((*pskb)->proto_csum_blank)
-+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
-+ else
-+#endif
-+ hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(oldport ^ 0xFFFF,
- newport,
- hdr->check));
-diff -pruN ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_udp.c
./net/ipv4/netfilter/ip_nat_proto_udp.c
---- ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2007-01-12 16:08:53.000000000
+0000
-@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb,
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
-- if (hdr->check) /* 0 is a special case meaning no checksum */
-- hdr->check = ip_nat_cheat_check(~oldip, newip,
-+ if (hdr->check) { /* 0 is a special case meaning no checksum */
-+#ifdef CONFIG_XEN
-+ if ((*pskb)->proto_csum_blank)
-+ hdr->check = ip_nat_cheat_check(oldip, ~newip,
hdr->check);
-+ else
-+#endif
-+ hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(*portptr ^ 0xFFFF,
- newport,
- hdr->check));
-+ }
- *portptr = newport;
- return 1;
- }
-diff -pruN ../orig-linux-2.6.18/net/ipv4/xfrm4_output.c
./net/ipv4/xfrm4_output.c
---- ../orig-linux-2.6.18/net/ipv4/xfrm4_output.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./net/ipv4/xfrm4_output.c 2007-01-12 17:38:34.000000000 +0000
-@@ -18,6 +18,8 @@
- #include <net/xfrm.h>
- #include <net/icmp.h>
-
-+extern int skb_checksum_setup(struct sk_buff *skb);
-+
- static int xfrm4_tunnel_check_size(struct sk_buff *skb)
- {
- int mtu, ret = 0;
-@@ -48,6 +50,10 @@ static int xfrm4_output_one(struct sk_bu
- struct xfrm_state *x = dst->xfrm;
- int err;
-
-+ err = skb_checksum_setup(skb);
-+ if (err)
-+ goto error_nolock;
-+
- if (skb->ip_summed == CHECKSUM_HW) {
- err = skb_checksum_help(skb, 0);
- if (err)
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/net-gso-5-rcv-mss.patch
--- a/patches/linux-2.6.18/net-gso-5-rcv-mss.patch Thu Jan 17 14:35:38
2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/net/ipv4/tcp_input.c ./net/ipv4/tcp_input.c
---- ../orig-linux-2.6.18/net/ipv4/tcp_input.c 2006-09-20 04:42:06.000000000
+0100
-+++ ./net/ipv4/tcp_input.c 2007-01-12 18:10:16.000000000 +0000
-@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct s
- /* skb->len may jitter because of SACKs, even if peer
- * sends good full-sized frames.
- */
-- len = skb->len;
-+ len = skb_shinfo(skb)->gso_size ?: skb->len;
- if (len >= icsk->icsk_ack.rcv_mss) {
- icsk->icsk_ack.rcv_mss = len;
- } else {
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/net-gso-6-linear-segmentation.patch
--- a/patches/linux-2.6.18/net-gso-6-linear-segmentation.patch Thu Jan 17
14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/net/core/skbuff.c ./net/core/skbuff.c
---- ../orig-linux-2.6.18/net/core/skbuff.c 2006-09-20 04:42:06.000000000
+0100
-+++ ./net/core/skbuff.c 2007-01-12 18:10:37.000000000 +0000
-@@ -1945,7 +1945,7 @@ struct sk_buff *skb_segment(struct sk_bu
- do {
- struct sk_buff *nskb;
- skb_frag_t *frag;
-- int hsize, nsize;
-+ int hsize;
- int k;
- int size;
-
-@@ -1956,11 +1956,10 @@ struct sk_buff *skb_segment(struct sk_bu
- hsize = skb_headlen(skb) - offset;
- if (hsize < 0)
- hsize = 0;
-- nsize = hsize + doffset;
-- if (nsize > len + doffset || !sg)
-- nsize = len + doffset;
-+ if (hsize > len || !sg)
-+ hsize = len;
-
-- nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
-+ nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
- if (unlikely(!nskb))
- goto err;
-
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/pmd-shared.patch
--- a/patches/linux-2.6.18/pmd-shared.patch Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pageattr.c
./arch/i386/mm/pageattr.c
---- ../orig-linux-2.6.18/arch/i386/mm/pageattr.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/i386/mm/pageattr.c 2007-01-12 18:11:06.000000000 +0000
-@@ -84,7 +84,7 @@ static void set_pmd_pte(pte_t *kpte, uns
- unsigned long flags;
-
- set_pte_atomic(kpte, pte); /* change init_mm */
-- if (PTRS_PER_PMD > 1)
-+ if (HAVE_SHARED_KERNEL_PMD)
- return;
-
- spin_lock_irqsave(&pgd_lock, flags);
-diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c
---- ../orig-linux-2.6.18/arch/i386/mm/pgtable.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/i386/mm/pgtable.c 2007-01-12 18:11:06.000000000 +0000
-@@ -214,9 +214,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
- spin_lock_irqsave(&pgd_lock, flags);
- }
-
-- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-- swapper_pg_dir + USER_PTRS_PER_PGD,
-- KERNEL_PGD_PTRS);
-+ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD)
-+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-+ swapper_pg_dir + USER_PTRS_PER_PGD,
-+ KERNEL_PGD_PTRS);
- if (PTRS_PER_PMD > 1)
- return;
-
-@@ -248,6 +249,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
- goto out_oom;
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
- }
-+
-+ if (!HAVE_SHARED_KERNEL_PMD) {
-+ unsigned long flags;
-+
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-+ if (!pmd)
-+ goto out_oom;
-+ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
-+ }
-+
-+ spin_lock_irqsave(&pgd_lock, flags);
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ unsigned long v = (unsigned long)i << PGDIR_SHIFT;
-+ pgd_t *kpgd = pgd_offset_k(v);
-+ pud_t *kpud = pud_offset(kpgd, v);
-+ pmd_t *kpmd = pmd_offset(kpud, v);
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ memcpy(pmd, kpmd, PAGE_SIZE);
-+ }
-+ pgd_list_add(pgd);
-+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ }
-+
- return pgd;
-
- out_oom:
-@@ -262,9 +287,23 @@ void pgd_free(pgd_t *pgd)
- int i;
-
- /* in the PAE case user pgd entries are overwritten before usage */
-- if (PTRS_PER_PMD > 1)
-- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
-- kmem_cache_free(pmd_cache, (void
*)__va(pgd_val(pgd[i])-1));
-+ if (PTRS_PER_PMD > 1) {
-+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ kmem_cache_free(pmd_cache, pmd);
-+ }
-+ if (!HAVE_SHARED_KERNEL_PMD) {
-+ unsigned long flags;
-+ spin_lock_irqsave(&pgd_lock, flags);
-+ pgd_list_del(pgd);
-+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
-+ kmem_cache_free(pmd_cache, pmd);
-+ }
-+ }
-+ }
- /* in the non-PAE case, free_pgtables() clears user pgd entries */
- kmem_cache_free(pgd_cache, pgd);
- }
-diff -pruN ../orig-linux-2.6.18/include/asm-i386/pgtable-2level-defs.h
./include/asm-i386/pgtable-2level-defs.h
---- ../orig-linux-2.6.18/include/asm-i386/pgtable-2level-defs.h
2006-09-20 04:42:06.000000000 +0100
-+++ ./include/asm-i386/pgtable-2level-defs.h 2007-01-12 18:11:06.000000000
+0000
-@@ -1,6 +1,8 @@
- #ifndef _I386_PGTABLE_2LEVEL_DEFS_H
- #define _I386_PGTABLE_2LEVEL_DEFS_H
-
-+#define HAVE_SHARED_KERNEL_PMD 0
-+
- /*
- * traditional i386 two-level paging structure:
- */
-diff -pruN ../orig-linux-2.6.18/include/asm-i386/pgtable-3level-defs.h
./include/asm-i386/pgtable-3level-defs.h
---- ../orig-linux-2.6.18/include/asm-i386/pgtable-3level-defs.h
2006-09-20 04:42:06.000000000 +0100
-+++ ./include/asm-i386/pgtable-3level-defs.h 2007-01-12 18:11:06.000000000
+0000
-@@ -1,6 +1,8 @@
- #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
- #define _I386_PGTABLE_3LEVEL_DEFS_H
-
-+#define HAVE_SHARED_KERNEL_PMD 1
-+
- /*
- * PGDIR_SHIFT determines what a top-level page table entry can map
- */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
--- a/patches/linux-2.6.18/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/entry.S
./arch/i386/kernel/entry.S
---- ../orig-linux-2.6.18/arch/i386/kernel/entry.S 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/i386/kernel/entry.S 2007-01-12 18:12:31.000000000 +0000
-@@ -269,7 +269,7 @@ ENTRY(sysenter_entry)
- CFI_STARTPROC simple
- CFI_DEF_CFA esp, 0
- CFI_REGISTER esp, ebp
-- movl TSS_sysenter_esp0(%esp),%esp
-+ movl SYSENTER_stack_esp0(%esp),%esp
- sysenter_past_esp:
- /*
- * No need to follow this irqs on/off section: the syscall
-@@ -689,7 +689,7 @@ device_not_available_emulate:
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
-- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
-+ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
-@@ -701,7 +701,7 @@ device_not_available_emulate:
- cmpw $__KERNEL_CS,4(%esp); \
- jne ok; \
- label: \
-- movl TSS_sysenter_esp0+offset(%esp),%esp; \
-+ movl SYSENTER_stack_esp0+offset(%esp),%esp; \
- pushfl; \
- pushl $__KERNEL_CS; \
- pushl $sysenter_past_esp
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/series
--- a/patches/linux-2.6.18/series Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
-linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
-git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
-linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
-blktap-aio-16_03_06.patch
-fix-ide-cd-pio-mode.patch
-i386-mach-io-check-nmi.patch
-net-csum.patch
-net-gso-5-rcv-mss.patch
-net-gso-6-linear-segmentation.patch
-pmd-shared.patch
-rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
-xen-hotplug.patch
-xenoprof-generic.patch
-x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
-x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
-git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
-x86-elfnote-as-preprocessor-macro.patch
-fixaddr-top.patch
-git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch
-softlockup-no-idle-hz.patch
-allow-i386-crash-kernels-to-handle-x86_64-dumps.patch
-allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch
-git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch
-linux-2.6.18-xen-375-748cd890ea7f
-linux-2.6.18-xen-376-353802ec1caf
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/softlockup-no-idle-hz.patch
--- a/patches/linux-2.6.18/softlockup-no-idle-hz.patch Thu Jan 17 14:35:38
2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/include/linux/sched.h ./include/linux/sched.h
---- ../orig-linux-2.6.18/include/linux/sched.h 2006-09-20 04:42:06.000000000
+0100
-+++ ./include/linux/sched.h 2007-02-07 01:10:24.000000000 +0000
-@@ -211,10 +211,15 @@ extern void update_process_times(int use
- extern void scheduler_tick(void);
-
- #ifdef CONFIG_DETECT_SOFTLOCKUP
-+extern unsigned long softlockup_get_next_event(void);
- extern void softlockup_tick(void);
- extern void spawn_softlockup_task(void);
- extern void touch_softlockup_watchdog(void);
- #else
-+static inline unsigned long softlockup_get_next_event(void)
-+{
-+ return MAX_JIFFY_OFFSET;
-+}
- static inline void softlockup_tick(void)
- {
- }
-diff -pruN ../orig-linux-2.6.18/kernel/softlockup.c ./kernel/softlockup.c
---- ../orig-linux-2.6.18/kernel/softlockup.c 2006-09-20 04:42:06.000000000
+0100
-+++ ./kernel/softlockup.c 2007-02-07 01:53:22.000000000 +0000
-@@ -40,6 +40,19 @@ void touch_softlockup_watchdog(void)
- }
- EXPORT_SYMBOL(touch_softlockup_watchdog);
-
-+unsigned long softlockup_get_next_event(void)
-+{
-+ int this_cpu = smp_processor_id();
-+ unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
-+
-+ if (per_cpu(print_timestamp, this_cpu) == touch_timestamp ||
-+ did_panic ||
-+ !per_cpu(watchdog_task, this_cpu))
-+ return MAX_JIFFY_OFFSET;
-+
-+ return max_t(long, 0, touch_timestamp + HZ - jiffies);
-+}
-+
- /*
- * This callback runs from the timer interrupt, and checks
- * whether the watchdog thread has hung or not:
-diff -pruN ../orig-linux-2.6.18/kernel/timer.c ./kernel/timer.c
---- ../orig-linux-2.6.18/kernel/timer.c 2006-09-20 04:42:06.000000000
+0100
-+++ ./kernel/timer.c 2007-02-07 01:29:34.000000000 +0000
-@@ -485,7 +485,9 @@ unsigned long next_timer_interrupt(void)
- if (hr_expires < 3)
- return hr_expires + jiffies;
- }
-- hr_expires += jiffies;
-+ hr_expires = min_t(unsigned long,
-+ softlockup_get_next_event(),
-+ hr_expires) + jiffies;
-
- base = __get_cpu_var(tvec_bases);
- spin_lock(&base->lock);
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/x86-elfnote-as-preprocessor-macro.patch
--- a/patches/linux-2.6.18/x86-elfnote-as-preprocessor-macro.patch Thu Jan
17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/include/linux/elfnote.h
./include/linux/elfnote.h
---- ../orig-linux-2.6.18/include/linux/elfnote.h 2007-01-12
18:19:44.000000000 +0000
-+++ ./include/linux/elfnote.h 2007-01-12 18:21:02.000000000 +0000
-@@ -31,22 +31,38 @@
- /*
- * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
- * turn out to be the same size and shape), followed by the name and
-- * desc data with appropriate padding. The 'desc' argument includes
-- * the assembler pseudo op defining the type of the data: .asciz
-- * "hello, world"
-+ * desc data with appropriate padding. The 'desctype' argument is the
-+ * assembler pseudo op defining the type of the data e.g. .asciz while
-+ * 'descdata' is the data itself e.g. "hello, world".
-+ *
-+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
-+ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
- */
--.macro ELFNOTE name type desc:vararg
--.pushsection ".note.\name"
-- .align 4
-- .long 2f - 1f /* namesz */
-- .long 4f - 3f /* descsz */
-- .long \type
--1:.asciz "\name"
--2:.align 4
--3:\desc
--4:.align 4
-+#ifdef __STDC__
-+#define ELFNOTE(name, type, desctype, descdata...) \
-+.pushsection .note.name ; \
-+ .align 4 ; \
-+ .long 2f - 1f /* namesz */ ; \
-+ .long 4f - 3f /* descsz */ ; \
-+ .long type ; \
-+1:.asciz #name ; \
-+2:.align 4 ; \
-+3:desctype descdata ; \
-+4:.align 4 ; \
- .popsection
--.endm
-+#else /* !__STDC__, i.e. -traditional */
-+#define ELFNOTE(name, type, desctype, descdata) \
-+.pushsection .note.name ; \
-+ .align 4 ; \
-+ .long 2f - 1f /* namesz */ ; \
-+ .long 4f - 3f /* descsz */ ; \
-+ .long type ; \
-+1:.asciz "name" ; \
-+2:.align 4 ; \
-+3:desctype descdata ; \
-+4:.align 4 ; \
-+.popsection
-+#endif /* __STDC__ */
- #else /* !__ASSEMBLER__ */
- #include <linux/elf.h>
- /*
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
---
a/patches/linux-2.6.18/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,143 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/vmlinux.lds.S
./arch/i386/kernel/vmlinux.lds.S
---- ../orig-linux-2.6.18/arch/i386/kernel/vmlinux.lds.S 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/i386/kernel/vmlinux.lds.S 2007-01-12 18:19:44.000000000 +0000
-@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386"
- OUTPUT_ARCH(i386)
- ENTRY(phys_startup_32)
- jiffies = jiffies_64;
-+
-+PHDRS {
-+ text PT_LOAD FLAGS(5); /* R_E */
-+ data PT_LOAD FLAGS(7); /* RWE */
-+ note PT_NOTE FLAGS(4); /* R__ */
-+}
- SECTIONS
- {
- . = __KERNEL_START;
-@@ -26,7 +32,7 @@ SECTIONS
- KPROBES_TEXT
- *(.fixup)
- *(.gnu.warning)
-- } = 0x9090
-+ } :text = 0x9090
-
- _etext = .; /* End of text section */
-
-@@ -48,7 +54,7 @@ SECTIONS
- .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
- *(.data)
- CONSTRUCTORS
-- }
-+ } :data
-
- . = ALIGN(4096);
- __nosave_begin = .;
-@@ -184,4 +190,6 @@ SECTIONS
- STABS_DEBUG
-
- DWARF_DEBUG
-+
-+ NOTES
- }
-diff -pruN ../orig-linux-2.6.18/include/asm-generic/vmlinux.lds.h
./include/asm-generic/vmlinux.lds.h
---- ../orig-linux-2.6.18/include/asm-generic/vmlinux.lds.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./include/asm-generic/vmlinux.lds.h 2007-01-12 18:19:44.000000000
+0000
-@@ -194,3 +194,6 @@
- .stab.index 0 : { *(.stab.index) } \
- .stab.indexstr 0 : { *(.stab.indexstr) } \
- .comment 0 : { *(.comment) }
-+
-+#define NOTES \
-+ .notes : { *(.note.*) } :note
-diff -pruN ../orig-linux-2.6.18/include/linux/elfnote.h
./include/linux/elfnote.h
---- ../orig-linux-2.6.18/include/linux/elfnote.h 1970-01-01
01:00:00.000000000 +0100
-+++ ./include/linux/elfnote.h 2007-01-12 18:19:44.000000000 +0000
-@@ -0,0 +1,88 @@
-+#ifndef _LINUX_ELFNOTE_H
-+#define _LINUX_ELFNOTE_H
-+/*
-+ * Helper macros to generate ELF Note structures, which are put into a
-+ * PT_NOTE segment of the final vmlinux image. These are useful for
-+ * including name-value pairs of metadata into the kernel binary (or
-+ * modules?) for use by external programs.
-+ *
-+ * Each note has three parts: a name, a type and a desc. The name is
-+ * intended to distinguish the note's originator, so it would be a
-+ * company, project, subsystem, etc; it must be in a suitable form for
-+ * use in a section name. The type is an integer which is used to tag
-+ * the data, and is considered to be within the "name" namespace (so
-+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
-+ * "desc" field is the actual data. There are no constraints on the
-+ * desc field's contents, though typically they're fairly small.
-+ *
-+ * All notes from a given NAME are put into a section named
-+ * .note.NAME. When the kernel image is finally linked, all the notes
-+ * are packed into a single .notes section, which is mapped into the
-+ * PT_NOTE segment. Because notes for a given name are grouped into
-+ * the same section, they'll all be adjacent the output file.
-+ *
-+ * This file defines macros for both C and assembler use. Their
-+ * syntax is slightly different, but they're semantically similar.
-+ *
-+ * See the ELF specification for more detail about ELF notes.
-+ */
-+
-+#ifdef __ASSEMBLER__
-+/*
-+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
-+ * turn out to be the same size and shape), followed by the name and
-+ * desc data with appropriate padding. The 'desc' argument includes
-+ * the assembler pseudo op defining the type of the data: .asciz
-+ * "hello, world"
-+ */
-+.macro ELFNOTE name type desc:vararg
-+.pushsection ".note.\name"
-+ .align 4
-+ .long 2f - 1f /* namesz */
-+ .long 4f - 3f /* descsz */
-+ .long \type
-+1:.asciz "\name"
-+2:.align 4
-+3:\desc
-+4:.align 4
-+.popsection
-+.endm
-+#else /* !__ASSEMBLER__ */
-+#include <linux/elf.h>
-+/*
-+ * Use an anonymous structure which matches the shape of
-+ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and
-+ * type of name and desc depend on the macro arguments. "name" must
-+ * be a literal string, and "desc" must be passed by value. You may
-+ * only define one note per line, since __LINE__ is used to generate
-+ * unique symbols.
-+ */
-+#define _ELFNOTE_PASTE(a,b) a##b
-+#define _ELFNOTE(size, name, unique, type, desc) \
-+ static const struct { \
-+ struct elf##size##_note _nhdr; \
-+ unsigned char _name[sizeof(name)] \
-+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
-+ typeof(desc) _desc \
-+
__attribute__((aligned(sizeof(Elf##size##_Word)))); \
-+ } _ELFNOTE_PASTE(_note_, unique) \
-+ __attribute_used__ \
-+ __attribute__((section(".note." name), \
-+ aligned(sizeof(Elf##size##_Word)), \
-+ unused)) = { \
-+ { \
-+ sizeof(name), \
-+ sizeof(desc), \
-+ type, \
-+ }, \
-+ name, \
-+ desc \
-+ }
-+#define ELFNOTE(size, name, type, desc) \
-+ _ELFNOTE(size, name, __LINE__, type, desc)
-+
-+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
-+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
-+#endif /* __ASSEMBLER__ */
-+
-+#endif /* _LINUX_ELFNOTE_H */
diff -r c9b32b389e62 -r b17dfd182f7c
patches/linux-2.6.18/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
---
a/patches/linux-2.6.18/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S
./arch/x86_64/kernel/vmlinux.lds.S
---- ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S 2006-09-20
04:42:06.000000000 +0100
-+++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:02.000000000 +0000
-@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86
- OUTPUT_ARCH(i386:x86-64)
- ENTRY(phys_startup_64)
- jiffies_64 = jiffies;
-+PHDRS {
-+ text PT_LOAD FLAGS(5); /* R_E */
-+ data PT_LOAD FLAGS(7); /* RWE */
-+ user PT_LOAD FLAGS(7); /* RWE */
-+ note PT_NOTE FLAGS(4); /* R__ */
-+}
- SECTIONS
- {
- . = __START_KERNEL;
-@@ -31,7 +37,7 @@ SECTIONS
- KPROBES_TEXT
- *(.fixup)
- *(.gnu.warning)
-- } = 0x9090
-+ } :text = 0x9090
- /* out-of-line lock text */
- .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
-
-@@ -57,17 +63,10 @@ SECTIONS
- .data : AT(ADDR(.data) - LOAD_OFFSET) {
- *(.data)
- CONSTRUCTORS
-- }
-+ } :data
-
- _edata = .; /* End of data section */
-
-- __bss_start = .; /* BSS */
-- .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-- *(.bss.page_aligned)
-- *(.bss)
-- }
-- __bss_stop = .;
--
- . = ALIGN(PAGE_SIZE);
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-@@ -89,7 +88,7 @@ SECTIONS
- #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
- . = VSYSCALL_ADDR;
-- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) }
-+ .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
- __vsyscall_0 = VSYSCALL_VIRT_ADDR;
-
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-@@ -132,7 +131,7 @@ SECTIONS
- . = ALIGN(8192); /* init_task */
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
- *(.data.init_task)
-- }
-+ } :data
-
- . = ALIGN(4096);
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-@@ -222,6 +221,14 @@ SECTIONS
- . = ALIGN(4096);
- __nosave_end = .;
-
-+ __bss_start = .; /* BSS */
-+ . = ALIGN(4096);
-+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-+ *(.bss.page_aligned)
-+ *(.bss)
-+ }
-+ __bss_stop = .;
-+
- _end = . ;
-
- /* Sections to be discarded */
-@@ -235,4 +242,6 @@ SECTIONS
- STABS_DEBUG
-
- DWARF_DEBUG
-+
-+ NOTES
- }
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/xen-hotplug.patch
--- a/patches/linux-2.6.18/xen-hotplug.patch Thu Jan 17 14:35:38 2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/fs/proc/proc_misc.c ./fs/proc/proc_misc.c
---- ../orig-linux-2.6.18/fs/proc/proc_misc.c 2006-09-20 04:42:06.000000000
+0100
-+++ ./fs/proc/proc_misc.c 2007-01-12 18:18:36.000000000 +0000
-@@ -471,7 +471,7 @@ static int show_stat(struct seq_file *p,
- (unsigned long long)cputime64_to_clock_t(irq),
- (unsigned long long)cputime64_to_clock_t(softirq),
- (unsigned long long)cputime64_to_clock_t(steal));
-- for_each_online_cpu(i) {
-+ for_each_possible_cpu(i) {
-
- /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
- user = kstat_cpu(i).cpustat.user;
diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/xenoprof-generic.patch
--- a/patches/linux-2.6.18/xenoprof-generic.patch Thu Jan 17 14:35:38
2008 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,662 +0,0 @@
-diff -pruN ../orig-linux-2.6.18/drivers/oprofile/buffer_sync.c
./drivers/oprofile/buffer_sync.c
---- ../orig-linux-2.6.18/drivers/oprofile/buffer_sync.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./drivers/oprofile/buffer_sync.c 2007-01-12 18:19:28.000000000 +0000
-@@ -6,6 +6,10 @@
- *
- * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
- *
-+ * Modified by Aravind Menon for Xen
-+ * These modifications are:
-+ * Copyright (C) 2005 Hewlett-Packard Co.
-+ *
- * This is the core of the buffer management. Each
- * CPU buffer is processed and entered into the
- * global event buffer. Such processing is necessary
-@@ -38,6 +42,7 @@ static cpumask_t marked_cpus = CPU_MASK_
- static DEFINE_SPINLOCK(task_mortuary);
- static void process_task_mortuary(void);
-
-+static int cpu_current_domain[NR_CPUS];
-
- /* Take ownership of the task struct and place it on the
- * list for processing. Only after two full buffer syncs
-@@ -146,6 +151,11 @@ static void end_sync(void)
- int sync_start(void)
- {
- int err;
-+ int i;
-+
-+ for (i = 0; i < NR_CPUS; i++) {
-+ cpu_current_domain[i] = COORDINATOR_DOMAIN;
-+ }
-
- start_cpu_work();
-
-@@ -275,15 +285,31 @@ static void add_cpu_switch(int i)
- last_cookie = INVALID_COOKIE;
- }
-
--static void add_kernel_ctx_switch(unsigned int in_kernel)
-+static void add_cpu_mode_switch(unsigned int cpu_mode)
- {
- add_event_entry(ESCAPE_CODE);
-- if (in_kernel)
-- add_event_entry(KERNEL_ENTER_SWITCH_CODE);
-- else
-- add_event_entry(KERNEL_EXIT_SWITCH_CODE);
-+ switch (cpu_mode) {
-+ case CPU_MODE_USER:
-+ add_event_entry(USER_ENTER_SWITCH_CODE);
-+ break;
-+ case CPU_MODE_KERNEL:
-+ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
-+ break;
-+ case CPU_MODE_XEN:
-+ add_event_entry(XEN_ENTER_SWITCH_CODE);
-+ break;
-+ default:
-+ break;
-+ }
- }
--
-+
-+static void add_domain_switch(unsigned long domain_id)
-+{
-+ add_event_entry(ESCAPE_CODE);
-+ add_event_entry(DOMAIN_SWITCH_CODE);
-+ add_event_entry(domain_id);
-+}
-+
- static void
- add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
- {
-@@ -348,9 +374,9 @@ static int add_us_sample(struct mm_struc
- * for later lookup from userspace.
- */
- static int
--add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
-+add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
- {
-- if (in_kernel) {
-+ if (cpu_mode >= CPU_MODE_KERNEL) {
- add_sample_entry(s->eip, s->event);
- return 1;
- } else if (mm) {
-@@ -496,15 +522,21 @@ void sync_buffer(int cpu)
- struct mm_struct *mm = NULL;
- struct task_struct * new;
- unsigned long cookie = 0;
-- int in_kernel = 1;
-+ int cpu_mode = 1;
- unsigned int i;
- sync_buffer_state state = sb_buffer_start;
- unsigned long available;
-+ int domain_switch = 0;
-
- mutex_lock(&buffer_mutex);
-
- add_cpu_switch(cpu);
-
-+ /* We need to assign the first samples in this CPU buffer to the
-+ same domain that we were processing at the last sync_buffer */
-+ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
-+ add_domain_switch(cpu_current_domain[cpu]);
-+ }
- /* Remember, only we can modify tail_pos */
-
- available = get_slots(cpu_buf);
-@@ -512,16 +544,18 @@ void sync_buffer(int cpu)
- for (i = 0; i < available; ++i) {
- struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
-
-- if (is_code(s->eip)) {
-- if (s->event <= CPU_IS_KERNEL) {
-- /* kernel/userspace switch */
-- in_kernel = s->event;
-+ if (is_code(s->eip) && !domain_switch) {
-+ if (s->event <= CPU_MODE_XEN) {
-+ /* xen/kernel/userspace switch */
-+ cpu_mode = s->event;
- if (state == sb_buffer_start)
- state = sb_sample_start;
-- add_kernel_ctx_switch(s->event);
-+ add_cpu_mode_switch(s->event);
- } else if (s->event == CPU_TRACE_BEGIN) {
- state = sb_bt_start;
- add_trace_begin();
-+ } else if (s->event == CPU_DOMAIN_SWITCH) {
-+ domain_switch = 1;
- } else {
- struct mm_struct * oldmm = mm;
-
-@@ -535,11 +569,21 @@ void sync_buffer(int cpu)
- add_user_ctx_switch(new, cookie);
- }
- } else {
-- if (state >= sb_bt_start &&
-- !add_sample(mm, s, in_kernel)) {
-- if (state == sb_bt_start) {
-- state = sb_bt_ignore;
--
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
-+ if (domain_switch) {
-+ cpu_current_domain[cpu] = s->eip;
-+ add_domain_switch(s->eip);
-+ domain_switch = 0;
-+ } else {
-+ if (cpu_current_domain[cpu] !=
-+ COORDINATOR_DOMAIN) {
-+ add_sample_entry(s->eip, s->event);
-+ }
-+ else if (state >= sb_bt_start &&
-+ !add_sample(mm, s, cpu_mode)) {
-+ if (state == sb_bt_start) {
-+ state = sb_bt_ignore;
-+
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
-+ }
- }
- }
- }
-@@ -548,6 +592,11 @@ void sync_buffer(int cpu)
- }
- release_mm(mm);
-
-+ /* We reset domain to COORDINATOR at each CPU switch */
-+ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
-+ add_domain_switch(COORDINATOR_DOMAIN);
-+ }
-+
- mark_done(cpu);
-
- mutex_unlock(&buffer_mutex);
-diff -pruN ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.c
./drivers/oprofile/cpu_buffer.c
---- ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./drivers/oprofile/cpu_buffer.c 2007-01-12 18:18:50.000000000 +0000
-@@ -6,6 +6,10 @@
- *
- * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
- *
-+ * Modified by Aravind Menon for Xen
-+ * These modifications are:
-+ * Copyright (C) 2005 Hewlett-Packard Co.
-+ *
- * Each CPU has a local buffer that stores PC value/event
- * pairs. We also log context switches when we notice them.
- * Eventually each CPU's buffer is processed into the global
-@@ -34,6 +38,8 @@ static void wq_sync_buffer(void *);
- #define DEFAULT_TIMER_EXPIRE (HZ / 10)
- static int work_enabled;
-
-+static int32_t current_domain = COORDINATOR_DOMAIN;
-+
- void free_cpu_buffers(void)
- {
- int i;
-@@ -57,7 +63,7 @@ int alloc_cpu_buffers(void)
- goto fail;
-
- b->last_task = NULL;
-- b->last_is_kernel = -1;
-+ b->last_cpu_mode = -1;
- b->tracing = 0;
- b->buffer_size = buffer_size;
- b->tail_pos = 0;
-@@ -113,7 +119,7 @@ void cpu_buffer_reset(struct oprofile_cp
- * collected will populate the buffer with proper
- * values to initialize the buffer
- */
-- cpu_buf->last_is_kernel = -1;
-+ cpu_buf->last_cpu_mode = -1;
- cpu_buf->last_task = NULL;
- }
-
-@@ -163,13 +169,13 @@ add_code(struct oprofile_cpu_buffer * bu
- * because of the head/tail separation of the writer and reader
- * of the CPU buffer.
- *
-- * is_kernel is needed because on some architectures you cannot
-+ * cpu_mode is needed because on some architectures you cannot
- * tell if you are in kernel or user space simply by looking at
-- * pc. We tag this in the buffer by generating kernel enter/exit
-- * events whenever is_kernel changes
-+ * pc. We tag this in the buffer by generating kernel/user (and xen)
-+ * enter events whenever cpu_mode changes
- */
- static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
-- int is_kernel, unsigned long event)
-+ int cpu_mode, unsigned long event)
- {
- struct task_struct * task;
-
-@@ -180,18 +186,18 @@ static int log_sample(struct oprofile_cp
- return 0;
- }
-
-- is_kernel = !!is_kernel;
--
- task = current;
-
- /* notice a switch from user->kernel or vice versa */
-- if (cpu_buf->last_is_kernel != is_kernel) {
-- cpu_buf->last_is_kernel = is_kernel;
-- add_code(cpu_buf, is_kernel);
-+ if (cpu_buf->last_cpu_mode != cpu_mode) {
-+ cpu_buf->last_cpu_mode = cpu_mode;
-+ add_code(cpu_buf, cpu_mode);
- }
--
-+
- /* notice a task switch */
-- if (cpu_buf->last_task != task) {
-+ /* if not processing other domain samples */
-+ if ((cpu_buf->last_task != task) &&
-+ (current_domain == COORDINATOR_DOMAIN)) {
- cpu_buf->last_task = task;
- add_code(cpu_buf, (unsigned long)task);
- }
-@@ -275,6 +281,25 @@ void oprofile_add_trace(unsigned long pc
- add_sample(cpu_buf, pc, 0);
- }
-
-+int oprofile_add_domain_switch(int32_t domain_id)
-+{
-+ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
-+
-+ /* should have space for switching into and out of domain
-+ (2 slots each) plus one sample and one cpu mode switch */
-+ if (((nr_available_slots(cpu_buf) < 6) &&
-+ (domain_id != COORDINATOR_DOMAIN)) ||
-+ (nr_available_slots(cpu_buf) < 2))
-+ return 0;
-+
-+ add_code(cpu_buf, CPU_DOMAIN_SWITCH);
-+ add_sample(cpu_buf, domain_id, 0);
-+
-+ current_domain = domain_id;
-+
-+ return 1;
-+}
-+
- /*
- * This serves to avoid cpu buffer overflow, and makes sure
- * the task mortuary progresses
-diff -pruN ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.h
./drivers/oprofile/cpu_buffer.h
---- ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./drivers/oprofile/cpu_buffer.h 2007-01-12 18:18:50.000000000 +0000
-@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
- volatile unsigned long tail_pos;
- unsigned long buffer_size;
- struct task_struct * last_task;
-- int last_is_kernel;
-+ int last_cpu_mode;
- int tracing;
- struct op_sample * buffer;
- unsigned long sample_received;
-@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_bu
- void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
-
- /* transient events for the CPU buffer -> event buffer */
--#define CPU_IS_KERNEL 1
--#define CPU_TRACE_BEGIN 2
-+#define CPU_MODE_USER 0
-+#define CPU_MODE_KERNEL 1
-+#define CPU_MODE_XEN 2
-+#define CPU_TRACE_BEGIN 3
-+#define CPU_DOMAIN_SWITCH 4
-
- #endif /* OPROFILE_CPU_BUFFER_H */
-diff -pruN ../orig-linux-2.6.18/drivers/oprofile/event_buffer.h
./drivers/oprofile/event_buffer.h
---- ../orig-linux-2.6.18/drivers/oprofile/event_buffer.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./drivers/oprofile/event_buffer.h 2007-01-12 18:18:50.000000000 +0000
-@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void);
- #define CPU_SWITCH_CODE 2
- #define COOKIE_SWITCH_CODE 3
- #define KERNEL_ENTER_SWITCH_CODE 4
--#define KERNEL_EXIT_SWITCH_CODE 5
-+#define USER_ENTER_SWITCH_CODE 5
- #define MODULE_LOADED_CODE 6
- #define CTX_TGID_CODE 7
- #define TRACE_BEGIN_CODE 8
- #define TRACE_END_CODE 9
-+#define XEN_ENTER_SWITCH_CODE 10
-+#define DOMAIN_SWITCH_CODE 11
-
- #define INVALID_COOKIE ~0UL
- #define NO_COOKIE 0UL
-
-+/* Constant used to refer to coordinator domain (Xen) */
-+#define COORDINATOR_DOMAIN -1
-+
- /* add data to the event buffer */
- void add_event_entry(unsigned long data);
-
-diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprof.c
./drivers/oprofile/oprof.c
---- ../orig-linux-2.6.18/drivers/oprofile/oprof.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./drivers/oprofile/oprof.c 2007-01-12 18:18:50.000000000 +0000
-@@ -5,6 +5,10 @@
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
-+ *
-+ * Modified by Aravind Menon for Xen
-+ * These modifications are:
-+ * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
- #include <linux/kernel.h>
-@@ -19,7 +23,7 @@
- #include "cpu_buffer.h"
- #include "buffer_sync.h"
- #include "oprofile_stats.h"
--
-+
- struct oprofile_operations oprofile_ops;
-
- unsigned long oprofile_started;
-@@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex);
- */
- static int timer = 0;
-
-+int oprofile_set_active(int active_domains[], unsigned int adomains)
-+{
-+ int err;
-+
-+ if (!oprofile_ops.set_active)
-+ return -EINVAL;
-+
-+ mutex_lock(&start_mutex);
-+ err = oprofile_ops.set_active(active_domains, adomains);
-+ mutex_unlock(&start_mutex);
-+ return err;
-+}
-+
-+int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
-+{
-+ int err;
-+
-+ if (!oprofile_ops.set_passive)
-+ return -EINVAL;
-+
-+ mutex_lock(&start_mutex);
-+ err = oprofile_ops.set_passive(passive_domains, pdomains);
-+ mutex_unlock(&start_mutex);
-+ return err;
-+}
-+
- int oprofile_setup(void)
- {
- int err;
-diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprof.h
./drivers/oprofile/oprof.h
---- ../orig-linux-2.6.18/drivers/oprofile/oprof.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./drivers/oprofile/oprof.h 2007-01-12 18:18:50.000000000 +0000
-@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_
- void oprofile_timer_init(struct oprofile_operations * ops);
-
- int oprofile_set_backtrace(unsigned long depth);
-+
-+int oprofile_set_active(int active_domains[], unsigned int adomains);
-+int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
-
- #endif /* OPROF_H */
-diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprofile_files.c
./drivers/oprofile/oprofile_files.c
---- ../orig-linux-2.6.18/drivers/oprofile/oprofile_files.c 2006-09-20
04:42:06.000000000 +0100
-+++ ./drivers/oprofile/oprofile_files.c 2007-01-12 18:18:50.000000000
+0000
-@@ -5,15 +5,21 @@
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
-+ *
-+ * Modified by Aravind Menon for Xen
-+ * These modifications are:
-+ * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
- #include <linux/fs.h>
- #include <linux/oprofile.h>
-+#include <asm/uaccess.h>
-+#include <linux/ctype.h>
-
- #include "event_buffer.h"
- #include "oprofile_stats.h"
- #include "oprof.h"
--
-+
- unsigned long fs_buffer_size = 131072;
- unsigned long fs_cpu_buffer_size = 8192;
- unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
-@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file *
- static struct file_operations dump_fops = {
- .write = dump_write,
- };
--
-+
-+#define TMPBUFSIZE 512
-+
-+static unsigned int adomains = 0;
-+static int active_domains[MAX_OPROF_DOMAINS + 1];
-+static DEFINE_MUTEX(adom_mutex);
-+
-+static ssize_t adomain_write(struct file * file, char const __user * buf,
-+ size_t count, loff_t * offset)
-+{
-+ char *tmpbuf;
-+ char *startp, *endp;
-+ int i;
-+ unsigned long val;
-+ ssize_t retval = count;
-+
-+ if (*offset)
-+ return -EINVAL;
-+ if (count > TMPBUFSIZE - 1)
-+ return -EINVAL;
-+
-+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
-+ return -ENOMEM;
-+
-+ if (copy_from_user(tmpbuf, buf, count)) {
-+ kfree(tmpbuf);
-+ return -EFAULT;
-+ }
-+ tmpbuf[count] = 0;
-+
-+ mutex_lock(&adom_mutex);
-+
-+ startp = tmpbuf;
-+ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
-+ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
-+ val = simple_strtoul(startp, &endp, 0);
-+ if (endp == startp)
-+ break;
-+ while (ispunct(*endp) || isspace(*endp))
-+ endp++;
-+ active_domains[i] = val;
-+ if (active_domains[i] != val)
-+ /* Overflow, force error below */
-+ i = MAX_OPROF_DOMAINS + 1;
-+ startp = endp;
-+ }
-+ /* Force error on trailing junk */
-+ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
-+
-+ kfree(tmpbuf);
-+
-+ if (adomains > MAX_OPROF_DOMAINS
-+ || oprofile_set_active(active_domains, adomains)) {
-+ adomains = 0;
-+ retval = -EINVAL;
-+ }
-+
-+ mutex_unlock(&adom_mutex);
-+ return retval;
-+}
-+
-+static ssize_t adomain_read(struct file * file, char __user * buf,
-+ size_t count, loff_t * offset)
-+{
-+ char * tmpbuf;
-+ size_t len;
-+ int i;
-+ ssize_t retval;
-+
-+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
-+ return -ENOMEM;
-+
-+ mutex_lock(&adom_mutex);
-+
-+ len = 0;
-+ for (i = 0; i < adomains; i++)
-+ len += snprintf(tmpbuf + len,
-+ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
-+ "%u ", active_domains[i]);
-+ WARN_ON(len > TMPBUFSIZE);
-+ if (len != 0 && len <= TMPBUFSIZE)
-+ tmpbuf[len-1] = '\n';
-+
-+ mutex_unlock(&adom_mutex);
-+
-+ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
-+
-+ kfree(tmpbuf);
-+ return retval;
-+}
-+
-+
-+static struct file_operations active_domain_ops = {
-+ .read = adomain_read,
-+ .write = adomain_write,
-+};
-+
-+static unsigned int pdomains = 0;
-+static int passive_domains[MAX_OPROF_DOMAINS];
-+static DEFINE_MUTEX(pdom_mutex);
-+
-+static ssize_t pdomain_write(struct file * file, char const __user * buf,
-+ size_t count, loff_t * offset)
-+{
-+ char *tmpbuf;
-+ char *startp, *endp;
-+ int i;
-+ unsigned long val;
-+ ssize_t retval = count;
-+
-+ if (*offset)
-+ return -EINVAL;
-+ if (count > TMPBUFSIZE - 1)
-+ return -EINVAL;
-+
-+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
-+ return -ENOMEM;
-+
-+ if (copy_from_user(tmpbuf, buf, count)) {
-+ kfree(tmpbuf);
-+ return -EFAULT;
-+ }
-+ tmpbuf[count] = 0;
-+
-+ mutex_lock(&pdom_mutex);
-+
-+ startp = tmpbuf;
-+ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
-+ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
-+ val = simple_strtoul(startp, &endp, 0);
-+ if (endp == startp)
-+ break;
-+ while (ispunct(*endp) || isspace(*endp))
-+ endp++;
-+ passive_domains[i] = val;
-+ if (passive_domains[i] != val)
-+ /* Overflow, force error below */
-+ i = MAX_OPROF_DOMAINS + 1;
-+ startp = endp;
-+ }
-+ /* Force error on trailing junk */
-+ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
-+
-+ kfree(tmpbuf);
-+
-+ if (pdomains > MAX_OPROF_DOMAINS
-+ || oprofile_set_passive(passive_domains, pdomains)) {
-+ pdomains = 0;
-+ retval = -EINVAL;
-+ }
-+
-+ mutex_unlock(&pdom_mutex);
-+ return retval;
-+}
-+
-+static ssize_t pdomain_read(struct file * file, char __user * buf,
-+ size_t count, loff_t * offset)
-+{
-+ char * tmpbuf;
-+ size_t len;
-+ int i;
-+ ssize_t retval;
-+
-+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
-+ return -ENOMEM;
-+
-+ mutex_lock(&pdom_mutex);
-+
-+ len = 0;
-+ for (i = 0; i < pdomains; i++)
-+ len += snprintf(tmpbuf + len,
-+ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
-+ "%u ", passive_domains[i]);
-+ WARN_ON(len > TMPBUFSIZE);
-+ if (len != 0 && len <= TMPBUFSIZE)
-+ tmpbuf[len-1] = '\n';
-+
-+ mutex_unlock(&pdom_mutex);
-+
-+ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
-+
-+ kfree(tmpbuf);
-+ return retval;
-+}
-+
-+static struct file_operations passive_domain_ops = {
-+ .read = pdomain_read,
-+ .write = pdomain_write,
-+};
-+
- void oprofile_create_files(struct super_block * sb, struct dentry * root)
- {
- oprofilefs_create_file(sb, root, "enable", &enable_fops);
- oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
-+ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
-+ oprofilefs_create_file(sb, root, "passive_domains",
&passive_domain_ops);
- oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
- oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
- oprofilefs_create_ulong(sb, root, "buffer_watershed",
&fs_buffer_watershed);
-diff -pruN ../orig-linux-2.6.18/include/linux/oprofile.h
./include/linux/oprofile.h
---- ../orig-linux-2.6.18/include/linux/oprofile.h 2006-09-20
04:42:06.000000000 +0100
-+++ ./include/linux/oprofile.h 2007-01-12 18:18:50.000000000 +0000
-@@ -16,6 +16,8 @@
- #include <linux/types.h>
- #include <linux/spinlock.h>
- #include <asm/atomic.h>
-+
-+#include <xen/interface/xenoprof.h>
-
- struct super_block;
- struct dentry;
-@@ -27,6 +29,11 @@ struct oprofile_operations {
- /* create any necessary configuration files in the oprofile fs.
- * Optional. */
- int (*create_files)(struct super_block * sb, struct dentry * root);
-+ /* setup active domains with Xen */
-+ int (*set_active)(int *active_domains, unsigned int adomains);
-+ /* setup passive domains with Xen */
-+ int (*set_passive)(int *passive_domains, unsigned int pdomains);
-+
- /* Do any necessary interrupt setup. Optional. */
- int (*setup)(void);
- /* Do any necessary interrupt shutdown. Optional. */
-@@ -78,6 +85,8 @@ void oprofile_add_pc(unsigned long pc, i
- /* add a backtrace entry, to be called from the ->backtrace callback */
- void oprofile_add_trace(unsigned long eip);
-
-+/* add a domain switch entry */
-+int oprofile_add_domain_switch(int32_t domain_id);
-
- /**
- * Create a file of the given name as a child of the given root, with
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|