# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID b0338759544e116337f25d5cdebc59c38a6d40c5
# Parent 78b5e590be34ee3454d970321d7c2f3199a0de41
Big reworking of SHARED_KERNEL_PMD logic. Includes several
bug fixes for PAE, and reverts my previous changeset that
broke non-pae.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r 78b5e590be34 -r b0338759544e
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Thu Nov 24 19:57:01 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Thu Nov 24 22:21:48 2005
@@ -708,7 +708,7 @@
panic("pgtable_cache_init(): cannot create pmd cache");
}
pgd_cache = kmem_cache_create("pgd",
-#if 0 /* How the heck _this_ works in native linux ??? */
+#ifndef CONFIG_XEN
PTRS_PER_PGD*sizeof(pgd_t),
PTRS_PER_PGD*sizeof(pgd_t),
#else
@@ -717,7 +717,7 @@
#endif
0,
pgd_ctor,
- pgd_dtor);
+ PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
diff -r 78b5e590be34 -r b0338759544e
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Thu Nov 24 19:57:01 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Thu Nov 24 22:21:48 2005
@@ -28,8 +28,6 @@
#include <asm/hypervisor.h>
static void pgd_test_and_unpin(pgd_t *pgd);
-#define suspend_disable preempt_disable
-#define suspend_enable preempt_enable
void show_mem(void)
{
@@ -279,26 +277,31 @@
{
unsigned long flags;
-#ifdef CONFIG_X86_PAE
- /* Ensure pgd resides below 4GB. */
- int rc = xen_create_contiguous_region((unsigned long)pgd, 0, 32);
- BUG_ON(rc);
+ if (PTRS_PER_PMD > 1) {
+#ifdef CONFIG_XEN
+ /* Ensure pgd resides below 4GB. */
+ int rc = xen_create_contiguous_region(
+ (unsigned long)pgd, 0, 32);
+ BUG_ON(rc);
#endif
-
- if (HAVE_SHARED_KERNEL_PMD) {
+ if (HAVE_SHARED_KERNEL_PMD)
+ memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir, sizeof(pgd_t));
+ } else {
+ if (!HAVE_SHARED_KERNEL_PMD)
+ spin_lock_irqsave(&pgd_lock, flags);
memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
- return;
- }
-
- memset(pgd, 0, PTRS_PER_PGD*sizeof(pgd_t));
-
- spin_lock_irqsave(&pgd_lock, flags);
- pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-}
-
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+ if (!HAVE_SHARED_KERNEL_PMD) {
+ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+ }
+}
+
+/* never called when PTRS_PER_PMD > 1 */
void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
@@ -315,7 +318,7 @@
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- int i = 0;
+ int i;
pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
pgd_test_and_unpin(pgd);
@@ -323,34 +326,31 @@
if (PTRS_PER_PMD == 1 || !pgd)
return pgd;
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+ if (!pmd)
+ goto out_oom;
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ }
+
if (!HAVE_SHARED_KERNEL_PMD) {
- /* alloc and copy kernel pmd */
unsigned long flags;
pgd_t *copy_pgd = pgd_offset_k(PAGE_OFFSET);
pud_t *copy_pud = pud_offset(copy_pgd, PAGE_OFFSET);
pmd_t *copy_pmd = pmd_offset(copy_pud, PAGE_OFFSET);
pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
- if (0 == pmd)
+ ++i;
+ if (!pmd)
goto out_oom;
spin_lock_irqsave(&pgd_lock, flags);
memcpy(pmd, copy_pmd, PAGE_SIZE);
- spin_unlock_irqrestore(&pgd_lock, flags);
make_lowmem_page_readonly(pmd);
set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
- }
-
- /* alloc user pmds */
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
- if (!pmd)
- goto out_oom;
- suspend_disable();
- if (test_bit(PG_pinned, &virt_to_page(pgd)->flags))
- make_lowmem_page_readonly(pmd);
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
- suspend_enable();
- }
+ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+
return pgd;
out_oom:
@@ -364,28 +364,25 @@
{
int i;
- suspend_disable();
pgd_test_and_unpin(pgd);
/* in the PAE case user pgd entries are overwritten before usage */
if (PTRS_PER_PMD > 1) {
for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
- set_pgd(&pgd[i], __pgd(0));
- make_lowmem_page_writable(pmd);
kmem_cache_free(pmd_cache, pmd);
}
if (!HAVE_SHARED_KERNEL_PMD) {
+ unsigned long flags;
pmd_t *pmd = (void
*)__va(pgd_val(pgd[USER_PTRS_PER_PGD])-1);
- set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(0));
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
make_lowmem_page_writable(pmd);
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
kmem_cache_free(pmd_cache, pmd);
}
}
-
- suspend_enable();
-
/* in the non-PAE case, free_pgtables() clears user pgd entries */
kmem_cache_free(pgd_cache, pgd);
}
@@ -510,9 +507,6 @@
static void __pgd_pin(pgd_t *pgd)
{
- /* PAE PGDs with no kernel PMD cannot be pinned. Bail right now. */
- if ((PTRS_PER_PMD > 1) && pgd_none(pgd[USER_PTRS_PER_PGD]))
- return;
pgd_walk(pgd, PAGE_KERNEL_RO);
xen_pgd_pin(__pa(pgd));
set_bit(PG_pinned, &virt_to_page(pgd)->flags);
@@ -527,10 +521,8 @@
static void pgd_test_and_unpin(pgd_t *pgd)
{
- suspend_disable();
if (test_bit(PG_pinned, &virt_to_page(pgd)->flags))
__pgd_unpin(pgd);
- suspend_enable();
}
void mm_pin(struct mm_struct *mm)
diff -r 78b5e590be34 -r b0338759544e patches/linux-2.6.12/pmd-shared.patch
--- a/patches/linux-2.6.12/pmd-shared.patch Thu Nov 24 19:57:01 2005
+++ b/patches/linux-2.6.12/pmd-shared.patch Thu Nov 24 22:21:48 2005
@@ -1,15 +1,3 @@
-diff -urNpP linux-2.6.12/arch/i386/mm/init.c
linux-2.6.12.new/arch/i386/mm/init.c
---- linux-2.6.12/arch/i386/mm/init.c 2005-06-17 20:48:29.000000000 +0100
-+++ linux-2.6.12.new/arch/i386/mm/init.c 2005-07-11 16:28:09.778165582
+0100
-@@ -634,7 +634,7 @@ void __init pgtable_cache_init(void)
- PTRS_PER_PGD*sizeof(pgd_t),
- 0,
- pgd_ctor,
-- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
-+ pgd_dtor);
- if (!pgd_cache)
- panic("pgtable_cache_init(): Cannot create pgd cache");
- }
diff -urNpP linux-2.6.12/arch/i386/mm/pageattr.c
linux-2.6.12.new/arch/i386/mm/pageattr.c
--- linux-2.6.12/arch/i386/mm/pageattr.c 2005-06-17 20:48:29.000000000
+0100
+++ linux-2.6.12.new/arch/i386/mm/pageattr.c 2005-07-11 16:28:09.775165494
+0100
@@ -23,31 +11,45 @@
spin_lock_irqsave(&pgd_lock, flags);
diff -urNpP linux-2.6.12/arch/i386/mm/pgtable.c
linux-2.6.12.new/arch/i386/mm/pgtable.c
---- linux-2.6.12/arch/i386/mm/pgtable.c 2005-06-17 20:48:29.000000000
+0100
-+++ linux-2.6.12.new/arch/i386/mm/pgtable.c 2005-07-11 16:32:01.478023726
+0100
-@@ -199,14 +199,14 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
+--- linux-2.6.12/arch/i386/mm/pgtable.c 2005-11-24 21:51:49.000000000
+0000
++++ linux-2.6.12.new/arch/i386/mm/pgtable.c 2005-11-24 22:06:04.000000000
+0000
+@@ -199,19 +199,22 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
{
unsigned long flags;
- if (PTRS_PER_PMD == 1)
-+ if (!HAVE_SHARED_KERNEL_PMD)
- spin_lock_irqsave(&pgd_lock, flags);
-
- memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
-
+- spin_lock_irqsave(&pgd_lock, flags);
+-
+- memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+- swapper_pg_dir + USER_PTRS_PER_PGD,
+- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+-
- if (PTRS_PER_PMD > 1)
-+ if (HAVE_SHARED_KERNEL_PMD)
- return;
-
- pgd_list_add(pgd);
-@@ -214,11 +214,13 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+- return;
+-
+- pgd_list_add(pgd);
+- spin_unlock_irqrestore(&pgd_lock, flags);
+- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
++ if (PTRS_PER_PMD > 1) {
++ if (HAVE_SHARED_KERNEL_PMD)
++ memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
++ swapper_pg_dir, sizeof(pgd_t));
++ } else {
++ if (!HAVE_SHARED_KERNEL_PMD)
++ spin_lock_irqsave(&pgd_lock, flags);
++ memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
++ swapper_pg_dir + USER_PTRS_PER_PGD,
++ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
++ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
++ if (!HAVE_SHARED_KERNEL_PMD) {
++ pgd_list_add(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ }
++ }
}
--/* never called when PTRS_PER_PMD > 1 */
- void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+ /* never called when PTRS_PER_PMD > 1 */
+@@ -219,6 +222,9 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
{
unsigned long flags; /* can be called from interrupt context */
@@ -57,38 +59,32 @@
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
-@@ -226,12 +228,29 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
-
- pgd_t *pgd_alloc(struct mm_struct *mm)
- {
-- int i;
-+ int i = 0;
- pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
-
- if (PTRS_PER_PMD == 1 || !pgd)
- return pgd;
-
+@@ -238,6 +244,24 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ goto out_oom;
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ }
++
+ if (!HAVE_SHARED_KERNEL_PMD) {
-+ /* alloc and copy kernel pmd */
+ unsigned long flags;
+ pgd_t *copy_pgd = pgd_offset_k(PAGE_OFFSET);
+ pud_t *copy_pud = pud_offset(copy_pgd, PAGE_OFFSET);
+ pmd_t *copy_pmd = pmd_offset(copy_pud, PAGE_OFFSET);
+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-+ if (0 == pmd)
++ ++i;
++ if (!pmd)
+ goto out_oom;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ memcpy(pmd, copy_pmd, PAGE_SIZE);
++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
++ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
+ }
+
-+ /* alloc user pmds */
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
- if (!pmd)
-@@ -252,9 +271,16 @@ void pgd_free(pgd_t *pgd)
+ return pgd;
+
+ out_oom:
+@@ -252,9 +276,21 @@ void pgd_free(pgd_t *pgd)
int i;
/* in the PAE case user pgd entries are overwritten before usage */
@@ -101,7 +97,12 @@
+ kmem_cache_free(pmd_cache, pmd);
+ }
+ if (!HAVE_SHARED_KERNEL_PMD) {
++ unsigned long flags;
+ pmd_t *pmd = (void
*)__va(pgd_val(pgd[USER_PTRS_PER_PGD])-1);
++ spin_lock_irqsave(&pgd_lock, flags);
++ pgd_list_del(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
+ kmem_cache_free(pmd_cache, pmd);
+ }
+ }
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|