[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86: introduce specialized clear_page()



Both IA64 and PowerPC have specialized (non-memset) versions for this,
so add one more than doubling performance of page clearing on not too
old processors (SSE2 supported).

While the patch also adds an SSE version, this is is currently orphaned
as I am not certain about the benefit of special casing idle VCPUs in
a few places (during context switching), so that at least in that
context using %xmmN registers would be possible without crashing and/or
corrupting guest state. The benefit of adding such support could be to
reduce scheduling latency when a VCPU is to transition out of idle, but
is busy doing page cleaning.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2007-06-18/xen/arch/x86/domain.c
===================================================================
--- 2007-06-18.orig/xen/arch/x86/domain.c       2007-06-04 08:35:35.000000000 
+0200
+++ 2007-06-18/xen/arch/x86/domain.c    2007-06-18 11:57:46.000000000 +0200
@@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v, 
         pg = alloc_domheap_page(NULL);
         if ( !pg )
             return -ENOMEM;
-        d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+        d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
+        clear_page(d->arch.mm_arg_xlat_l3);
     }
 
     l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
@@ -429,7 +430,8 @@ int arch_domain_create(struct domain *d)
 
     if ( (pg = alloc_domheap_page(NULL)) == NULL )
         goto fail;
-    d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
+    d->arch.mm_perdomain_l2 = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l2);
     for ( i = 0; i < (1 << pdpt_order); i++ )
         d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
             l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
@@ -437,7 +439,8 @@ int arch_domain_create(struct domain *d)
 
     if ( (pg = alloc_domheap_page(NULL)) == NULL )
         goto fail;
-    d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
+    d->arch.mm_perdomain_l3 = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l3);
     d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
         l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
                             __PAGE_HYPERVISOR);
Index: 2007-06-18/xen/arch/x86/x86_32/Makefile
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_32/Makefile        2006-11-14 
13:51:10.000000000 +0100
+++ 2007-06-18/xen/arch/x86/x86_32/Makefile     2007-06-18 11:57:46.000000000 
+0200
@@ -1,3 +1,4 @@
+obj-y += clear_page.o
 obj-y += domain_page.o
 obj-y += entry.o
 obj-y += gpr_switch.o
Index: 2007-06-18/xen/arch/x86/x86_32/clear_page.S
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ 2007-06-18/xen/arch/x86/x86_32/clear_page.S 2007-06-18 11:57:46.000000000 
+0200
@@ -0,0 +1,36 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+ENTRY(clear_page_xmm)
+       movl    4(%esp), %edx
+       movl    $PAGE_SIZE / 64, %ecx
+       xorps   %xmm0, %xmm0
+
+0:
+       decl    %ecx
+       movntps %xmm0, (%edx)
+       movntps %xmm0, 16(%edx)
+       movntps %xmm0, 32(%edx)
+       movntps %xmm0, 48(%edx)
+       leal    64(%edx), %edx
+       jnz     0b
+
+       sfence
+       ret
+
+ENTRY(clear_page_sse2)
+       movl    4(%esp), %edx
+       movl    $PAGE_SIZE / 16, %ecx
+       xorl    %eax, %eax
+
+0:
+       decl    %ecx
+       movnti  %eax, (%edx)
+       movnti  %eax, 4(%edx)
+       movnti  %eax, 8(%edx)
+       movnti  %eax, 12(%edx)
+       leal    16(%edx), %edx
+       jnz     0b
+
+       sfence
+       ret
Index: 2007-06-18/xen/arch/x86/x86_64/Makefile
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_64/Makefile        2007-02-12 
14:00:54.000000000 +0100
+++ 2007-06-18/xen/arch/x86/x86_64/Makefile     2007-06-18 11:57:46.000000000 
+0200
@@ -1,12 +1,13 @@
 subdir-y += compat
 
+obj-y += clear_page.o
 obj-y += entry.o
-obj-y += compat_kexec.o
 obj-y += gpr_switch.o
 obj-y += mm.o
 obj-y += traps.o
 
 obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_COMPAT) += compat_kexec.o
 obj-$(CONFIG_COMPAT) += domain.o
 obj-$(CONFIG_COMPAT) += physdev.o
 obj-$(CONFIG_COMPAT) += platform_hypercall.o
Index: 2007-06-18/xen/arch/x86/x86_64/clear_page.S
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ 2007-06-18/xen/arch/x86/x86_64/clear_page.S 2007-06-18 11:57:46.000000000 
+0200
@@ -0,0 +1,34 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+ENTRY(clear_page_xmm)
+       movl    $PAGE_SIZE / 64, %ecx
+       xorps   %xmm0, %xmm0
+
+0:
+       decl    %ecx
+       movntps %xmm0, (%rdi)
+       movntps %xmm0, 16(%rdi)
+       movntps %xmm0, 32(%rdi)
+       movntps %xmm0, 48(%rdi)
+       leaq    64(%rdi), %rdi
+       jnz     0b
+
+       sfence
+       ret
+
+ENTRY(clear_page_sse2)
+       movl    $PAGE_SIZE / 32, %ecx
+       xorl    %eax, %eax
+
+0:
+       decl    %ecx
+       movnti  %rax, (%rdi)
+       movnti  %rax, 8(%rdi)
+       movnti  %rax, 16(%rdi)
+       movnti  %rax, 24(%rdi)
+       leaq    32(%rdi), %rdi
+       jnz     0b
+
+       sfence
+       ret
Index: 2007-06-18/xen/arch/x86/x86_64/mm.c
===================================================================
--- 2007-06-18.orig/xen/arch/x86/x86_64/mm.c    2007-06-04 08:35:35.000000000 
+0200
+++ 2007-06-18/xen/arch/x86/x86_64/mm.c 2007-06-18 11:57:46.000000000 +0200
@@ -106,7 +106,8 @@ void __init paging_init(void)
     /* Create user-accessible L2 directory to map the MPT for guests. */
     if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
         goto nomem;
-    l3_ro_mpt = clear_page(page_to_virt(l2_pg));
+    l3_ro_mpt = page_to_virt(l2_pg);
+    clear_page(l3_ro_mpt);
     l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
               l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
 
@@ -132,7 +133,8 @@ void __init paging_init(void)
             if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
                 goto nomem;
             va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
-            l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+            l2_ro_mpt = page_to_virt(l2_pg);
+            clear_page(l2_ro_mpt);
             l3e_write(&l3_ro_mpt[l3_table_offset(va)],
                       l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
             l2_ro_mpt += l2_table_offset(va);
@@ -152,7 +154,8 @@ void __init paging_init(void)
         l3_ro_mpt = 
l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
         if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
             goto nomem;
-        compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+        compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
+        clear_page(l2_ro_mpt);
         l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
                   l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
         l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
Index: 2007-06-18/xen/include/asm-x86/page.h
===================================================================
--- 2007-06-18.orig/xen/include/asm-x86/page.h  2007-06-04 08:35:36.000000000 
+0200
+++ 2007-06-18/xen/include/asm-x86/page.h       2007-06-18 11:57:46.000000000 
+0200
@@ -214,9 +214,12 @@ typedef struct { u64 pfn; } pagetable_t;
 #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
 #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
 #define pagetable_null()        pagetable_from_pfn(0)
-#endif
 
-#define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
+extern void clear_page_xmm(void *);
+extern void clear_page_sse2(void *);
+#define clear_page(_p)      (cpu_has_xmm2 ? \
+                             clear_page_sse2((void *)(_p)) : \
+                             memset((void *)(_p), 0, PAGE_SIZE))
 #define copy_page(_t,_f)    memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
 
 #define mfn_valid(mfn)      ((mfn) < max_page)
@@ -244,6 +247,7 @@ typedef struct { u64 pfn; } pagetable_t;
 /* Convert between frame number and address formats.  */
 #define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
 #define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
+#endif
 
 /* High table entries are reserved by the hypervisor. */
 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.