WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: add SSE-based copy_page()

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: add SSE-based copy_page()
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 19 Nov 2008 08:10:23 -0800
Delivery-date: Wed, 19 Nov 2008 08:14:12 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1226491455 0
# Node ID 8de4b4e9a435cea9b8e85863fcb832c213281076
# Parent  8e18dd41c6c7bb0980b29393b275c564cfb96437
x86: add SSE-based copy_page()

In top of the highmem asstance hypercalls added earlier, this provides
a performance improvement of another 12% (measured on Xeon E5345) for
the page copying case.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/x86/Makefile       |    1 
 xen/arch/x86/copy_page.S    |   66 ++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/domain.c       |    3 +-
 xen/arch/x86/domain_build.c |    5 ++-
 xen/include/asm-x86/page.h  |    5 ++-
 5 files changed, 76 insertions(+), 4 deletions(-)

diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Wed Nov 12 12:01:35 2008 +0000
+++ b/xen/arch/x86/Makefile     Wed Nov 12 12:04:15 2008 +0000
@@ -11,6 +11,7 @@ obj-y += apic.o
 obj-y += apic.o
 obj-y += bitops.o
 obj-y += clear_page.o
+obj-y += copy_page.o
 obj-y += compat.o
 obj-y += delay.o
 obj-y += dmi_scan.o
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/copy_page.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/copy_page.S  Wed Nov 12 12:04:15 2008 +0000
@@ -0,0 +1,66 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+#ifdef __i386__
+#define src_reg %esi
+#define dst_reg %edi
+#define WORD_SIZE 4
+#define tmp1_reg %eax
+#define tmp2_reg %edx
+#define tmp3_reg %ebx
+#define tmp4_reg %ebp
+#else
+#define src_reg %rsi
+#define dst_reg %rdi
+#define WORD_SIZE 8
+#define tmp1_reg %r8
+#define tmp2_reg %r9
+#define tmp3_reg %r10
+#define tmp4_reg %r11
+#endif
+
+ENTRY(copy_page_sse2)
+#ifdef __i386__
+        push    %ebx
+        push    %ebp
+        push    %esi
+        push    %edi
+        mov     6*4(%esp), src_reg
+        mov     5*4(%esp), dst_reg
+#endif
+        mov     $PAGE_SIZE/(4*WORD_SIZE)-3, %ecx
+
+        prefetchnta 2*4*WORD_SIZE(src_reg)
+        mov     (src_reg), tmp1_reg
+        mov     WORD_SIZE(src_reg), tmp2_reg
+        mov     2*WORD_SIZE(src_reg), tmp3_reg
+        mov     3*WORD_SIZE(src_reg), tmp4_reg
+
+0:      prefetchnta 3*4*WORD_SIZE(src_reg)
+1:      add     $4*WORD_SIZE, src_reg
+        movnti  tmp1_reg, (dst_reg)
+        mov     (src_reg), tmp1_reg
+        dec     %ecx
+        movnti  tmp2_reg, WORD_SIZE(dst_reg)
+        mov     WORD_SIZE(src_reg), tmp2_reg
+        movnti  tmp3_reg, 2*WORD_SIZE(dst_reg)
+        mov     2*WORD_SIZE(src_reg), tmp3_reg
+        movnti  tmp4_reg, 3*WORD_SIZE(dst_reg)
+        lea     4*WORD_SIZE(dst_reg), dst_reg
+        mov     3*WORD_SIZE(src_reg), tmp4_reg
+        jg      0b
+        jpe     1b
+
+        movnti  tmp1_reg, (dst_reg)
+        movnti  tmp2_reg, WORD_SIZE(dst_reg)
+        movnti  tmp3_reg, 2*WORD_SIZE(dst_reg)
+        movnti  tmp4_reg, 3*WORD_SIZE(dst_reg)
+
+#ifdef __i386__
+        pop     %edi
+        pop     %esi
+        pop     %ebp
+        pop     %ebx
+#endif
+        sfence
+        ret
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Nov 12 12:01:35 2008 +0000
+++ b/xen/arch/x86/domain.c     Wed Nov 12 12:04:15 2008 +0000
@@ -184,7 +184,8 @@ static int setup_compat_l4(struct vcpu *
     /* This page needs to look like a pagetable so that it can be shadowed */
     pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;
 
-    l4tab = copy_page(page_to_virt(pg), idle_pg_table);
+    l4tab = page_to_virt(pg);
+    copy_page(l4tab, idle_pg_table);
     l4tab[0] = l4e_empty();
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_page(pg, __PAGE_HYPERVISOR);
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Nov 12 12:01:35 2008 +0000
+++ b/xen/arch/x86/domain_build.c       Wed Nov 12 12:04:15 2008 +0000
@@ -455,8 +455,9 @@ int __init construct_dom0(
     /* WARNING: The new domain must have its 'processor' field filled in! */
     l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
     l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
-    memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
-    for (i = 0; i < 4; i++) {
+    for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) {
+        copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
+                  idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
         l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT);
         l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
             l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Wed Nov 12 12:01:35 2008 +0000
+++ b/xen/include/asm-x86/page.h        Wed Nov 12 12:04:15 2008 +0000
@@ -215,7 +215,10 @@ void clear_page_sse2(void *);
 #define clear_page(_p)      (cpu_has_xmm2 ?                             \
                              clear_page_sse2((void *)(_p)) :            \
                              (void)memset((void *)(_p), 0, PAGE_SIZE))
-#define copy_page(_t,_f)    memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+void copy_page_sse2(void *, const void *);
+#define copy_page(_t,_f)    (cpu_has_xmm2 ?                             \
+                             copy_page_sse2(_t, _f) :                   \
+                             (void)memcpy(_t, _f, PAGE_SIZE))
 
 #define mfn_valid(mfn)      ((mfn) < max_page)
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: add SSE-based copy_page(), Xen patchbot-unstable <=