WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ia64-devel

[Xen-ia64-devel] PATCH resend: performance enhancements for BS2000 domU

To: xen-ia64-devel <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-ia64-devel] PATCH resend: performance enhancements for BS2000 domU
From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 17 Sep 2007 08:43:49 +0200
Delivery-date: Sun, 16 Sep 2007 23:44:14 -0700
Domainkey-signature: s=s768; d=fujitsu-siemens.com; c=nofws; q=dns; h=Received:X-SBRSScore:X-IronPort-AV:Received:Received: Message-ID:Date:From:Organization:User-Agent: MIME-Version:To:Subject:X-Enigmail-Version: Content-Type; b=nZ8YzwbIA8XS+MiaoKdOH9M4WD1ofAJTls0U0f1x2fJlcUv41+Xfa LaL04Oj3UO9jXQYgHY8mPnnZLKVKBUblZX9QcHPG/eYsd5CmUOV0l hAOQviuSI/KjYipw8kHX4P;
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-ia64-devel-request@lists.xensource.com?subject=help>
List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>
List-post: <mailto:xen-ia64-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=unsubscribe>
Organization: Fujitsu Siemens Computers
Sender: xen-ia64-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Icedove 1.5.0.10 (X11/20070328)
Hi,

the first version of this patch contained an error (wrong mask in vhpt flush).
Please ignore the previous patch, this one should be okay!

This is a patch to enhance performance of BS2000 domu. I've changed the
following:
- enable some fast_reflects for big-endian guests
- even in case of 4k-vhpt use page-size up to PAGE_SIZE in entries, if guest
  inserts larger TC-entries
- added KEY_PERMISSION_VECTOR support (reflect to guest)

BS2000 boot was about 10% faster with these changes.

Juergen

-- 
Juergen Gross                             Principal Developer
IP SW OS6                      Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers         e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx
Otto-Hahn-Ring 6                Internet: www.fujitsu-siemens.com
D-81739 Muenchen         Company details: www.fujitsu-siemens.com/imprint.html
# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxxxxxxx
# Node ID c4cd34eb32371d1d868a767bc7b0920a74e86dcb
# Parent  15d4ba6c29c4949c0fa88f24b68ab88bd352cb52

performance enhancement for big-endian, 4k-pages, protection keys

Signed-off-by: Juergen Gross juergen.gross@xxxxxxxxxxxxxxxxxxx

diff -r 15d4ba6c29c4 -r c4cd34eb3237 xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Fri Sep 14 13:49:10 2007 +0200
+++ b/xen/arch/ia64/xen/vhpt.c  Mon Sep 17 08:34:04 2007 +0200
@@ -293,10 +293,10 @@ __flush_vhpt_range(unsigned long vhpt_ma
 {
        void *vhpt_base = __va(vhpt_maddr);
        u64 pgsz = 1L << current->arch.vhpt_pg_shift;
-       u64 purge_addr = vadr & ~PAGE_SIZE;
+       u64 purge_addr = vadr & ~(PAGE_SIZE - 1);
 
        addr_range += vadr - purge_addr;
-       addr_range = (addr_range + PAGE_SIZE - 1) & ~PAGE_SIZE;
+       addr_range = (addr_range + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
        while ((long)addr_range > 0) {
                /* Get the VHPT entry.  */
                unsigned int off = ia64_thash(purge_addr) -
# HG changeset patch
# User gross@xxxxxxxxxxxxxxxxxxxx
# Node ID 15d4ba6c29c4949c0fa88f24b68ab88bd352cb52
# Parent  1668a62e1c1fff8dc8d552c4fb459ecb19663157
performance enhancement for big-endian, 4k-pages, protection keys

diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/faults.c
--- a/xen/arch/ia64/xen/faults.c        Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/faults.c        Fri Sep 14 13:49:10 2007 +0200
@@ -594,6 +594,9 @@ ia64_handle_reflection(unsigned long ifa
                check_lazy_cover = 1;
                vector = IA64_PAGE_NOT_PRESENT_VECTOR;
                break;
+       case 21:
+               vector = IA64_KEY_PERMISSION_VECTOR;
+               break;
        case 22:
                vector = IA64_INST_ACCESS_RIGHTS_VECTOR;
                break;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/hyperprivop.S   Fri Sep 14 13:49:10 2007 +0200
@@ -223,9 +223,6 @@ ENTRY(hyper_ssm_i)
        // give up for now if: ipsr.be==1, ipsr.pp==1
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.sptk.many dispatch_break_fault ;;
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0
 (p7)   br.sptk.many dispatch_break_fault ;;
@@ -268,7 +265,7 @@ ENTRY(hyper_ssm_i)
        // FOR SSM_I ONLY, also turn on psr.i and psr.ic
        movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC)
 //     movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
-       movl r27=~(IA64_PSR_BE|IA64_PSR_BN);;
+       movl r27=~IA64_PSR_BN;;
        or r30=r30,r28;;
        and r30=r30,r27;;
        mov r20=1
@@ -361,10 +358,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
        cmp.ltu p6,p0=r26,r27
 (p6)   br.cond.spnt.few rp;;
        mov r17=cr.ipsr;;
-       // slow path if: ipsr.be==1, ipsr.pp==1
-       extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p6,p0=r21,r0
-(p6)   br.cond.spnt.few rp;;
+       // slow path if: ipsr.pp==1
        extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p6,p0=r21,r0
 (p6)   br.cond.spnt.few rp;;
@@ -453,7 +447,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
        cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
 (p7)   dep r17=0,r17,IA64_PSR_CPL0_BIT,2
        movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
-       movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
+       movl r27=~(IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
        or r17=r17,r28;;
        and r17=r17,r27
        ld4 r16=[r18];;
@@ -556,9 +550,6 @@ GLOBAL_ENTRY(fast_break_reflect)
 #endif
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.spnt.few dispatch_break_fault ;;
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0
 (p7)   br.spnt.few dispatch_break_fault ;;
@@ -633,7 +624,7 @@ ENTRY(fast_reflect)
        cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
 (p7)   dep r30=0,r30,IA64_PSR_CPL0_BIT,2
        movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
-       movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+       movl r27=~(IA64_PSR_PP|IA64_PSR_BN);;
        or r30=r30,r28;;
        and r30=r30,r27
        // also set shared_mem ipsr.i and ipsr.ic appropriately
@@ -744,9 +735,6 @@ GLOBAL_ENTRY(fast_access_reflect)
 #endif
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.spnt.few dispatch_reflection ;;
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0
 (p7)   br.spnt.few dispatch_reflection ;;
@@ -794,9 +782,6 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
        cmp.eq p7,p0=r21,r0
 (p7)   br.spnt.few page_fault ;;
        // slow path if strange ipsr or isr bits set
-       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.spnt.few page_fault ;;
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0
 (p7)   br.spnt.few page_fault ;;
@@ -1068,10 +1053,6 @@ 1:
 1:
        adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r21=[r20];;         // r21 = vcr.ipsr
-       extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
-       // if turning on psr.be, give up for now and do it the slow way
-       cmp.ne p7,p0=r22,r0
-(p7)   br.spnt.few slow_vcpu_rfi ;;
        // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
        movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
        and r22=r20,r21
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S   Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/ivt.S   Fri Sep 14 13:49:10 2007 +0200
@@ -313,7 +313,6 @@ GLOBAL_ENTRY(dispatch_reflection)
        adds out1=16,sp
        mov out2=cr.isr
        mov out3=cr.iim
-//     mov out3=cr.itir                // TODO: why commented out?
 
        ssm psr.ic | PSR_DEFAULT_BITS
        ;;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/mm.c    Fri Sep 14 13:49:10 2007 +0200
@@ -509,25 +509,22 @@ u64 translate_domain_pte(u64 pteval, u64
        u64 arflags;
        u64 arflags2;
        u64 maflags2;
-       u64 ps;
 
        pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
 
        // FIXME address had better be pre-validated on insert
        mask = ~itir_mask(_itir.itir);
        mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
-       ps = current->arch.vhpt_pg_shift ? current->arch.vhpt_pg_shift :
-                                          PAGE_SHIFT;
-
-       if (_itir.ps > ps)
-               _itir.ps = ps;
+
+       if (_itir.ps > PAGE_SHIFT)
+               _itir.ps = PAGE_SHIFT;
 
        ((ia64_itir_t*)itir)->itir = _itir.itir;/* Copy the whole register. */
        ((ia64_itir_t*)itir)->ps = _itir.ps;    /* Overwrite ps part! */
 
        pteval2 = lookup_domain_mpa(d, mpaddr, entry);
-       if (ps < PAGE_SHIFT)
-               pteval2 |= mpaddr & (PAGE_SIZE - 1) & ~((1L << ps) - 1);
+       if (_itir.ps < PAGE_SHIFT)
+               pteval2 |= mpaddr & (PAGE_SIZE - 1) & ~((1L << _itir.ps) - 1);
 
        /* Check access rights.  */
        arflags  = pteval  & _PAGE_AR_MASK;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c  Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/vcpu.c  Fri Sep 14 13:49:10 2007 +0200
@@ -1817,7 +1817,7 @@ IA64FAULT vcpu_tpa(VCPU * vcpu, u64 vadr
 
 IA64FAULT vcpu_tak(VCPU * vcpu, u64 vadr, u64 * key)
 {
-       u64 pteval, itir, mask, iha;
+       u64 pteval, itir, iha;
        IA64FAULT fault;
 
        fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
@@ -2319,8 +2319,6 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
 {
        ia64_itir_t _itir = {.itir = itir};
        unsigned long psr;
-       unsigned long ps = (vcpu->domain == dom0) ? _itir.ps :
-                                                   vcpu->arch.vhpt_pg_shift;
 
        check_xen_space_overlap("itc", vaddr, 1UL << _itir.ps);
 
@@ -2329,12 +2327,12 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
                panic_domain(NULL, "vcpu_itc_no_srlz: domain trying to use "
                             "smaller page size!\n");
 
-       BUG_ON(_itir.ps > vcpu->arch.vhpt_pg_shift);
+       BUG_ON(_itir.ps > PAGE_SHIFT);
        vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
        psr = ia64_clear_ic();
        pte &= ~(_PAGE_RV2 | _PAGE_RV1);        // Mask out the reserved bits.
                                        // FIXME: look for bigger mappings
-       ia64_itc(IorD, vaddr, pte, IA64_ITIR_PS_KEY(ps, _itir.key));
+       ia64_itc(IorD, vaddr, pte, _itir.itir);
        ia64_set_psr(psr);
        // ia64_srlz_i(); // no srls req'd, will rfi later
        if (vcpu->domain == dom0 && ((vaddr >> 61) == 7)) {
@@ -2350,7 +2348,6 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
        // even if domain pagesize is larger than PAGE_SIZE, just put
        // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
        else {
-               _itir.ps = vcpu->arch.vhpt_pg_shift;
                vhpt_insert(vaddr, pte, _itir.itir);
        }
 }
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/vhpt.c  Fri Sep 14 13:49:10 2007 +0200
@@ -293,15 +293,18 @@ __flush_vhpt_range(unsigned long vhpt_ma
 {
        void *vhpt_base = __va(vhpt_maddr);
        u64 pgsz = 1L << current->arch.vhpt_pg_shift;
-
+       u64 purge_addr = vadr & ~PAGE_SIZE;
+
+       addr_range += vadr - purge_addr;
+       addr_range = (addr_range + PAGE_SIZE - 1) & ~PAGE_SIZE;
        while ((long)addr_range > 0) {
                /* Get the VHPT entry.  */
-               unsigned int off = ia64_thash(vadr) -
+               unsigned int off = ia64_thash(purge_addr) -
                        __va_ul(vcpu_vhpt_maddr(current));
                struct vhpt_lf_entry *v = vhpt_base + off;
                v->ti_tag = INVALID_TI_TAG;
                addr_range -= pgsz;
-               vadr += pgsz;
+               purge_addr += pgsz;
        }
 }
 
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
<Prev in Thread] Current Thread [Next in Thread>