WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ia64-devel

[Xen-ia64-devel] [PATCH] support of 4k page size for individual guests

To: xen-ia64-devel <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-ia64-devel] [PATCH] support of 4k page size for individual guests
From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 16 Aug 2007 12:47:06 +0200
Delivery-date: Thu, 16 Aug 2007 03:47:19 -0700
Domainkey-signature: s=s768; d=fujitsu-siemens.com; c=nofws; q=dns; b=AtlgPNFxskBzcbXgpcxXm4wbRL/x+5rgaQir4GOizYp+LuISyZ25kgMOKRETZEvGyHOBBIkQsp9HsYDGJ0AN+SgFoWUyYYznAUL9PZYxuonyE301QdxruUrFWB+5O0Co;
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-ia64-devel-request@lists.xensource.com?subject=help>
List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>
List-post: <mailto:xen-ia64-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=unsubscribe>
Organization: Fujitsu Siemens Computers
Sender: xen-ia64-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Icedove 1.5.0.10 (X11/20070328)
Hi,

this is the patch needed to support 4k (and 8k) pages for individual guests
(currently PV only).
"normal" domU's should not be affected, as the per-vcpu vhpt is reconfigured
only if a domU uses a page size less than PAGE_SIZE.
I haven't touched grant pages yet, I think they should work on PAGE_SIZE base
as before, but I didn't check it.

Tested by compile, boot dom0, boot domU (Linux) and boot domU (BS2000 - our
mainframe OS using 4k pages)

Juergen

-- 
Juergen Gross                             Principal Developer
IP SW OS6                      Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers         e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx
Otto-Hahn-Ring 6                Internet: www.fujitsu-siemens.com
D-81739 Muenchen         Company details: www.fujitsu-siemens.com/imprint.html
# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxxxxxx
# Node ID 2f58face717cefcaaa9791994ab978f975b14573
# Parent  6b0c965e95a668bf65d475f519b254107cce21a3

4k pagesize support per vcpu

Signed-off-by: Juergen Gross juergen.gross@xxxxxxxxxxxxxxxxxxx

diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/asm-offsets.c       Thu Aug 16 11:33:27 2007 +0200
@@ -72,6 +72,7 @@ void foo(void)
        DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, 
arch.domain_itm_last));
        DEFINE(IA64_VCPU_ITLB_OFFSET, offsetof (struct vcpu, arch.itlb));
        DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb));
+       DEFINE(IA64_VCPU_VHPT_PG_SHIFT_OFFSET, offsetof (struct vcpu, 
arch.vhpt_pg_shift));
 
        BLANK();
 
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/domain.c        Thu Aug 16 11:33:27 2007 +0200
@@ -445,6 +445,7 @@ int vcpu_initialise(struct vcpu *v)
            v->arch.ending_rid = d->arch.ending_rid;
            v->arch.breakimm = d->arch.breakimm;
            v->arch.last_processor = INVALID_PROCESSOR;
+           v->arch.vhpt_pg_shift = PAGE_SHIFT;
        }
 
        if (!VMX_DOMAIN(v))
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/faults.c
--- a/xen/arch/ia64/xen/faults.c        Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/faults.c        Thu Aug 16 11:33:27 2007 +0200
@@ -239,6 +239,8 @@ void ia64_do_page_fault(unsigned long ad
                    (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
                regs->cr_ipsr = vcpu_pl_adjust(regs->cr_ipsr,
                                               IA64_PSR_CPL0_BIT);
+               if (PSCB(current, dcr) & IA64_DCR_BE)
+                       regs->cr_ipsr |= IA64_PSR_BE;
 
                if (PSCB(current, hpsr_dfh))
                        regs->cr_ipsr |= IA64_PSR_DFH;  
@@ -741,7 +743,8 @@ ia64_shadow_fault(unsigned long ifa, uns
        pte = vlfe->page_flags;
        if (vlfe->ti_tag == ia64_ttag(ifa)) {
                /* The VHPT entry is valid.  */
-               gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
+               gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >>
+                                        v->arch.vhpt_pg_shift);
                BUG_ON(gpfn == INVALID_M2P_ENTRY);
        } else {
                unsigned long itir, iha;
@@ -757,10 +760,10 @@ ia64_shadow_fault(unsigned long ifa, uns
                /* Try again!  */
                if (fault != IA64_NO_FAULT) {
                        /* This will trigger a dtlb miss.  */
-                       ia64_ptcl(ifa, PAGE_SHIFT << 2);
-                       return;
-               }
-               gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
+                       ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
+                       return;
+               }
+               gpfn = ((pte & _PAGE_PPN_MASK) >> v->arch.vhpt_pg_shift);
                if (pte & _PAGE_D)
                        pte |= _PAGE_VIRT_D;
        }
@@ -788,7 +791,7 @@ ia64_shadow_fault(unsigned long ifa, uns
                        /* Purge the TC locally.
                           It will be reloaded from the VHPT iff the
                           VHPT entry is still valid.  */
-                       ia64_ptcl(ifa, PAGE_SHIFT << 2);
+                       ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
 
                        atomic64_inc(&d->arch.shadow_fault_count);
                } else {
@@ -800,6 +803,6 @@ ia64_shadow_fault(unsigned long ifa, uns
                /* We don't know wether or not the fault must be
                   reflected.  The VHPT entry is not valid.  */
                /* FIXME: in metaphysical mode, we could do an ITC now.  */
-               ia64_ptcl(ifa, PAGE_SHIFT << 2);
-       }
-}
+               ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
+       }
+}
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/hyperprivop.S   Thu Aug 16 11:33:27 2007 +0200
@@ -1604,26 +1604,27 @@ ENTRY(hyper_set_rr)
        extr.u r26=r9,IA64_RR_RID,IA64_RR_RID_LEN       // r26 = r9.rid
        movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
        ld8 r20=[r20];;
-       adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
-       ld4 r22=[r21];;
-       adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
-       ld4 r23=[r21];;
-       adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;;
+       adds r22=IA64_VCPU_STARTING_RID_OFFSET,r20
+       adds r23=IA64_VCPU_ENDING_RID_OFFSET,r20
+       adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20
+       adds r21=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r20;;
+       ld4 r22=[r22]
+       ld4 r23=[r23]
+       ld1 r21=[r21];;
        add r22=r26,r22;;
        cmp.geu p6,p0=r22,r23   // if r9.rid + starting_rid >= ending_rid
 (p6)   br.cond.spnt.few 1f;    // this is an error, but just ignore/return
-       // r21=starting_rid
        adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
        shl r25=r25,3;;
        add r20=r20,r25;;
        st8 [r20]=r9;;          // store away exactly what was passed
        // but adjust value actually placed in rr[r8]
        // r22 contains adjusted rid, "mangle" it (see regionreg.c)
-       // and set ps to PAGE_SHIFT and ve to 1
+       // and set ps to v->arch.vhpt_pg_shift and ve to 1
        extr.u r27=r22,0,8
        extr.u r28=r22,8,8
-       extr.u r29=r22,16,8;;
-       dep.z r23=PAGE_SHIFT,IA64_RR_PS,IA64_RR_PS_LEN;;
+       extr.u r29=r22,16,8
+       dep.z r23=r21,IA64_RR_PS,IA64_RR_PS_LEN;;
        dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
        dep r23=r27,r23,24,8;;
        dep r23=r28,r23,16,8;;
@@ -1673,34 +1674,38 @@ ENTRY(hyper_set_rr0_to_rr4)
        ld8 r17=[r17];;
 
        adds r21=IA64_VCPU_STARTING_RID_OFFSET,r17
-       adds r25=IA64_VCPU_ENDING_RID_OFFSET,r17
+       adds r22=IA64_VCPU_ENDING_RID_OFFSET,r17
+       adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r17
        ;; 
-       ld4 r22=[r21] // r22 = current->starting_rid
+       ld4 r21=[r21] // r21 = current->starting_rid
        extr.u r26=r8,IA64_RR_RID,IA64_RR_RID_LEN       // r26 = r8.rid
        extr.u r27=r9,IA64_RR_RID,IA64_RR_RID_LEN       // r27 = r9.rid
-       ld4 r23=[r25] // r23 = current->ending_rid
+       ld4 r22=[r22] // r22 = current->ending_rid
        extr.u r28=r10,IA64_RR_RID,IA64_RR_RID_LEN      // r28 = r10.rid
        extr.u r29=r11,IA64_RR_RID,IA64_RR_RID_LEN      // r29 = r11.rid
        adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r17
        extr.u r30=r14,IA64_RR_RID,IA64_RR_RID_LEN      // r30 = r14.rid
+       ld1 r23=[r23] // r23 = current->vhpt_pg_shift
        ;; 
-       add r16=r26,r22
-       add r17=r27,r22
-       add r19=r28,r22
-       add r20=r29,r22
-       add r21=r30,r22 
+       add r16=r26,r21
+       add r17=r27,r21
+       add r19=r28,r21
+       add r20=r29,r21
+       add r21=r30,r21 
+       dep.z r23=r23,IA64_RR_PS,IA64_RR_PS_LEN         // r23 = rr.ps
        ;; 
-       cmp.geu p6,p0=r16,r23   // if r8.rid + starting_rid >= ending_rid
-       cmp.geu p7,p0=r17,r23   // if r9.rid + starting_rid >= ending_rid
-       cmp.geu p8,p0=r19,r23   // if r10.rid + starting_rid >= ending_rid
+       cmp.geu p6,p0=r16,r22   // if r8.rid + starting_rid >= ending_rid
+       cmp.geu p7,p0=r17,r22   // if r9.rid + starting_rid >= ending_rid
+       cmp.geu p8,p0=r19,r22   // if r10.rid + starting_rid >= ending_rid
 (p6)   br.cond.spnt.few 1f     // this is an error, but just ignore/return
 (p7)   br.cond.spnt.few 1f     // this is an error, but just ignore/return
-       cmp.geu p9,p0=r20,r23   // if r11.rid + starting_rid >= ending_rid
+       cmp.geu p9,p0=r20,r22   // if r11.rid + starting_rid >= ending_rid
 (p8)   br.cond.spnt.few 1f     // this is an error, but just ignore/return
 (p9)   br.cond.spnt.few 1f     // this is an error, but just ignore/return
-       cmp.geu p10,p0=r21,r23  // if r14.rid + starting_rid >= ending_rid
+       cmp.geu p10,p0=r21,r22  // if r14.rid + starting_rid >= ending_rid
 (p10)  br.cond.spnt.few 1f     // this is an error, but just ignore/return
-       
+       dep r23=-1,r23,0,1      // add rr.ve
+       ;;
        mov r25=1
        adds r22=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
        ;;
@@ -1715,13 +1720,11 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r16,0,8
        extr.u r28=r16,8,8
        extr.u r29=r16,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       st8 [r24]=r23           // save for metaphysical
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       st8 [r24]=r25           // save for metaphysical
+       mov rr[r26]=r25
        dv_serialize_data
 
        // rr1
@@ -1730,12 +1733,10 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r17,0,8
        extr.u r28=r17,8,8
        extr.u r29=r17,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
 
        // rr2
@@ -1744,12 +1745,10 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r19,0,8
        extr.u r28=r19,8,8
        extr.u r29=r19,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
 
        // rr3
@@ -1759,12 +1758,10 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r20,0,8
        extr.u r28=r20,8,8
        extr.u r29=r20,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
        
        // rr4
@@ -1774,49 +1771,43 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r21,0,8
        extr.u r28=r21,8,8
        extr.u r29=r21,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
 #else
        // shuffled version
        // rr0
        // uses r27, r28, r29 for mangling
-       //      r23           for mangled value
+       //      r25           for mangled value
        st8 [r22]=r8, 8 // current->rrs[0] = r8
        mov r26=0       // r26=0x0000000000000000
        extr.u r27=r16,0,8
        extr.u r28=r16,8,8
-       extr.u r29=r16,16,8
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       extr.u r25=r17,0,8
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       st8 [r24]=r23           // save for metaphysical
-       mov rr[r26]=r23
+       extr.u r29=r16,16,8;;
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       st8 [r24]=r25           // save for metaphysical
+       mov rr[r26]=r25
        dv_serialize_data
 
        // r16, r24, r25 is usable.
        // rr1
        // uses r25, r28, r29 for mangling
-       //      r23           for mangled value
+       //      r25           for mangled value
+       extr.u r25=r17,0,8
        extr.u r28=r17,8,8
        st8 [r22]=r9, 8 // current->rrs[1] = r9
        extr.u r29=r17,16,8 ;; 
-       dep.z r23=PAGE_SHIFT,2,6;;
        add r26=r26,r30 // r26 = 0x2000000000000000
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
        extr.u r24=r19,8,8
        extr.u r16=r19,0,8
-       dep r23=r25,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r25,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
 
        // r16, r17, r24, r25 is usable
@@ -1826,10 +1817,8 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r29=r19,16,8
        extr.u r27=r20,0,8
        st8 [r22]=r10, 8 // current->rrs[2] = r10
-       dep.z r17=PAGE_SHIFT,2,6;;
        add r26=r26,r30 // r26 = 0x4000000000000000     
-       dep r17=-1,r17,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r17=r16,r17,24,8;;
+       dep r17=r16,r23,24,8;;  // mangling is swapping bytes 1 & 3
        dep r17=r24,r17,16,8;;
        dep r17=r29,r17,8,8;; 
        mov rr[r26]=r17
@@ -1838,18 +1827,16 @@ ENTRY(hyper_set_rr0_to_rr4)
        // r16, r17, r19, r24, r25 is usable
        // rr3
        // uses r27, r28, r29 for mangling
-       //      r23           for mangled value
+       //      r25           for mangled value
        extr.u r28=r20,8,8
        extr.u r29=r20,16,8
        st8 [r22]=r11, 8 // current->rrs[3] = r11
        extr.u r16=r21,0,8
-       dep.z r23=PAGE_SHIFT,2,6;;
        add r26=r26,r30 // r26 = 0x6000000000000000
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
        
        // r16, r17, r19, r20, r24, r25
@@ -1859,10 +1846,8 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r17=r21,8,8
        extr.u r24=r21,16,8
        st8 [r22]=r14 // current->rrs[4] = r14
-       dep.z r25=PAGE_SHIFT,2,6;;
        add r26=r26,r30 // r26 = 0x8000000000000000
-       dep r25=-1,r25,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r25=r16,r25,24,8;;
+       dep r25=r16,r23,24,8;;  // mangling is swapping bytes 1 & 3
        dep r25=r17,r25,16,8;;
        dep r25=r24,r25,8,8;; 
        mov rr[r26]=r25
@@ -2024,26 +2009,30 @@ ENTRY(hyper_ptc_ga)
        adds r21=1,r21;;
        st4 [r20]=r21;;
 #endif
+       movl r21=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r21=[r21];;
+       adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r21
        mov r28=r8
        extr.u r19=r9,2,6               // addr_range=1<<((r9&0xfc)>>2)
        mov r20=1
        shr.u r24=r8,61
-       addl r27=56,r0                  // PAGE_SHIFT<<2 (for ptc.ga)
        movl r26=0x8000000000000000     // INVALID_TI_TAG
        mov r30=ar.lc
        ;;
+       ld1 r22=[r22]                   // current->arch.vhpt_pg_shift
        shl r19=r20,r19
        cmp.eq p7,p0=7,r24
 (p7)   br.spnt.many dispatch_break_fault ;;    // slow way for rr7
        ;;
+       shl r27=r22,2                   // vhpt_pg_shift<<2 (for ptc.ga)
+       shr.u r23=r19,r22               // repeat loop for n pages
        cmp.le p7,p0=r19,r0             // skip flush if size<=0
 (p7)   br.cond.dpnt 2f ;;
-       extr.u r24=r19,0,PAGE_SHIFT
-       shr.u r23=r19,PAGE_SHIFT ;;     // repeat loop for n pages
-       cmp.ne p7,p0=r24,r0 ;;
+       shl r24=r23,r22;;
+       cmp.ne p7,p0=r24,r23 ;;
 (p7)   adds r23=1,r23 ;;               // n_pages<size<n_pages+1? extra iter
        mov ar.lc=r23
-       movl r29=PAGE_SIZE;;
+       shl r29=r20,r22;;               // page_size
 1:
        thash r25=r28 ;;
        adds r25=16,r25 ;;
@@ -2060,10 +2049,8 @@ 2:
        mov ar.lc=r30 ;;
        mov r29=cr.ipsr
        mov r30=cr.iip;;
-       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
-       ld8 r27=[r27];;
-       adds r25=IA64_VCPU_DTLB_OFFSET,r27
-       adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
+       adds r25=IA64_VCPU_DTLB_OFFSET,r21
+       adds r26=IA64_VCPU_ITLB_OFFSET,r21;;
        ld8 r24=[r25]
        ld8 r27=[r26] ;;
        and r24=-2,r24
@@ -2110,10 +2097,14 @@ hyper_itc_d:
        br.sptk.many dispatch_break_fault ;;
 #else
        // ensure itir.ps >= xen's pagesize
+       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r27=[r27];;
+       adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
        adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld1 r22=[r22]
        ld8 r23=[r23];;
        extr.u r24=r23,IA64_ITIR_PS,IA64_ITIR_PS_LEN;;          // r24==logps
-       cmp.gt p7,p0=PAGE_SHIFT,r24
+       cmp.gt p7,p0=r22,r24
 (p7)   br.spnt.many dispatch_break_fault ;;
        adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r21=[r21];;
@@ -2121,8 +2112,6 @@ hyper_itc_d:
        extr.u r21=r21,61,3;;
        cmp.eq p7,p0=r21,r0
 (p7)   br.spnt.many dispatch_break_fault ;;
-       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
-       ld8 r27=[r27];;
        adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
        ld8 r27=[r27]
 // FIXME: is the global var dom0 always pinned? assume so for now
@@ -2160,18 +2149,24 @@ END(hyper_itc)
 //     r31 == pr
 ENTRY(fast_insert)
        // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
-       mov r19=1;;
-       shl r20=r19,r24;;
+       mov r19=1
+       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       shl r20=r19,r24
+       ld8 r27=[r27];;
+       adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
        adds r20=-1,r20         // r20 == mask
        movl r19=_PAGE_PPN_MASK;;
+       ld1 r23=[r23]
+       mov r25=-1
        and r22=r16,r19;;       // r22 == pteval & _PAGE_PPN_MASK
        andcm r19=r22,r20
+       shl r25=r25,r23         // -1 << current->arch.vhpt_pg_shift
        adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r21=[r21];;
        and r20=r21,r20;;
        or r19=r19,r20;;        // r19 == mpaddr
 // FIXME: for now, just do domain0 and skip mpaddr range checks
-       dep r20=r0,r19,0,PAGE_SHIFT
+       and r20=r25,r19
        movl r21=PAGE_PHYS ;;
        or r20=r20,r21 ;;       // r20==return value from lookup_domain_mpa
        // r16=pteval,r20=pteval2
@@ -2208,8 +2203,6 @@ ENTRY(fast_insert)
        // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
        // TR_ENTRY = {page_flags,itir,addr,rid}
        tbit.z p6,p7=r17,0
-       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
-       ld8 r27=[r27];;
        adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
 (p6)   adds r27=IA64_VCPU_DTLB_OFFSET,r27
 (p7)   adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/mm.c    Thu Aug 16 11:33:27 2007 +0200
@@ -447,7 +447,7 @@ gmfn_to_mfn_foreign(struct domain *d, un
 // given a domain virtual address, pte and pagesize, extract the metaphysical
 // address, convert the pte for a physical address for (possibly different)
 // Xen PAGE_SIZE and return modified pte.  (NOTE: TLB insert should use
-// PAGE_SIZE!)
+// current->arch.vhpt_pg_shift!)
 u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* itir,
                          struct p2m_entry* entry)
 {
@@ -457,20 +457,25 @@ u64 translate_domain_pte(u64 pteval, u64
        u64 arflags;
        u64 arflags2;
        u64 maflags2;
+       u64 ps;
 
        pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
 
        // FIXME address had better be pre-validated on insert
        mask = ~itir_mask(_itir.itir);
        mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
-
-       if (_itir.ps > PAGE_SHIFT)
-               _itir.ps = PAGE_SHIFT;
+       ps = current->arch.vhpt_pg_shift ? current->arch.vhpt_pg_shift :
+                                          PAGE_SHIFT;
+
+       if (_itir.ps > ps)
+               _itir.ps = ps;
 
        ((ia64_itir_t*)itir)->itir = _itir.itir;/* Copy the whole register. */
        ((ia64_itir_t*)itir)->ps = _itir.ps;    /* Overwrite ps part! */
 
        pteval2 = lookup_domain_mpa(d, mpaddr, entry);
+       if (ps < PAGE_SHIFT)
+               pteval2 |= address & (PAGE_SIZE - 1) & ~((1L << ps) - 1);
 
        /* Check access rights.  */
        arflags  = pteval  & _PAGE_AR_MASK;
@@ -544,10 +549,11 @@ u64 translate_domain_pte(u64 pteval, u64
                        pteval &= ~_PAGE_D;
        }
     
-       /* Ignore non-addr bits of pteval2 and force PL0->2
+       /* Ignore non-addr bits of pteval2 and force PL0->1
           (PL3 is unaffected) */
-       return (pteval & ~_PAGE_PPN_MASK) |
-              (pteval2 & _PAGE_PPN_MASK) | _PAGE_PL_PRIV;
+       return (pteval & ~(_PAGE_PPN_MASK | _PAGE_PL_MASK)) |
+              (pteval2 & _PAGE_PPN_MASK) |
+              (vcpu_pl_adjust(pteval, 7) & _PAGE_PL_MASK);
 }
 
 // given a current domain metaphysical address, return the physical address
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/regionreg.c
--- a/xen/arch/ia64/xen/regionreg.c     Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/regionreg.c     Thu Aug 16 11:33:27 2007 +0200
@@ -72,7 +72,7 @@ static unsigned long allocate_metaphysic
 
        rrv.rrval = 0;  // Or else may see reserved bit fault
        rrv.rid = d->arch.starting_mp_rid + n;
-       rrv.ps = PAGE_SHIFT;
+       rrv.ps = PAGE_SHIFT;    // only used at domain creation
        rrv.ve = 0;
        /* Mangle metaphysical rid */
        rrv.rrval = vmMangleRID(rrv.rrval);
@@ -254,7 +254,7 @@ int set_one_rr(unsigned long rr, unsigne
        memrrv.rrval = rrv.rrval;
        newrrv.rid = newrid;
        newrrv.ve = 1;  // VHPT now enabled for region 7!!
-       newrrv.ps = PAGE_SHIFT;
+       newrrv.ps = v->arch.vhpt_pg_shift;
 
        if (rreg == 0) {
                v->arch.metaphysical_saved_rr0 = vmMangleRID(newrrv.rrval);
@@ -288,7 +288,7 @@ void init_all_rr(struct vcpu *v)
 
        rrv.rrval = 0;
        //rrv.rrval = v->domain->arch.metaphysical_rr0;
-       rrv.ps = PAGE_SHIFT;
+       rrv.ps = v->arch.vhpt_pg_shift;
        rrv.ve = 1;
 if (!v->vcpu_info) { panic("Stopping in init_all_rr\n"); }
        VCPU(v,rrs[0]) = -1;
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c  Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/vcpu.c  Thu Aug 16 11:33:27 2007 +0200
@@ -1697,7 +1697,7 @@ IA64FAULT vcpu_translate(VCPU * vcpu, u6
                } else {
                        *pteval = (address & _PAGE_PPN_MASK) |
                                __DIRTY_BITS | _PAGE_PL_PRIV | _PAGE_AR_RWX;
-                       *itir = PAGE_SHIFT << 2;
+                       *itir = vcpu->arch.vhpt_pg_shift << 2;
                        perfc_incr(phys_translate);
                        return IA64_NO_FAULT;
                }
@@ -2290,13 +2290,29 @@ IA64FAULT vcpu_set_dtr(VCPU * vcpu, u64 
  VCPU translation cache access routines
 **************************************************************************/
 
+static void
+vcpu_rebuild_vhpt(VCPU * vcpu, u64 ps)
+{
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+       printk("vhpt rebuild: using page_shift %d\n", (int)ps);
+       vcpu->arch.vhpt_pg_shift = ps;
+       vcpu_purge_tr_entry(&PSCBX(vcpu, dtlb));
+       vcpu_purge_tr_entry(&PSCBX(vcpu, itlb));
+       local_vhpt_flush();
+       load_region_regs(vcpu);
+#else
+       panic_domain(NULL, "domain trying to use smaller page size!\n");
+#endif
+}
+
 void
 vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, u64 vaddr, u64 pte,
                  u64 mp_pte, u64 itir, struct p2m_entry *entry)
 {
        ia64_itir_t _itir = {.itir = itir};
        unsigned long psr;
-       unsigned long ps = (vcpu->domain == dom0) ? _itir.ps : PAGE_SHIFT;
+       unsigned long ps = (vcpu->domain == dom0) ? _itir.ps :
+                                                   vcpu->arch.vhpt_pg_shift;
 
        check_xen_space_overlap("itc", vaddr, 1UL << _itir.ps);
 
@@ -2305,7 +2321,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
                panic_domain(NULL, "vcpu_itc_no_srlz: domain trying to use "
                             "smaller page size!\n");
 
-       BUG_ON(_itir.ps > PAGE_SHIFT);
+       BUG_ON(_itir.ps > vcpu->arch.vhpt_pg_shift);
        vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
        psr = ia64_clear_ic();
        pte &= ~(_PAGE_RV2 | _PAGE_RV1);        // Mask out the reserved bits.
@@ -2318,7 +2334,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
                // addresses never get flushed.  More work needed if this
                // ever happens.
 //printk("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps);
-               if (_itir.ps > PAGE_SHIFT)
+               if (_itir.ps > vcpu->arch.vhpt_pg_shift)
                        vhpt_multiple_insert(vaddr, pte, _itir.itir);
                else
                        vhpt_insert(vaddr, pte, _itir.itir);
@@ -2326,7 +2342,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
        // even if domain pagesize is larger than PAGE_SIZE, just put
        // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
        else {
-               _itir.ps = PAGE_SHIFT;
+               _itir.ps = vcpu->arch.vhpt_pg_shift;
                vhpt_insert(vaddr, pte, _itir.itir);
        }
 }
@@ -2338,12 +2354,11 @@ IA64FAULT vcpu_itc_d(VCPU * vcpu, u64 pt
        struct p2m_entry entry;
        ia64_itir_t _itir = {.itir = itir};
 
-       if (_itir.ps < PAGE_SHIFT)
-               panic_domain(NULL, "vcpu_itc_d: domain trying to use "
-                            "smaller page size!\n");
+       if (_itir.ps < vcpu->arch.vhpt_pg_shift)
+               vcpu_rebuild_vhpt(vcpu, _itir.ps);
 
  again:
-       //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
+       //itir = (itir & ~0xfc) | (vcpu->arch.vhpt_pg_shift<<2); // ign dom pgsz
        pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry);
        if (!pteval)
                return IA64_ILLOP_FAULT;
@@ -2367,11 +2382,11 @@ IA64FAULT vcpu_itc_i(VCPU * vcpu, u64 pt
        struct p2m_entry entry;
        ia64_itir_t _itir = {.itir = itir};
 
-       if (_itir.ps < PAGE_SHIFT)
-               panic_domain(NULL, "vcpu_itc_i: domain trying to use "
-                            "smaller page size!\n");
+       if (_itir.ps < vcpu->arch.vhpt_pg_shift)
+               vcpu_rebuild_vhpt(vcpu, _itir.ps);
+
       again:
-       //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
+       //itir = (itir & ~0xfc) | (vcpu->arch.vhpt_pg_shift<<2); // ign dom pgsz
        pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry);
        if (!pteval)
                return IA64_ILLOP_FAULT;
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/vhpt.c  Thu Aug 16 11:33:27 2007 +0200
@@ -88,15 +88,16 @@ void vhpt_multiple_insert(unsigned long 
 void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,
                           unsigned long itir)
 {
+       unsigned char ps = current->arch.vhpt_pg_shift;
        ia64_itir_t _itir = {.itir = itir};
        unsigned long mask = (1L << _itir.ps) - 1;
        int i;
 
-       if (_itir.ps-PAGE_SHIFT > 10 && !running_on_sim) {
+       if (_itir.ps-ps > 10 && !running_on_sim) {
                // if this happens, we may want to revisit this algorithm
                panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
        }
-       if (_itir.ps-PAGE_SHIFT > 2) {
+       if (_itir.ps-ps > 2) {
                // FIXME: Should add counter here to see how often this
                //  happens (e.g. for 16MB pages!) and determine if it
                //  is a performance problem.  On a quick look, it takes
@@ -111,9 +112,9 @@ void vhpt_multiple_insert(unsigned long 
        }
        vaddr &= ~mask;
        pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
-       for (i = 1L << (_itir.ps-PAGE_SHIFT); i > 0; i--) {
+       for (i = 1L << (_itir.ps-ps); i > 0; i--) {
                vhpt_insert(vaddr, pte, _itir.itir);
-               vaddr += PAGE_SIZE;
+               vaddr += (1L << ps);
        }
 }
 
@@ -291,6 +292,7 @@ __flush_vhpt_range(unsigned long vhpt_ma
 __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
 {
        void *vhpt_base = __va(vhpt_maddr);
+       u64 pgsz = 1L << current->arch.vhpt_pg_shift;
 
        while ((long)addr_range > 0) {
                /* Get the VHPT entry.  */
@@ -298,8 +300,8 @@ __flush_vhpt_range(unsigned long vhpt_ma
                        __va_ul(vcpu_vhpt_maddr(current));
                struct vhpt_lf_entry *v = vhpt_base + off;
                v->ti_tag = INVALID_TI_TAG;
-               addr_range -= PAGE_SIZE;
-               vadr += PAGE_SIZE;
+               addr_range -= pgsz;
+               vadr += pgsz;
        }
 }
 
@@ -362,7 +364,8 @@ void domain_flush_vtlb_range (struct dom
        // ptc.ga has release semantics.
 
        /* ptc.ga  */
-       platform_global_tlb_purge(vadr, vadr + addr_range, PAGE_SHIFT);
+       platform_global_tlb_purge(vadr, vadr + addr_range,
+                                 current->arch.vhpt_pg_shift);
        perfc_incr(domain_flush_vtlb_range);
 }
 
@@ -381,6 +384,7 @@ __domain_flush_vtlb_track_entry(struct d
        int cpu;
        int vcpu;
        int local_purge = 1;
+       unsigned char ps = current->arch.vhpt_pg_shift;
        
        BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
        /*
@@ -413,7 +417,7 @@ __domain_flush_vtlb_track_entry(struct d
                                continue;
 
                        /* Invalidate VHPT entries.  */
-                       vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
+                       vcpu_flush_vhpt_range(v, vaddr, 1L << ps);
 
                        /*
                         * current->processor == v->processor
@@ -427,7 +431,7 @@ __domain_flush_vtlb_track_entry(struct d
        } else {
                for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
                        /* Invalidate VHPT entries.  */
-                       cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
+                       cpu_flush_vhpt_range(cpu, vaddr, 1L << ps);
 
                        if (d->vcpu[cpu] != current)
                                local_purge = 0;
@@ -436,12 +440,11 @@ __domain_flush_vtlb_track_entry(struct d
 
        /* ptc.ga  */
        if (local_purge) {
-               ia64_ptcl(vaddr, PAGE_SHIFT << 2);
+               ia64_ptcl(vaddr, ps << 2);
                perfc_incr(domain_flush_vtlb_local);
        } else {
                /* ptc.ga has release semantics. */
-               platform_global_tlb_purge(vaddr, vaddr + PAGE_SIZE,
-                                         PAGE_SHIFT);
+               platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps);
                perfc_incr(domain_flush_vtlb_global);
        }
 
diff -r 6b0c965e95a6 -r 2f58face717c xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/include/asm-ia64/domain.h     Thu Aug 16 11:33:27 2007 +0200
@@ -246,6 +246,7 @@ struct arch_vcpu {
 #define XEN_IA64_PKR_IN_USE    0x1             /* If psr.pk = 1 was set. */
     unsigned char pkr_flags;
 
+    unsigned char       vhpt_pg_shift;         /* PAGE_SHIFT or less */
 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
     PTA                 pta;
     unsigned long       vhpt_maddr;
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel