Hi,
this is a patch to enhance performance of BS2000 domu. I've changed the
following:
- enable some fast_reflects for big-endian guests
- even in case of 4k-vhpt use page-size up to PAGE_SIZE in entries, if guest
inserts larger TC-entries
- added KEY_PERMISSION_VECTOR support (reflect to guest)
BS2000 boot was about 10% faster with these changes.
Juergen
--
Juergen Gross Principal Developer
IP SW OS6 Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx
Otto-Hahn-Ring 6 Internet: www.fujitsu-siemens.com
D-81739 Muenchen Company details: www.fujitsu-siemens.com/imprint.html
# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxxxxxx
# Node ID 15d4ba6c29c4949c0fa88f24b68ab88bd352cb52
# Parent 1668a62e1c1fff8dc8d552c4fb459ecb19663157
performance enhancement for big-endian, 4k-pages, protection keys
Signed-off-by: Juergen Gross juergen.gross@xxxxxxxxxxxxxxxxxxx
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/faults.c
--- a/xen/arch/ia64/xen/faults.c Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/faults.c Fri Sep 14 13:49:10 2007 +0200
@@ -594,6 +594,9 @@ ia64_handle_reflection(unsigned long ifa
check_lazy_cover = 1;
vector = IA64_PAGE_NOT_PRESENT_VECTOR;
break;
+ case 21:
+ vector = IA64_KEY_PERMISSION_VECTOR;
+ break;
case 22:
vector = IA64_INST_ACCESS_RIGHTS_VECTOR;
break;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/hyperprivop.S Fri Sep 14 13:49:10 2007 +0200
@@ -223,9 +223,6 @@ ENTRY(hyper_ssm_i)
// give up for now if: ipsr.be==1, ipsr.pp==1
mov r30=cr.ipsr
mov r29=cr.iip;;
- extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p7,p0=r21,r0
-(p7) br.sptk.many dispatch_break_fault ;;
extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
cmp.ne p7,p0=r21,r0
(p7) br.sptk.many dispatch_break_fault ;;
@@ -268,7 +265,7 @@ ENTRY(hyper_ssm_i)
// FOR SSM_I ONLY, also turn on psr.i and psr.ic
movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC)
// movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
- movl r27=~(IA64_PSR_BE|IA64_PSR_BN);;
+ movl r27=~IA64_PSR_BN;;
or r30=r30,r28;;
and r30=r30,r27;;
mov r20=1
@@ -361,10 +358,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
cmp.ltu p6,p0=r26,r27
(p6) br.cond.spnt.few rp;;
mov r17=cr.ipsr;;
- // slow path if: ipsr.be==1, ipsr.pp==1
- extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p6,p0=r21,r0
-(p6) br.cond.spnt.few rp;;
+ // slow path if: ipsr.pp==1
extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
cmp.ne p6,p0=r21,r0
(p6) br.cond.spnt.few rp;;
@@ -453,7 +447,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
(p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
- movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
+ movl r27=~(IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
or r17=r17,r28;;
and r17=r17,r27
ld4 r16=[r18];;
@@ -556,9 +550,6 @@ GLOBAL_ENTRY(fast_break_reflect)
#endif
mov r30=cr.ipsr
mov r29=cr.iip;;
- extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p7,p0=r21,r0
-(p7) br.spnt.few dispatch_break_fault ;;
extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
cmp.ne p7,p0=r21,r0
(p7) br.spnt.few dispatch_break_fault ;;
@@ -633,7 +624,7 @@ ENTRY(fast_reflect)
cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
- movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+ movl r27=~(IA64_PSR_PP|IA64_PSR_BN);;
or r30=r30,r28;;
and r30=r30,r27
// also set shared_mem ipsr.i and ipsr.ic appropriately
@@ -744,9 +735,6 @@ GLOBAL_ENTRY(fast_access_reflect)
#endif
mov r30=cr.ipsr
mov r29=cr.iip;;
- extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p7,p0=r21,r0
-(p7) br.spnt.few dispatch_reflection ;;
extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
cmp.ne p7,p0=r21,r0
(p7) br.spnt.few dispatch_reflection ;;
@@ -794,9 +782,6 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
cmp.eq p7,p0=r21,r0
(p7) br.spnt.few page_fault ;;
// slow path if strange ipsr or isr bits set
- extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p7,p0=r21,r0
-(p7) br.spnt.few page_fault ;;
extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
cmp.ne p7,p0=r21,r0
(p7) br.spnt.few page_fault ;;
@@ -1068,10 +1053,6 @@ 1:
1:
adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
ld8 r21=[r20];; // r21 = vcr.ipsr
- extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
- // if turning on psr.be, give up for now and do it the slow way
- cmp.ne p7,p0=r22,r0
-(p7) br.spnt.few slow_vcpu_rfi ;;
// if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
and r22=r20,r21
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/ivt.S Fri Sep 14 13:49:10 2007 +0200
@@ -313,7 +313,6 @@ GLOBAL_ENTRY(dispatch_reflection)
adds out1=16,sp
mov out2=cr.isr
mov out3=cr.iim
-// mov out3=cr.itir // TODO: why commented out?
ssm psr.ic | PSR_DEFAULT_BITS
;;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/mm.c Fri Sep 14 13:49:10 2007 +0200
@@ -509,25 +509,22 @@ u64 translate_domain_pte(u64 pteval, u64
u64 arflags;
u64 arflags2;
u64 maflags2;
- u64 ps;
pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
// FIXME address had better be pre-validated on insert
mask = ~itir_mask(_itir.itir);
mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
- ps = current->arch.vhpt_pg_shift ? current->arch.vhpt_pg_shift :
- PAGE_SHIFT;
-
- if (_itir.ps > ps)
- _itir.ps = ps;
+
+ if (_itir.ps > PAGE_SHIFT)
+ _itir.ps = PAGE_SHIFT;
((ia64_itir_t*)itir)->itir = _itir.itir;/* Copy the whole register. */
((ia64_itir_t*)itir)->ps = _itir.ps; /* Overwrite ps part! */
pteval2 = lookup_domain_mpa(d, mpaddr, entry);
- if (ps < PAGE_SHIFT)
- pteval2 |= mpaddr & (PAGE_SIZE - 1) & ~((1L << ps) - 1);
+ if (_itir.ps < PAGE_SHIFT)
+ pteval2 |= mpaddr & (PAGE_SIZE - 1) & ~((1L << _itir.ps) - 1);
/* Check access rights. */
arflags = pteval & _PAGE_AR_MASK;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/vcpu.c Fri Sep 14 13:49:10 2007 +0200
@@ -1817,7 +1817,7 @@ IA64FAULT vcpu_tpa(VCPU * vcpu, u64 vadr
IA64FAULT vcpu_tak(VCPU * vcpu, u64 vadr, u64 * key)
{
- u64 pteval, itir, mask, iha;
+ u64 pteval, itir, iha;
IA64FAULT fault;
fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
@@ -2319,8 +2319,6 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD,
{
ia64_itir_t _itir = {.itir = itir};
unsigned long psr;
- unsigned long ps = (vcpu->domain == dom0) ? _itir.ps :
- vcpu->arch.vhpt_pg_shift;
check_xen_space_overlap("itc", vaddr, 1UL << _itir.ps);
@@ -2329,12 +2327,12 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD,
panic_domain(NULL, "vcpu_itc_no_srlz: domain trying to use "
"smaller page size!\n");
- BUG_ON(_itir.ps > vcpu->arch.vhpt_pg_shift);
+ BUG_ON(_itir.ps > PAGE_SHIFT);
vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
psr = ia64_clear_ic();
pte &= ~(_PAGE_RV2 | _PAGE_RV1); // Mask out the reserved bits.
// FIXME: look for bigger mappings
- ia64_itc(IorD, vaddr, pte, IA64_ITIR_PS_KEY(ps, _itir.key));
+ ia64_itc(IorD, vaddr, pte, _itir.itir);
ia64_set_psr(psr);
// ia64_srlz_i(); // no srls req'd, will rfi later
if (vcpu->domain == dom0 && ((vaddr >> 61) == 7)) {
@@ -2350,7 +2348,6 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD,
// even if domain pagesize is larger than PAGE_SIZE, just put
// PAGE_SIZE mapping in the vhpt for now, else purging is complicated
else {
- _itir.ps = vcpu->arch.vhpt_pg_shift;
vhpt_insert(vaddr, pte, _itir.itir);
}
}
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/vhpt.c Fri Sep 14 13:49:10 2007 +0200
@@ -293,15 +293,18 @@ __flush_vhpt_range(unsigned long vhpt_ma
{
void *vhpt_base = __va(vhpt_maddr);
u64 pgsz = 1L << current->arch.vhpt_pg_shift;
-
+ u64 purge_addr = vadr & ~PAGE_SIZE;
+
+ addr_range += vadr - purge_addr;
+ addr_range = (addr_range + PAGE_SIZE - 1) & ~PAGE_SIZE;
while ((long)addr_range > 0) {
/* Get the VHPT entry. */
- unsigned int off = ia64_thash(vadr) -
+ unsigned int off = ia64_thash(purge_addr) -
__va_ul(vcpu_vhpt_maddr(current));
struct vhpt_lf_entry *v = vhpt_base + off;
v->ti_tag = INVALID_TI_TAG;
addr_range -= pgsz;
- vadr += pgsz;
+ purge_addr += pgsz;
}
}
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|