[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] xen-unstable, winxp32 very poor performance on AMD FX-8150, I bisected and changeset is 24770:7f79475d3de7



On Nov 1, 2012, at 1:00 PM, Tim Deegan <tim@xxxxxxx> wrote:

> Hi,
> 
> At 14:59 +0100 on 22 Oct (1350917960), Tim Deegan wrote:
>> At 19:21 +0200 on 20 Oct (1350760876), Peter Maloney wrote:
>>> The change was 8 months ago
>>> 
>>> changeset:   24770:7f79475d3de7
>>> user:        Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>>> date:        Fri Feb 10 16:07:07 2012 +0000
>>> summary:     x86/mm: Make p2m lookups fully synchronized wrt modifications
>> 
>> This change was bad for performnace across the board and most of it has
>> since been either reverted or amended, but clearly we missed something
>> here. 
>> 
>> It's interesting that Win8 isn't slowed down.  I wonder whether that's to
>> do with the way it drives the VGA card -- IIRC it uses a generic VESA
>> driver rather than a Cirrus one. 
> 
> In fact this is to do with the APIC.  On my test system, a busy 2-vcpu
> VM is making about 300k/s accesses to the APIC TPR.  These accesses are
> all trapped and emulated by Xen, and that emulation has got more
> expensive as part of this change.
> 
> Later Windows OSes have a feature called 'lazy IRQL' which makes those
> accesses go away, but sadly that's not been done for WinXP.  On modern
> Intel CPUs, the hardware acceleration for TPR accesses works for XP; on
> AMD it requires the OS to use 'MOV reg32, CR8' to access the TPR instead
> of MMIO, which XP is clearly not doing. :(
> 
> Peter: if you have the option, you might find that installing the PV
> drivers that ship with Citrix XenServer 6.0 makes things work better.
> 
> Andres: even though this load of APIC emulations is pretty extreme, it's
> surprising that the VM runs faster on shadow pagetables!  Any ideas for
> where this slowdown is coming from?

Not any immediate ideas without profiling.

However, most callers of hvmemul_do_io pass a stub zero ram_gpa address. We 
might be madly hitting the p2m locks for no reason there.

How about the following patch, Peter, Tim?
diff -r 5171750d133e xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -60,24 +60,28 @@ static int hvmemul_do_io(
     ioreq_t *p = get_ioreq(curr);
     unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
     p2m_type_t p2mt;
-    struct page_info *ram_page;
+    struct page_info *ram_page = NULL;
     int rc;
 
     /* Check for paged out page */
-    ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, P2M_UNSHARE);
-    if ( p2m_is_paging(p2mt) )
+    if ( ram_gpa != INVALID_MFN )
     {
-        if ( ram_page )
-            put_page(ram_page);
-        p2m_mem_paging_populate(curr->domain, ram_gfn);
-        return X86EMUL_RETRY;
-    }
-    if ( p2m_is_shared(p2mt) )
-    {
-        if ( ram_page )
-            put_page(ram_page);
-        return X86EMUL_RETRY;
-    }
+        ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, 
P2M_UNSHARE);
+        if ( p2m_is_paging(p2mt) )
+        {
+            if ( ram_page )
+                put_page(ram_page);
+            p2m_mem_paging_populate(curr->domain, ram_gfn);
+            return X86EMUL_RETRY;
+        }
+        if ( p2m_is_shared(p2mt) )
+        {
+            if ( ram_page )
+                put_page(ram_page);
+            return X86EMUL_RETRY;
+        }
+    } else
+        value = 0; /* for pvalue */
 
     /*
      * Weird-sized accesses have undefined behaviour: we discard writes
@@ -455,7 +459,7 @@ static int __hvmemul_read(
             return X86EMUL_UNHANDLEABLE;
         gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off);
         if ( (off + bytes) <= PAGE_SIZE )
-            return hvmemul_do_mmio(gpa, &reps, bytes, 0,
+            return hvmemul_do_mmio(gpa, &reps, bytes, INVALID_MFN,
                                    IOREQ_READ, 0, p_data);
     }
 
@@ -480,7 +484,8 @@ static int __hvmemul_read(
             addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt);
         if ( rc != X86EMUL_OKAY )
             return rc;
-        return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, p_data);
+        return hvmemul_do_mmio(gpa, &reps, bytes, INVALID_MFN,
+                                IOREQ_READ, 0, p_data);
     case HVMCOPY_gfn_paged_out:
         return X86EMUL_RETRY;
     case HVMCOPY_gfn_shared:
@@ -552,7 +557,7 @@ static int hvmemul_write(
         unsigned int off = addr & (PAGE_SIZE - 1);
         gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off);
         if ( (off + bytes) <= PAGE_SIZE )
-            return hvmemul_do_mmio(gpa, &reps, bytes, 0,
+            return hvmemul_do_mmio(gpa, &reps, bytes, INVALID_MFN,
                                    IOREQ_WRITE, 0, p_data);
     }
 
@@ -573,7 +578,7 @@ static int hvmemul_write(
             addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt);
         if ( rc != X86EMUL_OKAY )
             return rc;
-        return hvmemul_do_mmio(gpa, &reps, bytes, 0,
+        return hvmemul_do_mmio(gpa, &reps, bytes, INVALID_MFN,
                                IOREQ_WRITE, 0, p_data);
     case HVMCOPY_gfn_paged_out:
         return X86EMUL_RETRY;
@@ -804,7 +809,7 @@ static int hvmemul_read_io(
 {
     unsigned long reps = 1;
     *val = 0;
-    return hvmemul_do_pio(port, &reps, bytes, 0, IOREQ_READ, 0, val);
+    return hvmemul_do_pio(port, &reps, bytes, INVALID_MFN, IOREQ_READ, 0, val);
 }
 
 static int hvmemul_write_io(
@@ -814,7 +819,7 @@ static int hvmemul_write_io(
     struct x86_emulate_ctxt *ctxt)
 {
     unsigned long reps = 1;
-    return hvmemul_do_pio(port, &reps, bytes, 0, IOREQ_WRITE, 0, &val);
+    return hvmemul_do_pio(port, &reps, bytes, INVALID_MFN, IOREQ_WRITE, 0, 
&val);
 }
 
 static int hvmemul_read_cr(
diff -r 5171750d133e xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -231,7 +231,7 @@ int handle_pio(uint16_t port, int size, 
     if ( dir == IOREQ_WRITE )
         data = guest_cpu_user_regs()->eax;
 
-    rc = hvmemul_do_pio(port, &reps, size, 0, dir, 0, &data);
+    rc = hvmemul_do_pio(port, &reps, size, INVALID_MFN, dir, 0, &data);
 
     switch ( rc )
     {



> 
> Cheers,
> 
> Tim.


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.