[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 2/2] x86/vm_event: Allow overwriting Xen's i-cache used for emulation



When emulating instructions Xen's emulator maintains a small i-cache fetched
from the guest memory. This patch extends the vm_event interface to allow
overwriting this i-cache via a buffer returned in the vm_event response.

When responding to a SOFTWARE_BREAKPOINT event (INT3) the monitor subscriber
normally has to remove the INT3 from memory - singlestep - place back INT3
to allow the guest to continue execution. This routine however is susceptible
to a race-condition on multi-vCPU guests. By allowing the subscriber to return
the i-cache to be used for emulation it can side-step the problem by returning
a clean buffer without the INT3 present.

As part of this patch we rename hvm_mem_access_emulate_one to
hvm_emulate_one_vm_event to better reflect that it is used in various vm_event
scenarios now, not just in response to mem_access events.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@xxxxxxxxxxxx>
---
Cc: Paul Durrant <paul.durrant@xxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Cc: Razvan Cojocaru <rcojocaru@xxxxxxxxxxxxxxx>
Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
Cc: Julien Grall <julien.grall@xxxxxxx>

v2: rework hvm_mem_access_emulate_one switch statement
    add BUILD_BUG_ON to ensure internal and vm_event buffer sizes match

Note: this patch has now been fully tested and works as intended
---
 xen/arch/x86/hvm/emulate.c        | 37 ++++++++++++++++++++++++-------------
 xen/arch/x86/hvm/hvm.c            |  9 ++++++---
 xen/arch/x86/hvm/vmx/vmx.c        |  1 +
 xen/arch/x86/vm_event.c           |  9 ++++++++-
 xen/common/vm_event.c             |  1 -
 xen/include/asm-x86/hvm/emulate.h |  8 +++++---
 xen/include/asm-x86/vm_event.h    |  5 ++++-
 xen/include/public/vm_event.h     | 16 +++++++++++++++-
 8 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index cc25676..acae998 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -76,9 +76,9 @@ static int set_context_data(void *buffer, unsigned int size)
     if ( curr->arch.vm_event )
     {
         unsigned int safe_size =
-            min(size, curr->arch.vm_event->emul_read_data.size);
+            min(size, curr->arch.vm_event->emul.read.size);
 
-        memcpy(buffer, curr->arch.vm_event->emul_read_data.data, safe_size);
+        memcpy(buffer, curr->arch.vm_event->emul.read.data, safe_size);
         memset(buffer + safe_size, 0, size - safe_size);
         return X86EMUL_OKAY;
     }
@@ -827,7 +827,7 @@ static int hvmemul_read(
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
 
-    if ( unlikely(hvmemul_ctxt->set_context) )
+    if ( unlikely(hvmemul_ctxt->set_context_data) )
         return set_context_data(p_data, bytes);
 
     return __hvmemul_read(
@@ -1029,7 +1029,7 @@ static int hvmemul_cmpxchg(
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
 
-    if ( unlikely(hvmemul_ctxt->set_context) )
+    if ( unlikely(hvmemul_ctxt->set_context_data) )
     {
         int rc = set_context_data(p_new, bytes);
 
@@ -1122,7 +1122,7 @@ static int hvmemul_rep_outs(
     p2m_type_t p2mt;
     int rc;
 
-    if ( unlikely(hvmemul_ctxt->set_context) )
+    if ( unlikely(hvmemul_ctxt->set_context_data) )
         return hvmemul_rep_outs_set_context(src_seg, src_offset, dst_port,
                                             bytes_per_rep, reps, ctxt);
 
@@ -1264,7 +1264,7 @@ static int hvmemul_rep_movs(
     if ( buf == NULL )
         return X86EMUL_UNHANDLEABLE;
 
-    if ( unlikely(hvmemul_ctxt->set_context) )
+    if ( unlikely(hvmemul_ctxt->set_context_data) )
     {
         rc = set_context_data(buf, bytes);
 
@@ -1470,7 +1470,7 @@ static int hvmemul_read_io(
 
     *val = 0;
 
-    if ( unlikely(hvmemul_ctxt->set_context) )
+    if ( unlikely(hvmemul_ctxt->set_context_data) )
         return set_context_data(val, bytes);
 
     return hvmemul_do_pio_buffer(port, bytes, IOREQ_READ, val);
@@ -1793,7 +1793,17 @@ static int _hvm_emulate_one(struct hvm_emulate_ctxt 
*hvmemul_ctxt,
         pfec |= PFEC_user_mode;
 
     hvmemul_ctxt->insn_buf_eip = regs->eip;
-    if ( !vio->mmio_insn_bytes )
+
+    if ( unlikely(hvmemul_ctxt->set_context_insn) && curr->arch.vm_event )
+    {
+        BUILD_BUG_ON(sizeof(hvmemul_ctxt->insn_buf_bytes) ==
+                     sizeof(curr->arch.vm_event->emul.insn));
+
+        hvmemul_ctxt->insn_buf_bytes = sizeof(curr->arch.vm_event->emul.insn);
+        memcpy(hvmemul_ctxt->insn_buf, &curr->arch.vm_event->emul.insn,
+               hvmemul_ctxt->insn_buf_bytes);
+    }
+    else if ( !vio->mmio_insn_bytes )
     {
         hvmemul_ctxt->insn_buf_bytes =
             hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?:
@@ -1931,7 +1941,7 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long 
gla)
     return rc;
 }
 
-void hvm_mem_access_emulate_one(enum emul_kind kind, unsigned int trapnr,
+void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
     unsigned int errcode)
 {
     struct hvm_emulate_ctxt ctx = {{ 0 }};
@@ -1944,11 +1954,11 @@ void hvm_mem_access_emulate_one(enum emul_kind kind, 
unsigned int trapnr,
     case EMUL_KIND_NOWRITE:
         rc = hvm_emulate_one_no_write(&ctx);
         break;
-    case EMUL_KIND_SET_CONTEXT:
-        ctx.set_context = 1;
-        /* Intentional fall-through. */
     default:
+        ctx.set_context_data = (kind == EMUL_KIND_SET_CONTEXT_DATA);
+        ctx.set_context_insn = (kind == EMUL_KIND_SET_CONTEXT_INSN);
         rc = hvm_emulate_one(&ctx);
+        break;
     }
 
     switch ( rc )
@@ -1983,7 +1993,8 @@ void hvm_emulate_prepare(
     hvmemul_ctxt->ctxt.force_writeback = 1;
     hvmemul_ctxt->seg_reg_accessed = 0;
     hvmemul_ctxt->seg_reg_dirty = 0;
-    hvmemul_ctxt->set_context = 0;
+    hvmemul_ctxt->set_context_data = 0;
+    hvmemul_ctxt->set_context_insn = 0;
     hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
     hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
 }
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ca96643..7462794 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -489,13 +489,16 @@ void hvm_do_resume(struct vcpu *v)
 
             if ( v->arch.vm_event->emulate_flags &
                  VM_EVENT_FLAG_SET_EMUL_READ_DATA )
-                kind = EMUL_KIND_SET_CONTEXT;
+                kind = EMUL_KIND_SET_CONTEXT_DATA;
             else if ( v->arch.vm_event->emulate_flags &
                       VM_EVENT_FLAG_EMULATE_NOWRITE )
                 kind = EMUL_KIND_NOWRITE;
+            else if ( v->arch.vm_event->emulate_flags &
+                 VM_EVENT_FLAG_SET_EMUL_INSN_DATA )
+                kind = EMUL_KIND_SET_CONTEXT_INSN;
 
-            hvm_mem_access_emulate_one(kind, TRAP_invalid_op,
-                                       HVM_DELIVER_NO_ERROR_CODE);
+            hvm_emulate_one_vm_event(kind, TRAP_invalid_op,
+                                     HVM_DELIVER_NO_ERROR_CODE);
 
             v->arch.vm_event->emulate_flags = 0;
         }
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 2759e6f..d214716 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -57,6 +57,7 @@
 #include <asm/altp2m.h>
 #include <asm/event.h>
 #include <asm/monitor.h>
+#include <asm/vm_event.h>
 #include <public/arch-x86/cpuid.h>
 
 static bool_t __initdata opt_force_ept;
diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
index 343b9c8..ca5d515 100644
--- a/xen/arch/x86/vm_event.c
+++ b/xen/arch/x86/vm_event.c
@@ -209,11 +209,18 @@ void vm_event_emulate_check(struct vcpu *v, 
vm_event_response_t *rsp)
         if ( p2m_mem_access_emulate_check(v, rsp) )
         {
             if ( rsp->flags & VM_EVENT_FLAG_SET_EMUL_READ_DATA )
-                v->arch.vm_event->emul_read_data = rsp->data.emul_read_data;
+                v->arch.vm_event->emul.read = rsp->data.emul.read;
 
             v->arch.vm_event->emulate_flags = rsp->flags;
         }
         break;
+    case VM_EVENT_REASON_SOFTWARE_BREAKPOINT:
+        if ( rsp->flags & VM_EVENT_FLAG_SET_EMUL_INSN_DATA )
+        {
+            v->arch.vm_event->emul.insn = rsp->data.emul.insn;
+            v->arch.vm_event->emulate_flags = rsp->flags;
+        }
+        break;
     default:
         break;
     };
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 907ab40..d8ee7f3 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -398,7 +398,6 @@ void vm_event_resume(struct domain *d, struct 
vm_event_domain *ved)
          * In some cases the response type needs extra handling, so here
          * we call the appropriate handlers.
          */
-
         /* Check flags which apply only when the vCPU is paused */
         if ( atomic_read(&v->vm_event_pause_count) )
         {
diff --git a/xen/include/asm-x86/hvm/emulate.h 
b/xen/include/asm-x86/hvm/emulate.h
index 3aabcbe..b52f99e 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -34,20 +34,22 @@ struct hvm_emulate_ctxt {
 
     uint32_t intr_shadow;
 
-    bool_t set_context;
+    bool_t set_context_data;
+    bool_t set_context_insn;
 };
 
 enum emul_kind {
     EMUL_KIND_NORMAL,
     EMUL_KIND_NOWRITE,
-    EMUL_KIND_SET_CONTEXT
+    EMUL_KIND_SET_CONTEXT_DATA,
+    EMUL_KIND_SET_CONTEXT_INSN
 };
 
 int hvm_emulate_one(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 int hvm_emulate_one_no_write(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
-void hvm_mem_access_emulate_one(enum emul_kind kind,
+void hvm_emulate_one_vm_event(enum emul_kind kind,
     unsigned int trapnr,
     unsigned int errcode);
 void hvm_emulate_prepare(
diff --git a/xen/include/asm-x86/vm_event.h b/xen/include/asm-x86/vm_event.h
index ebb5d88..ca73f99 100644
--- a/xen/include/asm-x86/vm_event.h
+++ b/xen/include/asm-x86/vm_event.h
@@ -27,7 +27,10 @@
  */
 struct arch_vm_event {
     uint32_t emulate_flags;
-    struct vm_event_emul_read_data emul_read_data;
+    union {
+        struct vm_event_emul_read_data read;
+        struct vm_event_emul_insn_data insn;
+    } emul;
     struct monitor_write_data write_data;
 };
 
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index f756126..ef62932 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -97,6 +97,13 @@
  * Requires the vCPU to be paused already (synchronous events only).
  */
 #define VM_EVENT_FLAG_SET_REGISTERS      (1 << 8)
+/*
+ * Instruction cache is being sent back to the hypervisor in the event response
+ * to be used by the emulator. This flag is only useful when combined with
+ * VM_EVENT_FLAG_EMULATE and is incompatible with also setting
+ * VM_EVENT_FLAG_EMULATE_NOWRITE or VM_EVENT_FLAG_SET_EMUL_READ_DATA.
+ */
+#define VM_EVENT_FLAG_SET_EMUL_INSN_DATA (1 << 9)
 
 /*
  * Reasons for the vm event request
@@ -265,6 +272,10 @@ struct vm_event_emul_read_data {
     uint8_t  data[sizeof(struct vm_event_regs_x86) - sizeof(uint32_t)];
 };
 
+struct vm_event_emul_insn_data {
+    uint8_t data[16]; /* Has to be completely filled */
+};
+
 typedef struct vm_event_st {
     uint32_t version;   /* VM_EVENT_INTERFACE_VERSION */
     uint32_t flags;     /* VM_EVENT_FLAG_* */
@@ -291,7 +302,10 @@ typedef struct vm_event_st {
             struct vm_event_regs_arm arm;
         } regs;
 
-        struct vm_event_emul_read_data emul_read_data;
+        union {
+            struct vm_event_emul_read_data read;
+            struct vm_event_emul_insn_data insn;
+        } emul;
     } data;
 } vm_event_request_t, vm_event_response_t;
 
-- 
2.9.3


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.