[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 07/10] vm_event: Add vm_event_ng interface



In high throughput introspection scenarios where lots of monitor
vm_events are generated, the ring buffer can fill up before the monitor
application gets a chance to handle all the requests thus blocking
other vcpus which will have to wait for a slot to become available.

This patch adds support for a different mechanism to handle synchronous
vm_event requests / responses. As each synchronous request pauses the
vcpu until the corresponding response is handled, it can be stored in
a slotted memory buffer (one per vcpu) shared between the hypervisor and
the controlling domain.

Signed-off-by: Petre Pircalabu <ppircalabu@xxxxxxxxxxxxxxx>
---
 tools/libxc/include/xenctrl.h |   9 +
 tools/libxc/xc_mem_paging.c   |   9 +-
 tools/libxc/xc_memshr.c       |   9 +-
 tools/libxc/xc_monitor.c      |  23 +-
 tools/libxc/xc_private.h      |  12 +-
 tools/libxc/xc_vm_event.c     | 100 ++++++-
 xen/arch/x86/mm.c             |   7 +
 xen/common/vm_event.c         | 595 +++++++++++++++++++++++++++++++++++-------
 xen/include/public/domctl.h   |  10 +-
 xen/include/public/memory.h   |   2 +
 xen/include/public/vm_event.h |  16 ++
 xen/include/xen/vm_event.h    |  11 +-
 12 files changed, 684 insertions(+), 119 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index f3af710..1293b0f 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -128,6 +128,7 @@ enum xc_error_code {
 
 typedef enum xc_error_code xc_error_code;
 
+struct xenforeignmemory_resource_handle;
 
 /*
  *  INITIALIZATION FUNCTIONS
@@ -2007,6 +2008,14 @@ int xc_vm_event_get_version(xc_interface *xch);
 void *xc_monitor_enable(xc_interface *xch, uint32_t domain_id, uint32_t *port);
 int xc_monitor_disable(xc_interface *xch, uint32_t domain_id);
 int xc_monitor_resume(xc_interface *xch, uint32_t domain_id);
+
+/* Monitor NG interface */
+int xc_monitor_ng_enable(xc_interface *xch, uint32_t domain_id,
+                         struct xenforeignmemory_resource_handle **fres,
+                         int *num_channels, void **p_addr);
+int xc_monitor_ng_disable(xc_interface *xch, uint32_t domain_id,
+                          struct xenforeignmemory_resource_handle **fres);
+
 /*
  * Get a bitmap of supported monitor events in the form
  * (1 << XEN_DOMCTL_MONITOR_EVENT_*).
diff --git a/tools/libxc/xc_mem_paging.c b/tools/libxc/xc_mem_paging.c
index a88c0cc..978008a 100644
--- a/tools/libxc/xc_mem_paging.c
+++ b/tools/libxc/xc_mem_paging.c
@@ -49,7 +49,7 @@ int xc_mem_paging_enable(xc_interface *xch, uint32_t 
domain_id,
     return xc_vm_event_control(xch, domain_id,
                                XEN_VM_EVENT_ENABLE,
                                XEN_VM_EVENT_TYPE_PAGING,
-                               port);
+                               0, port);
 }
 
 int xc_mem_paging_disable(xc_interface *xch, uint32_t domain_id)
@@ -57,15 +57,12 @@ int xc_mem_paging_disable(xc_interface *xch, uint32_t 
domain_id)
     return xc_vm_event_control(xch, domain_id,
                                XEN_VM_EVENT_DISABLE,
                                XEN_VM_EVENT_TYPE_PAGING,
-                               NULL);
+                               0, NULL);
 }
 
 int xc_mem_paging_resume(xc_interface *xch, uint32_t domain_id)
 {
-    return xc_vm_event_control(xch, domain_id,
-                               XEN_VM_EVENT_RESUME,
-                               XEN_VM_EVENT_TYPE_PAGING,
-                               NULL);
+    return xc_vm_event_resume(xch, domain_id, XEN_VM_EVENT_TYPE_PAGING, 0);
 }
 
 int xc_mem_paging_nominate(xc_interface *xch, uint32_t domain_id, uint64_t gfn)
diff --git a/tools/libxc/xc_memshr.c b/tools/libxc/xc_memshr.c
index 1c4a706..44d4f23 100644
--- a/tools/libxc/xc_memshr.c
+++ b/tools/libxc/xc_memshr.c
@@ -54,7 +54,7 @@ int xc_memshr_ring_enable(xc_interface *xch,
     return xc_vm_event_control(xch, domid,
                                XEN_VM_EVENT_ENABLE,
                                XEN_VM_EVENT_TYPE_SHARING,
-                               port);
+                               0, port);
 }
 
 int xc_memshr_ring_disable(xc_interface *xch, 
@@ -63,7 +63,7 @@ int xc_memshr_ring_disable(xc_interface *xch,
     return xc_vm_event_control(xch, domid,
                                XEN_VM_EVENT_DISABLE,
                                XEN_VM_EVENT_TYPE_SHARING,
-                               NULL);
+                               0, NULL);
 }
 
 static int xc_memshr_memop(xc_interface *xch, uint32_t domid,
@@ -203,10 +203,7 @@ int xc_memshr_range_share(xc_interface *xch,
 int xc_memshr_domain_resume(xc_interface *xch,
                             uint32_t domid)
 {
-    return xc_vm_event_control(xch, domid,
-                               XEN_VM_EVENT_RESUME,
-                               XEN_VM_EVENT_TYPE_SHARING,
-                               NULL);
+    return xc_vm_event_resume(xch, domid, XEN_VM_EVENT_TYPE_SHARING, 0);
 }
 
 int xc_memshr_debug_gfn(xc_interface *xch,
diff --git a/tools/libxc/xc_monitor.c b/tools/libxc/xc_monitor.c
index f05b53d..d8d62c4 100644
--- a/tools/libxc/xc_monitor.c
+++ b/tools/libxc/xc_monitor.c
@@ -33,15 +33,12 @@ int xc_monitor_disable(xc_interface *xch, uint32_t 
domain_id)
     return xc_vm_event_control(xch, domain_id,
                                XEN_VM_EVENT_DISABLE,
                                XEN_VM_EVENT_TYPE_MONITOR,
-                               NULL);
+                               0, NULL);
 }
 
 int xc_monitor_resume(xc_interface *xch, uint32_t domain_id)
 {
-    return xc_vm_event_control(xch, domain_id,
-                               XEN_VM_EVENT_RESUME,
-                               XEN_VM_EVENT_TYPE_MONITOR,
-                               NULL);
+    return xc_vm_event_resume(xch, domain_id, XEN_VM_EVENT_TYPE_MONITOR, 0);
 }
 
 int xc_monitor_get_capabilities(xc_interface *xch, uint32_t domain_id,
@@ -246,6 +243,22 @@ int xc_monitor_emul_unimplemented(xc_interface *xch, 
uint32_t domain_id,
     return do_domctl(xch, &domctl);
 }
 
+int xc_monitor_ng_enable(xc_interface *xch, uint32_t domain_id,
+                         xenforeignmemory_resource_handle **fres,
+                         int *num_channels, void **p_addr)
+{
+    return xc_vm_event_ng_enable(xch, domain_id, XEN_VM_EVENT_TYPE_MONITOR,
+                                 fres, num_channels, p_addr);
+}
+
+
+int xc_monitor_ng_disable(xc_interface *xch, uint32_t domain_id,
+                          xenforeignmemory_resource_handle **fres)
+{
+    return xc_vm_event_ng_disable(xch, domain_id, XEN_VM_EVENT_TYPE_MONITOR,
+                                  fres);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index e4f7c3a..9cd6069 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -412,13 +412,23 @@ int xc_ffs64(uint64_t x);
  * vm_event operations. Internal use only.
  */
 int xc_vm_event_control(xc_interface *xch, uint32_t domain_id, unsigned int op,
-                        unsigned int type, uint32_t *port);
+                        unsigned int type, unsigned int flags, uint32_t *port);
+int xc_vm_event_resume(xc_interface *xch, uint32_t domain_id, unsigned int 
type,
+                       unsigned int flags);
 /*
  * Enables vm_event and returns the mapped ring page indicated by type.
  * type can be XEN_VM_EVENT_TYPE_(PAGING/MONITOR/SHARING)
  */
 void *xc_vm_event_enable(xc_interface *xch, uint32_t domain_id, int type,
                          uint32_t *port);
+/*
+ * Enables/Disables vm_event using the new interface.
+ */
+int xc_vm_event_ng_enable(xc_interface *xch, uint32_t domain_id, int type,
+                          xenforeignmemory_resource_handle **fres,
+                          int *num_channels, void **p_addr);
+int xc_vm_event_ng_disable(xc_interface *xch, uint32_t domain_id, int type,
+                          xenforeignmemory_resource_handle **fres);
 
 int do_dm_op(xc_interface *xch, uint32_t domid, unsigned int nr_bufs, ...);
 
diff --git a/tools/libxc/xc_vm_event.c b/tools/libxc/xc_vm_event.c
index 044bf71..d070e64 100644
--- a/tools/libxc/xc_vm_event.c
+++ b/tools/libxc/xc_vm_event.c
@@ -22,8 +22,12 @@
 
 #include "xc_private.h"
 
+#ifndef PFN_UP
+#define PFN_UP(x)     (((x) + XC_PAGE_SIZE-1) >> XC_PAGE_SHIFT)
+#endif /* PFN_UP */
+
 int xc_vm_event_control(xc_interface *xch, uint32_t domain_id, unsigned int op,
-                        unsigned int type, uint32_t *port)
+                        unsigned int type, unsigned int flags, uint32_t *port)
 {
     DECLARE_DOMCTL;
     int rc;
@@ -32,6 +36,7 @@ int xc_vm_event_control(xc_interface *xch, uint32_t 
domain_id, unsigned int op,
     domctl.domain = domain_id;
     domctl.u.vm_event_op.op = op;
     domctl.u.vm_event_op.type = type;
+    domctl.u.vm_event_op.flags = flags;
 
     rc = do_domctl(xch, &domctl);
     if ( !rc && port )
@@ -113,7 +118,7 @@ void *xc_vm_event_enable(xc_interface *xch, uint32_t 
domain_id, int type,
         goto out;
     }
 
-    rc1 = xc_vm_event_control(xch, domain_id, XEN_VM_EVENT_ENABLE, type, port);
+    rc1 = xc_vm_event_control(xch, domain_id, XEN_VM_EVENT_ENABLE, type, 0, 
port);
     if ( rc1 != 0 )
     {
         PERROR("Failed to enable vm_event\n");
@@ -164,6 +169,97 @@ int xc_vm_event_get_version(xc_interface *xch)
     return rc;
 }
 
+int xc_vm_event_resume(xc_interface *xch, uint32_t domain_id,
+                       unsigned int type, unsigned int flags)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_vm_event_op;
+    domctl.domain = domain_id;
+    domctl.u.vm_event_op.op = XEN_VM_EVENT_RESUME;
+    domctl.u.vm_event_op.type = type;
+    domctl.u.vm_event_op.flags = flags;
+    domctl.u.vm_event_op.u.resume.vcpu_id = 0;
+
+    return do_domctl(xch, &domctl);
+}
+
+int xc_vm_event_ng_enable(xc_interface *xch, uint32_t domain_id, int type,
+                          xenforeignmemory_resource_handle **fres,
+                          int *num_channels, void **p_addr)
+{
+    int rc1, rc2;
+    xc_dominfo_t info;
+    unsigned long nr_frames;
+
+    if ( !fres || !num_channels || ! p_addr )
+        return -EINVAL;
+
+    /* Get the numbers of vcpus */
+    if ( xc_domain_getinfo(xch, domain_id, 1, &info) != 1 ||
+         info.domid != domain_id )
+    {
+        PERROR("xc_domain_getinfo failed.\n");
+        return -ESRCH;
+    }
+
+    *num_channels = info.max_vcpu_id + 1;
+
+    rc1 = xc_domain_pause(xch, domain_id);
+    if ( rc1 )
+    {
+        PERROR("Unable to pause domain\n");
+        return rc1;
+    }
+
+    rc1 = xc_vm_event_control(xch, domain_id, XEN_VM_EVENT_ENABLE,
+                              type, XEN_VM_EVENT_FLAGS_NG_OP, NULL);
+    if ( rc1 )
+    {
+        PERROR("Failed to enable vm_event\n");
+        goto out;
+    }
+
+    nr_frames = PFN_UP(*num_channels * sizeof(struct vm_event_slot));
+
+    *fres = xenforeignmemory_map_resource(xch->fmem, domain_id,
+                                          XENMEM_resource_vm_event,
+                                          XEN_VM_EVENT_TYPE_MONITOR, 0,
+                                          nr_frames, p_addr,
+                                          PROT_READ | PROT_WRITE, 0);
+    if ( !*fres )
+    {
+        xc_vm_event_control(xch, domain_id, XEN_VM_EVENT_DISABLE,
+                            type, XEN_VM_EVENT_FLAGS_NG_OP, NULL);
+        ERROR("Failed to map vm_event resource");
+        rc1 = -errno;
+        goto out;
+    }
+
+out:
+    rc2 = xc_domain_unpause(xch, domain_id);
+    if ( rc1 || rc2 )
+    {
+        if ( rc2 )
+            PERROR("Unable to pause domain\n");
+
+        if ( rc1 == 0 )
+            rc1 = rc2;
+    }
+
+    return rc1;
+}
+
+int xc_vm_event_ng_disable(xc_interface *xch, uint32_t domain_id, int type,
+                           xenforeignmemory_resource_handle **fres)
+{
+    xenforeignmemory_unmap_resource(xch->fmem, *fres);
+    *fres = NULL;
+
+    return xc_vm_event_control(xch, domain_id, XEN_VM_EVENT_DISABLE,
+                              type, XEN_VM_EVENT_FLAGS_NG_OP, NULL);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index df2c013..768df4f 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -119,6 +119,7 @@
 #include <xen/efi.h>
 #include <xen/grant_table.h>
 #include <xen/hypercall.h>
+#include <xen/vm_event.h>
 #include <asm/paging.h>
 #include <asm/shadow.h>
 #include <asm/page.h>
@@ -4555,6 +4556,12 @@ int arch_acquire_resource(struct domain *d, unsigned int 
type,
     }
 #endif
 
+    case XENMEM_resource_vm_event:
+        rc = vm_event_ng_get_frames(d, id, frame, nr_frames, mfn_list);
+        if ( !rc )
+            *flags |= XENMEM_rsrc_acq_caller_owned;
+        break;
+
     default:
         rc = -EOPNOTSUPP;
         break;
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index e6a7a29..3f9be97 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -25,6 +25,7 @@
 #include <xen/wait.h>
 #include <xen/vm_event.h>
 #include <xen/mem_access.h>
+#include <xen/vmap.h>
 #include <asm/p2m.h>
 #include <asm/monitor.h>
 #include <asm/vm_event.h>
@@ -35,6 +36,78 @@
 #define xen_rmb()  smp_rmb()
 #define xen_wmb()  smp_wmb()
 
+static int vm_event_ring_pfn_param(uint32_t type)
+{
+    switch( type )
+    {
+#ifdef CONFIG_HAS_MEM_PAGING
+    case XEN_VM_EVENT_TYPE_PAGING:
+        return HVM_PARAM_PAGING_RING_PFN;
+#endif
+    case XEN_VM_EVENT_TYPE_MONITOR:
+        return HVM_PARAM_MONITOR_RING_PFN;
+#ifdef CONFIG_HAS_MEM_SHARING
+    case XEN_VM_EVENT_TYPE_SHARING:
+        return HVM_PARAM_SHARING_RING_PFN;
+#endif
+    };
+
+    ASSERT_UNREACHABLE();
+    return -1;
+}
+
+static int vm_event_pause_flag(uint32_t type)
+{
+    switch( type )
+    {
+#ifdef CONFIG_HAS_MEM_PAGING
+    case XEN_VM_EVENT_TYPE_PAGING:
+        return _VPF_mem_paging;
+#endif
+    case XEN_VM_EVENT_TYPE_MONITOR:
+        return _VPF_mem_access;
+#ifdef CONFIG_HAS_MEM_SHARING
+    case XEN_VM_EVENT_TYPE_SHARING:
+        return _VPF_mem_sharing;
+#endif
+    };
+
+    ASSERT_UNREACHABLE();
+    return -1;
+}
+
+#ifdef CONFIG_HAS_MEM_PAGING
+static void mem_paging_notification(struct vcpu *v, unsigned int port);
+#endif
+static void monitor_notification(struct vcpu *v, unsigned int port);
+#ifdef CONFIG_HAS_MEM_SHARING
+static void mem_sharing_notification(struct vcpu *v, unsigned int port);
+#endif
+
+static xen_event_channel_notification_t vm_event_notification_fn(uint32_t type)
+{
+    switch( type )
+    {
+#ifdef CONFIG_HAS_MEM_PAGING
+    case XEN_VM_EVENT_TYPE_PAGING:
+        return mem_paging_notification;
+#endif
+    case XEN_VM_EVENT_TYPE_MONITOR:
+        return monitor_notification;
+#ifdef CONFIG_HAS_MEM_SHARING
+    case XEN_VM_EVENT_TYPE_SHARING:
+        return mem_sharing_notification;
+#endif
+    };
+
+    ASSERT_UNREACHABLE();
+    return NULL;
+}
+
+/*
+ * VM event ring implementation;
+ */
+
 #define to_ring(_ved) container_of((_ved), struct vm_event_ring_domain, ved)
 
 /* VM event ring implementation */
@@ -67,12 +140,12 @@ static const struct vm_event_ops vm_event_ring_ops;
 static int vm_event_ring_enable(
     struct domain *d,
     struct xen_domctl_vm_event_op *vec,
-    struct vm_event_domain **p_ved,
-    int pause_flag,
-    int param,
-    xen_event_channel_notification_t notification_fn)
+    struct vm_event_domain **p_ved)
 {
     int rc;
+    int param = vm_event_ring_pfn_param(vec->type);
+    int pause_flag = vm_event_pause_flag(vec->type);
+    xen_event_channel_notification_t fn = vm_event_notification_fn(vec->type);
     unsigned long ring_gfn = d->arch.hvm.params[param];
     struct vm_event_ring_domain *impl;
 
@@ -111,8 +184,7 @@ static int vm_event_ring_enable(
                     (vm_event_sring_t *)impl->ring_page,
                     PAGE_SIZE);
 
-    rc = alloc_unbound_xen_event_channel(d, 0, current->domain->domain_id,
-                                         notification_fn);
+    rc = alloc_unbound_xen_event_channel(d, 0, current->domain->domain_id, fn);
     if ( rc < 0 )
         goto err;
 
@@ -242,6 +314,7 @@ static int vm_event_ring_disable(struct vm_event_domain 
**p_ved)
 
     xfree(impl);
     *p_ved = NULL;
+
     return 0;
 }
 
@@ -365,6 +438,51 @@ static int vm_event_ring_get_response(struct 
vm_event_ring_domain *impl,
     return rc;
 }
 
+static void vm_event_handle_response(struct domain *d, struct vcpu *v,
+                                     vm_event_response_t *rsp)
+{
+    /* Check flags which apply only when the vCPU is paused */
+    if ( atomic_read(&v->vm_event_pause_count) )
+    {
+#ifdef CONFIG_HAS_MEM_PAGING
+        if ( rsp->reason == VM_EVENT_REASON_MEM_PAGING )
+            p2m_mem_paging_resume(d, rsp);
+#endif
+
+        /*
+         * Check emulation flags in the arch-specific handler only, as it
+         * has to set arch-specific flags when supported, and to avoid
+         * bitmask overhead when it isn't supported.
+         */
+        vm_event_emulate_check(v, rsp);
+
+        /*
+         * Check in arch-specific handler to avoid bitmask overhead when
+         * not supported.
+         */
+        vm_event_register_write_resume(v, rsp);
+
+        /*
+         * Check in arch-specific handler to avoid bitmask overhead when
+         * not supported.
+         */
+        vm_event_toggle_singlestep(d, v, rsp);
+
+        /* Check for altp2m switch */
+        if ( rsp->flags & VM_EVENT_FLAG_ALTERNATE_P2M )
+            p2m_altp2m_check(v, rsp->altp2m_idx);
+
+        if ( rsp->flags & VM_EVENT_FLAG_SET_REGISTERS )
+            vm_event_set_registers(v, rsp);
+
+        if ( rsp->flags & VM_EVENT_FLAG_GET_NEXT_INTERRUPT )
+            vm_event_monitor_next_interrupt(v);
+
+        if ( rsp->flags & VM_EVENT_FLAG_VCPU_PAUSED )
+            vm_event_vcpu_unpause(v);
+    }
+}
+
 /*
  * Pull all responses from the given ring and unpause the corresponding vCPU
  * if required. Based on the response type, here we can also call custom
@@ -373,22 +491,20 @@ static int vm_event_ring_get_response(struct 
vm_event_ring_domain *impl,
  * Note: responses are handled the same way regardless of which ring they
  * arrive on.
  */
-static int vm_event_ring_resume(struct vm_event_ring_domain *impl)
+static int vm_event_ring_resume(struct vm_event_domain *ved, struct vcpu *v)
 {
     vm_event_response_t rsp;
-
-    if ( unlikely(!impl || !vm_event_check(&impl->ved)) )
-         return -ENODEV;
+    struct vm_event_ring_domain *impl = to_ring(ved);
 
     /*
-     * vm_event_resume() runs in either XEN_VM_EVENT_* domctls, or
+     * vm_event_ring_resume() runs in either XEN_VM_EVENT_* domctls, or
      * EVTCHN_send context from the introspection consumer. Both contexts
      * are guaranteed not to be the subject of vm_event responses.
      * While we could ASSERT(v != current) for each VCPU in d in the loop
      * below, this covers the case where we would need to iterate over all
      * of them more succintly.
      */
-    ASSERT(impl->ved.d != current->domain);
+    ASSERT(ved->d != current->domain);
 
     /* Pull all responses off the ring. */
     while ( vm_event_ring_get_response(impl, &rsp) )
@@ -402,7 +518,7 @@ static int vm_event_ring_resume(struct vm_event_ring_domain 
*impl)
         }
 
         /* Validate the vcpu_id in the response. */
-        v = domain_vcpu(impl->ved.d, rsp.vcpu_id);
+        v = domain_vcpu(ved->d, rsp.vcpu_id);
         if ( !v )
             continue;
 
@@ -410,47 +526,7 @@ static int vm_event_ring_resume(struct 
vm_event_ring_domain *impl)
          * In some cases the response type needs extra handling, so here
          * we call the appropriate handlers.
          */
-
-        /* Check flags which apply only when the vCPU is paused */
-        if ( atomic_read(&v->vm_event_pause_count) )
-        {
-#ifdef CONFIG_HAS_MEM_PAGING
-            if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
-                p2m_mem_paging_resume(impl->ved.d, &rsp);
-#endif
-
-            /*
-             * Check emulation flags in the arch-specific handler only, as it
-             * has to set arch-specific flags when supported, and to avoid
-             * bitmask overhead when it isn't supported.
-             */
-            vm_event_emulate_check(v, &rsp);
-
-            /*
-             * Check in arch-specific handler to avoid bitmask overhead when
-             * not supported.
-             */
-            vm_event_register_write_resume(v, &rsp);
-
-            /*
-             * Check in arch-specific handler to avoid bitmask overhead when
-             * not supported.
-             */
-            vm_event_toggle_singlestep(impl->ved.d, v, &rsp);
-
-            /* Check for altp2m switch */
-            if ( rsp.flags & VM_EVENT_FLAG_ALTERNATE_P2M )
-                p2m_altp2m_check(v, rsp.altp2m_idx);
-
-            if ( rsp.flags & VM_EVENT_FLAG_SET_REGISTERS )
-                vm_event_set_registers(v, &rsp);
-
-            if ( rsp.flags & VM_EVENT_FLAG_GET_NEXT_INTERRUPT )
-                vm_event_monitor_next_interrupt(v);
-
-            if ( rsp.flags & VM_EVENT_FLAG_VCPU_PAUSED )
-                vm_event_vcpu_unpause(v);
-        }
+        vm_event_handle_response(ved->d, v, &rsp);
     }
 
     return 0;
@@ -535,59 +611,361 @@ static int vm_event_ring_claim_slot(struct 
vm_event_domain *ved, bool allow_slee
         return vm_event_ring_grab_slot(to_ring(ved), current->domain != 
ved->d);
 }
 
-#ifdef CONFIG_HAS_MEM_PAGING
-/* Registered with Xen-bound event channel for incoming notifications. */
-static void mem_paging_notification(struct vcpu *v, unsigned int port)
+static void vm_event_ring_cleanup(struct vm_event_domain *ved)
 {
-    vm_event_ring_resume(to_ring(v->domain->vm_event_paging));
+    struct vm_event_ring_domain *impl = to_ring(ved);
+    /* Destroying the wait queue head means waking up all
+     * queued vcpus. This will drain the list, allowing
+     * the disable routine to complete. It will also drop
+     * all domain refs the wait-queued vcpus are holding.
+     * Finally, because this code path involves previously
+     * pausing the domain (domain_kill), unpausing the
+     * vcpus causes no harm. */
+    destroy_waitqueue_head(&impl->wq);
 }
-#endif
 
-/* Registered with Xen-bound event channel for incoming notifications. */
-static void monitor_notification(struct vcpu *v, unsigned int port)
+/*
+ * VM event NG (new generation)
+ */
+#define to_channels(_ved) container_of((_ved), \
+                                        struct vm_event_channels_domain, ved)
+
+struct vm_event_channels_domain
+{
+    /* VM event domain */
+    struct vm_event_domain ved;
+    /* shared channels buffer */
+    struct vm_event_slot *slots;
+    /* the buffer size (number of frames) */
+    unsigned int nr_frames;
+    /* buffer's mnf list */
+    mfn_t mfn[0];
+};
+
+static const struct vm_event_ops vm_event_channels_ops;
+
+static void vm_event_channels_free_buffer(struct vm_event_channels_domain 
*impl)
 {
-    vm_event_ring_resume(to_ring(v->domain->vm_event_monitor));
+    int i;
+
+    vunmap(impl->slots);
+    impl->slots = NULL;
+
+    for ( i = 0; i < impl->nr_frames; i++ )
+        free_domheap_page(mfn_to_page(impl->mfn[i]));
 }
 
-#ifdef CONFIG_HAS_MEM_SHARING
-/* Registered with Xen-bound event channel for incoming notifications. */
-static void mem_sharing_notification(struct vcpu *v, unsigned int port)
+static int vm_event_channels_alloc_buffer(struct vm_event_channels_domain 
*impl)
 {
-    vm_event_ring_resume(to_ring(v->domain->vm_event_share));
+    int i = 0;
+
+    impl->slots = vzalloc(impl->nr_frames * PAGE_SIZE);
+    if ( !impl->slots )
+        return -ENOMEM;
+
+    for ( i = 0; i < impl->nr_frames; i++ )
+        impl->mfn[i] = vmap_to_mfn(impl->slots + i * PAGE_SIZE);
+
+    for ( i = 0; i < impl->nr_frames; i++ )
+        share_xen_page_with_guest(mfn_to_page(impl->mfn[i]), current->domain,
+                                  SHARE_rw);
+
+    return 0;
 }
+
+static int vm_event_channels_enable(
+    struct domain *d,
+    struct xen_domctl_vm_event_op *vec,
+    struct vm_event_domain **p_ved)
+{
+    int rc, i = 0;
+    xen_event_channel_notification_t fn = vm_event_notification_fn(vec->type);
+    unsigned int nr_frames = PFN_UP(d->max_vcpus * sizeof(struct 
vm_event_slot));
+    struct vm_event_channels_domain *impl;
+
+    if ( *p_ved )
+        return -EBUSY;
+
+    impl = _xzalloc(sizeof(struct vm_event_channels_domain) +
+                           nr_frames * sizeof(mfn_t),
+                    __alignof__(struct vm_event_channels_domain));
+    if ( unlikely(!impl) )
+        return -ENOMEM;
+
+    spin_lock_init(&impl->ved.lock);
+
+    impl->nr_frames = nr_frames;
+    impl->ved.d = d;
+    impl->ved.ops = &vm_event_channels_ops;
+
+    rc = vm_event_init_domain(d);
+    if ( rc < 0 )
+        goto err;
+
+    rc = vm_event_channels_alloc_buffer(impl);
+    if ( rc )
+        goto err;
+
+    for ( i = 0; i < d->max_vcpus; i++ )
+    {
+        rc = alloc_unbound_xen_event_channel(d, i, current->domain->domain_id, 
fn);
+        if ( rc < 0 )
+            goto err;
+
+        impl->slots[i].port = rc;
+        impl->slots[i].state = STATE_VM_EVENT_SLOT_IDLE;
+    }
+
+    *p_ved = &impl->ved;
+
+    return 0;
+
+err:
+    while ( --i >= 0 )
+        evtchn_close(d, impl->slots[i].port, 0);
+    xfree(impl);
+
+    return rc;
+}
+
+static int vm_event_channels_disable(struct vm_event_domain **p_ved)
+{
+    struct vcpu *v;
+    struct domain *d = (*p_ved)->d;
+    struct vm_event_channels_domain *impl = to_channels(*p_ved);
+    int i;
+
+    spin_lock(&impl->ved.lock);
+
+    for_each_vcpu( impl->ved.d, v )
+    {
+        if ( atomic_read(&v->vm_event_pause_count) )
+            vm_event_vcpu_unpause(v);
+    }
+
+    for ( i = 0; i < impl->ved.d->max_vcpus; i++ )
+        evtchn_close(impl->ved.d, impl->slots[i].port, 0);
+
+    vm_event_channels_free_buffer(impl);
+
+    vm_event_cleanup_domain(d);
+
+    spin_unlock(&impl->ved.lock);
+
+    xfree(impl);
+    *p_ved = NULL;
+
+    return 0;
+}
+
+static bool vm_event_channels_check(struct vm_event_domain *ved)
+{
+    return to_channels(ved)->slots != NULL;
+}
+
+static void vm_event_channels_cleanup(struct vm_event_domain *ved)
+{
+}
+
+static int vm_event_channels_claim_slot(struct vm_event_domain *ved,
+                                        bool allow_sleep)
+{
+    return 0;
+}
+
+static void vm_event_channels_cancel_slot(struct vm_event_domain *ved)
+{
+}
+
+static void vm_event_channels_put_request(struct vm_event_domain *ved,
+                                          vm_event_request_t *req)
+{
+    struct vm_event_channels_domain *impl = to_channels(ved);
+    struct vm_event_slot *slot;
+
+    ASSERT( req->vcpu_id >= 0 && req->vcpu_id < ved->d->max_vcpus );
+
+    slot = &impl->slots[req->vcpu_id];
+
+    if ( current->domain != ved->d )
+    {
+        req->flags |= VM_EVENT_FLAG_FOREIGN;
+#ifndef NDEBUG
+        if ( !(req->flags & VM_EVENT_FLAG_VCPU_PAUSED) )
+            gdprintk(XENLOG_G_WARNING, "d%dv%d was not paused.\n",
+                     ved->d->domain_id, req->vcpu_id);
 #endif
+    }
+
+    req->version = VM_EVENT_INTERFACE_VERSION;
+
+    spin_lock(&impl->ved.lock);
+    if ( slot->state != STATE_VM_EVENT_SLOT_IDLE )
+    {
+        gdprintk(XENLOG_G_WARNING, "The VM event slot for d%dv%d is not 
IDLE.\n",
+                 impl->ved.d->domain_id, req->vcpu_id);
+        spin_unlock(&impl->ved.lock);
+        return;
+    }
+
+    slot->u.req = *req;
+    slot->state = STATE_VM_EVENT_SLOT_SUBMIT;
+    spin_unlock(&impl->ved.lock);
+    notify_via_xen_event_channel(impl->ved.d, slot->port);
+}
+
+static int vm_event_channels_get_response(struct vm_event_channels_domain 
*impl,
+                                          struct vcpu *v, vm_event_response_t 
*rsp)
+{
+    struct vm_event_slot *slot = &impl->slots[v->vcpu_id];
+    int rc = 0;
+
+    ASSERT( slot != NULL );
+    spin_lock(&impl->ved.lock);
+
+    if ( slot->state != STATE_VM_EVENT_SLOT_FINISH )
+    {
+        gdprintk(XENLOG_G_WARNING, "The VM event slot state for d%dv%d is 
invalid.\n",
+                 impl->ved.d->domain_id, v->vcpu_id);
+        rc = -1;
+        goto out;
+    }
+
+    *rsp = slot->u.rsp;
+    slot->state = STATE_VM_EVENT_SLOT_IDLE;
+
+out:
+    spin_unlock(&impl->ved.lock);
+
+    return rc;
+}
+
+static int vm_event_channels_resume(struct vm_event_domain *ved, struct vcpu 
*v)
+{
+    vm_event_response_t rsp;
+    struct vm_event_channels_domain *impl = to_channels(ved);
+
+    ASSERT(ved->d != current->domain);
+
+    if ( vm_event_channels_get_response(impl, v, &rsp) ||
+         rsp.version != VM_EVENT_INTERFACE_VERSION ||
+         rsp.vcpu_id != v->vcpu_id )
+        return -1;
+
+    vm_event_handle_response(ved->d, v, &rsp);
+
+    return 0;
+}
+
+int vm_event_ng_get_frames(struct domain *d, unsigned int id,
+                           unsigned long frame, unsigned int nr_frames,
+                           xen_pfn_t mfn_list[])
+{
+    struct vm_event_domain *ved;
+    int i;
+
+    switch (id )
+    {
+    case XEN_VM_EVENT_TYPE_MONITOR:
+        ved = d->vm_event_monitor;
+        break;
+
+    default:
+        return -ENOSYS;
+    }
+
+    if ( !vm_event_check(ved) )
+        return -EINVAL;
+
+    if ( frame != 0 || nr_frames != to_channels(ved)->nr_frames )
+        return -EINVAL;
+
+    spin_lock(&ved->lock);
+
+    for ( i = 0; i < to_channels(ved)->nr_frames; i++ )
+        mfn_list[i] = mfn_x(to_channels(ved)->mfn[i]);
+
+    spin_unlock(&ved->lock);
+
+    return 0;
+}
+
+/*
+ * vm_event implementation agnostic functions
+ */
 
 /* Clean up on domain destruction */
 void vm_event_cleanup(struct domain *d)
 {
 #ifdef CONFIG_HAS_MEM_PAGING
     if ( vm_event_check(d->vm_event_paging) )
-        d->vm_event_paging->ops->cleanup(&d->vm_event_paging);
+    {
+        d->vm_event_paging->ops->cleanup(d->vm_event_paging);
+        d->vm_event_paging->ops->disable(&d->vm_event_paging);
+    }
 #endif
 
     if ( vm_event_check(d->vm_event_monitor) )
-        d->vm_event_monitor->ops->cleanup(&d->vm_event_monitor);
+    {
+        d->vm_event_monitor->ops->cleanup(d->vm_event_monitor);
+        d->vm_event_monitor->ops->disable(&d->vm_event_monitor);
+    }
 
 #ifdef CONFIG_HAS_MEM_SHARING
     if ( vm_event_check(d->vm_event_share) )
-        d->vm_event_share->ops->cleanup(&d->vm_event_share);
+    {
+        d->vm_event_share->ops->cleanup(d->vm_event_share);
+        d->vm_event_share->ops->disable(&d->vm_event_share);
+    }
 #endif
 }
 
-static void vm_event_ring_cleanup(struct vm_event_domain **_ved)
+static int vm_event_enable(struct domain *d,
+                           struct xen_domctl_vm_event_op *vec,
+                           struct vm_event_domain **p_ved)
 {
-    struct vm_event_ring_domain *impl = to_ring(*_ved);
-    /* Destroying the wait queue head means waking up all
-     * queued vcpus. This will drain the list, allowing
-     * the disable routine to complete. It will also drop
-     * all domain refs the wait-queued vcpus are holding.
-     * Finally, because this code path involves previously
-     * pausing the domain (domain_kill), unpausing the
-     * vcpus causes no harm. */
-    destroy_waitqueue_head(&impl->wq);
-    (void)vm_event_ring_disable(_ved);
+    return ( vec->flags & XEN_VM_EVENT_FLAGS_NG_OP ) ?
+        vm_event_channels_enable(d, vec, p_ved) :
+        vm_event_ring_enable(d, vec, p_ved);
 }
 
+static int vm_event_resume(struct vm_event_domain *ved, struct vcpu *v)
+{
+    if ( !vm_event_check(ved) )
+         return -ENODEV;
+
+    if ( !v )
+        return -EINVAL;
+
+    return ved->ops->resume(ved, v);
+}
+
+#ifdef CONFIG_HAS_MEM_PAGING
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void mem_paging_notification(struct vcpu *v, unsigned int port)
+{
+    vm_event_resume(v->domain->vm_event_paging, v);
+}
+#endif
+
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void monitor_notification(struct vcpu *v, unsigned int port)
+{
+    vm_event_resume(v->domain->vm_event_monitor, v);
+}
+
+#ifdef CONFIG_HAS_MEM_SHARING
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void mem_sharing_notification(struct vcpu *v, unsigned int port)
+{
+    vm_event_resume(v->domain->vm_event_share, v);
+}
+#endif
+
+/*
+ * vm_event domctl interface
+ */
+
 int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec)
 {
     int rc;
@@ -632,6 +1010,13 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec)
     {
         rc = -EINVAL;
 
+        /*
+         * The NG interface is only supported by XEN_VM_EVENT_TYPE_MONITOR
+         * for now.
+         */
+        if ( vec->flags & XEN_VM_EVENT_FLAGS_NG_OP )
+            break;
+
         switch( vec->op )
         {
         case XEN_VM_EVENT_ENABLE:
@@ -657,9 +1042,7 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec)
                 break;
 
             /* domain_pause() not required here, see XSA-99 */
-            rc = vm_event_ring_enable(d, vec, &d->vm_event_paging, 
_VPF_mem_paging,
-                                 HVM_PARAM_PAGING_RING_PFN,
-                                 mem_paging_notification);
+            rc = vm_event_enable(d, vec, &d->vm_event_paging);
         }
         break;
 
@@ -667,12 +1050,13 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec)
             if ( !vm_event_check(d->vm_event_paging) )
                 break;
             domain_pause(d);
-            rc = vm_event_ring_disable(&d->vm_event_paging);
+            rc = d->vm_event_paging->ops->disable(&d->vm_event_paging);
             domain_unpause(d);
             break;
 
         case XEN_VM_EVENT_RESUME:
-            rc = vm_event_ring_resume(to_ring(d->vm_event_paging));
+            rc = vm_event_resume(d->vm_event_paging,
+                                 domain_vcpu(d, vec->u.resume.vcpu_id));
             break;
 
         default:
@@ -694,22 +1078,23 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec)
             rc = arch_monitor_init_domain(d);
             if ( rc )
                 break;
-            rc = vm_event_ring_enable(d, vec, &d->vm_event_monitor, 
_VPF_mem_access,
-                                 HVM_PARAM_MONITOR_RING_PFN,
-                                 monitor_notification);
+
+            rc = vm_event_enable(d, vec, &d->vm_event_monitor);
+
             break;
 
         case XEN_VM_EVENT_DISABLE:
             if ( !vm_event_check(d->vm_event_monitor) )
                 break;
             domain_pause(d);
-            rc = vm_event_ring_disable(&d->vm_event_monitor);
+            rc = d->vm_event_monitor->ops->disable(&d->vm_event_monitor);
             arch_monitor_cleanup_domain(d);
             domain_unpause(d);
             break;
 
         case XEN_VM_EVENT_RESUME:
-            rc = vm_event_ring_resume(to_ring(d->vm_event_monitor));
+            rc = vm_event_resume(d->vm_event_monitor,
+                                 domain_vcpu(d, vec->u.resume.vcpu_id));
             break;
 
         default:
@@ -724,6 +1109,13 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec)
     {
         rc = -EINVAL;
 
+        /*
+         * The NG interface is only supported by XEN_VM_EVENT_TYPE_MONITOR
+         * for now.
+         */
+        if ( vec->flags & XEN_VM_EVENT_FLAGS_NG_OP )
+            break;
+
         switch( vec->op )
         {
         case XEN_VM_EVENT_ENABLE:
@@ -738,21 +1130,20 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec)
                 break;
 
             /* domain_pause() not required here, see XSA-99 */
-            rc = vm_event_ring_enable(d, vec, &d->vm_event_share, 
_VPF_mem_sharing,
-                                 HVM_PARAM_SHARING_RING_PFN,
-                                 mem_sharing_notification);
+            rc = vm_event_enable(d, vec, &d->vm_event_share);
             break;
 
         case XEN_VM_EVENT_DISABLE:
             if ( !vm_event_check(d->vm_event_share) )
                 break;
             domain_pause(d);
-            rc = vm_event_ring_disable(&d->vm_event_share);
+            rc = d->vm_event_share->ops->disable(&d->vm_event_share);
             domain_unpause(d);
             break;
 
         case XEN_VM_EVENT_RESUME:
-            rc = vm_event_ring_resume(to_ring(d->vm_event_share));
+            rc = vm_event_resume(d->vm_event_share,
+                                 domain_vcpu(d, vec->u.resume.vcpu_id));
             break;
 
         default:
@@ -809,7 +1200,19 @@ static const struct vm_event_ops vm_event_ring_ops = {
     .cleanup = vm_event_ring_cleanup,
     .claim_slot = vm_event_ring_claim_slot,
     .cancel_slot = vm_event_ring_cancel_slot,
-    .put_request = vm_event_ring_put_request
+    .disable = vm_event_ring_disable,
+    .put_request = vm_event_ring_put_request,
+    .resume = vm_event_ring_resume,
+};
+
+static const struct vm_event_ops vm_event_channels_ops = {
+    .check = vm_event_channels_check,
+    .cleanup = vm_event_channels_cleanup,
+    .claim_slot = vm_event_channels_claim_slot,
+    .cancel_slot = vm_event_channels_cancel_slot,
+    .disable = vm_event_channels_disable,
+    .put_request = vm_event_channels_put_request,
+    .resume = vm_event_channels_resume,
 };
 
 /*
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 234d8c5..fc7420c 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -38,7 +38,7 @@
 #include "hvm/save.h"
 #include "memory.h"
 
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000011
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000012
 
 /*
  * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
@@ -781,12 +781,20 @@ struct xen_domctl_gdbsx_domstatus {
 struct xen_domctl_vm_event_op {
     uint32_t       op;           /* XEN_VM_EVENT_* */
     uint32_t       type;         /* XEN_VM_EVENT_TYPE_* */
+ /* Use the NG interface */
+#define _XEN_VM_EVENT_FLAGS_NG_OP         0
+#define XEN_VM_EVENT_FLAGS_NG_OP          (1U << _XEN_VM_EVENT_FLAGS_NG_OP)
+    uint32_t       flags;
 
     union {
         struct {
             uint32_t port;       /* OUT: event channel for ring */
         } enable;
 
+        struct {
+            uint32_t vcpu_id;    /* IN: vcpu_id*/
+        } resume;
+
         uint32_t version;
     } u;
 };
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 68ddadb..2e8912e 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -612,6 +612,7 @@ struct xen_mem_acquire_resource {
 
 #define XENMEM_resource_ioreq_server 0
 #define XENMEM_resource_grant_table 1
+#define XENMEM_resource_vm_event 2
 
     /*
      * IN - a type-specific resource identifier, which must be zero
@@ -619,6 +620,7 @@ struct xen_mem_acquire_resource {
      *
      * type == XENMEM_resource_ioreq_server -> id == ioreq server id
      * type == XENMEM_resource_grant_table -> id defined below
+     * type == XENMEM_resource_vm_event -> id == vm_event type
      */
     uint32_t id;
 
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index c48bc21..2f2160b 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -421,6 +421,22 @@ typedef struct vm_event_st {
 
 DEFINE_RING_TYPES(vm_event, vm_event_request_t, vm_event_response_t);
 
+/* VM Event slot state */
+#define STATE_VM_EVENT_SLOT_IDLE     0 /* the slot data is invalid */
+#define STATE_VM_EVENT_SLOT_SUBMIT   1 /* a request was submitted */
+#define STATE_VM_EVENT_SLOT_FINISH   2 /* a response was issued */
+
+struct vm_event_slot
+{
+    uint32_t port;      /* evtchn for notifications to/from helper */
+    uint32_t state:4;
+    uint32_t pad:28;
+    union {
+        vm_event_request_t req;
+        vm_event_response_t rsp;
+    } u;
+};
+
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 #endif /* _XEN_PUBLIC_VM_EVENT_H */
 
diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
index 21a3f50..0468269 100644
--- a/xen/include/xen/vm_event.h
+++ b/xen/include/xen/vm_event.h
@@ -30,14 +30,17 @@
 
 struct domain;
 struct vm_event_domain;
+struct xen_domctl_vm_event_op;
 
 struct vm_event_ops
 {
     bool (*check)(struct vm_event_domain *ved);
-    void (*cleanup)(struct vm_event_domain **_ved);
-    int (*claim_slot)(struct vm_event_domain *ved, bool allow_sleep);
+    void (*cleanup)(struct vm_event_domain *p_ved);
+    int  (*claim_slot)(struct vm_event_domain *ved, bool allow_sleep);
     void (*cancel_slot)(struct vm_event_domain *ved);
+    int  (*disable)(struct vm_event_domain **p_ved);
     void (*put_request)(struct vm_event_domain *ved, vm_event_request_t *req);
+    int  (*resume)(struct vm_event_domain *ved, struct vcpu *v);
 };
 
 struct vm_event_domain
@@ -111,6 +114,10 @@ static inline void vm_event_put_request(struct 
vm_event_domain *ved,
 
 int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec);
 
+int vm_event_ng_get_frames(struct domain *d, unsigned int id,
+                           unsigned long frame, unsigned int nr_frames,
+                           xen_pfn_t mfn_list[]);
+
 void vm_event_vcpu_pause(struct vcpu *v);
 void vm_event_vcpu_unpause(struct vcpu *v);
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.