[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH 4/6] vm_event: Use slotted channels for sync requests.



> -----Original Message-----
> From: Xen-devel [mailto:xen-devel-bounces@xxxxxxxxxxxxxxxxxxxx] On Behalf
> Of Petre Pircalabu
> Sent: 19 December 2018 18:52
> To: xen-devel@xxxxxxxxxxxxxxxxxxxx
> Cc: Petre Pircalabu <ppircalabu@xxxxxxxxxxxxxxx>; Stefano Stabellini
> <sstabellini@xxxxxxxxxx>; Wei Liu <wei.liu2@xxxxxxxxxx>; Razvan Cojocaru
> <rcojocaru@xxxxxxxxxxxxxxx>; Konrad Rzeszutek Wilk
> <konrad.wilk@xxxxxxxxxx>; George Dunlap <George.Dunlap@xxxxxxxxxx>; Andrew
> Cooper <Andrew.Cooper3@xxxxxxxxxx>; Ian Jackson <Ian.Jackson@xxxxxxxxxx>;
> Tim (Xen.org) <tim@xxxxxxx>; Julien Grall <julien.grall@xxxxxxx>; Tamas K
> Lengyel <tamas@xxxxxxxxxxxxx>; Jan Beulich <jbeulich@xxxxxxxx>; Roger Pau
> Monne <roger.pau@xxxxxxxxxx>
> Subject: [Xen-devel] [RFC PATCH 4/6] vm_event: Use slotted channels for
> sync requests.
> 
> In high throughput introspection scenarios where lots of monitor
> vm_events are generated, the ring buffer can fill up before the monitor
> application gets a chance to handle all the requests thus blocking
> other vcpus which will have to wait for a slot to become available.
> 
> This patch adds support for a different mechanism to handle synchronous
> vm_event requests / responses. As each synchronous request pauses the
> vcpu until the corresponsing response is handled, it can be stored in
> a slotted memory buffer (one per vcpu) shared between the hypervisor and
> the controlling domain. The asynchronous vm_event requests will be sent
> to the controlling domain using a ring buffer, but without blocking the
> vcpu as no response is required.
> 
> The memory for the asynchronous ring and the synchronous channels will
> be allocated from domheap and mapped to the controlling domain using the
> foreignmemory_map_resource interface. Unlike the current implementation,
> the allocated pages are not part of the target DomU, so they will not be
> reclaimed when the vm_event domain is disabled.

Why re-invent the wheel here? The ioreq infrastructure already does pretty much 
everything you need AFAICT.

  Paul

> 
> Signed-off-by: Petre Pircalabu <ppircalabu@xxxxxxxxxxxxxxx>
> ---
>  tools/libxc/include/xenctrl.h |  11 +
>  tools/libxc/xc_monitor.c      |  36 +++
>  tools/libxc/xc_private.h      |  14 ++
>  tools/libxc/xc_vm_event.c     |  74 +++++-
>  xen/arch/x86/mm.c             |   7 +
>  xen/common/vm_event.c         | 515
> ++++++++++++++++++++++++++++++++++++++----
>  xen/include/public/domctl.h   |  25 +-
>  xen/include/public/memory.h   |   2 +
>  xen/include/public/vm_event.h |  15 ++
>  xen/include/xen/vm_event.h    |   4 +
>  10 files changed, 660 insertions(+), 43 deletions(-)
> 
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index de0b990..fad8bc4 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -2012,6 +2012,17 @@ int xc_get_mem_access(xc_interface *xch, uint32_t
> domain_id,
>   * Caller has to unmap this page when done.
>   */
>  void *xc_monitor_enable(xc_interface *xch, uint32_t domain_id, uint32_t
> *port);
> +
> +struct xenforeignmemory_resource_handle *xc_monitor_enable_ex(
> +    xc_interface *xch,
> +    uint32_t domain_id,
> +    void **_ring_buffer,
> +    uint32_t ring_frames,
> +    uint32_t *ring_port,
> +    void **_sync_buffer,
> +    uint32_t *sync_ports,
> +    uint32_t nr_sync_channels);
> +
>  int xc_monitor_disable(xc_interface *xch, uint32_t domain_id);
>  int xc_monitor_resume(xc_interface *xch, uint32_t domain_id);
>  /*
> diff --git a/tools/libxc/xc_monitor.c b/tools/libxc/xc_monitor.c
> index 718fe8b..4ceb528 100644
> --- a/tools/libxc/xc_monitor.c
> +++ b/tools/libxc/xc_monitor.c
> @@ -49,6 +49,42 @@ void *xc_monitor_enable(xc_interface *xch, uint32_t
> domain_id, uint32_t *port)
>      return buffer;
>  }
> 
> +struct xenforeignmemory_resource_handle *xc_monitor_enable_ex(
> +    xc_interface *xch,
> +    uint32_t domain_id,
> +    void **_ring_buffer,
> +    uint32_t ring_frames,
> +    uint32_t *ring_port,
> +    void **_sync_buffer,
> +    uint32_t *sync_ports,
> +    uint32_t nr_sync_channels)
> +{
> +    xenforeignmemory_resource_handle *fres;
> +    int saved_errno;
> +
> +    /* Pause the domain for ring page setup */
> +    if ( xc_domain_pause(xch, domain_id) )
> +    {
> +        PERROR("Unable to pause domain\n");
> +        return NULL;
> +    }
> +
> +    fres = xc_vm_event_enable_ex(xch, domain_id,
> XEN_VM_EVENT_TYPE_MONITOR,
> +                                _ring_buffer, ring_frames, ring_port,
> +                                _sync_buffer, sync_ports,
> nr_sync_channels);
> +
> +    saved_errno = errno;
> +    if ( xc_domain_unpause(xch, domain_id) )
> +    {
> +        if ( fres )
> +            saved_errno = errno;
> +        PERROR("Unable to unpause domain");
> +    }
> +
> +    errno = saved_errno;
> +    return fres;
> +}
> +
>  int xc_monitor_disable(xc_interface *xch, uint32_t domain_id)
>  {
>      return xc_vm_event_control(xch, domain_id,
> diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
> index 482451c..1f70223 100644
> --- a/tools/libxc/xc_private.h
> +++ b/tools/libxc/xc_private.h
> @@ -420,6 +420,20 @@ int xc_vm_event_control(xc_interface *xch, uint32_t
> domain_id, unsigned int op,
>  void *xc_vm_event_enable(xc_interface *xch, uint32_t domain_id, int type,
>                           uint32_t *port);
> 
> +/*
> + * Enables vm_event for using the xenforeignmemory_map_resource
> interface.
> + * The vm_event type can be XEN_VM_EVENT_TYPE_(PAGING/MONITOR/SHARING).
> + *
> + * The function returns:
> + *  - A ring for asynchronous vm_events.
> + *  - A slotted buffer for synchronous vm_events (one slot per vcpu)
> + *  - xenforeignmemory_resource_handle used exclusively for resource
> cleanup
> + */
> +xenforeignmemory_resource_handle *xc_vm_event_enable_ex(xc_interface
> *xch,
> +    uint32_t domain_id, int type,
> +    void **_ring_buffer, uint32_t ring_frames, uint32_t *ring_port,
> +    void **_sync_buffer, uint32_t *sync_ports, uint32_t
> nr_sync_channels);
> +
>  int do_dm_op(xc_interface *xch, uint32_t domid, unsigned int nr_bufs,
> ...);
> 
>  #endif /* __XC_PRIVATE_H__ */
> diff --git a/tools/libxc/xc_vm_event.c b/tools/libxc/xc_vm_event.c
> index 4fc2548..0a976b4 100644
> --- a/tools/libxc/xc_vm_event.c
> +++ b/tools/libxc/xc_vm_event.c
> @@ -22,6 +22,12 @@
> 
>  #include "xc_private.h"
> 
> +#include <xen/vm_event.h>
> +
> +#ifndef PFN_UP
> +#define PFN_UP(x)     (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
> +#endif /* PFN_UP */
> +
>  int xc_vm_event_control(xc_interface *xch, uint32_t domain_id, unsigned
> int op,
>                          unsigned int type)
>  {
> @@ -120,7 +126,7 @@ void *xc_vm_event_enable(xc_interface *xch, uint32_t
> domain_id, int type,
>          goto out;
>      }
> 
> -    *port = domctl.u.vm_event_op.port;
> +    *port = domctl.u.vm_event_op.u.enable.port;
> 
>      /* Remove the ring_pfn from the guest's physmap */
>      rc = xc_domain_decrease_reservation_exact(xch, domain_id, 1, 0,
> &ring_pfn);
> @@ -138,6 +144,72 @@ void *xc_vm_event_enable(xc_interface *xch, uint32_t
> domain_id, int type,
>      return ring_page;
>  }
> 
> +xenforeignmemory_resource_handle *xc_vm_event_enable_ex(xc_interface
> *xch,
> +    uint32_t domain_id, int type,
> +    void **_ring_buffer, uint32_t ring_frames, uint32_t *ring_port,
> +    void **_sync_buffer, uint32_t *sync_ports, uint32_t nr_sync_channels)
> +{
> +    DECLARE_DOMCTL;
> +    DECLARE_HYPERCALL_BOUNCE(sync_ports, nr_sync_channels *
> sizeof(uint32_t),
> +                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> +    xenforeignmemory_resource_handle *fres;
> +    unsigned long nr_frames;
> +    void *buffer;
> +
> +    if ( !_ring_buffer || !ring_port || !_sync_buffer || !sync_ports )
> +    {
> +        errno = EINVAL;
> +        return NULL;
> +    }
> +
> +    nr_frames = ring_frames + PFN_UP(nr_sync_channels * sizeof(struct
> vm_event_slot));
> +
> +    fres = xenforeignmemory_map_resource(xch->fmem, domain_id,
> +                                         XENMEM_resource_vm_event, type,
> 0,
> +                                         nr_frames, &buffer,
> +                                         PROT_READ | PROT_WRITE, 0);
> +    if ( !fres )
> +    {
> +        PERROR("Could not map the vm_event pages\n");
> +        return NULL;
> +    }
> +
> +    domctl.cmd = XEN_DOMCTL_vm_event_op;
> +    domctl.domain = domain_id;
> +    domctl.u.vm_event_op.op = XEN_VM_EVENT_GET_PORTS;
> +    domctl.u.vm_event_op.type = type;
> +
> +    if ( xc_hypercall_bounce_pre(xch, sync_ports) )
> +    {
> +        PERROR("Could not bounce memory for XEN_DOMCTL_vm_event_op");
> +        errno = ENOMEM;
> +        return NULL;
> +    }
> +
> +    set_xen_guest_handle(domctl.u.vm_event_op.u.get_ports.sync,
> sync_ports);
> +
> +    if ( do_domctl(xch, &domctl) )
> +    {
> +        PERROR("Failed to get vm_event ports\n");
> +        goto out;
> +    }
> +
> +    xc_hypercall_bounce_post(xch, sync_ports);
> +    *ring_port = domctl.u.vm_event_op.u.get_ports.async;
> +
> +    *_sync_buffer = buffer + ring_frames * PAGE_SIZE;
> +    *_ring_buffer = buffer;
> +
> +    return fres;
> +
> +out:
> +    xc_hypercall_bounce_post(xch, sync_ports);
> +    if ( fres )
> +        xenforeignmemory_unmap_resource(xch->fmem, fres);
> +    return NULL;
> +}
> +
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index 1431f34..256c63b 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -103,6 +103,7 @@
>  #include <xen/efi.h>
>  #include <xen/grant_table.h>
>  #include <xen/hypercall.h>
> +#include <xen/vm_event.h>
>  #include <asm/paging.h>
>  #include <asm/shadow.h>
>  #include <asm/page.h>
> @@ -4469,6 +4470,12 @@ int arch_acquire_resource(struct domain *d,
> unsigned int type,
>      }
>  #endif
> 
> +    case XENMEM_resource_vm_event:
> +    {
> +        rc = vm_event_get_frames(d, id, frame, nr_frames, mfn_list);
> +        break;
> +    }
> +
>      default:
>          rc = -EOPNOTSUPP;
>          break;
> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> index 77da41b..a2712a0 100644
> --- a/xen/common/vm_event.c
> +++ b/xen/common/vm_event.c
> @@ -28,6 +28,8 @@
>  #include <asm/p2m.h>
>  #include <asm/monitor.h>
>  #include <asm/vm_event.h>
> +#include <xen/guest_access.h>
> +#include <xen/vmap.h>
>  #include <xsm/xsm.h>
> 
>  /* for public/io/ring.h macros */
> @@ -40,6 +42,7 @@
>  #define vm_event_unlock(_ved)     spin_unlock(&(_ved)->lock)
> 
>  #define to_vm_event_domain_ring(_ved) container_of(_ved, struct
> vm_event_domain_ring, ved)
> +#define to_vm_event_domain_channel(_ved) container_of(_ved, struct
> vm_event_domain_channel, ved)
> 
>  struct vm_event_domain
>  {
> @@ -48,7 +51,8 @@ struct vm_event_domain
>      int (*claim_slot)(struct vm_event_domain *ved, bool allow_sleep);
>      void (*release_slot)(struct vm_event_domain *ved);
>      void (*put_request)(struct vm_event_domain *ved, vm_event_request_t
> *req);
> -    int (*get_response)(struct vm_event_domain *ved, vm_event_response_t
> *rsp);
> +    int (*get_response)(struct vm_event_domain *ved, struct vcpu *v,
> +                        unsigned int port, vm_event_response_t *rsp);
>      int (*disable)(struct vm_event_domain **_ved);
> 
>      /* The domain associated with the VM event */
> @@ -58,11 +62,6 @@ struct vm_event_domain
>      spinlock_t lock;
>  };
> 
> -bool vm_event_check(struct vm_event_domain *ved)
> -{
> -    return (ved && ved->check(ved));
> -}
> -
>  /* VM event domain ring implementation */
>  struct vm_event_domain_ring
>  {
> @@ -78,22 +77,57 @@ struct vm_event_domain_ring
>      vm_event_front_ring_t front_ring;
>      /* event channel port (vcpu0 only) */
>      int xen_port;
> -    /* vm_event bit for vcpu->pause_flags */
> -    int pause_flag;
>      /* list of vcpus waiting for room in the ring */
>      struct waitqueue_head wq;
>      /* the number of vCPUs blocked */
>      unsigned int blocked;
> +    /* vm_event bit for vcpu->pause_flags */
> +    int pause_flag;
>      /* The last vcpu woken up */
>      unsigned int last_vcpu_wake_up;
>  };
> 
> +struct vm_event_buffer
> +{
> +    void *va;
> +    unsigned int nr_frames;
> +    mfn_t mfn[0];
> +};
> +
> +struct vm_event_domain_channel
> +{
> +    /* VM event domain */
> +    struct vm_event_domain ved;
> +    /* ring for asynchronous vm events */
> +    struct vm_event_buffer *ring;
> +    /* front-end ring */
> +    vm_event_front_ring_t front_ring;
> +    /* per vcpu channels for synchronous vm events */
> +    struct vm_event_buffer *channels;
> +    /*
> +     * event channels ports
> +     * - one per vcpu for the synchronous channels.
> +     * - one for the asynchronous ring.
> +     */
> +    uint32_t xen_ports[0];
> +};
> +
> +bool vm_event_check(struct vm_event_domain *ved)
> +{
> +    return (ved && ved->check(ved));
> +}
> +
>  static bool vm_event_ring_check(struct vm_event_domain *ved)
>  {
>      struct vm_event_domain_ring *impl = to_vm_event_domain_ring(ved);
>      return impl->ring_page != NULL;
>  }
> 
> +static bool is_vm_event_domain_ring(struct vm_event_domain *ved)
> +{
> +    return ved->check == vm_event_ring_check;
> +}
> +
>  static unsigned int vm_event_ring_available(struct vm_event_domain_ring
> *ved)
>  {
>      int avail_req = RING_FREE_REQUESTS(&ved->front_ring);
> @@ -317,12 +351,15 @@ static void vm_event_ring_put_request(struct
> vm_event_domain *ved,
>      notify_via_xen_event_channel(d, impl->xen_port);
>  }
> 
> -static int vm_event_ring_get_response(struct vm_event_domain *ved,
> -                                      vm_event_response_t *rsp)
> +static int vm_event_ring_get_response(
> +    struct vm_event_domain *ved,
> +    struct vcpu *v,
> +    unsigned int port,
> +    vm_event_response_t *rsp)
>  {
>      vm_event_front_ring_t *front_ring;
>      RING_IDX rsp_cons;
> -    struct vm_event_domain_ring *impl = (struct vm_event_domain_ring
> *)ved;
> +    struct vm_event_domain_ring *impl = to_vm_event_domain_ring(ved);
> 
>      vm_event_lock(ved);
> 
> @@ -332,7 +369,7 @@ static int vm_event_ring_get_response(struct
> vm_event_domain *ved,
>      if ( !RING_HAS_UNCONSUMED_RESPONSES(front_ring) )
>      {
>          vm_event_unlock(ved);
> -        return 0;
> +        return -1;
>      }
> 
>      /* Copy response */
> @@ -353,6 +390,35 @@ static int vm_event_ring_get_response(struct
> vm_event_domain *ved,
>  }
> 
>  /*
> + * The response is received only from the sync channels
> + */
> +static int vm_event_channel_get_response(
> +    struct vm_event_domain *ved,
> +    struct vcpu *v,
> +    unsigned int port,
> +    vm_event_response_t *rsp)
> +{
> +    struct vm_event_domain_channel *impl =
> to_vm_event_domain_channel(ved);
> +    struct vm_event_slot *slot = impl->channels->va + v->vcpu_id *
> sizeof(struct vm_event_slot);
> +
> +    vm_event_lock(ved);
> +
> +    if ( slot->state != VM_EVENT_SLOT_STATE_FINISH )
> +    {
> +        gdprintk(XENLOG_G_WARNING, "The VM event slot state for d%dv%d is
> invalid.\n",
> +                 ved->d->domain_id, v->vcpu_id);
> +        vm_event_unlock(ved);
> +        return -1;
> +    }
> +
> +    memcpy(rsp, &slot->u.rsp, sizeof(*rsp));
> +    slot->state = VM_EVENT_SLOT_STATE_IDLE;
> +
> +    vm_event_unlock(ved);
> +    return 0;
> +}
> +
> +/*
>   * Pull all responses from the given ring and unpause the corresponding
> vCPU
>   * if required. Based on the response type, here we can also call custom
>   * handlers.
> @@ -360,10 +426,11 @@ static int vm_event_ring_get_response(struct
> vm_event_domain *ved,
>   * Note: responses are handled the same way regardless of which ring they
>   * arrive on.
>   */
> -static int vm_event_resume(struct vm_event_domain *ved)
> +static int vm_event_resume(struct vm_event_domain *ved, struct vcpu *v,
> unsigned int port)
>  {
>      vm_event_response_t rsp;
>      struct domain *d;
> +    int rc;
> 
>      if (! vm_event_check(ved))
>          return -ENODEV;
> @@ -380,22 +447,25 @@ static int vm_event_resume(struct vm_event_domain
> *ved)
>       */
>      ASSERT(d != current->domain);
> 
> -    /* Pull all responses off the ring. */
> -    while ( ved->get_response(ved, &rsp) )
> +    /* Loop until all available responses are read. */
> +    do
>      {
> -        struct vcpu *v;
> +        struct vcpu *rsp_v;
> +        rc = ved->get_response(ved, v, port, &rsp);
> +        if ( rc < 0 )
> +            break;
> 
>          if ( rsp.version != VM_EVENT_INTERFACE_VERSION )
>          {
>              printk(XENLOG_G_WARNING "vm_event interface version
> mismatch\n");
> -            continue;
> +            goto end_loop;
>          }
> 
>          /* Validate the vcpu_id in the response. */
>          if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] )
> -            continue;
> +            goto end_loop;
> 
> -        v = d->vcpu[rsp.vcpu_id];
> +        rsp_v = d->vcpu[rsp.vcpu_id];
> 
>          /*
>           * In some cases the response type needs extra handling, so here
> @@ -403,7 +473,7 @@ static int vm_event_resume(struct vm_event_domain
> *ved)
>           */
> 
>          /* Check flags which apply only when the vCPU is paused */
> -        if ( atomic_read(&v->vm_event_pause_count) )
> +        if ( atomic_read(&rsp_v->vm_event_pause_count) )
>          {
>  #ifdef CONFIG_HAS_MEM_PAGING
>              if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
> @@ -415,34 +485,36 @@ static int vm_event_resume(struct vm_event_domain
> *ved)
>               * has to set arch-specific flags when supported, and to
> avoid
>               * bitmask overhead when it isn't supported.
>               */
> -            vm_event_emulate_check(v, &rsp);
> +            vm_event_emulate_check(rsp_v, &rsp);
> 
>              /*
>               * Check in arch-specific handler to avoid bitmask overhead
> when
>               * not supported.
>               */
> -            vm_event_register_write_resume(v, &rsp);
> +            vm_event_register_write_resume(rsp_v, &rsp);
> 
>              /*
>               * Check in arch-specific handler to avoid bitmask overhead
> when
>               * not supported.
>               */
> -            vm_event_toggle_singlestep(d, v, &rsp);
> +            vm_event_toggle_singlestep(d, rsp_v, &rsp);
> 
>              /* Check for altp2m switch */
>              if ( rsp.flags & VM_EVENT_FLAG_ALTERNATE_P2M )
> -                p2m_altp2m_check(v, rsp.altp2m_idx);
> +                p2m_altp2m_check(rsp_v, rsp.altp2m_idx);
> 
>              if ( rsp.flags & VM_EVENT_FLAG_SET_REGISTERS )
> -                vm_event_set_registers(v, &rsp);
> +                vm_event_set_registers(rsp_v, &rsp);
> 
>              if ( rsp.flags & VM_EVENT_FLAG_GET_NEXT_INTERRUPT )
> -                vm_event_monitor_next_interrupt(v);
> +                vm_event_monitor_next_interrupt(rsp_v);
> 
>              if ( rsp.flags & VM_EVENT_FLAG_VCPU_PAUSED )
> -                vm_event_vcpu_unpause(v);
> +                vm_event_vcpu_unpause(rsp_v);
>          }
> +end_loop: ;
>      }
> +    while ( rc > 0 );
> 
>      return 0;
>  }
> @@ -527,28 +599,28 @@ int __vm_event_claim_slot(struct vm_event_domain
> *ved, bool allow_sleep)
>      if ( !vm_event_check(ved) )
>          return -EOPNOTSUPP;
> 
> -    return ved->claim_slot(ved, allow_sleep);
> +    return (ved->claim_slot) ? ved->claim_slot(ved, allow_sleep) : 0;
>  }
> 
>  #ifdef CONFIG_HAS_MEM_PAGING
>  /* Registered with Xen-bound event channel for incoming notifications. */
>  static void mem_paging_notification(struct vcpu *v, unsigned int port)
>  {
> -    vm_event_resume(v->domain->vm_event_paging);
> +    vm_event_resume(v->domain->vm_event_paging, v, port);
>  }
>  #endif
> 
>  /* Registered with Xen-bound event channel for incoming notifications. */
>  static void monitor_notification(struct vcpu *v, unsigned int port)
>  {
> -    vm_event_resume(v->domain->vm_event_monitor);
> +    vm_event_resume(v->domain->vm_event_monitor, v, port);
>  }
> 
>  #ifdef CONFIG_HAS_MEM_SHARING
>  /* Registered with Xen-bound event channel for incoming notifications. */
>  static void mem_sharing_notification(struct vcpu *v, unsigned int port)
>  {
> -    vm_event_resume(v->domain->vm_event_share);
> +    vm_event_resume(v->domain->vm_event_share, v, port);
>  }
>  #endif
> 
> @@ -565,19 +637,24 @@ void vm_event_cleanup(struct domain *d)
>           * Finally, because this code path involves previously
>           * pausing the domain (domain_kill), unpausing the
>           * vcpus causes no harm. */
> -        destroy_waitqueue_head(&to_vm_event_domain_ring(d-
> >vm_event_paging)->wq);
> +        if ( is_vm_event_domain_ring(d->vm_event_paging) )
> +            destroy_waitqueue_head(&to_vm_event_domain_ring(d-
> >vm_event_paging)->wq);
>          (void)vm_event_disable(&d->vm_event_paging);
>      }
>  #endif
> +
>      if ( vm_event_check(d->vm_event_monitor) )
>      {
> -        destroy_waitqueue_head(&to_vm_event_domain_ring(d-
> >vm_event_monitor)->wq);
> +        if ( is_vm_event_domain_ring(d->vm_event_monitor) )
> +            destroy_waitqueue_head(&to_vm_event_domain_ring(d-
> >vm_event_monitor)->wq);
>          (void)vm_event_disable(&d->vm_event_monitor);
>      }
> +
>  #ifdef CONFIG_HAS_MEM_SHARING
>      if ( vm_event_check(d->vm_event_share) )
>      {
> -        destroy_waitqueue_head(&to_vm_event_domain_ring(d-
> >vm_event_share)->wq);
> +        if ( is_vm_event_domain_ring(d->vm_event_share) )
> +            destroy_waitqueue_head(&to_vm_event_domain_ring(d-
> >vm_event_share)->wq);
>          (void)vm_event_disable(&d->vm_event_share);
>      }
>  #endif
> @@ -641,7 +718,7 @@ static int vm_event_ring_enable(
>      if ( rc < 0 )
>          goto err;
> 
> -    impl->xen_port = vec->port = rc;
> +    impl->xen_port = vec->u.enable.port = rc;
> 
>      /* Prepare ring buffer */
>      FRONT_RING_INIT(&impl->front_ring,
> @@ -668,6 +745,294 @@ static int vm_event_ring_enable(
>      return rc;
>  }
> 
> +/*
> + * Helper functions for allocating / freeing vm_event buffers
> + */
> +static int vm_event_alloc_buffer(struct domain *d, unsigned int
> nr_frames,
> +                                 struct vm_event_buffer **_veb)
> +{
> +    struct vm_event_buffer *veb;
> +    int i = 0, rc;
> +
> +    veb = _xzalloc(sizeof(struct vm_event_buffer) + nr_frames *
> sizeof(mfn_t),
> +                   __alignof__(struct vm_event_buffer));
> +    if ( unlikely(!veb) )
> +    {
> +        rc = -ENOMEM;
> +        goto err;
> +    }
> +
> +    veb->nr_frames = nr_frames;
> +
> +    for ( i = 0; i < nr_frames; i++ )
> +    {
> +        struct page_info *page = alloc_domheap_page(d, 0);
> +
> +        if ( !page )
> +        {
> +            rc = -ENOMEM;
> +            goto err;
> +        }
> +
> +        if ( !get_page_and_type(page, d, PGT_writable_page) )
> +        {
> +            domain_crash(d);
> +            rc = -ENODATA;
> +            goto err;
> +        }
> +
> +        veb->mfn[i] = page_to_mfn(page);
> +    }
> +
> +    veb->va = vmap(veb->mfn, nr_frames);
> +    if ( !veb->va )
> +    {
> +        rc = -ENOMEM;
> +        goto err;
> +    }
> +
> +    for( i = 0; i < nr_frames; i++ )
> +        clear_page(veb->va + i * PAGE_SIZE);
> +
> +    *_veb = veb;
> +    return 0;
> +
> +err:
> +    while ( --i >= 0 )
> +    {
> +        struct page_info *page = mfn_to_page(veb->mfn[i]);
> +
> +        if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
> +            put_page(page);
> +        put_page_and_type(page);
> +    }
> +
> +    xfree(veb);
> +    return rc;
> +}
> +
> +static void vm_event_free_buffer(struct vm_event_buffer **_veb)
> +{
> +    struct vm_event_buffer *veb = *_veb;
> +
> +    if ( !veb )
> +        return;
> +
> +    if ( veb->va )
> +    {
> +        int i;
> +
> +        vunmap(veb->va);
> +        for ( i = 0; i < veb->nr_frames; i++ )
> +        {
> +            struct page_info *page = mfn_to_page(veb->mfn[i]);
> +
> +            if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
> +                put_page(page);
> +            put_page_and_type(page);
> +        }
> +    }
> +    XFREE(*_veb);
> +}
> +
> +static bool vm_event_channel_check(struct vm_event_domain *ved)
> +{
> +    struct vm_event_domain_channel *impl =
> to_vm_event_domain_channel(ved);
> +    return impl->ring->va != NULL && impl->channels->va != NULL;
> +}
> +
> +static void vm_event_channel_put_request(struct vm_event_domain *ved,
> +                                         vm_event_request_t *req)
> +{
> +    struct vcpu *curr = current;
> +    struct vm_event_domain_channel *impl =
> to_vm_event_domain_channel(ved);
> +    struct domain *d;
> +    struct vm_event_slot *slot;
> +    bool sync;
> +
> +    if ( !vm_event_check(ved) )
> +        return;
> +
> +    d = ved->d;
> +    slot = impl->channels->va + req->vcpu_id * sizeof(struct
> vm_event_slot);
> +
> +    if ( curr->domain != d )
> +    {
> +        req->flags |= VM_EVENT_FLAG_FOREIGN;
> +#ifndef NDEBUG
> +        if ( !(req->flags & VM_EVENT_FLAG_VCPU_PAUSED) )
> +            gdprintk(XENLOG_G_WARNING, "d%dv%d was not paused.\n",
> +                     d->domain_id, req->vcpu_id);
> +#endif
> +    }
> +
> +    req->version = VM_EVENT_INTERFACE_VERSION;
> +
> +    sync = req->flags & VM_EVENT_FLAG_VCPU_PAUSED;
> +
> +    vm_event_lock(ved);
> +
> +    if ( sync )
> +    {
> +        if ( slot->state != VM_EVENT_SLOT_STATE_IDLE )
> +        {
> +            gdprintk(XENLOG_G_WARNING, "The VM event slot for d%dv%d is
> not IDLE.\n",
> +                     d->domain_id, req->vcpu_id);
> +            vm_event_unlock(ved);
> +            return;
> +        }
> +        memcpy( &slot->u.req, req, sizeof(*req) );
> +        slot->state = VM_EVENT_SLOT_STATE_SUBMIT;
> +    }
> +    else
> +    {
> +        vm_event_front_ring_t *front_ring;
> +        RING_IDX req_prod;
> +
> +        /* Due to the reservations, this step must succeed. */
> +        front_ring = &impl->front_ring;
> +
> +        /* Copy request */
> +        req_prod = front_ring->req_prod_pvt;
> +        memcpy(RING_GET_REQUEST(front_ring, req_prod), req,
> sizeof(*req));
> +        req_prod++;
> +
> +        /* Update ring */
> +        front_ring->req_prod_pvt = req_prod;
> +        RING_PUSH_REQUESTS(front_ring);
> +    }
> +
> +    vm_event_unlock(ved);
> +
> +    notify_via_xen_event_channel(d, impl->xen_ports[(sync) ? req->vcpu_id
> : d->max_vcpus]);
> +}
> +
> +static int vm_event_channel_disable(struct vm_event_domain **_ved)
> +{
> +    struct vm_event_domain_channel *ved =
> to_vm_event_domain_channel(*_ved);
> +    struct domain *d = ved->ved.d;
> +    struct vcpu *v;
> +    int i;
> +
> +    vm_event_lock(&ved->ved);
> +
> +    for_each_vcpu ( d, v )
> +    {
> +        if ( atomic_read(&v->vm_event_pause_count) )
> +            vm_event_vcpu_unpause(v);
> +        /*
> +        if ( test_and_clear_bit(ved->ved.pause_flag, &v->pause_flags) )
> +        {
> +            vcpu_unpause(v);
> +        }
> +        */
> +    }
> +
> +    /* Free domU's event channels and leave the other one unbound */
> +    for ( i = 0; i < d->max_vcpus; i++ )
> +        evtchn_close(d, ved->xen_ports[i], 0);
> +    evtchn_close(d, ved->xen_ports[d->max_vcpus], 0);
> +
> +    vm_event_free_buffer(&ved->ring);
> +    vm_event_free_buffer(&ved->channels);
> +
> +    vm_event_cleanup_domain(d);
> +
> +    vm_event_unlock(&ved->ved);
> +
> +    XFREE(*_ved);
> +
> +    return 0;
> +}
> +
> +static int vm_event_channel_enable(
> +    struct domain *d,
> +    struct vm_event_domain **_ved,
> +    unsigned int nr_frames,
> +    xen_event_channel_notification_t notification_fn)
> +{
> +    int i = 0, rc;
> +    struct vm_event_domain_channel *impl;
> +    unsigned int nr_ring_frames, nr_channel_frames;
> +
> +    if ( *_ved )
> +        return -EBUSY;
> +
> +    if ( nr_frames <= PFN_UP(d->max_vcpus * sizeof(struct vm_event_slot))
> )
> +        return -EINVAL;
> +
> +    impl = _xzalloc(sizeof(struct vm_event_domain_channel) +
> +                        ( d->max_vcpus + 1 ) * sizeof(uint32_t),
> +                    __alignof__(struct vm_event_domain_channel));
> +    if ( !impl )
> +        return -ENOMEM;
> +
> +    impl->ved.d = d;
> +    impl->ved.check = vm_event_channel_check;
> +    impl->ved.claim_slot = NULL;
> +    impl->ved.release_slot = NULL;
> +    impl->ved.put_request = vm_event_channel_put_request;
> +    impl->ved.get_response = vm_event_channel_get_response;
> +    impl->ved.disable = vm_event_channel_disable;
> +
> +    nr_channel_frames = PFN_UP(d->max_vcpus *
> sizeof(vm_event_request_t));
> +    nr_ring_frames = nr_frames - nr_channel_frames;
> +
> +    vm_event_lock_init(&impl->ved);
> +    vm_event_lock(&impl->ved);
> +
> +    rc = vm_event_init_domain(d);
> +    if ( rc < 0 )
> +        goto err;
> +
> +    rc = vm_event_alloc_buffer(d, nr_ring_frames, &impl->ring);
> +    if ( rc )
> +        goto err;
> +
> +    /* Allocate event channel for the async ring*/
> +    rc = alloc_unbound_xen_event_channel(d, 0, current->domain-
> >domain_id,
> +                                         notification_fn);
> +    if ( rc < 0 )
> +        goto err;
> +
> +    impl->xen_ports[d->max_vcpus] = rc;
> +
> +    /* Prepare ring buffer */
> +    FRONT_RING_INIT(&impl->front_ring,
> +                    (vm_event_sring_t *)impl->ring->va,
> +                    impl->ring->nr_frames * PAGE_SIZE);
> +
> +    rc = vm_event_alloc_buffer(d, nr_channel_frames, &impl->channels);
> +    if ( rc != 0)
> +        goto err;
> +
> +    for ( i = 0; i < d->max_vcpus; i++)
> +    {
> +        rc = alloc_unbound_xen_event_channel(d, i, current->domain-
> >domain_id,
> +                                             notification_fn);
> +        if ( rc < 0 )
> +            goto err;
> +
> +        impl->xen_ports[i] = rc;
> +    }
> +
> +    *_ved = &impl->ved;
> +
> +    vm_event_unlock(&impl->ved);
> +    return 0;
> +
> +err:
> +    while (i--)
> +        evtchn_close(d, impl->xen_ports[i], 0);
> +    evtchn_close(d, impl->xen_ports[d->max_vcpus], 0);
> +    vm_event_free_buffer(&impl->ring);
> +    vm_event_free_buffer(&impl->channels);
> +    vm_event_cleanup_domain(d);
> +    vm_event_unlock(&impl->ved);
> +    xfree(impl);
> +    return rc;
> +}
> +
>  int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec,
>                      XEN_GUEST_HANDLE_PARAM(void) u_domctl)
>  {
> @@ -748,7 +1113,9 @@ int vm_event_domctl(struct domain *d, struct
> xen_domctl_vm_event_op *vec,
>              break;
> 
>          case XEN_VM_EVENT_RESUME:
> -            rc = vm_event_resume(d->vm_event_paging);
> +            if ( vm_event_check(d->vm_event_paging) &&
> +                 is_vm_event_domain_ring(d->vm_event_paging) )
> +                rc = vm_event_resume(d->vm_event_paging, NULL, 0);
>              break;
> 
>          default:
> @@ -786,7 +1153,30 @@ int vm_event_domctl(struct domain *d, struct
> xen_domctl_vm_event_op *vec,
>              break;
> 
>          case XEN_VM_EVENT_RESUME:
> -            rc = vm_event_resume(d->vm_event_monitor);
> +            if ( vm_event_check(d->vm_event_monitor) &&
> +                 is_vm_event_domain_ring(d->vm_event_monitor) )
> +                rc = vm_event_resume(d->vm_event_monitor, NULL, 0);
> +            break;
> +
> +        case XEN_VM_EVENT_GET_PORTS:
> +            if ( !vm_event_check(d->vm_event_monitor) )
> +                break;
> +
> +            if ( !is_vm_event_domain_ring(d->vm_event_monitor) )
> +            {
> +                struct vm_event_domain_channel *impl =
> to_vm_event_domain_channel(d->vm_event_monitor);
> +
> +                if ( copy_to_guest(vec->u.get_ports.sync,
> +                                   impl->xen_ports,
> +                                   d->max_vcpus) != 0 )
> +                {
> +                    rc = -EFAULT;
> +                    break;
> +                }
> +
> +                vec->u.get_ports.async = impl->xen_ports[d->max_vcpus];
> +                rc = 0;
> +            }
>              break;
> 
>          default:
> @@ -830,7 +1220,10 @@ int vm_event_domctl(struct domain *d, struct
> xen_domctl_vm_event_op *vec,
>              break;
> 
>          case XEN_VM_EVENT_RESUME:
> -            rc = vm_event_resume(d->vm_event_share);
> +            if ( vm_event_check(d->vm_event_monitor) &&
> +                 is_vm_event_domain_ring(d->vm_event_monitor) )
> +                rc = vm_event_resume(d->vm_event_share, NULL, 0);
> +            break;
> 
>          default:
>              rc = -ENOSYS;
> @@ -847,6 +1240,52 @@ int vm_event_domctl(struct domain *d, struct
> xen_domctl_vm_event_op *vec,
>      return rc;
>  }
> 
> +int vm_event_get_frames(struct domain *d, unsigned int id,
> +                        unsigned long frame, unsigned int nr_frames,
> +                        xen_pfn_t mfn_list[])
> +{
> +    int rc = 0, i, j;
> +    struct vm_event_domain **_ved;
> +    struct vm_event_domain_channel *impl;
> +    xen_event_channel_notification_t fn;
> +
> +    switch ( id )
> +    {
> +    case XEN_VM_EVENT_TYPE_MONITOR:
> +        /* domain_pause() not required here, see XSA-99 */
> +        rc = arch_monitor_init_domain(d);
> +        if ( rc )
> +            return rc;
> +        _ved = &d->vm_event_monitor;
> +        fn = monitor_notification;
> +        break;
> +
> +    default:
> +        return -ENOSYS;
> +    }
> +
> +    rc = vm_event_channel_enable(d, _ved, nr_frames, fn);
> +    if ( rc )
> +    {
> +        switch ( id )
> +        {
> +            case XEN_VM_EVENT_TYPE_MONITOR:
> +                arch_monitor_cleanup_domain(d);
> +                break;
> +        }
> +        return rc;
> +    }
> +
> +    impl = to_vm_event_domain_channel(*_ved);
> +    j = 0;
> +    for ( i = 0; i < impl->ring->nr_frames; i++ )
> +        mfn_list[j++] = mfn_x(impl->ring->mfn[i]);
> +    for ( i = 0; i < impl->channels->nr_frames; i++ )
> +        mfn_list[j++] = mfn_x(impl->channels->mfn[i]);
> +
> +    return rc;
> +}
> +
>  void vm_event_vcpu_pause(struct vcpu *v)
>  {
>      ASSERT(v == current);
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 26b1a55..78262a1 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -38,7 +38,7 @@
>  #include "hvm/save.h"
>  #include "memory.h"
> 
> -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000011
> +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000012
> 
>  /*
>   * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
> @@ -836,6 +836,7 @@ struct xen_domctl_gdbsx_domstatus {
>  #define XEN_VM_EVENT_ENABLE               0
>  #define XEN_VM_EVENT_DISABLE              1
>  #define XEN_VM_EVENT_RESUME               2
> +#define XEN_VM_EVENT_GET_PORTS            3
> 
>  /*
>   * Use for teardown/setup of helper<->hypervisor interface for paging,
> @@ -843,10 +844,26 @@ struct xen_domctl_gdbsx_domstatus {
>   */
>  /* XEN_DOMCTL_vm_event_op */
>  struct xen_domctl_vm_event_op {
> -    uint32_t        op;           /* XEN_VM_EVENT_* */
> -    uint32_t        type;         /* XEN_VM_EVENT_TYPE_* */
> +    /* IN: Xen vm_event opcode (XEN_VM_EVENT_*) */
> +    uint32_t            op;
> +    /* IN: Xen vm event ring type (XEN_VM_EVENT_TYPE_*) */
> +    uint32_t            type;
> 
> -    uint32_t        port;         /* OUT: event channel for ring */
> +    union {
> +        struct {
> +            /* OUT: remote port for event channel ring */
> +            uint32_t    port;
> +        } enable;
> +        struct {
> +            /* OUT: remote port for the async event channel ring */
> +            uint32_t    async;
> +            /*
> +             * OUT: remote ports for the sync event vm_event channels
> +             * The number for ports will be equal with the vcpu count.
> +             */
> +            XEN_GUEST_HANDLE_64(uint32) sync;
> +        } get_ports;
> +    } u;
>  };
> 
>  /*
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 8638023..cfd280d 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -612,6 +612,7 @@ struct xen_mem_acquire_resource {
> 
>  #define XENMEM_resource_ioreq_server 0
>  #define XENMEM_resource_grant_table 1
> +#define XENMEM_resource_vm_event 2
> 
>      /*
>       * IN - a type-specific resource identifier, which must be zero
> @@ -619,6 +620,7 @@ struct xen_mem_acquire_resource {
>       *
>       * type == XENMEM_resource_ioreq_server -> id == ioreq server id
>       * type == XENMEM_resource_grant_table -> id defined below
> +     * type == XENMEM_resource_vm_event -> id == vm_event type
>       */
>      uint32_t id;
> 
> diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> index b2bafc0..499fbbc 100644
> --- a/xen/include/public/vm_event.h
> +++ b/xen/include/public/vm_event.h
> @@ -388,6 +388,21 @@ typedef struct vm_event_st {
> 
>  DEFINE_RING_TYPES(vm_event, vm_event_request_t, vm_event_response_t);
> 
> +struct vm_event_slot
> +{
> +    uint32_t state;
> +    union {
> +        vm_event_request_t req;
> +        vm_event_response_t rsp;
> +    } u;
> +};
> +
> +enum vm_event_slot_state {
> +    VM_EVENT_SLOT_STATE_IDLE,   /* no contents */
> +    VM_EVENT_SLOT_STATE_SUBMIT, /* request ready */
> +    VM_EVENT_SLOT_STATE_FINISH, /* response ready */
> +};
> +
>  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>  #endif /* _XEN_PUBLIC_VM_EVENT_H */
> 
> diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
> index a5c82d6..d4bd184 100644
> --- a/xen/include/xen/vm_event.h
> +++ b/xen/include/xen/vm_event.h
> @@ -64,6 +64,10 @@ void vm_event_put_request(struct vm_event_domain *ved,
>  int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec,
>                      XEN_GUEST_HANDLE_PARAM(void) u_domctl);
> 
> +int vm_event_get_frames(struct domain *d, unsigned int id,
> +                        unsigned long frame, unsigned int nr_frames,
> +                        xen_pfn_t mfn_list[]);
> +
>  void vm_event_vcpu_pause(struct vcpu *v);
>  void vm_event_vcpu_unpause(struct vcpu *v);
> 
> --
> 2.7.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxxx
> https://lists.xenproject.org/mailman/listinfo/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.