|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] mem_event: use wait queue when ring is full
> Date: Mon, 05 Dec 2011 12:19:03 +0100
> From: Olaf Hering <olaf@xxxxxxxxx>
> To: xen-devel@xxxxxxxxxxxxxxxxxxx
> Subject: [Xen-devel] [PATCH] mem_event: use wait queue when ring is
> full
> Message-ID: <cd163bcd0f066e52ee74.1323083943@xxxxxxxxxxxx>
> Content-Type: text/plain; charset="us-ascii"
>
> # HG changeset patch
> # User Olaf Hering <olaf@xxxxxxxxx>
> # Date 1323083831 -3600
> # Node ID cd163bcd0f066e52ee74e42090188d632f0086bf
> # Parent a4d7c27ec1f190ecbb9a909609f6ef0eca250c00
> mem_event: use wait queue when ring is full
>
> This change is based on an idea/patch from Adin Scannell.
>
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
>
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
>
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
>
>
> v5:
> - rename mem_event_check_ring() to mem_event_claim_slot()
> - rename mem_event_put_req_producers() to mem_event_release_slot()
> - add local/foreign request accounting
> - keep room for at least one guest request
>
> v4:
> - fix off-by-one bug in _mem_event_put_request
> - add mem_event_wake_requesters() and use wake_up_nr()
> - rename mem_event_mark_and_pause() and mem_event_mark_and_pause()
> functions
> - req_producers counts foreign request producers, rename member
>
> v3:
> - rename ->mem_event_bit to ->bit
> - remove me_ from new VPF_ defines
>
> v2:
> - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise
> the
> vcpu will not wake_up after a wait_event because the pause_count was
> increased twice. Fixes guest hangs.
> - update free space check in _mem_event_put_request()
> - simplify mem_event_put_request()
>
> Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4156,8 +4156,8 @@ static int hvm_memory_event_traps(long p
> if ( (p & HVMPME_onchangeonly) && (value == old) )
> return 1;
>
> - rc = mem_event_check_ring(d, &d->mem_event->access);
> - if ( rc )
> + rc = mem_event_claim_slot(d, &d->mem_event->access);
> + if ( rc < 0 )
> return rc;
>
> memset(&req, 0, sizeof(req));
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -23,6 +23,7 @@
>
> #include <asm/domain.h>
> #include <xen/event.h>
> +#include <xen/wait.h>
> #include <asm/p2m.h>
> #include <asm/mem_event.h>
> #include <asm/mem_paging.h>
> @@ -39,6 +40,7 @@
>
> static int mem_event_enable(struct domain *d,
> xen_domctl_mem_event_op_t *mec,
> + int bit,
> struct mem_event_domain *med)
> {
> int rc;
> @@ -107,8 +109,12 @@ static int mem_event_enable(struct domai
>
> mem_event_ring_lock_init(med);
>
> + med->bit = bit;
I think it's been asked before for this to have a more expressive name.
> +
> + init_waitqueue_head(&med->wq);
> +
> /* Wake any VCPUs paused for memory events */
> - mem_event_unpause_vcpus(d);
> + mem_event_wake_waiters(d, med);
>
> return 0;
>
> @@ -124,6 +130,9 @@ static int mem_event_enable(struct domai
>
> static int mem_event_disable(struct mem_event_domain *med)
> {
> + if (!list_empty(&med->wq.list))
> + return -EBUSY;
> +
What does the caller do with EBUSY? Retry?
> unmap_domain_page(med->ring_page);
> med->ring_page = NULL;
>
> @@ -133,13 +142,24 @@ static int mem_event_disable(struct mem_
> return 0;
> }
>
> -void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req)
> +static int _mem_event_put_request(struct domain *d,
> + struct mem_event_domain *med,
> + mem_event_request_t *req)
> {
> mem_event_front_ring_t *front_ring;
> + int free_req, claimed_req;
> RING_IDX req_prod;
>
> mem_event_ring_lock(med);
>
> + free_req = RING_FREE_REQUESTS(&med->front_ring);
> + /* Foreign requests must succeed because their vcpus can not sleep */
> + claimed_req = med->foreign_producers;
> + if ( !free_req || ( current->domain == d && free_req <= claimed_req )
> ) {
> + mem_event_ring_unlock(med);
> + return 0;
> + }
> +
> front_ring = &med->front_ring;
> req_prod = front_ring->req_prod_pvt;
>
> @@ -147,14 +167,35 @@ void mem_event_put_request(struct domain
> memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
> req_prod++;
>
> + /* Update accounting */
> + if ( current->domain == d )
> + med->target_producers--;
> + else
> + med->foreign_producers--;
> +
> /* Update ring */
> - med->req_producers--;
> front_ring->req_prod_pvt = req_prod;
> RING_PUSH_REQUESTS(front_ring);
>
> mem_event_ring_unlock(med);
>
> notify_via_xen_event_channel(d, med->xen_port);
> +
> + return 1;
> +}
> +
> +void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med,
> + mem_event_request_t *req)
> +{
> + /* Go to sleep if request came from guest */
> + if (current->domain == d) {
> + wait_event(med->wq, _mem_event_put_request(d, med, req));
> + return;
> + }
> + /* Ring was full anyway, unable to sleep in non-guest context */
> + if (!_mem_event_put_request(d, med, req))
> + printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
> d->domain_id,
> + req->type, req->flags, (unsigned long)req->gfn);
> }
>
> void mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp)
> @@ -178,32 +219,97 @@ void mem_event_get_response(struct mem_e
> mem_event_ring_unlock(med);
> }
>
> -void mem_event_unpause_vcpus(struct domain *d)
> +/**
> + * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
> + * ring. Only as many as can place another request in the ring will
> + * resume execution.
> + */
> +void mem_event_wake_requesters(struct mem_event_domain *med)
> +{
> + int free_req;
> +
> + mem_event_ring_lock(med);
> +
> + free_req = RING_FREE_REQUESTS(&med->front_ring);
> + if ( free_req )
> + wake_up_nr(&med->wq, free_req);
> +
> + mem_event_ring_unlock(med);
> +}
> +
> +/**
> + * mem_event_wake_waiters - Wake all vcpus waiting for the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
> + * become available.
> + */
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med)
> {
> struct vcpu *v;
>
> for_each_vcpu ( d, v )
> - if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
> + if ( test_and_clear_bit(med->bit, &v->pause_flags) )
> vcpu_wake(v);
> }
>
> -void mem_event_mark_and_pause(struct vcpu *v)
> +/**
> + * mem_event_mark_and_sleep - Put vcpu to sleep
> + * @v: guest vcpu
> + * @med: mem_event ring
> + *
> + * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
> + * The vcpu will resume execution in mem_event_wake_waiters().
> + */
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med)
> {
> - set_bit(_VPF_mem_event, &v->pause_flags);
> + set_bit(med->bit, &v->pause_flags);
> vcpu_sleep_nosync(v);
> }
>
> -void mem_event_put_req_producers(struct mem_event_domain *med)
> +/**
> + * mem_event_release_slot - Release a claimed slot
> + * @med: mem_event ring
> + *
> + * mem_event_release_slot() releases a claimed slot in the mem_event
> ring.
> + */
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med)
> {
> mem_event_ring_lock(med);
> - med->req_producers--;
> + if ( current->domain == d )
> + med->target_producers--;
> + else
> + med->foreign_producers--;
> mem_event_ring_unlock(med);
> }
>
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
> +/**
> + * mem_event_claim_slot - Check state of a mem_event ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * Return codes: < 0: the ring is not yet configured
> + * 0: the ring has some room
> + * > 0: the ring is full
> + *
> + * mem_event_claim_slot() checks the state of the given mem_event ring.
> + * If the current vcpu belongs to the guest domain, the function assumes
> that
> + * mem_event_put_request() will sleep until the ring has room again.
> + * A guest can always place at least one request.
> + *
> + * If the current vcpu does not belong to the target domain the caller
> must try
> + * again until there is room. A slot is claimed and the caller can place
> a
> + * request. If the caller does not need to send a request, the claimed
> slot has
> + * to be released with mem_event_release_slot().
> + */
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
> {
> - struct vcpu *curr = current;
> - int free_requests;
> + int free_req;
> int ring_full = 1;
>
> if ( !med->ring_page )
> @@ -211,16 +317,17 @@ int mem_event_check_ring(struct domain *
>
> mem_event_ring_lock(med);
>
> - free_requests = RING_FREE_REQUESTS(&med->front_ring);
> - if ( med->req_producers < free_requests )
> + free_req = RING_FREE_REQUESTS(&med->front_ring);
> +
> + if ( current->domain == d ) {
> + med->target_producers++;
> + ring_full = 0;
> + } else if ( med->foreign_producers + med->target_producers + 1 <
> free_req )
> {
> - med->req_producers++;
> + med->foreign_producers++;
> ring_full = 0;
> }
>
> - if ( ring_full && (curr->domain == d) )
> - mem_event_mark_and_pause(curr);
> -
> mem_event_ring_unlock(med);
>
> return ring_full;
> @@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
> if ( p2m->pod.entry_count )
> break;
>
> - rc = mem_event_enable(d, mec, med);
> + rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
> }
> break;
>
> @@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
> if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
> break;
>
> - rc = mem_event_enable(d, mec, med);
> + rc = mem_event_enable(d, mec, _VPF_mem_access, med);
Ok, the idea of bit is that different vcpus will sleep with different
pause flags, depending on the ring they're sleeping on. But this is only
used in wake_waiters, which is not used by all rings. In fact, why do we
need wake_waiters with wait queues?
> }
> break;
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_sharing.c
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
> #endif
>
>
> -static struct page_info* mem_sharing_alloc_page(struct domain *d,
> - unsigned long gfn)
> +static void mem_sharing_notify_helper(struct domain *d, unsigned long
> gfn)
> {
> - struct page_info* page;
> struct vcpu *v = current;
> - mem_event_request_t req;
> -
> - page = alloc_domheap_page(d, 0);
> - if(page != NULL) return page;
> -
> - memset(&req, 0, sizeof(req));
> - req.type = MEM_EVENT_TYPE_SHARED;
> + mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
>
> if ( v->domain != d )
> {
> @@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
> gdprintk(XENLOG_ERR,
> "Failed alloc on unshare path for foreign (%d)
> lookup\n",
> d->domain_id);
> - return page;
> + return;
> }
>
> - vcpu_pause_nosync(v);
> - req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
> + if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
> + return;
>
> - if(mem_event_check_ring(d, &d->mem_event->share)) return page;
> -
> + req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
> req.gfn = gfn;
> req.p2mt = p2m_ram_shared;
> req.vcpu_id = v->vcpu_id;
> mem_event_put_request(d, &d->mem_event->share, &req);
> -
> - return page;
> + vcpu_pause_nosync(v);
> }
>
> unsigned int mem_sharing_get_nr_saved_mfns(void)
> @@ -653,7 +643,7 @@ gfn_found:
> if(ret == 0) goto private_page_found;
>
> old_page = page;
> - page = mem_sharing_alloc_page(d, gfn);
> + page = alloc_domheap_page(d, 0);
> if(!page)
> {
> /* We've failed to obtain memory for private page. Need to re-add
> the
> @@ -661,6 +651,7 @@ gfn_found:
> list_add(&gfn_info->list, &hash_entry->gfns);
> put_gfn(d, gfn);
> shr_unlock();
> + mem_sharing_notify_helper(d, gfn);
This is nice. Do you think PoD could use this, should it ever run into a
ENOMEM situation? And what about mem_paging_prep? Perhaps, rather than a
sharing ring (which is bit rotted) we could have an ENOMEM ring with a
utility launched by xencommons listening. The problem, again, is what if
ENOMEM is itself caused by dom0 (e.g. writable mapping of a shared page)
> return -ENOMEM;
> }
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
> */
> void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
> {
> - struct vcpu *v = current;
> - mem_event_request_t req;
> + mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn
> };
>
> - /* Check that there's space on the ring for this request */
> - if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
> - {
> - /* Send release notification to pager */
> - memset(&req, 0, sizeof(req));
> - req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
> - req.gfn = gfn;
> - req.vcpu_id = v->vcpu_id;
> + /* Send release notification to pager */
> + req.flags = MEM_EVENT_FLAG_DROP_PAGE;
>
> - mem_event_put_request(d, &d->mem_event->paging, &req);
> - }
> + mem_event_put_request(d, &d->mem_event->paging, &req);
> }
>
> /**
> @@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
> /* Check that there's space on the ring for this request */
> - if ( mem_event_check_ring(d, &d->mem_event->paging) )
> + if ( mem_event_claim_slot(d, &d->mem_event->paging) )
> return;
>
> memset(&req, 0, sizeof(req));
> @@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
> else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
> {
> /* gfn is already on its way back and vcpu is not paused */
> - mem_event_put_req_producers(&d->mem_event->paging);
> + mem_event_release_slot(d, &d->mem_event->paging);
> return;
> }
>
> @@ -1078,8 +1070,8 @@ void p2m_mem_paging_resume(struct domain
> if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
> vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>
> - /* Unpause any domains that were paused because the ring was full */
> - mem_event_unpause_vcpus(d);
> + /* Wake vcpus waiting for room in the ring */
> + mem_event_wake_requesters(&d->mem_event->paging);
> }
>
> void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned
> long gla,
> @@ -1108,7 +1100,7 @@ void p2m_mem_access_check(unsigned long
> p2m_unlock(p2m);
>
> /* Otherwise, check if there is a memory event listener, and send the
> message along */
> - res = mem_event_check_ring(d, &d->mem_event->access);
> + res = mem_event_claim_slot(d, &d->mem_event->access);
> if ( res < 0 )
> {
> /* No listener */
> @@ -1118,7 +1110,7 @@ void p2m_mem_access_check(unsigned long
> "Memory access permissions failure, no mem_event
> listener: pausing VCPU %d, dom %d\n",
> v->vcpu_id, d->domain_id);
>
> - mem_event_mark_and_pause(v);
> + mem_event_mark_and_sleep(v, &d->mem_event->access);
> }
> else
> {
> @@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
> if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
> vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>
> - /* Unpause any domains that were paused because the ring was full or
> no listener
> - * was available */
> - mem_event_unpause_vcpus(d);
> + /* Wake vcpus waiting for room in the ring */
> + mem_event_wake_requesters(&d->mem_event->access);
> +
> + /* Unpause all vcpus that were paused because no listener was
> available */
> + mem_event_wake_waiters(d, &d->mem_event->access);
Is this not used in p2m_mem_paging_resume? Why the difference? Why are two
mechanisms needed (wake_requesters, wake_sleepers)?
Thanks
Andres
> }
>
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/asm-x86/mem_event.h
> --- a/xen/include/asm-x86/mem_event.h
> +++ b/xen/include/asm-x86/mem_event.h
> @@ -24,13 +24,13 @@
> #ifndef __MEM_EVENT_H__
> #define __MEM_EVENT_H__
>
> -/* Pauses VCPU while marking pause flag for mem event */
> -void mem_event_mark_and_pause(struct vcpu *v);
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
> -void mem_event_put_req_producers(struct mem_event_domain *med);
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med);
> void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req);
> void mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp);
> -void mem_event_unpause_vcpus(struct domain *d);
> +void mem_event_wake_requesters(struct mem_event_domain *med);
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med);
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med);
>
> int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
> XEN_GUEST_HANDLE(void) u_domctl);
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -14,6 +14,7 @@
> #include <xen/nodemask.h>
> #include <xen/radix-tree.h>
> #include <xen/multicall.h>
> +#include <xen/wait.h>
> #include <public/xen.h>
> #include <public/domctl.h>
> #include <public/sysctl.h>
> @@ -183,7 +184,8 @@ struct mem_event_domain
> {
> /* ring lock */
> spinlock_t ring_lock;
> - unsigned int req_producers;
> + unsigned short foreign_producers;
> + unsigned short target_producers;
> /* shared page */
> mem_event_shared_page_t *shared_page;
> /* shared ring page */
> @@ -192,6 +194,10 @@ struct mem_event_domain
> mem_event_front_ring_t front_ring;
> /* event channel port (vcpu0 only) */
> int xen_port;
> + /* mem_event bit for vcpu->pause_flags */
> + int bit;
> + /* list of vcpus waiting for room in the ring */
> + struct waitqueue_head wq;
> };
>
> struct mem_event_per_domain
> @@ -615,9 +621,12 @@ static inline struct domain *next_domain
> /* VCPU affinity has changed: migrating to a new CPU. */
> #define _VPF_migrating 3
> #define VPF_migrating (1UL<<_VPF_migrating)
> - /* VCPU is blocked on memory-event ring. */
> -#define _VPF_mem_event 4
> -#define VPF_mem_event (1UL<<_VPF_mem_event)
> + /* VCPU is blocked due to missing mem_paging ring. */
> +#define _VPF_mem_paging 4
> +#define VPF_mem_paging (1UL<<_VPF_mem_paging)
> + /* VCPU is blocked due to missing mem_access ring. */
> +#define _VPF_mem_access 5
> +#define VPF_mem_access (1UL<<_VPF_mem_access)
>
> static inline int vcpu_runnable(struct vcpu *v)
> {
>
>
>
> ------------------------------
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
>
>
> End of Xen-devel Digest, Vol 82, Issue 66
> *****************************************
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |