Xen project Mailing List

Re: [Xen-devel] [PATCH] mem_event: use wait queue when ring is full

From: "Andres Lagar-Cavilla" <andres@xxxxxxxxxxxxxxxx>

Date: Mon, 5 Dec 2011 07:45:14 -0800

Delivery-date: Mon, 05 Dec 2011 15:46:22 +0000

Domainkey-signature: a=rsa-sha1; c=nofws; d=lagarcavilla.org; h=message-id :in-reply-to:references:date:subject:from:to:cc:reply-to :mime-version:content-type:content-transfer-encoding; q=dns; s= lagarcavilla.org; b=lVA8lB9cVD3L78AYbTGiZsyqLEplrc1qyBVPsk1/vSin EamVMhttht7I/6UrQ+29xrg3HOsYBgzt3iKv5y5RdSaFUb0k0miBKUpWDZHpChUk twJ0fx45fUDiTtGIIpz8BbnKW8S1V1D29iUczZe36vrIe6jXC2mJf3yAMU7j5MQ=

List-id: Xen developer discussion <xen-devel.lists.xensource.com>

> Date: Mon, 05 Dec 2011 12:19:03 +0100 > From: Olaf Hering <olaf@xxxxxxxxx> > To: xen-devel@xxxxxxxxxxxxxxxxxxx > Subject: [Xen-devel] [PATCH] mem_event: use wait queue when ring is > full > Message-ID: <cd163bcd0f066e52ee74.1323083943@xxxxxxxxxxxx> > Content-Type: text/plain; charset="us-ascii" > > # HG changeset patch > # User Olaf Hering <olaf@xxxxxxxxx> > # Date 1323083831 -3600 > # Node ID cd163bcd0f066e52ee74e42090188d632f0086bf > # Parent a4d7c27ec1f190ecbb9a909609f6ef0eca250c00 > mem_event: use wait queue when ring is full > > This change is based on an idea/patch from Adin Scannell. > > If the ring is full, put the current vcpu to sleep if it belongs to the > target domain. The wakeup happens in the p2m_*_resume functions. Wakeup > will take the number of free slots into account. > > A request from foreign domain has to succeed once a slot was claimed > because such vcpus can not sleep. > > This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a > full ring will lead to harmless inconsistency in the pager. > > > v5: > - rename mem_event_check_ring() to mem_event_claim_slot() > - rename mem_event_put_req_producers() to mem_event_release_slot() > - add local/foreign request accounting > - keep room for at least one guest request > > v4: > - fix off-by-one bug in _mem_event_put_request > - add mem_event_wake_requesters() and use wake_up_nr() > - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() > functions > - req_producers counts foreign request producers, rename member > > v3: > - rename ->mem_event_bit to ->bit > - remove me_ from new VPF_ defines > > v2: > - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise > the > vcpu will not wake_up after a wait_event because the pause_count was > increased twice. Fixes guest hangs. > - update free space check in _mem_event_put_request() > - simplify mem_event_put_request() > > Signed-off-by: Olaf Hering <olaf@xxxxxxxxx> > > diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/hvm/hvm.c > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -4156,8 +4156,8 @@ static int hvm_memory_event_traps(long p > if ( (p & HVMPME_onchangeonly) && (value == old) ) > return 1; > > - rc = mem_event_check_ring(d, &d->mem_event->access); > - if ( rc ) > + rc = mem_event_claim_slot(d, &d->mem_event->access); > + if ( rc < 0 ) > return rc; > > memset(&req, 0, sizeof(req)); > diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c > --- a/xen/arch/x86/mm/mem_event.c > +++ b/xen/arch/x86/mm/mem_event.c > @@ -23,6 +23,7 @@ > > #include <asm/domain.h> > #include <xen/event.h> > +#include <xen/wait.h> > #include <asm/p2m.h> > #include <asm/mem_event.h> > #include <asm/mem_paging.h> > @@ -39,6 +40,7 @@ > > static int mem_event_enable(struct domain *d, > xen_domctl_mem_event_op_t *mec, > + int bit, > struct mem_event_domain *med) > { > int rc; > @@ -107,8 +109,12 @@ static int mem_event_enable(struct domai > > mem_event_ring_lock_init(med); > > + med->bit = bit; I think it's been asked before for this to have a more expressive name. > + > + init_waitqueue_head(&med->wq); > + > /* Wake any VCPUs paused for memory events */ > - mem_event_unpause_vcpus(d); > + mem_event_wake_waiters(d, med); > > return 0; > > @@ -124,6 +130,9 @@ static int mem_event_enable(struct domai > > static int mem_event_disable(struct mem_event_domain *med) > { > + if (!list_empty(&med->wq.list)) > + return -EBUSY; > + What does the caller do with EBUSY? Retry? > unmap_domain_page(med->ring_page); > med->ring_page = NULL; > > @@ -133,13 +142,24 @@ static int mem_event_disable(struct mem_ > return 0; > } > > -void mem_event_put_request(struct domain *d, struct mem_event_domain > *med, mem_event_request_t *req) > +static int _mem_event_put_request(struct domain *d, > + struct mem_event_domain *med, > + mem_event_request_t *req) > { > mem_event_front_ring_t *front_ring; > + int free_req, claimed_req; > RING_IDX req_prod; > > mem_event_ring_lock(med); > > + free_req = RING_FREE_REQUESTS(&med->front_ring); > + /* Foreign requests must succeed because their vcpus can not sleep */ > + claimed_req = med->foreign_producers; > + if ( !free_req || ( current->domain == d && free_req <= claimed_req ) > ) { > + mem_event_ring_unlock(med); > + return 0; > + } > + > front_ring = &med->front_ring; > req_prod = front_ring->req_prod_pvt; > > @@ -147,14 +167,35 @@ void mem_event_put_request(struct domain > memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req)); > req_prod++; > > + /* Update accounting */ > + if ( current->domain == d ) > + med->target_producers--; > + else > + med->foreign_producers--; > + > /* Update ring */ > - med->req_producers--; > front_ring->req_prod_pvt = req_prod; > RING_PUSH_REQUESTS(front_ring); > > mem_event_ring_unlock(med); > > notify_via_xen_event_channel(d, med->xen_port); > + > + return 1; > +} > + > +void mem_event_put_request(struct domain *d, struct mem_event_domain > *med, > + mem_event_request_t *req) > +{ > + /* Go to sleep if request came from guest */ > + if (current->domain == d) { > + wait_event(med->wq, _mem_event_put_request(d, med, req)); > + return; > + } > + /* Ring was full anyway, unable to sleep in non-guest context */ > + if (!_mem_event_put_request(d, med, req)) > + printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", > d->domain_id, > + req->type, req->flags, (unsigned long)req->gfn); > } > > void mem_event_get_response(struct mem_event_domain *med, > mem_event_response_t *rsp) > @@ -178,32 +219,97 @@ void mem_event_get_response(struct mem_e > mem_event_ring_unlock(med); > } > > -void mem_event_unpause_vcpus(struct domain *d) > +/** > + * mem_event_wake_requesters - Wake vcpus waiting for room in the ring > + * @d: guest domain > + * @med: mem_event ring > + * > + * mem_event_wake_requesters() will wakeup vcpus waiting for room in the > + * ring. Only as many as can place another request in the ring will > + * resume execution. > + */ > +void mem_event_wake_requesters(struct mem_event_domain *med) > +{ > + int free_req; > + > + mem_event_ring_lock(med); > + > + free_req = RING_FREE_REQUESTS(&med->front_ring); > + if ( free_req ) > + wake_up_nr(&med->wq, free_req); > + > + mem_event_ring_unlock(med); > +} > + > +/** > + * mem_event_wake_waiters - Wake all vcpus waiting for the ring > + * @d: guest domain > + * @med: mem_event ring > + * > + * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to > + * become available. > + */ > +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain > *med) > { > struct vcpu *v; > > for_each_vcpu ( d, v ) > - if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) ) > + if ( test_and_clear_bit(med->bit, &v->pause_flags) ) > vcpu_wake(v); > } > > -void mem_event_mark_and_pause(struct vcpu *v) > +/** > + * mem_event_mark_and_sleep - Put vcpu to sleep > + * @v: guest vcpu > + * @med: mem_event ring > + * > + * mem_event_mark_and_sleep() tags vcpu and put it to sleep. > + * The vcpu will resume execution in mem_event_wake_waiters(). > + */ > +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain > *med) > { > - set_bit(_VPF_mem_event, &v->pause_flags); > + set_bit(med->bit, &v->pause_flags); > vcpu_sleep_nosync(v); > } > > -void mem_event_put_req_producers(struct mem_event_domain *med) > +/** > + * mem_event_release_slot - Release a claimed slot > + * @med: mem_event ring > + * > + * mem_event_release_slot() releases a claimed slot in the mem_event > ring. > + */ > +void mem_event_release_slot(struct domain *d, struct mem_event_domain > *med) > { > mem_event_ring_lock(med); > - med->req_producers--; > + if ( current->domain == d ) > + med->target_producers--; > + else > + med->foreign_producers--; > mem_event_ring_unlock(med); > } > > -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med) > +/** > + * mem_event_claim_slot - Check state of a mem_event ring > + * @d: guest domain > + * @med: mem_event ring > + * > + * Return codes: < 0: the ring is not yet configured > + * 0: the ring has some room > + * > 0: the ring is full > + * > + * mem_event_claim_slot() checks the state of the given mem_event ring. > + * If the current vcpu belongs to the guest domain, the function assumes > that > + * mem_event_put_request() will sleep until the ring has room again. > + * A guest can always place at least one request. > + * > + * If the current vcpu does not belong to the target domain the caller > must try > + * again until there is room. A slot is claimed and the caller can place > a > + * request. If the caller does not need to send a request, the claimed > slot has > + * to be released with mem_event_release_slot(). > + */ > +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med) > { > - struct vcpu *curr = current; > - int free_requests; > + int free_req; > int ring_full = 1; > > if ( !med->ring_page ) > @@ -211,16 +317,17 @@ int mem_event_check_ring(struct domain * > > mem_event_ring_lock(med); > > - free_requests = RING_FREE_REQUESTS(&med->front_ring); > - if ( med->req_producers < free_requests ) > + free_req = RING_FREE_REQUESTS(&med->front_ring); > + > + if ( current->domain == d ) { > + med->target_producers++; > + ring_full = 0; > + } else if ( med->foreign_producers + med->target_producers + 1 < > free_req ) > { > - med->req_producers++; > + med->foreign_producers++; > ring_full = 0; > } > > - if ( ring_full && (curr->domain == d) ) > - mem_event_mark_and_pause(curr); > - > mem_event_ring_unlock(med); > > return ring_full; > @@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x > if ( p2m->pod.entry_count ) > break; > > - rc = mem_event_enable(d, mec, med); > + rc = mem_event_enable(d, mec, _VPF_mem_paging, med); > } > break; > > @@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x > if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) > break; > > - rc = mem_event_enable(d, mec, med); > + rc = mem_event_enable(d, mec, _VPF_mem_access, med); Ok, the idea of bit is that different vcpus will sleep with different pause flags, depending on the ring they're sleeping on. But this is only used in wake_waiters, which is not used by all rings. In fact, why do we need wake_waiters with wait queues? > } > break; > > diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_sharing.c > --- a/xen/arch/x86/mm/mem_sharing.c > +++ b/xen/arch/x86/mm/mem_sharing.c > @@ -253,18 +253,10 @@ static void mem_sharing_audit(void) > #endif > > > -static struct page_info* mem_sharing_alloc_page(struct domain *d, > - unsigned long gfn) > +static void mem_sharing_notify_helper(struct domain *d, unsigned long > gfn) > { > - struct page_info* page; > struct vcpu *v = current; > - mem_event_request_t req; > - > - page = alloc_domheap_page(d, 0); > - if(page != NULL) return page; > - > - memset(&req, 0, sizeof(req)); > - req.type = MEM_EVENT_TYPE_SHARED; > + mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED }; > > if ( v->domain != d ) > { > @@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all > gdprintk(XENLOG_ERR, > "Failed alloc on unshare path for foreign (%d) > lookup\n", > d->domain_id); > - return page; > + return; > } > > - vcpu_pause_nosync(v); > - req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; > + if (mem_event_claim_slot(d, &d->mem_event->share) < 0) > + return; > > - if(mem_event_check_ring(d, &d->mem_event->share)) return page; > - > + req.flags = MEM_EVENT_FLAG_VCPU_PAUSED; > req.gfn = gfn; > req.p2mt = p2m_ram_shared; > req.vcpu_id = v->vcpu_id; > mem_event_put_request(d, &d->mem_event->share, &req); > - > - return page; > + vcpu_pause_nosync(v); > } > > unsigned int mem_sharing_get_nr_saved_mfns(void) > @@ -653,7 +643,7 @@ gfn_found: > if(ret == 0) goto private_page_found; > > old_page = page; > - page = mem_sharing_alloc_page(d, gfn); > + page = alloc_domheap_page(d, 0); > if(!page) > { > /* We've failed to obtain memory for private page. Need to re-add > the > @@ -661,6 +651,7 @@ gfn_found: > list_add(&gfn_info->list, &hash_entry->gfns); > put_gfn(d, gfn); > shr_unlock(); > + mem_sharing_notify_helper(d, gfn); This is nice. Do you think PoD could use this, should it ever run into a ENOMEM situation? And what about mem_paging_prep? Perhaps, rather than a sharing ring (which is bit rotted) we could have an ENOMEM ring with a utility launched by xencommons listening. The problem, again, is what if ENOMEM is itself caused by dom0 (e.g. writable mapping of a shared page) > return -ENOMEM; > } > > diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/p2m.c > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain * > */ > void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn) > { > - struct vcpu *v = current; > - mem_event_request_t req; > + mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn > }; > > - /* Check that there's space on the ring for this request */ > - if ( mem_event_check_ring(d, &d->mem_event->paging) == 0) > - { > - /* Send release notification to pager */ > - memset(&req, 0, sizeof(req)); > - req.flags |= MEM_EVENT_FLAG_DROP_PAGE; > - req.gfn = gfn; > - req.vcpu_id = v->vcpu_id; > + /* Send release notification to pager */ > + req.flags = MEM_EVENT_FLAG_DROP_PAGE; > > - mem_event_put_request(d, &d->mem_event->paging, &req); > - } > + mem_event_put_request(d, &d->mem_event->paging, &req); > } > > /** > @@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma > struct p2m_domain *p2m = p2m_get_hostp2m(d); > > /* Check that there's space on the ring for this request */ > - if ( mem_event_check_ring(d, &d->mem_event->paging) ) > + if ( mem_event_claim_slot(d, &d->mem_event->paging) ) > return; > > memset(&req, 0, sizeof(req)); > @@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma > else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged ) > { > /* gfn is already on its way back and vcpu is not paused */ > - mem_event_put_req_producers(&d->mem_event->paging); > + mem_event_release_slot(d, &d->mem_event->paging); > return; > } > > @@ -1078,8 +1070,8 @@ void p2m_mem_paging_resume(struct domain > if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) > vcpu_unpause(d->vcpu[rsp.vcpu_id]); > > - /* Unpause any domains that were paused because the ring was full */ > - mem_event_unpause_vcpus(d); > + /* Wake vcpus waiting for room in the ring */ > + mem_event_wake_requesters(&d->mem_event->paging); > } > > void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned > long gla, > @@ -1108,7 +1100,7 @@ void p2m_mem_access_check(unsigned long > p2m_unlock(p2m); > > /* Otherwise, check if there is a memory event listener, and send the > message along */ > - res = mem_event_check_ring(d, &d->mem_event->access); > + res = mem_event_claim_slot(d, &d->mem_event->access); > if ( res < 0 ) > { > /* No listener */ > @@ -1118,7 +1110,7 @@ void p2m_mem_access_check(unsigned long > "Memory access permissions failure, no mem_event > listener: pausing VCPU %d, dom %d\n", > v->vcpu_id, d->domain_id); > > - mem_event_mark_and_pause(v); > + mem_event_mark_and_sleep(v, &d->mem_event->access); > } > else > { > @@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain > if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) > vcpu_unpause(d->vcpu[rsp.vcpu_id]); > > - /* Unpause any domains that were paused because the ring was full or > no listener > - * was available */ > - mem_event_unpause_vcpus(d); > + /* Wake vcpus waiting for room in the ring */ > + mem_event_wake_requesters(&d->mem_event->access); > + > + /* Unpause all vcpus that were paused because no listener was > available */ > + mem_event_wake_waiters(d, &d->mem_event->access); Is this not used in p2m_mem_paging_resume? Why the difference? Why are two mechanisms needed (wake_requesters, wake_sleepers)? Thanks Andres > } > > > diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/asm-x86/mem_event.h > --- a/xen/include/asm-x86/mem_event.h > +++ b/xen/include/asm-x86/mem_event.h > @@ -24,13 +24,13 @@ > #ifndef __MEM_EVENT_H__ > #define __MEM_EVENT_H__ > > -/* Pauses VCPU while marking pause flag for mem event */ > -void mem_event_mark_and_pause(struct vcpu *v); > -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med); > -void mem_event_put_req_producers(struct mem_event_domain *med); > +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med); > +void mem_event_release_slot(struct domain *d, struct mem_event_domain > *med); > void mem_event_put_request(struct domain *d, struct mem_event_domain > *med, mem_event_request_t *req); > void mem_event_get_response(struct mem_event_domain *med, > mem_event_response_t *rsp); > -void mem_event_unpause_vcpus(struct domain *d); > +void mem_event_wake_requesters(struct mem_event_domain *med); > +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain > *med); > +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain > *med); > > int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, > XEN_GUEST_HANDLE(void) u_domctl); > diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/xen/sched.h > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -14,6 +14,7 @@ > #include <xen/nodemask.h> > #include <xen/radix-tree.h> > #include <xen/multicall.h> > +#include <xen/wait.h> > #include <public/xen.h> > #include <public/domctl.h> > #include <public/sysctl.h> > @@ -183,7 +184,8 @@ struct mem_event_domain > { > /* ring lock */ > spinlock_t ring_lock; > - unsigned int req_producers; > + unsigned short foreign_producers; > + unsigned short target_producers; > /* shared page */ > mem_event_shared_page_t *shared_page; > /* shared ring page */ > @@ -192,6 +194,10 @@ struct mem_event_domain > mem_event_front_ring_t front_ring; > /* event channel port (vcpu0 only) */ > int xen_port; > + /* mem_event bit for vcpu->pause_flags */ > + int bit; > + /* list of vcpus waiting for room in the ring */ > + struct waitqueue_head wq; > }; > > struct mem_event_per_domain > @@ -615,9 +621,12 @@ static inline struct domain *next_domain > /* VCPU affinity has changed: migrating to a new CPU. */ > #define _VPF_migrating 3 > #define VPF_migrating (1UL<<_VPF_migrating) > - /* VCPU is blocked on memory-event ring. */ > -#define _VPF_mem_event 4 > -#define VPF_mem_event (1UL<<_VPF_mem_event) > + /* VCPU is blocked due to missing mem_paging ring. */ > +#define _VPF_mem_paging 4 > +#define VPF_mem_paging (1UL<<_VPF_mem_paging) > + /* VCPU is blocked due to missing mem_access ring. */ > +#define _VPF_mem_access 5 > +#define VPF_mem_access (1UL<<_VPF_mem_access) > > static inline int vcpu_runnable(struct vcpu *v) > { > > > > ------------------------------ > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel > > > End of Xen-devel Digest, Vol 82, Issue 66 > ***************************************** > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.