Xen project Mailing List

[Xen-devel] [PATCH v5 4/4] xen: introduce XENMEM_exchange_and_pin and XENMEM_unpin

From: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>

Date: Mon, 9 Sep 2013 17:06:03 +0100

Cc: tim@xxxxxxx, Ian.Campbell@xxxxxxxxxx, Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>

Delivery-date: Mon, 09 Sep 2013 16:06:55 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

Introduce two new hypercalls XENMEM_exchange_and_pin and XENMEM_unpin. XENMEM_exchange_and_pin, it's like XENMEM_exchange but it also pins the new pages: their p2m mapping are guaranteed not to be changed, until XENMEM_unpin is called. XENMEM_exchange_and_pin returns the DMA frame numbers of the new pages to the caller, even if it's an autotranslate guest. The only effect of XENMEM_unpin is to "unpin" the previously pinned pages. Afterwards the p2m mappings can be transparently changed by the hypervisor as normal. The memory remains accessible from the guest. Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> Changes in v5: - memory_exchange: handle guest_physmap_pin_range failures; - make i an unsigned long in unpinned; - add nr_unpinned to xen_unpin to report partial success. Changes in v4: - rename XENMEM_get_dma_buf to XENMEM_exchange_and_pin; - rename XENMEM_get_dma_buf to XENMEM_unpin; - move the pinning before we copy back the mfn to the guest; - propagate errors returned by guest_physmap_pin_range; - use xen_memory_exchange_t as parameter for XENMEM_exchange_and_pin; - use an unsigned iterator in unpin; - improve the documentation of the new hypercalls; - add a note about out.address_bits for XENMEM_exchange. --- xen/common/memory.c | 141 +++++++++++++++++++++++++++++++++---------- xen/include/public/memory.h | 47 ++++++++++++++ 2 files changed, 156 insertions(+), 32 deletions(-) diff --git a/xen/common/memory.c b/xen/common/memory.c index 4b2f311..34169c1 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -364,14 +364,14 @@ static void decrease_reservation(struct memop_args *a) a->nr_done = i; } -static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) +static long memory_exchange(int op, XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) { struct xen_memory_exchange exch; PAGE_LIST_HEAD(in_chunk_list); PAGE_LIST_HEAD(out_chunk_list); unsigned long in_chunk_order, out_chunk_order; xen_pfn_t gpfn, gmfn, mfn; - unsigned long i, j, k = 0; /* gcc ... */ + unsigned long i = 0, j = 0, k = 0; /* gcc ... */ unsigned int memflags = 0; long rc = 0; struct domain *d; @@ -542,54 +542,72 @@ static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) /* Assign each output page to the domain. */ for ( j = 0; (page = page_list_remove_head(&out_chunk_list)); ++j ) { - if ( assign_pages(d, page, exch.out.extent_order, - MEMF_no_refcount) ) - { - unsigned long dec_count; - bool_t drop_dom_ref; - - /* - * Pages in in_chunk_list is stolen without - * decreasing the tot_pages. If the domain is dying when - * assign pages, we need decrease the count. For those pages - * that has been assigned, it should be covered by - * domain_relinquish_resources(). - */ - dec_count = (((1UL << exch.in.extent_order) * - (1UL << in_chunk_order)) - - (j * (1UL << exch.out.extent_order))); - - spin_lock(&d->page_alloc_lock); - domain_adjust_tot_pages(d, -dec_count); - drop_dom_ref = (dec_count && !d->tot_pages); - spin_unlock(&d->page_alloc_lock); - - if ( drop_dom_ref ) - put_domain(d); - - free_domheap_pages(page, exch.out.extent_order); - goto dying; - } + unsigned long dec_count; + bool_t drop_dom_ref; if ( __copy_from_guest_offset(&gpfn, exch.out.extent_start, (i << out_chunk_order) + j, 1) ) { rc = -EFAULT; - continue; + goto extent_error; } mfn = page_to_mfn(page); guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order); + if ( op == XENMEM_exchange_and_pin ) + { + if ( guest_physmap_pin_range(d, gpfn, exch.out.extent_order) ) + { + rc = -EFAULT; + goto extent_error_physmap; + } + } + if ( !paging_mode_translate(d) ) { for ( k = 0; k < (1UL << exch.out.extent_order); k++ ) set_gpfn_from_mfn(mfn + k, gpfn + k); + } + + rc = assign_pages(d, page, exch.out.extent_order, MEMF_no_refcount); + if ( rc ) + goto extent_error_physmap; + + if ( op == XENMEM_exchange_and_pin || !paging_mode_translate(d) ) + { if ( __copy_to_guest_offset(exch.out.extent_start, (i << out_chunk_order) + j, &mfn, 1) ) rc = -EFAULT; } + + continue; + +extent_error_physmap: + guest_physmap_remove_page(d, gpfn, mfn, exch.out.extent_order); +extent_error: + /* + * Pages in in_chunk_list is stolen without + * decreasing the tot_pages. If the domain is dying when + * assign pages, we need decrease the count. For those pages + * that has been assigned, it should be covered by + * domain_relinquish_resources(). + */ + dec_count = (((1UL << exch.in.extent_order) * + (1UL << in_chunk_order)) - + (j * (1UL << exch.out.extent_order))); + + spin_lock(&d->page_alloc_lock); + domain_adjust_tot_pages(d, -dec_count); + drop_dom_ref = (dec_count && !d->tot_pages); + spin_unlock(&d->page_alloc_lock); + + if ( drop_dom_ref ) + put_domain(d); + + free_domheap_pages(page, exch.out.extent_order); + goto dying; } BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) ); } @@ -627,6 +645,60 @@ static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) return rc; } +static long unpin(XEN_GUEST_HANDLE_PARAM(xen_unpin_t) arg) +{ + int rc; + unsigned long i; + struct xen_unpin unpin; + xen_pfn_t gpfn; + struct domain *d; + + if ( copy_from_guest(&unpin, arg, 1) ) + return -EFAULT; + + /* Various sanity checks. */ + if ( /* Extent orders are sensible? */ + (unpin.in.extent_order > MAX_ORDER) || + /* Sizes of input list do not overflow a long? */ + ((~0UL >> unpin.in.extent_order) < unpin.in.nr_extents) ) + return -EFAULT; + + if ( !guest_handle_okay(unpin.in.extent_start, unpin.in.nr_extents) ) + return -EFAULT; + + d = rcu_lock_domain_by_any_id(unpin.in.domid); + if ( d == NULL ) + { + rc = -ESRCH; + goto fail; + } + + for ( i = 0; i < unpin.in.nr_extents; i++ ) + { + if ( unlikely(__copy_from_guest_offset( + &gpfn, unpin.in.extent_start, i, 1)) ) + { + rc = -EFAULT; + goto partial; + } + + rc = guest_physmap_unpin_range(d, gpfn, unpin.in.extent_order); + if ( rc ) + goto partial; + } + + rc = 0; + +partial: + unpin.nr_unpinned = i; + if ( __copy_field_to_guest(arg, &unpin, nr_unpinned) ) + rc = -EFAULT; + + fail: + rcu_unlock_domain(d); + return rc; +} + long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) { struct domain *d; @@ -715,8 +787,13 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; + case XENMEM_exchange_and_pin: case XENMEM_exchange: - rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t)); + rc = memory_exchange(op, guest_handle_cast(arg, xen_memory_exchange_t)); + break; + + case XENMEM_unpin: + rc = unpin(guest_handle_cast(arg, xen_unpin_t)); break; case XENMEM_maximum_ram_page: diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index 7a26dee..73fdc31 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -105,6 +105,7 @@ struct xen_memory_exchange { /* * [IN] Details of memory extents to be exchanged (GMFN bases). * Note that @in.address_bits is ignored and unused. + * @out.address_bits should contain the address mask for the new pages. */ struct xen_memory_reservation in; @@ -459,6 +460,52 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); * The zero value is appropiate. */ +#define XENMEM_exchange_and_pin 26 +/* + * This hypercall is similar to XENMEM_exchange: it takes the same + * struct as an argument and it exchanges the pages passed in with a new + * set of pages. The new pages are going to be "pinned": it's guaranteed + * that their p2m mapping won't be changed until explicitly "unpinned". + * The content of the exchanged pages is lost. + * Only normal guest r/w memory can be pinned: no granted pages or + * ballooned pages. + * If return code is zero then @out.extent_list provides the DMA frame + * numbers of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code: + * -ENOSYS if not implemented + * -EINVAL if the page is already pinned + * -EFAULT if an internal error occurs + * On complete success then always @nr_exchanged == @in.nr_extents. On + * partial success @nr_exchanged indicates how much work was done and a + * negative error code is returned. + */ + +#define XENMEM_unpin 27 +/* + * XENMEM_unpin unpins a set of pages, previously pinned by + * XENMEM_exchange_and_pin. After this call the p2m mapping of the pages can + * be transparently changed by the hypervisor, as usual. The pages are + * still accessible from the guest. + */ +struct xen_unpin { + /* + * [IN] Details of memory extents to be unpinned (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + /* + * [OUT] Number of input extents that were successfully unpinned. + * 1. The first @nr_unpinned input extents were successfully + * unpinned. + * 2. All other input extents are untouched. + * 3. If not all input exents are unpinned then the return code of this + * command will be non-zero. + */ + xen_ulong_t nr_unpinned; +}; +typedef struct xen_unpin xen_unpin_t; +DEFINE_XEN_GUEST_HANDLE(xen_unpin_t); + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ #endif /* __XEN_PUBLIC_MEMORY_H__ */ -- 1.7.2.5 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.