|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v9 3/5] xen/mem_sharing: VM forking
On Fri, Feb 21, 2020 at 10:49:21AM -0800, Tamas K Lengyel wrote:
> VM forking is the process of creating a domain with an empty memory space and
> a
> parent domain specified from which to populate the memory when necessary. For
> the new domain to be functional the VM state is copied over as part of the
> fork
> operation (HVM params, hap allocation, etc).
>
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@xxxxxxxxx>
> ---
> v9: remove stale header
> fix tsc incarnation being bumped on set
> ---
> xen/arch/x86/domain.c | 11 ++
> xen/arch/x86/hvm/hvm.c | 2 +-
> xen/arch/x86/mm/mem_sharing.c | 222 ++++++++++++++++++++++++++++++
> xen/arch/x86/mm/p2m.c | 11 +-
> xen/include/asm-x86/mem_sharing.h | 17 +++
> xen/include/public/memory.h | 5 +
> xen/include/xen/sched.h | 2 +
> 7 files changed, 268 insertions(+), 2 deletions(-)
>
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index fe63c23676..1ab0ca0942 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2203,6 +2203,17 @@ int domain_relinquish_resources(struct domain *d)
> ret = relinquish_shared_pages(d);
> if ( ret )
> return ret;
> +
> + /*
> + * If the domain is forked, decrement the parent's pause count
> + * and release the domain.
> + */
> + if ( d->parent )
> + {
> + domain_unpause(d->parent);
> + put_domain(d->parent);
> + d->parent = NULL;
> + }
> }
> #endif
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index a339b36a0d..9bfa603f8c 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1915,7 +1915,7 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned
> long gla,
> }
> #endif
>
> - /* Spurious fault? PoD and log-dirty also take this path. */
> + /* Spurious fault? PoD, log-dirty and VM forking also take this path. */
> if ( p2m_is_ram(p2mt) )
> {
> rc = 1;
> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> index 3835bc928f..ad5db9d8d5 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -22,6 +22,7 @@
>
> #include <xen/types.h>
> #include <xen/domain_page.h>
> +#include <xen/event.h>
> #include <xen/spinlock.h>
> #include <xen/rwlock.h>
> #include <xen/mm.h>
> @@ -36,6 +37,9 @@
> #include <asm/altp2m.h>
> #include <asm/atomic.h>
> #include <asm/event.h>
> +#include <asm/hap.h>
> +#include <asm/hvm/hvm.h>
> +#include <asm/hvm/save.h>
> #include <xsm/xsm.h>
>
> #include "mm-locks.h"
> @@ -1444,6 +1448,194 @@ static inline int mem_sharing_control(struct domain
> *d, bool enable)
> return 0;
> }
>
> +/*
> + * Forking a page only gets called when the VM faults due to no entry being
> + * in the EPT for the access. Depending on the type of access we either
> + * populate the physmap with a shared entry for read-only access or
> + * fork the page if its a write access.
> + *
> + * The client p2m is already locked so we only need to lock
> + * the parent's here.
> + */
> +int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool unsharing)
> +{
> + int rc = -ENOENT;
> + shr_handle_t handle;
> + struct domain *parent;
Can you constify parent, I assume there are no changes made to the
parent domain, just the forked one.
> + struct p2m_domain *p2m;
> + unsigned long gfn_l = gfn_x(gfn);
> + mfn_t mfn, new_mfn;
> + p2m_type_t p2mt;
> + struct page_info *page;
> +
> + if ( !mem_sharing_is_fork(d) )
> + return -ENOENT;
> +
> + parent = d->parent;
You can initialize at declaration time.
> +
> + if ( !unsharing )
> + {
> + /* For read-only accesses we just add a shared entry to the physmap
> */
> + while ( parent )
> + {
> + if ( !(rc = nominate_page(parent, gfn, 0, &handle)) )
> + break;
> +
> + parent = parent->parent;
> + }
> +
> + if ( !rc )
> + {
> + /* The client's p2m is already locked */
> + struct p2m_domain *pp2m = p2m_get_hostp2m(parent);
> +
> + p2m_lock(pp2m);
> + rc = add_to_physmap(parent, gfn_l, handle, d, gfn_l, false);
> + p2m_unlock(pp2m);
> +
> + if ( !rc )
> + return 0;
> + }
Don't you need to return here, or else you will fallback into the
unsharing path?
> + }
> +
> + /*
> + * If it's a write access (ie. unsharing) or if adding a shared entry to
> + * the physmap failed we'll fork the page directly.
> + */
> + p2m = p2m_get_hostp2m(d);
> + parent = d->parent;
> +
> + while ( parent )
> + {
> + mfn = get_gfn_query(parent, gfn_l, &p2mt);
> +
> + if ( mfn_valid(mfn) && p2m_is_any_ram(p2mt) )
This would also cover grants, but I'm not sure how those are handled
by forking, as access to those is granted on a per-domain basis. Ie:
the parent will have access to the grant, but not the child.
> + break;
> +
> + put_gfn(parent, gfn_l);
> + parent = parent->parent;
> + }
> +
> + if ( !parent )
> + return -ENOENT;
> +
> + if ( !(page = alloc_domheap_page(d, 0)) )
> + {
> + put_gfn(parent, gfn_l);
> + return -ENOMEM;
> + }
> +
> + new_mfn = page_to_mfn(page);
> + copy_domain_page(new_mfn, mfn);
> + set_gpfn_from_mfn(mfn_x(new_mfn), gfn_l);
> +
> + put_gfn(parent, gfn_l);
> +
> + return p2m->set_entry(p2m, gfn, new_mfn, PAGE_ORDER_4K, p2m_ram_rw,
So the child p2m is going to be populated using 4K pages exclusively?
Maybe it would make sense to try to use 2M if the parent domain page
is also a 2M page or larger?
> + p2m->default_access, -1);
> +}
> +
> +static int bring_up_vcpus(struct domain *cd, struct cpupool *cpupool)
> +{
> + int ret;
> + unsigned int i;
> +
> + if ( (ret = cpupool_move_domain(cd, cpupool)) )
> + return ret;
> +
> + for ( i = 0; i < cd->max_vcpus; i++ )
> + {
> + if ( cd->vcpu[i] )
> + continue;
> +
> + if ( !vcpu_create(cd, i) )
> + return -EINVAL;
> + }
> +
> + domain_update_node_affinity(cd);
> + return 0;
> +}
> +
> +static int fork_hap_allocation(struct domain *cd, struct domain *d)
Can you constify the parent domain?
Also cd and d and not very helpful names, I would just use child and
parent.
> +{
> + int rc;
> + bool preempted;
> + unsigned long mb = hap_get_allocation(d);
> +
> + if ( mb == hap_get_allocation(cd) )
> + return 0;
> +
> + paging_lock(cd);
> + rc = hap_set_allocation(cd, mb << (20 - PAGE_SHIFT), &preempted);
> + paging_unlock(cd);
> +
> + if ( rc )
> + return rc;
> +
> + if ( preempted )
> + return -ERESTART;
> +
> + return 0;
You can join all the checks into a single return:
return preempted ? -ERESTART : rc;
> +}
> +
> +static void fork_tsc(struct domain *cd, struct domain *d)
> +{
> + uint32_t tsc_mode;
> + uint32_t gtsc_khz;
> + uint32_t incarnation;
> + uint64_t elapsed_nsec;
> +
> + tsc_get_info(d, &tsc_mode, &elapsed_nsec, >sc_khz, &incarnation);
> + /* Don't bump incarnation on set */
> + tsc_set_info(cd, tsc_mode, elapsed_nsec, gtsc_khz, incarnation - 1);
Why is the incarnation not bumped?
> +}
> +
> +static int mem_sharing_fork(struct domain *d, struct domain *cd)
> +{
> + int rc = -EINVAL;
> +
> + if ( !cd->controller_pause_count )
> + return rc;
-EBUSY might be better here.
> +
> + /*
> + * We only want to get and pause the parent once, not each time this
> + * operation is restarted due to preemption.
> + */
> + if ( !cd->parent_paused )
> + {
> + ASSERT(get_domain(d));
We are trying to avoid such constructs, instead I suggest:
if ( !get_domain(parent) )
{
ASSERT_UNREACHABLE();
return -EBUSY;
}
> + domain_pause(d);
> +
> + cd->parent_paused = true;
> + cd->max_pages = d->max_pages;
> + cd->max_vcpus = d->max_vcpus;
> + }
> +
> + /* this is preemptible so it's the first to get done */
> + if ( (rc = fork_hap_allocation(cd, d)) )
> + goto done;
> +
> + if ( (rc = bring_up_vcpus(cd, d->cpupool)) )
> + goto done;
> +
> + if ( (rc = hvm_copy_context_and_params(cd, d)) )
> + goto done;
> +
> + fork_tsc(cd, d);
> +
> + cd->parent = d;
> +
> + done:
> + if ( rc && rc != -ERESTART )
> + {
> + domain_unpause(d);
> + put_domain(d);
> + cd->parent_paused = false;
> + }
> +
> + return rc;
> +}
> +
> int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
> {
> int rc;
> @@ -1698,6 +1890,36 @@ int
> mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
> rc = debug_gref(d, mso.u.debug.u.gref);
> break;
>
> + case XENMEM_sharing_op_fork:
> + {
> + struct domain *pd;
> +
> + rc = -EINVAL;
> + if ( mso.u.fork._pad[0] || mso.u.fork._pad[1] ||
> + mso.u.fork._pad[2] )
> + goto out;
> +
> + rc = rcu_lock_live_remote_domain_by_id(mso.u.fork.parent_domain,
> + &pd);
> + if ( rc )
> + goto out;
> +
> + if ( !mem_sharing_enabled(pd) )
> + {
> + if ( (rc = mem_sharing_control(pd, true)) )
> + goto out;
Please join both conditions:
if ( !mem_sharing_enabled(pd) &&
(rc = mem_sharing_control(pd, true)) )
goto out;
> + }
> +
> + rc = mem_sharing_fork(pd, d);
> +
> + if ( rc == -ERESTART )
> + rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
> + "lh", XENMEM_sharing_op,
> + arg);
> + rcu_unlock_domain(pd);
> + break;
> + }
> +
> default:
> rc = -ENOSYS;
> break;
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index c5f428d67c..7c4d2fd7a0 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -509,6 +509,14 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m,
> unsigned long gfn_l,
>
> mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
>
> + /* Check if we need to fork the page */
> + if ( (q & P2M_ALLOC) && p2m_is_hole(*t) &&
> + !mem_sharing_fork_page(p2m->domain, gfn, !!(q & P2M_UNSHARE)) )
> + {
> + mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
> + }
No need for the braces.
Thanks, Roger.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |