|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v6 01/10] xen: vnuma topology and subop hypercalls
On Fri, Jul 18, 2014 at 9:49 AM, Konrad Rzeszutek Wilk
<konrad.wilk@xxxxxxxxxx> wrote:
> On Fri, Jul 18, 2014 at 01:50:00AM -0400, Elena Ufimtseva wrote:
>> Define interface, structures and hypercalls for toolstack to
>> build vnuma topology and for guests that wish to retrieve it.
>> Two subop hypercalls introduced by patch:
>> XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain
>> and XENMEM_get_vnumainfo to retrieve that topology by guest.
>>
>> Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
>> ---
>> xen/common/domain.c | 13 ++++
>> xen/common/domctl.c | 167
>> +++++++++++++++++++++++++++++++++++++++++++
>> xen/common/memory.c | 62 ++++++++++++++++
>> xen/include/public/domctl.h | 29 ++++++++
>> xen/include/public/memory.h | 47 +++++++++++-
>> xen/include/xen/domain.h | 11 +++
>> xen/include/xen/sched.h | 1 +
>> 7 files changed, 329 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>> index cd64aea..895584a 100644
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -584,6 +584,18 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom,
>> struct domain **d)
>> return 0;
>> }
>>
>> +void vnuma_destroy(struct vnuma_info *vnuma)
>> +{
>> + if ( vnuma )
>> + {
>> + xfree(vnuma->vmemrange);
>> + xfree(vnuma->vcpu_to_vnode);
>> + xfree(vnuma->vdistance);
>> + xfree(vnuma->vnode_to_pnode);
>> + xfree(vnuma);
>> + }
>> +}
>> +
>> int domain_kill(struct domain *d)
>> {
>> int rc = 0;
>> @@ -602,6 +614,7 @@ int domain_kill(struct domain *d)
>> evtchn_destroy(d);
>> gnttab_release_mappings(d);
>> tmem_destroy(d->tmem_client);
>> + vnuma_destroy(d->vnuma);
>> domain_set_outstanding_pages(d, 0);
>> d->tmem_client = NULL;
>> /* fallthrough */
>> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
>> index c326aba..7464284 100644
>> --- a/xen/common/domctl.c
>> +++ b/xen/common/domctl.c
>> @@ -297,6 +297,144 @@ int vcpuaffinity_params_invalid(const
>> xen_domctl_vcpuaffinity_t *vcpuaff)
>> guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
>> }
>>
>> +/*
>> + * Allocates memory for vNUMA, **vnuma should be NULL.
>> + * Caller has to make sure that domain has max_pages
>> + * and number of vcpus set for domain.
>> + * Verifies that single allocation does not exceed
>> + * PAGE_SIZE.
>> + */
>> +static int vnuma_alloc(struct vnuma_info **vnuma,
>> + unsigned int nr_vnodes,
>> + unsigned int nr_vcpus,
>> + unsigned int dist_size)
>> +{
>> + struct vnuma_info *v;
>> +
>> + if ( vnuma && *vnuma )
>> + return -EINVAL;
>> +
>> + v = *vnuma;
>> + /*
>> + * check if any of xmallocs exeeds PAGE_SIZE.
>> + * If yes, consider it as an error for now.
>> + */
>> + if ( nr_vnodes > PAGE_SIZE / sizeof(nr_vnodes) ||
>> + nr_vcpus > PAGE_SIZE / sizeof(nr_vcpus) ||
>> + nr_vnodes > PAGE_SIZE / sizeof(struct vmemrange) ||
>> + dist_size > PAGE_SIZE / sizeof(dist_size) )
>> + return -EINVAL;
>> +
>> + v = xzalloc(struct vnuma_info);
>> + if ( !v )
>> + return -ENOMEM;
>> +
>> + v->vdistance = xmalloc_array(unsigned int, dist_size);
>> + v->vmemrange = xmalloc_array(vmemrange_t, nr_vnodes);
>> + v->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus);
>> + v->vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes);
>> +
>> + if ( v->vdistance == NULL || v->vmemrange == NULL ||
>> + v->vcpu_to_vnode == NULL || v->vnode_to_pnode == NULL )
>> + {
>> + vnuma_destroy(v);
>> + return -ENOMEM;
>> + }
>> +
>> + *vnuma = v;
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * Allocate memory and construct one vNUMA node,
>> + * set default parameters, assign all memory and
>> + * vcpus to this node, set distance to 10.
>> + */
>> +static long vnuma_fallback(const struct domain *d,
>> + struct vnuma_info **vnuma)
>> +{
>> + struct vnuma_info *v;
>> + long ret;
>> +
>> +
>> + /* Will not destroy vNUMA here, destroy before calling this. */
>> + if ( vnuma && *vnuma )
>> + return -EINVAL;
>> +
>> + v = *vnuma;
>> + ret = vnuma_alloc(&v, 1, d->max_vcpus, 1);
>> + if ( ret )
>> + return ret;
>> +
>> + v->vmemrange[0].start = 0;
>> + v->vmemrange[0].end = d->max_pages << PAGE_SHIFT;
>> + v->vdistance[0] = 10;
>> + v->vnode_to_pnode[0] = NUMA_NO_NODE;
>> + memset(v->vcpu_to_vnode, 0, d->max_vcpus);
>> + v->nr_vnodes = 1;
>> +
>> + *vnuma = v;
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * construct vNUMA topology form u_vnuma struct and return
>> + * it in dst.
>> + */
>> +long vnuma_init(const struct xen_domctl_vnuma *u_vnuma,
>> + const struct domain *d,
>> + struct vnuma_info **dst)
>> +{
>> + unsigned int dist_size, nr_vnodes = 0;
>> + long ret;
>> + struct vnuma_info *v = NULL;
>> +
>> + ret = -EINVAL;
>> +
>> + /* If vNUMA topology already set, just exit. */
>> + if ( !u_vnuma || *dst )
>> + return ret;
>> +
>> + nr_vnodes = u_vnuma->nr_vnodes;
>> +
>> + if ( nr_vnodes == 0 )
>> + return ret;
>> +
>> + if ( nr_vnodes > (UINT_MAX / nr_vnodes) )
>> + return ret;
>> +
>> + dist_size = nr_vnodes * nr_vnodes;
>> +
>> + ret = vnuma_alloc(&v, nr_vnodes, d->max_vcpus, dist_size);
>> + if ( ret )
>> + return ret;
>> +
>> + /* On failure, set only one vNUMA node and its success. */
>> + ret = 0;
>> +
>> + if ( copy_from_guest(v->vdistance, u_vnuma->vdistance, dist_size) )
>> + goto vnuma_onenode;
>> + if ( copy_from_guest(v->vmemrange, u_vnuma->vmemrange, nr_vnodes) )
>> + goto vnuma_onenode;
>> + if ( copy_from_guest(v->vcpu_to_vnode, u_vnuma->vcpu_to_vnode,
>> + d->max_vcpus) )
>> + goto vnuma_onenode;
>> + if ( copy_from_guest(v->vnode_to_pnode, u_vnuma->vnode_to_pnode,
>> + nr_vnodes) )
>> + goto vnuma_onenode;
>> +
>> + v->nr_vnodes = nr_vnodes;
>> + *dst = v;
>> +
>> + return ret;
>> +
>> +vnuma_onenode:
>> + vnuma_destroy(v);
>> + return vnuma_fallback(d, dst);
>> +}
>> +
>> long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>> {
>> long ret = 0;
>> @@ -967,6 +1105,35 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t)
>> u_domctl)
>> }
>> break;
>>
>> + case XEN_DOMCTL_setvnumainfo:
>> + {
>> + struct vnuma_info *v = NULL;
>> +
>> + ret = -EFAULT;
>> + if ( guest_handle_is_null(op->u.vnuma.vdistance) ||
>> + guest_handle_is_null(op->u.vnuma.vmemrange) ||
>> + guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) ||
>> + guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
>> + return ret;
>> +
>> + ret = -EINVAL;
>> +
>> + ret = vnuma_init(&op->u.vnuma, d, &v);
>> + if ( ret < 0 || v == NULL )
>> + break;
>> +
>> + /* overwrite vnuma for domain */
>> + if ( !d->vnuma )
>
> You want that in within the domain_lock.
>
> Otherwise an caller (on another CPU) could try to read the
> d->vnuma and blow up. Say by using the serial console and
> wanting to read the guest vNUMA topology.
>
>> + vnuma_destroy(d->vnuma);
>> +
>> + domain_lock(d);
>
> I would just do
>
> vnuma_destroy(d->vnuma)
>
> here and remove the 'if' above.
>> + d->vnuma = v;
>> + domain_unlock(d);
>> +
>> + ret = 0;
>> + }
>> + break;
>> +
Agree and done )
>> default:
>> ret = arch_do_domctl(op, d, u_domctl);
>> break;
>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>> index c2dd31b..925b9fc 100644
>> --- a/xen/common/memory.c
>> +++ b/xen/common/memory.c
>> @@ -969,6 +969,68 @@ long do_memory_op(unsigned long cmd,
>> XEN_GUEST_HANDLE_PARAM(void) arg)
>>
>> break;
>>
>> + case XENMEM_get_vnumainfo:
>> + {
>> + struct vnuma_topology_info topology;
>> + struct domain *d;
>> + unsigned int dom_vnodes = 0;
>> +
>> + /*
>> + * guest passes nr_vnodes and nr_vcpus thus
>> + * we know how much memory guest has allocated.
>> + */
>> + if ( copy_from_guest(&topology, arg, 1) ||
>> + guest_handle_is_null(topology.vmemrange.h) ||
>> + guest_handle_is_null(topology.vdistance.h) ||
>> + guest_handle_is_null(topology.vcpu_to_vnode.h) )
>> + return -EFAULT;
>> +
>> + if ( (d = rcu_lock_domain_by_any_id(topology.domid)) == NULL )
>> + return -ESRCH;
>> +
>> + rc = -EOPNOTSUPP;
>> + if ( d->vnuma == NULL )
>> + goto vnumainfo_out;
>> +
>> + if ( d->vnuma->nr_vnodes == 0 )
>> + goto vnumainfo_out;
>> +
>> + dom_vnodes = d->vnuma->nr_vnodes;
>> +
>> + /*
>> + * guest nr_cpus and nr_nodes may differ from domain vnuma config.
>> + * Check here guest nr_nodes and nr_cpus to make sure we dont
>> overflow.
>> + */
>> + rc = -ENOBUFS;
>> + if ( topology.nr_vnodes < dom_vnodes ||
>> + topology.nr_vcpus < d->max_vcpus )
>> + goto vnumainfo_out;
>> +
>> + rc = -EFAULT;
>> +
>> + if ( copy_to_guest(topology.vmemrange.h, d->vnuma->vmemrange,
>> + dom_vnodes) != 0 )
>> + goto vnumainfo_out;
>> +
>> + if ( copy_to_guest(topology.vdistance.h, d->vnuma->vdistance,
>> + dom_vnodes * dom_vnodes) != 0 )
>> + goto vnumainfo_out;
>> +
>> + if ( copy_to_guest(topology.vcpu_to_vnode.h,
>> d->vnuma->vcpu_to_vnode,
>> + d->max_vcpus) != 0 )
>> + goto vnumainfo_out;
>> +
>> + topology.nr_vnodes = dom_vnodes;
>> +
>> + if ( copy_to_guest(arg, &topology, 1) != 0 )
>> + goto vnumainfo_out;
>> + rc = 0;
>> +
>> + vnumainfo_out:
>> + rcu_unlock_domain(d);
>> + break;
>> + }
>> +
>> default:
>> rc = arch_memory_op(cmd, arg);
>> break;
>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>> index 5b11bbf..5ee74f4 100644
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -35,6 +35,7 @@
>> #include "xen.h"
>> #include "grant_table.h"
>> #include "hvm/save.h"
>> +#include "memory.h"
>>
>> #define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
>>
>> @@ -934,6 +935,32 @@ struct xen_domctl_vcpu_msrs {
>> };
>> typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
>> DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
>> +
>> +/*
>> + * Use in XEN_DOMCTL_setvnumainfo to set
>> + * vNUMA domain topology.
>> + */
>> +struct xen_domctl_vnuma {
>> + uint32_t nr_vnodes;
>> + uint32_t _pad;
>> + XEN_GUEST_HANDLE_64(uint) vdistance;
>> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
>> +
>> + /*
>> + * vnodes to physical NUMA nodes mask.
>> + * This kept on per-domain basis for
>> + * interested consumers, such as numa aware ballooning.
>> + */
>> + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
>> +
>> + /*
>> + * memory rages for each vNUMA node
>> + */
>> + XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange;
>> +};
>> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
>> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
>> +
>> #endif
>>
>> struct xen_domctl {
>> @@ -1008,6 +1035,7 @@ struct xen_domctl {
>> #define XEN_DOMCTL_cacheflush 71
>> #define XEN_DOMCTL_get_vcpu_msrs 72
>> #define XEN_DOMCTL_set_vcpu_msrs 73
>> +#define XEN_DOMCTL_setvnumainfo 74
>> #define XEN_DOMCTL_gdbsx_guestmemio 1000
>> #define XEN_DOMCTL_gdbsx_pausevcpu 1001
>> #define XEN_DOMCTL_gdbsx_unpausevcpu 1002
>> @@ -1068,6 +1096,7 @@ struct xen_domctl {
>> struct xen_domctl_cacheflush cacheflush;
>> struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>> struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
>> + struct xen_domctl_vnuma vnuma;
>> uint8_t pad[128];
>> } u;
>> };
>> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
>> index 2c57aa0..2c212e1 100644
>> --- a/xen/include/public/memory.h
>> +++ b/xen/include/public/memory.h
>> @@ -521,9 +521,54 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
>> * The zero value is appropiate.
>> */
>>
>> +/* vNUMA node memory range */
>> +struct vmemrange {
>> + uint64_t start, end;
>> +};
>> +
>> +typedef struct vmemrange vmemrange_t;
>> +DEFINE_XEN_GUEST_HANDLE(vmemrange_t);
>> +
>> +/*
>> + * vNUMA topology specifies vNUMA node number, distance table,
>> + * memory ranges and vcpu mapping provided for guests.
>> + * XENMEM_get_vnumainfo hypercall expects to see from guest
>> + * nr_vnodes and nr_vcpus to indicate available memory. After
>> + * filling guests structures, nr_vnodes and nr_vcpus copied
>> + * back to guest.
>> + */
>> +struct vnuma_topology_info {
>> + /* IN */
>> + domid_t domid;
>> + /* IN/OUT */
>> + unsigned int nr_vnodes;
>> + unsigned int nr_vcpus;
>> + /* OUT */
>> + union {
>> + XEN_GUEST_HANDLE(uint) h;
>> + uint64_t pad;
>> + } vdistance;
>> + union {
>> + XEN_GUEST_HANDLE(uint) h;
>> + uint64_t pad;
>> + } vcpu_to_vnode;
>> + union {
>> + XEN_GUEST_HANDLE(vmemrange_t) h;
>> + uint64_t pad;
>> + } vmemrange;
>> +};
>> +typedef struct vnuma_topology_info vnuma_topology_info_t;
>> +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
>> +
>> +/*
>> + * XENMEM_get_vnumainfo used by guest to get
>> + * vNUMA topology from hypervisor.
>> + */
>> +#define XENMEM_get_vnumainfo 26
>> +
>> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>>
>> -/* Next available subop number is 26 */
>> +/* Next available subop number is 27 */
>>
>> #endif /* __XEN_PUBLIC_MEMORY_H__ */
>>
>> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
>> index bb1c398..d29a84d 100644
>> --- a/xen/include/xen/domain.h
>> +++ b/xen/include/xen/domain.h
>> @@ -89,4 +89,15 @@ extern unsigned int xen_processor_pmbits;
>>
>> extern bool_t opt_dom0_vcpus_pin;
>>
>> +/* vnuma topology per domain. */
>> +struct vnuma_info {
>> + unsigned int nr_vnodes;
>> + unsigned int *vdistance;
>> + unsigned int *vcpu_to_vnode;
>> + unsigned int *vnode_to_pnode;
>> + struct vmemrange *vmemrange;
>> +};
>> +
>> +void vnuma_destroy(struct vnuma_info *vnuma);
>> +
>> #endif /* __XEN_DOMAIN_H__ */
>> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
>> index d5bc461..71e4218 100644
>> --- a/xen/include/xen/sched.h
>> +++ b/xen/include/xen/sched.h
>> @@ -447,6 +447,7 @@ struct domain
>> nodemask_t node_affinity;
>> unsigned int last_alloc_node;
>> spinlock_t node_affinity_lock;
>> + struct vnuma_info *vnuma;
>> };
>>
>> struct domain_setup_info
>> --
>> 1.7.10.4
>>
--
Elena
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |