diff -r 9a3efacb9e39 -r c636287eab3c xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Mar 30 18:51:31 2010 -0400 +++ b/xen/arch/x86/mm.c Tue Mar 30 21:18:25 2010 -0400 @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,7 @@ #include #include #include +#include /* * Mapping of first 2 or 4 megabytes of memory. This is mapped with 4kB @@ -4519,6 +4521,193 @@ case XENMEM_get_sharing_freed_pages: return mem_sharing_get_nr_saved_mfns(); + case XENMEM_numa_op: + { + struct xenmem_numa_op memop; + + if ( copy_from_guest(&memop, arg, 1) ) + { + printk("copy_from_guest : arg(%lu)\n", sizeof(memop)); + return -EFAULT; + } + + switch (memop.cmd) + { + case XENMEM_machine_numa_layout: + { + struct xenmem_machine_numa_layout *info; + uint32_t max_nodes; + int ret, i; + + XEN_GUEST_HANDLE(void) node_data_arr; + XEN_GUEST_HANDLE(uint32) node_distance_arr; + + info = &memop.u.minfo; + max_nodes = info->max_nodes; + max_nodes = min_t(uint32_t, max_nodes, num_online_nodes()); + node_data_arr = info->node_data; + node_distance_arr = info->node_distance; + + memset(info, 0, sizeof(*info)); + info->max_nodes = max_nodes; + ret = 0; + if (!guest_handle_is_null(node_distance_arr)) + { + int j; + for (i = 0; i < max_nodes; i++) + for (j = 0; j < max_nodes; j++) + { + uint32_t distance; + distance = (node_online(i) && node_online(j)) ? + node_distance(i, j):~0U; + + if (copy_to_guest_offset(node_distance_arr, + (i*max_nodes+j), &distance, 1)) + { + ret = -EFAULT; + break; + } + } + } + if (!guest_handle_is_null(node_data_arr)) + { + for (i = 0; i < max_nodes; i++) + { + struct xenmem_node_data xnd; + cpumask_t mask; + xnd.node_id = i; + xnd.node_memsize = node_online(i) ? + (node_spanned_pages(i)<bufsize < memnodemapsize) + { + printk("Insufficient memnode buf(%u/%lu)\n", + map->bufsize, memnodemapsize); + return -EFAULT; + } + map->shift = memnode_shift; + map->mapsize = memnodemapsize; + if ( !guest_handle_is_null(map->map) ) + { + if (copy_to_guest(map->map, memnodemap, memnodemapsize)) + { + printk("copy_to_guest:map(%lu)\n", memnodemapsize); + return -EFAULT; + } + } + break; + } + + case XENMEM_set_domain_numa_layout: + { + struct xenmem_domain_numa_layout *layout; + struct domain *d; + int ret; + + layout = &memop.u.dinfo; + + ret = 0; + if ((memop.cmd == XENMEM_get_domain_numa_layout) && + !IS_PRIV(current->domain) && + (layout->domid !=DOMID_SELF)) + return -EINVAL; + + if ((memop.cmd == XENMEM_set_domain_numa_layout) && + !IS_PRIV(current->domain)) + return -EINVAL; + + if ((layout->bufsize < sizeof(*d->numa_layout)) || + guest_handle_is_null(layout->buf)) + return -EFAULT; + + ret = rcu_lock_target_domain_by_id(layout->domid, &d); + if (ret) + return ret; + + if (memop.cmd == XENMEM_get_domain_numa_layout) + { + if (d->numa_layout == NULL) + { + ret = -EFAULT; + goto domain_numa_unlock; + } + if (copy_to_guest(layout->buf, d->numa_layout, 1)) + { + printk("numa domain : copy_to_guest failed\n"); + ret = -EFAULT; + } + goto domain_numa_unlock; + } + + if (!d->numa_layout) + { + struct xen_domain_numa_layout *numa_layout; + numa_layout = xmalloc(struct xen_domain_numa_layout); + if (numa_layout == NULL) + { + printk("numa domain : memory allocation failed\n"); + ret = -ENOMEM; + goto domain_numa_unlock; + } + memset(numa_layout, 0, sizeof(numa_layout)); + if (copy_from_guest(numa_layout, layout->buf, 1)) + { + printk("numa domain : copy_to_guest failed\n"); + xfree(numa_layout); + ret = -EFAULT; + goto domain_numa_unlock; + } + if (numa_layout->version != + XEN_DOM_NUMA_INTERFACE_VERSION) + { + printk("numa domain : version mismatch\n"); + xfree(numa_layout); + ret = -EFAULT; + goto domain_numa_unlock; + } + if (d->numa_layout) + xfree(d->numa_layout); + d->numa_layout = numa_layout; + } +domain_numa_unlock: + rcu_unlock_domain(d); + if (ret) + return ret; + break; + } + + return -EFAULT; + } + + if ( copy_to_guest(arg, &memop, 1) ) + { + printk("copy_to_guest : arg(%lu)\n", sizeof(memop)); + return -EFAULT; + } + + return 0; + } + default: return subarch_memory_op(op, arg); } diff -r 9a3efacb9e39 -r c636287eab3c xen/common/domain.c --- a/xen/common/domain.c Tue Mar 30 18:51:31 2010 -0400 +++ b/xen/common/domain.c Tue Mar 30 21:18:25 2010 -0400 @@ -599,6 +599,12 @@ sched_destroy_domain(d); + if (d->numa_layout) + { + xfree(d->numa_layout); + d->numa_layout = NULL; + } + /* Free page used by xen oprofile buffer. */ free_xenoprof_pages(d); diff -r 9a3efacb9e39 -r c636287eab3c xen/common/domctl.c --- a/xen/common/domctl.c Tue Mar 30 18:51:31 2010 -0400 +++ b/xen/common/domctl.c Tue Mar 30 21:18:25 2010 -0400 @@ -23,6 +23,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(domctl_lock); @@ -76,6 +77,36 @@ bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS); } +void cpumask_to_xenctl_cpumask( + struct xenctl_cpumask *xcpumask, cpumask_t *cpumask) +{ + unsigned int nr_cpus; + uint8_t *bytemap; + + bytemap = xcpumask->bits; + + memset(bytemap, 0, sizeof(*xcpumask)); + nr_cpus = + min_t(unsigned int, XENCTL_NR_CPUS, NR_CPUS); + + bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), nr_cpus); +} + +void xenctl_cpumask_to_cpumask( + cpumask_t *cpumask, struct xenctl_cpumask *xcpumask) +{ + unsigned int nr_cpus; + uint8_t *bytemap; + + bytemap = xcpumask->bits; + + nr_cpus = + min_t(unsigned int, XENCTL_NR_CPUS, NR_CPUS); + + bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, nr_cpus); + cpus_and(*cpumask, *cpumask, cpu_online_map); +} + static inline int is_free_domid(domid_t dom) { struct domain *d; diff -r 9a3efacb9e39 -r c636287eab3c xen/include/public/memory.h --- a/xen/include/public/memory.h Tue Mar 30 18:51:31 2010 -0400 +++ b/xen/include/public/memory.h Tue Mar 30 21:18:25 2010 -0400 @@ -270,6 +270,107 @@ typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); + +/* Returns numa related info */ +#define XENMEM_numa_op 15 + +#define XEN_DOM_NUMA_INTERFACE_VERSION 0x00000001 +#define XENCTL_NR_CPUS 64 +#define XENCTL_BITS_PER_BYTE 8 +#define XENCTL_BITS_TO_BYTES(bits) \ + (((bits)+XENCTL_BITS_PER_BYTE-1)/XENCTL_BITS_PER_BYTE) + +#define XENCTL_DECLARE_BITMAP(name,bits) \ + uint8_t name[XENCTL_BITS_TO_BYTES(bits)] +struct xenctl_cpumask{ XENCTL_DECLARE_BITMAP(bits, XENCTL_NR_CPUS); }; + +#define XENMEM_machine_numa_layout 0x01 +struct xenmem_node_data { + uint32_t node_id; + uint64_t node_memsize; + uint64_t node_memfree; + struct xenctl_cpumask cpu_mask; /* node_to_cpumask */ +}; + +/* NUMA layout for the machine. + * Structure has to fit within a page. */ +struct xenmem_machine_numa_layout { + uint32_t max_nodes; + + /* Only (max_nodes*max_nodes) entries are filled */ + XEN_GUEST_HANDLE(uint32) node_distance; + + /* max_vnodes entries of xenmem_node_data type */ + XEN_GUEST_HANDLE(void) node_data; +}; +typedef struct xenmem_machine_numa_layout xenmem_machine_numa_layout_t; +DEFINE_XEN_GUEST_HANDLE(xenmem_machine_numa_layout_t); + +#define XENMEM_machine_nodemap 0x02 +struct xenmem_machine_nodemap { + /* On call the size of the available buffer */ + uint32_t bufsize; + + /* memnode map parameters */ + int32_t shift; + uint32_t mapsize; + XEN_GUEST_HANDLE(void) map; +}; +typedef struct xenmem_machine_nodemap xenmem_machine_nodemap_t; +DEFINE_XEN_GUEST_HANDLE(xenmem_machine_nodemap_t); + +/* NUMA layout for the domain at the time of startup. + * Structure has to fit within a page. */ +#define XENMEM_set_domain_numa_layout 0x03 +#define XENMEM_get_domain_numa_layout 0x04 + +/* NUMA layout for the domain at the time of startup. + * Structure has to fit within a page. */ +#define XEN_MAX_VNODES 8 + +struct xen_vnode_data { + uint32_t vnode_id; + uint32_t mnode_id; + uint64_t nr_pages; + struct xenctl_cpumask vcpu_mask; /* vnode_to_vcpumask */ +}; + +#define XEN_DOM_NUMA_CONFINED 0x01 +#define XEN_DOM_NUMA_SPLIT 0x02 +#define XEN_DOM_NUMA_STRIPED 0x03 +struct xen_domain_numa_layout { + uint32_t version; + uint32_t type; + + uint32_t max_vcpus; + uint32_t max_vnodes; + + /* Only (max_vnodes*max_vnodes) entries are filled */ + uint32_t vnode_distance[XEN_MAX_VNODES * XEN_MAX_VNODES]; + /* Only (max_vnodes) entries are filled */ + struct xen_vnode_data vnode_data[XEN_MAX_VNODES]; +}; + +struct xenmem_domain_numa_layout { + domid_t domid; + + uint32_t bufsize; + XEN_GUEST_HANDLE(void) buf; +}; +typedef struct xenmem_domain_numa_layout xenmem_domain_numa_layout_t; +DEFINE_XEN_GUEST_HANDLE( xenmem_domain_numa_layout_t); + +struct xenmem_numa_op { + uint32_t cmd; + union { + struct xenmem_machine_numa_layout minfo; + struct xenmem_machine_nodemap mnodemap; + struct xenmem_domain_numa_layout dinfo; + } u; +}; +typedef struct xenmem_numa_op xenmem_numa_op_t; +DEFINE_XEN_GUEST_HANDLE(xenmem_numa_op_t); + #define XENMEM_set_pod_target 16 #define XENMEM_get_pod_target 17 struct xen_pod_target { diff -r 9a3efacb9e39 -r c636287eab3c xen/include/xen/cpumask.h --- a/xen/include/xen/cpumask.h Tue Mar 30 18:51:31 2010 -0400 +++ b/xen/include/xen/cpumask.h Tue Mar 30 21:18:25 2010 -0400 @@ -429,4 +429,10 @@ void xenctl_cpumap_to_cpumask( cpumask_t *cpumask, struct xenctl_cpumap *enctl_cpumap); +struct xenctl_cpumask; +void cpumask_to_xenctl_cpumask( + struct xenctl_cpumask *xcpumask, cpumask_t *cpumask); +void xenctl_cpumask_to_cpumask( + cpumask_t *cpumask, struct xenctl_cpumask *xcpumask); + #endif /* __XEN_CPUMASK_H */ diff -r 9a3efacb9e39 -r c636287eab3c xen/include/xen/sched.h --- a/xen/include/xen/sched.h Tue Mar 30 18:51:31 2010 -0400 +++ b/xen/include/xen/sched.h Tue Mar 30 21:18:25 2010 -0400 @@ -303,6 +303,8 @@ /* transcendent memory, auto-allocated on first tmem op by each domain */ void *tmem; + /* xen domain numa layout (for numa guests) */ + struct xen_domain_numa_layout *numa_layout; struct lock_profile_qhead profile_head;