[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.



Hi guys
Thank you for reviewing, I will clean these out.

George, after talking to Dario, I think the max number of physical
nodes will not exceed 256. Dario's automatic NUMA
placement work with this number and I think it can be easily u8.
Unless anyone has other thoughts.

Elena

On Mon, Sep 16, 2013 at 11:46 AM, George Dunlap
<George.Dunlap@xxxxxxxxxxxxx> wrote:
> On Fri, Sep 13, 2013 at 9:49 AM, Elena Ufimtseva <ufimtseva@xxxxxxxxx> wrote:
>> +void domain_vnuma_destroy(struct domain_vnuma_info *v)
>
> As long as you're respinning, I think I'd prefer this be named
> something different -- 'v' is almost universally of type "struct vcpu"
> in the Xen code, so this code is a little confusing.  I might use
> "vinfo", or even just take a domain pointer and dereference it
> yourself.
>
>> +{
>> +    if (v->vnuma_memblks) {
>> +        xfree(v->vnuma_memblks);
>> +        v->vnuma_memblks = NULL;
>> +    }
>> +    if (v->vcpu_to_vnode) {
>> +        xfree(v->vcpu_to_vnode);
>> +        v->vcpu_to_vnode = NULL;
>> +    }
>> +    if (v->vdistance) {
>> +        xfree(v->vdistance);
>> +        v->vdistance = NULL;
>> +    }
>> +    if (v->vnode_to_pnode) {
>> +        xfree(v->vnode_to_pnode);
>> +        v->vnode_to_pnode = NULL;
>> +    }
>> +    v->nr_vnodes = 0;
>> +}
>> +
>>  /*
>>   * Local variables:
>>   * mode: C
>> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
>> index 9760d50..e5d05c7 100644
>> --- a/xen/common/domctl.c
>> +++ b/xen/common/domctl.c
>> @@ -29,6 +29,7 @@
>>  #include <asm/page.h>
>>  #include <public/domctl.h>
>>  #include <xsm/xsm.h>
>> +#include <xen/vnuma.h>
>>
>>  static DEFINE_SPINLOCK(domctl_lock);
>>  DEFINE_SPINLOCK(vcpu_alloc_lock);
>> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
>> u_domctl)
>>          ret = set_global_virq_handler(d, virq);
>>      }
>>      break;
>> -
>> +    case XEN_DOMCTL_setvnumainfo:
>> +    {
>> +        unsigned int i, j, nr_vnodes, dist_size;
>> +        unsigned int dist, vmap, vntop;
>> +        vnuma_memblk_t vmemblk;
>> +
>> +        ret = -EINVAL;
>> +        dist = i = j = 0;
>
> I think the zeroing of these is unnecessary -- you zero them /
> initialize them before you use them below.
>
> You need to break this code into "paragraphs" of related changes, with
> a space in between.  For example, maybe here...
>
>> +        nr_vnodes = op->u.vnuma.nr_vnodes;
>> +        if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
>> +            break;
>
> here...
>
>> +        d->vnuma.nr_vnodes = nr_vnodes;
>> +        dist_size = nr_vnodes * nr_vnodes;
>
> here...
>
>> +        if (
>> +        (d->vnuma.vdistance = xmalloc_bytes(
>> +                 sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
>> +        (d->vnuma.vnuma_memblks = xmalloc_bytes(
>> +                 sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
>> +        (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
>> +                 sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL ||
>> +        (d->vnuma.vnode_to_pnode = xmalloc_bytes(
>> +                 sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
>> +            goto err_dom;
>
> here...
>
> Also, for an idiomatic way to do this kind of multiple allocation w/
> error checking, take a look at
> xen/arch/x86/hvm/hvm.c:hvm_domain_initialise()  (to pick a random
> function I've worked with recently).  Look at the
>
>> +        for ( i = 0; i < nr_vnodes; i++ )
>> +            for ( j = 0; j < nr_vnodes; j++ )
>> +            {
>> +                if ( unlikely(__copy_from_guest_offset(&dist,
>> +                                        op->u.vnuma.vdistance,
>> +                                        __vnode_distance_offset(d, i, j), 
>> 1)) )
>> +                    goto err_dom;
>> +                __vnode_distance_set(d, i, j, dist);
>> +            }
>
> here...
>
> Also, in each of these error paths, you leak the structures allocated above.
>
>> +        for ( i = 0; i < nr_vnodes; i++ )
>> +        {
>> +            if ( unlikely(__copy_from_guest_offset(&vmemblk,
>> +                                    op->u.vnuma.vnuma_memblks, i, 1)) )
>> +                goto err_dom;
>> +            d->vnuma.vnuma_memblks[i].start = vmemblk.start;
>> +            d->vnuma.vnuma_memblks[i].end = vmemblk.end;
>> +        }
>
> here...
>
>> +        for ( i = 0; i < d->max_vcpus; i++ )
>> +        {
>> +            if ( unlikely(__copy_from_guest_offset(&vmap,
>> +                           op->u.vnuma.vcpu_to_vnode, i, 1)) )
>> +                goto err_dom;
>> +            d->vnuma.vcpu_to_vnode[i] = vmap;
>> +        }
>
> here...
>
>> +        if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
>> +        {
>> +            for ( i = 0; i < nr_vnodes; i++ )
>> +            {
>> +                if ( unlikely(__copy_from_guest_offset(&vntop,
>> +                                        op->u.vnuma.vnode_to_pnode, i, 1)) )
>> +                    goto err_dom;
>> +                d->vnuma.vnode_to_pnode[i] = vntop;
>> +            }
>> +        }
>> +        else
>> +            for(i = 0; i < nr_vnodes; i++)
>> +                d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
>
> here...
>
>> +        ret = 0;
>
> and maybe here.
>
>> +        break;
>> +err_dom:
>> +        ret = -EINVAL;
>> +    }
>> +    break;
>>      default:
>>          ret = arch_do_domctl(op, d, u_domctl);
>>          break;
>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>> index 50b740f..6dc2452 100644
>> --- a/xen/common/memory.c
>> +++ b/xen/common/memory.c
>> @@ -28,6 +28,7 @@
>>  #include <public/memory.h>
>>  #include <xsm/xsm.h>
>>  #include <xen/trace.h>
>> +#include <xen/vnuma.h>
>>
>>  struct memop_args {
>>      /* INPUT */
>> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, 
>> XEN_GUEST_HANDLE_PARAM(void) arg)
>>          rcu_unlock_domain(d);
>>
>>          break;
>> +    case XENMEM_get_vnuma_info:
>> +    {
>> +        int i, j;
>> +        struct vnuma_topology_info mtopology;
>> +        struct vnuma_topology_info touser_topo;
>> +        struct domain *d;
>> +        unsigned int max_pages, max_vcpus, nr_vnodes;
>> +        vnuma_memblk_t *vblks;
>>
>> +        rc = -EINVAL;
>
> here...
>
>> +        if ( guest_handle_is_null(arg) )
>> +            return rc;
>
> here...
>
>> +        if( copy_from_guest(&mtopology, arg, 1) )
>> +            return -EINVAL;
>
> here... (and so on)
>
> This should be -EFAULT
>
>> +        if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
>> +            return -EINVAL;
>> +        touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
>> +        max_pages = d->max_pages;
>> +        max_vcpus = d->max_vcpus;
>> +        rcu_unlock_domain(d);
>> +
>> +        nr_vnodes = touser_topo.nr_vnodes;
>> +        rc = copy_to_guest(arg, &touser_topo, 1);
>> +        if ( rc )
>> +            return -EFAULT;
>> +        if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
>> +            return -EFAULT;
>> +        vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
>> +        if ( vblks == NULL )
>> +            return -EFAULT;
>
> Uum, what is this for?  It doesn't appear to be used -- if there's a
> copy failure it's freed again, if things succeed it's leaked.
>
>> +        for ( i = 0; i < nr_vnodes; i++ )
>> +        {
>> +                if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
>> +                           &d->vnuma.vnuma_memblks[i], 1) < 0 )
>> +                    goto out;
>
> 4 spaces for indentation, please.
>
>> +        }
>> +        for ( i = 0; i < touser_topo.nr_vnodes; i++ )
>> +            for ( j = 0; j < touser_topo.nr_vnodes; j++ )
>> +            {
>> +                if ( copy_to_guest_offset(mtopology.vdistance,
>> +                                            __vnode_distance_offset(d, i, 
>> j),
>> +                                            &__vnode_distance(d, i, j), 1) )
>> +                    goto out;
>> +            }
>> +        for ( i = 0; i < d->max_vcpus ; i++ )
>> +        {
>> +            if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
>> +                                        &d->vnuma.vcpu_to_vnode[i], 1) )
>> +                    goto out;
>> +        }
>> +        return rc;
>> +out:
>> +        if ( vblks ) xfree(vblks);
>> +        return rc;
>> +        break;
>> +    }
>>      default:
>>          rc = arch_memory_op(op, arg);
>>          break;
>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>> index 4c5b2bb..3574d0a 100644
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -35,6 +35,7 @@
>>  #include "xen.h"
>>  #include "grant_table.h"
>>  #include "hvm/save.h"
>> +#include "memory.h"
>>
>>  #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
>>
>> @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m {
>>  typedef struct xen_domctl_set_broken_page_p2m 
>> xen_domctl_set_broken_page_p2m_t;
>>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
>>
>> +struct xen_domctl_vnuma {
>> +    uint16_t nr_vnodes;
>> +    XEN_GUEST_HANDLE_64(uint) vdistance;
>> +    XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
>> +    XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
>> +    XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
>> +};
>> +
>> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
>> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
>> +
>>  struct xen_domctl {
>>      uint32_t cmd;
>>  #define XEN_DOMCTL_createdomain                   1
>> @@ -920,6 +932,7 @@ struct xen_domctl {
>>  #define XEN_DOMCTL_set_broken_page_p2m           67
>>  #define XEN_DOMCTL_setnodeaffinity               68
>>  #define XEN_DOMCTL_getnodeaffinity               69
>> +#define XEN_DOMCTL_setvnumainfo                  70
>>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
>> @@ -979,6 +992,7 @@ struct xen_domctl {
>>          struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
>>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>> +        struct xen_domctl_vnuma             vnuma;
>>          uint8_t                             pad[128];
>>      } u;
>>  };
>> @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t;
>>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
>>
>>  #endif /* __XEN_PUBLIC_DOMCTL_H__ */
>> -
>>  /*
>>   * Local variables:
>>   * mode: C
>> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
>> index 7a26dee..28f6aaf 100644
>> --- a/xen/include/public/memory.h
>> +++ b/xen/include/public/memory.h
>> @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
>>   * Caller must be privileged or the hypercall fails.
>>   */
>>  #define XENMEM_claim_pages                  24
>> -
>>  /*
>>   * XENMEM_claim_pages flags - the are no flags at this time.
>>   * The zero value is appropiate.
>>   */
>>
>> +struct vnuma_memblk {
>> +              uint64_t start, end;
>> +};
>> +typedef struct vnuma_memblk vnuma_memblk_t;
>> +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t);
>> +
>> +#define XENMEM_get_vnuma_info               25
>> +
>>  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>>
>>  #endif /* __XEN_PUBLIC_MEMORY_H__ */
>> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
>> index a057069..c9d53e3 100644
>> --- a/xen/include/xen/domain.h
>> +++ b/xen/include/xen/domain.h
>> @@ -4,6 +4,7 @@
>>
>>  #include <public/xen.h>
>>  #include <asm/domain.h>
>> +#include <public/memory.h>
>>
>>  typedef union {
>>      struct vcpu_guest_context *nat;
>> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
>>
>>  extern bool_t opt_dom0_vcpus_pin;
>>
>> +struct domain_vnuma_info {
>> +    uint16_t nr_vnodes;
>> +    uint *vdistance;
>> +    uint *vcpu_to_vnode;
>> +    uint *vnode_to_pnode;
>> +    vnuma_memblk_t *vnuma_memblks;
>> +};
>
> Are these the right sizes for things to allocate?  Do we really need
> 32 bits to represent distance, vnode and pnode?  Can we use u16 or u8?
>
>  -George



-- 
Elena

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.