[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl



Super. To which branch are you applying these now? (n00b question but have to 
ask)
Andres
On Sep 5, 2012, at 12:21 PM, Konrad Rzeszutek Wilk wrote:

> On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote:
>> Re-spin of alternative patch after David's feedback.
>> Thanks
>> Andres
> 
> applied. fixed some whitespace issues.
>> 
>> commit ab351a5cef1797935b083c2f6e72800a8949c515
>> Author: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>> Date:   Thu Aug 30 12:23:33 2012 -0400
>> 
>>    xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
>> 
>>    PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>>    field for reporting the error code for every frame that could not be
>>    mapped.  libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>> 
>>    Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top 
>> nibble
>>    in the mfn array.
>> 
>>    Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
>>    Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>> 
>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>> index 85226cb..5386f20 100644
>> --- a/drivers/xen/privcmd.c
>> +++ b/drivers/xen/privcmd.c
>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>>  */
>> static int gather_array(struct list_head *pagelist,
>>                      unsigned nelem, size_t size,
>> -                    void __user *data)
>> +                    const void __user *data)
>> {
>>      unsigned pageidx;
>>      void *pagedata;
>> @@ -246,61 +246,117 @@ struct mmap_batch_state {
>>      domid_t domain;
>>      unsigned long va;
>>      struct vm_area_struct *vma;
>> -    int err;
>> -
>> -    xen_pfn_t __user *user;
>> +    /* A tristate: 
>> +     *      0 for no errors
>> +     *      1 if at least one error has happened (and no
>> +     *          -ENOENT errors have happened)
>> +     *      -ENOENT if at least 1 -ENOENT has happened.
>> +     */
>> +    int global_error;
>> +    /* An array for individual errors */
>> +    int *err;
>> +
>> +    /* User-space mfn array to store errors in the second pass for V1. */
>> +    xen_pfn_t __user *user_mfn;
>> };
>> 
>> static int mmap_batch_fn(void *data, void *state)
>> {
>>      xen_pfn_t *mfnp = data;
>>      struct mmap_batch_state *st = state;
>> +    int ret;
>> 
>> -    if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>> -                                   st->vma->vm_page_prot, st->domain) < 0) {
>> -            *mfnp |= 0xf0000000U;
>> -            st->err++;
>> +    ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>> +                                     st->vma->vm_page_prot, st->domain);
>> +
>> +    /* Store error code for second pass. */
>> +    *(st->err++) = ret;
>> +
>> +    /* And see if it affects the global_error. */
>> +    if (ret < 0) {
>> +            if (ret == -ENOENT)
>> +                    st->global_error = -ENOENT;
>> +            else {
>> +                    /* Record that at least one error has happened. */
>> +                    if (st->global_error == 0)
>> +                            st->global_error = 1;
>> +            }
>>      }
>>      st->va += PAGE_SIZE;
>> 
>>      return 0;
>> }
>> 
>> -static int mmap_return_errors(void *data, void *state)
>> +static int mmap_return_errors_v1(void *data, void *state)
>> {
>>      xen_pfn_t *mfnp = data;
>>      struct mmap_batch_state *st = state;
>> -
>> -    return put_user(*mfnp, st->user++);
>> +    int err = *(st->err++);
>> +
>> +    /*
>> +     * V1 encodes the error codes in the 32bit top nibble of the 
>> +     * mfn (with its known limitations vis-a-vis 64 bit callers).
>> +     */
>> +    *mfnp |= (err == -ENOENT) ?
>> +                            PRIVCMD_MMAPBATCH_PAGED_ERROR :
>> +                            PRIVCMD_MMAPBATCH_MFN_ERROR;
>> +    return __put_user(*mfnp, st->user_mfn++);
>> }
>> 
>> static struct vm_operations_struct privcmd_vm_ops;
>> 
>> -static long privcmd_ioctl_mmap_batch(void __user *udata)
>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
>> {
>>      int ret;
>> -    struct privcmd_mmapbatch m;
>> +    struct privcmd_mmapbatch_v2 m;
>>      struct mm_struct *mm = current->mm;
>>      struct vm_area_struct *vma;
>>      unsigned long nr_pages;
>>      LIST_HEAD(pagelist);
>> +    int *err_array = NULL;
>>      struct mmap_batch_state state;
>> 
>>      if (!xen_initial_domain())
>>              return -EPERM;
>> 
>> -    if (copy_from_user(&m, udata, sizeof(m)))
>> -            return -EFAULT;
>> +    switch (version) {
>> +    case 1:
>> +            if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
>> +                    return -EFAULT;
>> +            /* Returns per-frame error in m.arr. */
>> +            m.err = NULL;
>> +            if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
>> +                    return -EFAULT;
>> +            break;
>> +    case 2:
>> +            if (copy_from_user(&m, udata, sizeof(struct 
>> privcmd_mmapbatch_v2)))
>> +                    return -EFAULT;
>> +            /* Returns per-frame error code in m.err. */
>> +            if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
>> +                    return -EFAULT;
>> +            break;
>> +    default:
>> +            return -EINVAL;
>> +    }
>> 
>>      nr_pages = m.num;
>>      if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>>              return -EINVAL;
>> 
>> -    ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
>> -                       m.arr);
>> +    ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>> +
>> +    if (ret)
>> +            goto out;
>> +    if (list_empty(&pagelist)) {
>> +            ret = -EINVAL;
>> +            goto out;
>> +    }
>> 
>> -    if (ret || list_empty(&pagelist))
>> +    err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
>> +    if (err_array == NULL) {
>> +            ret = -ENOMEM;
>>              goto out;
>> +    }
>> 
>>      down_write(&mm->mmap_sem);
>> 
>> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user 
>> *udata)
>>              goto out;
>>      }
>> 
>> -    state.domain = m.dom;
>> -    state.vma = vma;
>> -    state.va = m.addr;
>> -    state.err = 0;
>> +    state.domain        = m.dom;
>> +    state.vma           = vma;
>> +    state.va            = m.addr;
>> +    state.global_error  = 0;
>> +    state.err           = err_array;
>> 
>> -    ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>> -                         &pagelist, mmap_batch_fn, &state);
>> +    /* mmap_batch_fn guarantees ret == 0 */
>> +    BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
>> +                         &pagelist, mmap_batch_fn, &state));
>> 
>>      up_write(&mm->mmap_sem);
>> 
>> -    if (state.err > 0) {
>> -            state.user = m.arr;
>> +    if (state.global_error && (version == 1)) {
>> +            /* Write back errors in second pass. */
>> +            state.user_mfn = (xen_pfn_t *)m.arr;
>> +            state.err      = err_array;
>>              ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>> -                           &pagelist,
>> -                           mmap_return_errors, &state);
>> -    }
>> +                                     &pagelist, mmap_return_errors_v1, 
>> &state);
>> +    } else
>> +            ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
>> +
>> +    /* If we have not had any EFAULT-like global errors then set the global
>> +     * error to -ENOENT if necessary. */
>> +    if ((ret == 0) && (state.global_error == -ENOENT))
>> +        ret = -ENOENT;
>> 
>> out:
>> +    kfree(err_array);
>>      free_page_list(&pagelist);
>> 
>>      return ret;
>> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file,
>>              break;
>> 
>>      case IOCTL_PRIVCMD_MMAPBATCH:
>> -            ret = privcmd_ioctl_mmap_batch(udata);
>> +            ret = privcmd_ioctl_mmap_batch(udata, 1);
>> +            break;
>> +
>> +    case IOCTL_PRIVCMD_MMAPBATCH_V2:
>> +            ret = privcmd_ioctl_mmap_batch(udata, 2);
>>              break;
>> 
>>      default:
>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
>> index 45c1aa1..a853168 100644
>> --- a/include/xen/privcmd.h
>> +++ b/include/xen/privcmd.h
>> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
>>      int num;     /* number of pages to populate */
>>      domid_t dom; /* target domain */
>>      __u64 addr;  /* virtual address */
>> -    xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
>> +    xen_pfn_t __user *arr; /* array of mfns - or'd with
>> +                              PRIVCMD_MMAPBATCH_*_ERROR on err */
>> +};
>> +
>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR     0xf0000000U
>> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR   0x80000000U
>> +
>> +struct privcmd_mmapbatch_v2 {
>> +    unsigned int num; /* number of pages to populate */
>> +    domid_t dom;      /* target domain */
>> +    __u64 addr;       /* virtual address */
>> +    const xen_pfn_t __user *arr; /* array of mfns */
>> +    int __user *err;  /* array of error codes */
>> };
>> 
>> /*
>>  * @cmd: IOCTL_PRIVCMD_HYPERCALL
>>  * @arg: &privcmd_hypercall_t
>>  * Return: Value returned from execution of the specified hypercall.
>> + *
>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
>> + * @arg: &struct privcmd_mmapbatch_v2
>> + * Return: 0 on success (i.e., arg->err contains valid error codes for
>> + * each frame).  On an error other than a failed frame remap, -1 is
>> + * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
>> + * if the operation was otherwise successful but any frame failed with
>> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>>  */
>> #define IOCTL_PRIVCMD_HYPERCALL                                      \
>>      _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
>> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
>>      _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
>> #define IOCTL_PRIVCMD_MMAPBATCH                                      \
>>      _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2                          \
>> +    _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>> 
>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
>> 
>> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:
>> 
>>> From: David Vrabel <david.vrabel@xxxxxxxxxx>
>>> 
>>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>>> field for reporting the error code for every frame that could not be
>>> mapped.  libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>>> 
>>> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
>>> ---
>>> drivers/xen/privcmd.c |   99 
>>> +++++++++++++++++++++++++++++++++++++++---------
>>> include/xen/privcmd.h |   23 +++++++++++-
>>> 2 files changed, 102 insertions(+), 20 deletions(-)
>>> 
>>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>>> index ccee0f1..c0e89e7 100644
>>> --- a/drivers/xen/privcmd.c
>>> +++ b/drivers/xen/privcmd.c
>>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>>> */
>>> static int gather_array(struct list_head *pagelist,
>>>                     unsigned nelem, size_t size,
>>> -                   void __user *data)
>>> +                   const void __user *data)
>>> {
>>>     unsigned pageidx;
>>>     void *pagedata;
>>> @@ -248,18 +248,37 @@ struct mmap_batch_state {
>>>     struct vm_area_struct *vma;
>>>     int err;
>>> 
>>> -   xen_pfn_t __user *user;
>>> +   xen_pfn_t __user *user_mfn;
>>> +   int __user *user_err;
>>> };
>>> 
>>> static int mmap_batch_fn(void *data, void *state)
>>> {
>>>     xen_pfn_t *mfnp = data;
>>>     struct mmap_batch_state *st = state;
>>> +   int ret;
>>> 
>>> -   if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>> -                                  st->vma->vm_page_prot, st->domain) < 0) {
>>> -           *mfnp |= 0xf0000000U;
>>> -           st->err++;
>>> +   ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>> +                                    st->vma->vm_page_prot, st->domain);
>>> +   if (ret < 0) {
>>> +           /*
>>> +            * Error reporting is a mess but userspace relies on
>>> +            * it behaving this way.
>>> +            *
>>> +            * V2 needs to a) return the result of each frame's
>>> +            * remap; and b) return -ENOENT if any frame failed
>>> +            * with -ENOENT.
>>> +            *
>>> +            * In this first pass the error code is saved by
>>> +            * overwriting the mfn and an error is indicated in
>>> +            * st->err.
>>> +            *
>>> +            * The second pass by mmap_return_errors() will write
>>> +            * the error codes to user space and get the right
>>> +            * ioctl return value.
>>> +            */
>>> +           *(int *)mfnp = ret;
>>> +           st->err = ret;
>>>     }
>>>     st->va += PAGE_SIZE;
>>> 
>>> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
>>> {
>>>     xen_pfn_t *mfnp = data;
>>>     struct mmap_batch_state *st = state;
>>> +   int ret;
>>> +
>>> +   if (st->user_err) {
>>> +           int err = *(int *)mfnp;
>>> +
>>> +           if (err == -ENOENT)
>>> +                   st->err = err;
>>> 
>>> -   return put_user(*mfnp, st->user++);
>>> +           return __put_user(err, st->user_err++);
>>> +   } else {
>>> +           xen_pfn_t mfn;
>>> +
>>> +           ret = __get_user(mfn, st->user_mfn);
>>> +           if (ret < 0)
>>> +                   return ret;
>>> +
>>> +           mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
>>> +           return __put_user(mfn, st->user_mfn++);
>>> +   }
>>> }
>>> 
>>> static struct vm_operations_struct privcmd_vm_ops;
>>> 
>>> -static long privcmd_ioctl_mmap_batch(void __user *udata)
>>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
>>> {
>>>     int ret;
>>> -   struct privcmd_mmapbatch m;
>>> +   struct privcmd_mmapbatch_v2 m;
>>>     struct mm_struct *mm = current->mm;
>>>     struct vm_area_struct *vma;
>>>     unsigned long nr_pages;
>>> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user 
>>> *udata)
>>>     if (!xen_initial_domain())
>>>             return -EPERM;
>>> 
>>> -   if (copy_from_user(&m, udata, sizeof(m)))
>>> -           return -EFAULT;
>>> +   switch (version) {
>>> +   case 1:
>>> +           if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
>>> +                   return -EFAULT;
>>> +           /* Returns per-frame error in m.arr. */
>>> +           m.err = NULL;
>>> +           if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
>>> +                   return -EFAULT;
>>> +           break;
>>> +   case 2:
>>> +           if (copy_from_user(&m, udata, sizeof(struct 
>>> privcmd_mmapbatch_v2)))
>>> +                   return -EFAULT;
>>> +           /* Returns per-frame error code in m.err. */
>>> +           if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
>>> +                   return -EFAULT;
>>> +           break;
>>> +   default:
>>> +           return -EINVAL;
>>> +   }
>>> 
>>>     nr_pages = m.num;
>>>     if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>>>             return -EINVAL;
>>> 
>>> -   ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
>>> -                      m.arr);
>>> +   ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>>> 
>>>     if (ret || list_empty(&pagelist))
>>>             goto out;
>>> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user 
>>> *udata)
>>> 
>>>     up_write(&mm->mmap_sem);
>>> 
>>> -   if (state.err > 0) {
>>> -           state.user = m.arr;
>>> +   if (state.err) {
>>> +           state.err = 0;
>>> +           state.user_mfn = (xen_pfn_t *)m.arr;
>>> +           state.user_err = m.err;
>>>             ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>>> -                          &pagelist,
>>> -                          mmap_return_errors, &state);
>>> -   }
>>> +                                &pagelist,
>>> +                                mmap_return_errors, &state);
>>> +           if (ret >= 0)
>>> +                   ret = state.err;
>>> +   } else if (m.err)
>>> +           __clear_user(m.err, m.num * sizeof(*m.err));
>>> 
>>> out:
>>>     free_page_list(&pagelist);
>>> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
>>>             break;
>>> 
>>>     case IOCTL_PRIVCMD_MMAPBATCH:
>>> -           ret = privcmd_ioctl_mmap_batch(udata);
>>> +           ret = privcmd_ioctl_mmap_batch(udata, 1);
>>> +           break;
>>> +
>>> +   case IOCTL_PRIVCMD_MMAPBATCH_V2:
>>> +           ret = privcmd_ioctl_mmap_batch(udata, 2);
>>>             break;
>>> 
>>>     default:
>>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
>>> index 17857fb..f60d75c 100644
>>> --- a/include/xen/privcmd.h
>>> +++ b/include/xen/privcmd.h
>>> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
>>>     int num;     /* number of pages to populate */
>>>     domid_t dom; /* target domain */
>>>     __u64 addr;  /* virtual address */
>>> -   xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
>>> +   xen_pfn_t __user *arr; /* array of mfns - or'd with
>>> +                             PRIVCMD_MMAPBATCH_MFN_ERROR on err */
>>> +};
>>> +
>>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
>>> +
>>> +struct privcmd_mmapbatch_v2 {
>>> +   unsigned int num; /* number of pages to populate */
>>> +   domid_t dom;      /* target domain */
>>> +   __u64 addr;       /* virtual address */
>>> +   const xen_pfn_t __user *arr; /* array of mfns */
>>> +   int __user *err;  /* array of error codes */
>>> };
>>> 
>>> /*
>>> * @cmd: IOCTL_PRIVCMD_HYPERCALL
>>> * @arg: &privcmd_hypercall_t
>>> * Return: Value returned from execution of the specified hypercall.
>>> + *
>>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
>>> + * @arg: &struct privcmd_mmapbatch_v2
>>> + * Return: 0 on success (i.e., arg->err contains valid error codes for
>>> + * each frame).  On an error other than a failed frame remap, -1 is
>>> + * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
>>> + * if the operation was otherwise successful but any frame failed with
>>> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>>> */
>>> #define IOCTL_PRIVCMD_HYPERCALL                                     \
>>>     _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
>>> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
>>>     _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
>>> #define IOCTL_PRIVCMD_MMAPBATCH                                     \
>>>     _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
>>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2                         \
>>> +   _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>>> 
>>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
>>> -- 
>>> 1.7.2.5
>>> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.