[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] VMX status report. Xen:26323 & Dom0:3.7.1



On Jan 14, 2013, at 8:59 AM, David Vrabel <david.vrabel@xxxxxxxxxx> wrote:

> On 14/01/13 04:29, Andres Lagar-Cavilla wrote:
>> 
>> Below you'll find pasted an RFC patch to fix this. I've expanded the
>> cc line to add Mats Peterson, who is also looking into some improvements
>> to privcmd (and IanC for general feedback).
>> 
>> The RFC patch cuts down code overall and cleans up logic too. I did
>> change the behavior wrt classic implementations when it comes to
>> handling errors & EFAULT. Instead of doing all the mapping work and then
>> copying back to user, I copy back each individual mapping error as soon
>> as it arises. And short-circuit and quit the whole operation as soon as
>> the first EFAULT arises.
> 
> Which is broken.
Certainly due to copy_on_write within mmap semaphore. Unfortunately I didn't 
have time last night to post the fix, pardon for the noise.
>  Please just look at my v3 patch and implement that method.
The one nit I have about that is that it does an unnecessary get_user of the 
mfn on the second pass for V1. HOw about this?
Andres

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 3421f0d..fc4952d 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -261,11 +261,12 @@ struct mmap_batch_state {
         *      -ENOENT if at least 1 -ENOENT has happened.
         */
        int global_error;
-       /* An array for individual errors */
-       int *err;
+       int version;
 
        /* User-space mfn array to store errors in the second pass for V1. */
        xen_pfn_t __user *user_mfn;
+       /* User-space int array to store errors in the second pass for V2. */
+       int __user *user_err;
 };
 
 /* auto translated dom0 note: if domU being created is PV, then mfn is
@@ -288,7 +289,19 @@ static int mmap_batch_fn(void *data, void *state)
                                         &cur_page);
 
        /* Store error code for second pass. */
-       *(st->err++) = ret;
+       if (st->version == 1) {
+               if (ret < 0) {
+                       /*
+                        * V1 encodes the error codes in the 32bit top nibble 
of the
+                        * mfn (with its known limitations vis-a-vis 64 bit 
callers).
+                        */
+                       *mfnp |= (ret == -ENOENT) ?
+                                               PRIVCMD_MMAPBATCH_PAGED_ERROR :
+                                               PRIVCMD_MMAPBATCH_MFN_ERROR;
+               }
+       } else { /* st->version == 2 */
+               *((int *) mfnp) = ret;
+       }
 
        /* And see if it affects the global_error. */
        if (ret < 0) {
@@ -305,20 +318,25 @@ static int mmap_batch_fn(void *data, void *state)
        return 0;
 }
 
-static int mmap_return_errors_v1(void *data, void *state)
+static int mmap_return_errors(void *data, void *state)
 {
-       xen_pfn_t *mfnp = data;
        struct mmap_batch_state *st = state;
-       int err = *(st->err++);
 
-       /*
-        * V1 encodes the error codes in the 32bit top nibble of the
-        * mfn (with its known limitations vis-a-vis 64 bit callers).
-        */
-       *mfnp |= (err == -ENOENT) ?
-                               PRIVCMD_MMAPBATCH_PAGED_ERROR :
-                               PRIVCMD_MMAPBATCH_MFN_ERROR;
-       return __put_user(*mfnp, st->user_mfn++);
+       if (st->version == 1) {
+               xen_pfn_t mfnp = *((xen_pfn_t *) data);
+               if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
+                       return __put_user(mfnp, st->user_mfn++);
+               else
+                       st->user_mfn++;
+       } else { /* st->version == 2 */
+               int err = *((int *) data);
+               if (err)
+                       return __put_user(err, st->user_err++);
+               else
+                       st->user_err++;
+       }
+
+       return 0;
 }
 
 /* Allocate pfns that are then mapped with gmfns from foreign domid. Update
@@ -357,7 +375,6 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
        struct vm_area_struct *vma;
        unsigned long nr_pages;
        LIST_HEAD(pagelist);
-       int *err_array = NULL;
        struct mmap_batch_state state;
 
        if (!xen_initial_domain())
@@ -396,10 +413,12 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
                goto out;
        }
 
-       err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
-       if (err_array == NULL) {
-               ret = -ENOMEM;
-               goto out;
+       if (version == 2) {
+               /* Zero error array now to only copy back actual errors. */
+               if (clear_user(m.err, sizeof(int) * m.num)) {
+                       ret = -EFAULT;
+                       goto out;
+               }
        }
 
        down_write(&mm->mmap_sem);
@@ -427,7 +446,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
        state.va            = m.addr;
        state.index         = 0;
        state.global_error  = 0;
-       state.err           = err_array;
+       state.version       = version;
 
        /* mmap_batch_fn guarantees ret == 0 */
        BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
@@ -435,21 +454,14 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
 
        up_write(&mm->mmap_sem);
 
-       if (version == 1) {
-               if (state.global_error) {
-                       /* Write back errors in second pass. */
-                       state.user_mfn = (xen_pfn_t *)m.arr;
-                       state.err      = err_array;
-                       ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-                                            &pagelist, mmap_return_errors_v1, 
&state);
-               } else
-                       ret = 0;
-
-       } else if (version == 2) {
-               ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
-               if (ret)
-                       ret = -EFAULT;
-       }
+       if (state.global_error) {
+               /* Write back errors in second pass. */
+               state.user_mfn = (xen_pfn_t *)m.arr;
+               state.user_err = m.err;
+               ret = traverse_pages(m.num, sizeof(xen_pfn_t),
+                                                        &pagelist, 
mmap_return_errors, &state);
+       } else
+               ret = 0;
 
        /* If we have not had any EFAULT-like global errors then set the global
         * error to -ENOENT if necessary. */
@@ -457,7 +469,6 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
                ret = -ENOENT;
 
 out:
-       kfree(err_array);
        free_page_list(&pagelist);
 
        return ret;


> 
>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>> index 3421f0d..9433396 100644
>> --- a/drivers/xen/privcmd.c
>> +++ b/drivers/xen/privcmd.c 
> [...]
>> @@ -287,40 +285,35 @@ static int mmap_batch_fn(void *data, void *state)
> [...]
>> +           efault = __put_user(mfn_err, st->user_mfn++);
>> +       } else { /* st->version == 2 */
>> +           efault = __put_user(ret, st->user_err++);
> 
> You can't use __put_user() or any other function accessing user memory
> while holding mmap_sem or you will occasionally deadlock in the page
> fault handler (depending on whether the user page is currently present
> or not).
> 
> David


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.