# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1187079421 -32400 # Node ID cf7a9141e7f884a14cfab8d3785c95dea85749be # Parent 4dbbedee6bb8d594287940470a61b8c0c56daf9c [xen, xencomm] fix various xencomm invalid racy access. - Xencomm should check struct xencomm_desc alignment. - Xencomm should check whether struct xencomm_desc itself (8 bytes) doesn't cross page boundary. Otherwise a hostile guest kernel can pass such a pointer that may across page boundary. Then xencomm accesses a unrelated page. - Xencomm shouldn't access struct xencomm_desc::nr_addrs multiple times. Copy it to local area and use the copy. Otherwise a hostile guest can modify at the same time. - Xencomm should check whether struct xencomm_desc::address[] array crosses page boundary. Otherwise xencomm may access unrelated pages. - Xencomm should get_page()/put_page() after address conversion from paddr to maddr because xen supports SMP and balloon driver. Otherwise another vcpu may free the page at the same time. Such a domain behaviour doesn't make sense, however nothing prevents it. PATCHNAME: fix_xencomm_invalid_racy_access Signed-off-by: Isaku Yamahata diff -r 4dbbedee6bb8 -r cf7a9141e7f8 xen/common/xencomm.c --- a/xen/common/xencomm.c Tue Aug 14 16:46:23 2007 +0900 +++ b/xen/common/xencomm.c Tue Aug 14 17:17:01 2007 +0900 @@ -35,12 +35,149 @@ static int xencomm_debug = 1; /* extreme #endif static void* -xencomm_maddr_to_vaddr(unsigned long maddr) -{ +xencomm_vaddr(unsigned long paddr, struct page_info *page) +{ + return (void*)((paddr & ~PAGE_MASK) | (unsigned long)page_to_virt(page)); +} + +/* get_page() to prevent from another vcpu freeing the page */ +static int +xencomm_get_page(unsigned long paddr, struct page_info **page) +{ + unsigned long maddr = paddr_to_maddr(paddr); if (maddr == 0) - return NULL; - - return maddr_to_virt(maddr); + return -EFAULT; + + *page = maddr_to_page(maddr); + if (get_page(*page, current->domain) == 0) { + if (page_get_owner(*page) != current->domain) { + /* + * This page might be a page granted by another domain, or + * this page is freed with decrease reservation hypercall at + * the same time. + */ + gdprintk(XENLOG_WARNING, + "bad page is passed. paddr 0x%lx maddr 0x%lx\n", + paddr, maddr); + return -EFAULT; + } + + /* Try again. */ + cpu_relax(); + return -EAGAIN; + } + + return 0; +} + +/* check if struct desc doesn't cross page boundry */ +static int +xencomm_desc_cross_page_boundary(unsigned long paddr) +{ + unsigned long offset = paddr & ~PAGE_MASK; + if (offset > PAGE_SIZE - sizeof(struct xencomm_desc)) + return 1; + return 0; +} + +struct xencomm_ctxt { + struct xencomm_desc *desc; + + uint32_t nr_addrs; + struct page_info *page; +}; + +static uint32_t +xencomm_ctxt_nr_addrs(const struct xencomm_ctxt *ctxt) +{ + return ctxt->nr_addrs; +} + +static unsigned long* +xencomm_ctxt_address(struct xencomm_ctxt *ctxt, int i) +{ + return &ctxt->desc->address[i]; +} + +/* check if struct xencomm_desc and address array cross page boundary */ +static int +xencomm_ctxt_cross_page_boundary(struct xencomm_ctxt *ctxt) +{ + unsigned long saddr = (unsigned long)ctxt->desc; + unsigned long eaddr = + (unsigned long)&ctxt->desc->address[ctxt->nr_addrs] - 1; + if ((saddr >> PAGE_SHIFT) != (eaddr >> PAGE_SHIFT)) + return 1; + return 0; +} + +static int +xencomm_ctxt_init(const void* handle, struct xencomm_ctxt *ctxt) +{ + struct page_info *page; + struct xencomm_desc *desc; + int ret; + + /* avoid unaligned access */ + if ((unsigned long)handle % __alignof__(*desc) != 0) + return -EINVAL; + if (xencomm_desc_cross_page_boundary((unsigned long)handle)) + return -EINVAL; + + /* first we need to access the descriptor */ + ret = xencomm_get_page((unsigned long)handle, &page); + if (ret) + return ret; + + desc = xencomm_vaddr((unsigned long)handle, page); + if (desc->magic != XENCOMM_MAGIC) { + printk("%s: error: %p magic was 0x%x\n", __func__, desc, desc->magic); + put_page(page); + return -EINVAL; + } + + ctxt->desc = desc; + ctxt->nr_addrs = desc->nr_addrs; /* copy before use. + * It is possible for a guest domain to + * modify concurrently. + */ + ctxt->page = page; + if (xencomm_ctxt_cross_page_boundary(ctxt)) { + put_page(page); + return -EINVAL; + } + return 0; +} + +static void +xencomm_ctxt_done(struct xencomm_ctxt *ctxt) +{ + put_page(ctxt->page); +} + +static int +xencomm_copy_chunk_from(unsigned long to, unsigned long paddr, + unsigned int len) +{ + struct page_info *page; + + while (1) { + int res; + res = xencomm_get_page(paddr, &page); + if (res != 0) { + if (res == -EAGAIN) + continue; /* Try again. */ + return res; + } + if (xencomm_debug) + printk("%lx[%d] -> %lx\n", + (unsigned long)xencomm_vaddr(paddr, page), len, to); + + memcpy((void *)to, xencomm_vaddr(paddr, page), len); + put_page(page); + return 0; + } + /* NOTREACHED */ } static unsigned long @@ -53,17 +190,14 @@ xencomm_inline_from_guest(void *to, cons while (n > 0) { unsigned int chunksz; - unsigned long src_maddr; unsigned int bytes; chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE); bytes = min(chunksz, n); - src_maddr = paddr_to_maddr(src_paddr); - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", src_maddr, bytes, (unsigned long)to); - memcpy(to, maddr_to_virt(src_maddr), bytes); + if (xencomm_copy_chunk_from((unsigned long)to, src_paddr, bytes)) + return n; src_paddr += bytes; to += bytes; n -= bytes; @@ -89,7 +223,7 @@ xencomm_copy_from_guest(void *to, const xencomm_copy_from_guest(void *to, const void *from, unsigned int n, unsigned int skip) { - struct xencomm_desc *desc; + struct xencomm_ctxt ctxt; unsigned int from_pos = 0; unsigned int to_pos = 0; unsigned int i = 0; @@ -97,21 +231,12 @@ xencomm_copy_from_guest(void *to, const if (xencomm_is_inline(from)) return xencomm_inline_from_guest(to, from, n, skip); - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *) - xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)from)); - if (desc == NULL) + if (xencomm_ctxt_init(from, &ctxt)) return n; - if (desc->magic != XENCOMM_MAGIC) { - printk("%s: error: %p magic was 0x%x\n", - __func__, desc, desc->magic); - return n; - } - /* iterate through the descriptor, copying up to a page at a time */ - while ((to_pos < n) && (i < desc->nr_addrs)) { - unsigned long src_paddr = desc->address[i]; + while ((to_pos < n) && (i < xencomm_ctxt_nr_addrs(&ctxt))) { + unsigned long src_paddr = *xencomm_ctxt_address(&ctxt, i); unsigned int pgoffset; unsigned int chunksz; unsigned int chunk_skip; @@ -130,17 +255,11 @@ xencomm_copy_from_guest(void *to, const skip -= chunk_skip; if (skip == 0 && chunksz > 0) { - unsigned long src_maddr; - unsigned long dest = (unsigned long)to + to_pos; unsigned int bytes = min(chunksz, n - to_pos); - src_maddr = paddr_to_maddr(src_paddr + chunk_skip); - if (src_maddr == 0) - return n - to_pos; - - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", src_maddr, bytes, dest); - memcpy((void *)dest, maddr_to_virt(src_maddr), bytes); + if (xencomm_copy_chunk_from((unsigned long)to + to_pos, + src_paddr + chunk_skip, bytes)) + goto out; from_pos += bytes; to_pos += bytes; } @@ -148,7 +267,34 @@ xencomm_copy_from_guest(void *to, const i++; } +out: + xencomm_ctxt_done(&ctxt); return n - to_pos; +} + +static int +xencomm_copy_chunk_to(unsigned long paddr, unsigned long from, + unsigned int len) +{ + struct page_info *page; + + while (1) { + int res; + res = xencomm_get_page(paddr, &page); + if (res != 0) { + if (res == -EAGAIN) + continue; /* Try again. */ + return res; + } + if (xencomm_debug) + printk("%lx[%d] -> %lx\n", from, len, + (unsigned long)xencomm_vaddr(paddr, page)); + + memcpy(xencomm_vaddr(paddr, page), (void *)from, len); + put_page(page); + return 0; + } + /* NOTREACHED */ } static unsigned long @@ -161,17 +307,14 @@ xencomm_inline_to_guest(void *to, const while (n > 0) { unsigned int chunksz; - unsigned long dest_maddr; unsigned int bytes; chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE); bytes = min(chunksz, n); - dest_maddr = paddr_to_maddr(dest_paddr); - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", (unsigned long)from, bytes, dest_maddr); - memcpy(maddr_to_virt(dest_maddr), (void *)from, bytes); + if (xencomm_copy_chunk_to(dest_paddr, (unsigned long)from, bytes)) + return n; dest_paddr += bytes; from += bytes; n -= bytes; @@ -197,7 +340,7 @@ xencomm_copy_to_guest(void *to, const vo xencomm_copy_to_guest(void *to, const void *from, unsigned int n, unsigned int skip) { - struct xencomm_desc *desc; + struct xencomm_ctxt ctxt; unsigned int from_pos = 0; unsigned int to_pos = 0; unsigned int i = 0; @@ -205,20 +348,12 @@ xencomm_copy_to_guest(void *to, const vo if (xencomm_is_inline(to)) return xencomm_inline_to_guest(to, from, n, skip); - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *) - xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)to)); - if (desc == NULL) + if (xencomm_ctxt_init(to, &ctxt)) return n; - if (desc->magic != XENCOMM_MAGIC) { - printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); - return n; - } - /* iterate through the descriptor, copying up to a page at a time */ - while ((from_pos < n) && (i < desc->nr_addrs)) { - unsigned long dest_paddr = desc->address[i]; + while ((from_pos < n) && (i < xencomm_ctxt_nr_addrs(&ctxt))) { + unsigned long dest_paddr = *xencomm_ctxt_address(&ctxt, i); unsigned int pgoffset; unsigned int chunksz; unsigned int chunk_skip; @@ -237,17 +372,11 @@ xencomm_copy_to_guest(void *to, const vo skip -= chunk_skip; if (skip == 0 && chunksz > 0) { - unsigned long dest_maddr; - unsigned long source = (unsigned long)from + from_pos; unsigned int bytes = min(chunksz, n - from_pos); - dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip); - if (dest_maddr == 0) - return n - from_pos; - - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", source, bytes, dest_maddr); - memcpy(maddr_to_virt(dest_maddr), (void *)source, bytes); + if (xencomm_copy_chunk_to(dest_paddr + chunk_skip, + (unsigned long)from + from_pos, bytes)) + goto out; from_pos += bytes; to_pos += bytes; } @@ -255,6 +384,8 @@ xencomm_copy_to_guest(void *to, const vo i++; } +out: + xencomm_ctxt_done(&ctxt); return n - from_pos; } @@ -268,26 +399,19 @@ static int xencomm_inline_add_offset(voi * exhausted pages to XENCOMM_INVALID. */ int xencomm_add_offset(void **handle, unsigned int bytes) { - struct xencomm_desc *desc; + struct xencomm_ctxt ctxt; int i = 0; if (xencomm_is_inline(*handle)) return xencomm_inline_add_offset(handle, bytes); - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *) - xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)*handle)); - if (desc == NULL) + if (xencomm_ctxt_init(handle, &ctxt)) return -1; - if (desc->magic != XENCOMM_MAGIC) { - printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); - return -1; - } - /* iterate through the descriptor incrementing addresses */ - while ((bytes > 0) && (i < desc->nr_addrs)) { - unsigned long dest_paddr = desc->address[i]; + while ((bytes > 0) && (i < xencomm_ctxt_nr_addrs(&ctxt))) { + unsigned long *address = xencomm_ctxt_address(&ctxt, i); + unsigned long dest_paddr = *address; unsigned int pgoffset; unsigned int chunksz; unsigned int chunk_skip; @@ -303,34 +427,38 @@ int xencomm_add_offset(void **handle, un chunk_skip = min(chunksz, bytes); if (chunk_skip == chunksz) { /* exhausted this page */ - desc->address[i] = XENCOMM_INVALID; + *address = XENCOMM_INVALID; } else { - desc->address[i] += chunk_skip; + *address += chunk_skip; } bytes -= chunk_skip; i++; } + xencomm_ctxt_done(&ctxt); return 0; } int xencomm_handle_is_null(void *handle) { - struct xencomm_desc *desc; + struct xencomm_ctxt ctxt; int i; + int res = 1; if (xencomm_is_inline(handle)) return xencomm_inline_addr(handle) == 0; - desc = (struct xencomm_desc *) - xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)handle)); - if (desc == NULL) + if (xencomm_ctxt_init(handle, &ctxt)) return 1; - for (i = 0; i < desc->nr_addrs; i++) - if (desc->address[i] != XENCOMM_INVALID) - return 0; - - return 1; -} - + for (i = 0; i < xencomm_ctxt_nr_addrs(&ctxt); i++) { + if (*xencomm_ctxt_address(&ctxt, i) != XENCOMM_INVALID) { + res = 0; + goto out; + } + } + +out: + xencomm_ctxt_done(&ctxt); + return res; +}