diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 2005-06-15 14:46:12.000000000 -0400 @@ -109,11 +109,13 @@ retval = copy_ldt(&mm->context, &old_mm->context); up(&old_mm->context.sem); } +#ifndef CONFIG_XEN_SHADOW_MODE if (retval == 0) { spin_lock(&mm_unpinned_lock); list_add(&mm->context.unpinned, &mm_unpinned); spin_unlock(&mm_unpinned_lock); } +#endif return retval; } @@ -134,11 +136,13 @@ kfree(mm->context.ldt); mm->context.size = 0; } +#ifndef CONFIG_XEN_SHADOW_MODE if (!mm->context.pinned) { spin_lock(&mm_unpinned_lock); list_del(&mm->context.unpinned); spin_unlock(&mm_unpinned_lock); } +#endif } static int read_ldt(void __user * ptr, unsigned long bytecount) diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c 2005-06-08 12:43:02.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c 2005-06-15 14:46:12.000000000 -0400 @@ -360,8 +360,10 @@ shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; EXPORT_SYMBOL(HYPERVISOR_shared_info); +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list; EXPORT_SYMBOL(phys_to_machine_mapping); +#endif /* Raw start-of-day parameters from the hypervisor. */ union xen_start_info_union xen_start_info_union; @@ -1158,7 +1160,9 @@ } #endif +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list; +#endif return max_low_pfn; } @@ -1511,6 +1515,7 @@ find_smp_config(); #endif +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE /* Make sure we have a correctly sized P->M table. */ if (max_pfn != xen_start_info.nr_pages) { phys_to_machine_mapping = alloc_bootmem_low_pages( @@ -1547,7 +1552,7 @@ } HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; - +#endif /* ! CONFIG_XEN_SHADOW_TRANSLATE_MODE */ /* * NOTE: at this point the bootmem allocator is fully available. diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c 2005-06-10 09:23:25.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c 2005-06-15 14:46:43.000000000 -0400 @@ -248,6 +248,7 @@ void xen_contig_memory(unsigned long vstart, unsigned int order) { +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE /* * Ensure multi-page extents are contiguous in machine memory. This code * could be cleaned up some, and the number of hypercalls reduced. @@ -293,6 +294,7 @@ flush_tlb_all(); balloon_unlock(flags); +#endif } #ifdef CONFIG_XEN_PHYSDEV_ACCESS diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c 2005-06-01 14:06:28.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c 2005-06-15 14:46:12.000000000 -0400 @@ -408,6 +408,7 @@ } #endif /* CONFIG_XEN_SHADOW_MODE */ +#ifndef CONFIG_XEN_SHADOW_MODE LIST_HEAD(mm_unpinned); DEFINE_SPINLOCK(mm_unpinned_lock); @@ -454,6 +455,7 @@ } } + void mm_pin(struct mm_struct *mm) { spin_lock(&mm->page_table_lock); @@ -521,3 +523,4 @@ if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) ) mm_unpin(mm); } +#endif /* CONFIG_XEN_SHADOW_MODE */ diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/Kconfig xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/Kconfig --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-05-16 13:05:03.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-15 14:46:12.000000000 -0400 @@ -124,11 +124,21 @@ space. Odds are that you want to say N here. config XEN_SHADOW_MODE - bool "Fake shadow mode" + bool "Build linux to use Xen's shadow mode support (EXPERIMENTAL)" + depends on EXPERIMENTAL default n - help - fakes out a shadow mode kernel + help + Builds a xenolinux that expects Xen's shadow mode support to be + enabled. +config XEN_SHADOW_TRANSLATE_MODE + bool "Build linux to use Xen's shadow translate mode support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on XEN_SHADOW_MODE + default n + help + Builds a xenolinux that expects Xen's shadow translate mode support + to be enabled. config XEN_SCRUB_PAGES bool "Scrub memory before freeing it to Xen" diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c 2005-06-15 14:46:12.000000000 -0400 @@ -93,7 +93,9 @@ extern void time_suspend(void); extern void time_resume(void); extern unsigned long max_pfn; +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE extern unsigned int *pfn_to_mfn_frame_list; +#endif suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL); if ( suspend_record == NULL ) @@ -139,6 +141,7 @@ memset(empty_zero_page, 0, PAGE_SIZE); +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) { pfn_to_mfn_frame_list[j] = @@ -146,7 +149,7 @@ } HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; - +#endif /* ! CONFIG_XEN_SHADOW_TRANSLATE_MODE */ gnttab_resume(); irq_resume(); diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c xeno-ft-clean/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c 2005-04-13 05:44:49.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c 2005-06-15 14:46:12.000000000 -0400 @@ -197,12 +197,14 @@ BUG(); pfn = page - mem_map; +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY ) BUG(); /* Update P->M and M->P tables. */ phys_to_machine_mapping[pfn] = mfn_list[i]; xen_machphys_update(mfn_list[i], pfn); +#endif /* Link back into the page tables if it's not a highmem page. */ if ( pfn < max_low_pfn ) @@ -239,7 +241,11 @@ } pfn = page - mem_map; +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE mfn_list[i] = phys_to_machine_mapping[pfn]; +#else + mfn_list[i] = pfn; +#endif if ( !PageHighMem(page) ) { @@ -266,7 +272,9 @@ for ( i = 0; i < debt; i++ ) { pfn = mfn_to_pfn(mfn_list[i]); +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; +#endif balloon_append(pfn_to_page(pfn)); } diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h xeno-ft-clean/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h --- xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h 2005-06-01 14:06:28.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h 2005-06-15 14:46:12.000000000 -0400 @@ -41,9 +41,15 @@ : : "r" (0) ); } +#ifndef CONFIG_XEN_SHADOW_MODE extern void mm_pin(struct mm_struct *mm); extern void mm_unpin(struct mm_struct *mm); void mm_pin_all(void); +#else +#define mm_pin(_mm) ((void)0) +#define mm_unpin(_mm) ((void)0) +#define mm_pin_all() ((void)0) +#endif static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h xeno-ft-clean/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h --- xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h 2005-06-01 14:06:28.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h 2005-06-15 14:46:12.000000000 -0400 @@ -16,6 +16,7 @@ struct list_head unpinned; } mm_context_t; +#ifndef CONFIG_XEN_SHADOW_MODE extern struct list_head mm_unpinned; extern spinlock_t mm_unpinned_lock; @@ -23,4 +24,6 @@ extern void _arch_exit_mmap(struct mm_struct *mm); #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) +#endif /* CONFIG_XEN_SHADOW_MODE */ + #endif diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h xeno-ft-clean/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h --- xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h 2005-05-31 08:28:29.000000000 -0400 +++ xeno-ft-clean/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h 2005-06-15 14:46:12.000000000 -0400 @@ -58,9 +58,14 @@ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#ifdef CONFIG_XEN_SHADOW_MODE +# define pfn_to_mfn(_pfn) ((unsigned long)(_pfn)) +# define mfn_to_pfn(_mfn) ((unsigned long)(_mfn)) +#else extern unsigned int *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)])) -#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)])) +# define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)])) +# define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)])) +#endif static inline unsigned long phys_to_machine(unsigned long phys) { unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT); diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/libxc/Makefile xeno-ft-clean/tools/libxc/Makefile --- xen-unstable.latest/tools/libxc/Makefile 2005-06-06 08:50:14.000000000 -0400 +++ xeno-ft-clean/tools/libxc/Makefile 2005-06-15 14:46:13.000000000 -0400 @@ -22,6 +22,7 @@ SRCS += xc_load_bin.c SRCS += xc_load_elf.c SRCS += xc_linux_build.c +SRCS += xc_linuxtranslate_build.c SRCS += xc_plan9_build.c SRCS += xc_linux_restore.c SRCS += xc_linux_save.c diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/libxc/xc.h xeno-ft-clean/tools/libxc/xc.h --- xen-unstable.latest/tools/libxc/xc.h 2005-06-09 13:29:55.000000000 -0400 +++ xeno-ft-clean/tools/libxc/xc.h 2005-06-15 14:46:13.000000000 -0400 @@ -256,6 +256,17 @@ unsigned int store_evtchn, unsigned long *store_mfn); +int xc_linuxtranslate_build(int xc_handle, + u32 domid, + const char *image_name, + const char *ramdisk_name, + const char *cmdline, + unsigned int control_evtchn, + unsigned long flags, + unsigned int vcpus, + unsigned int store_evtchn, + unsigned long *store_mfn); + int xc_plan9_build (int xc_handle, u32 domid, diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/libxc/xc_linuxtranslate_build.c xeno-ft-clean/tools/libxc/xc_linuxtranslate_build.c --- xen-unstable.latest/tools/libxc/xc_linuxtranslate_build.c 1969-12-31 19:00:00.000000000 -0500 +++ xeno-ft-clean/tools/libxc/xc_linuxtranslate_build.c 2005-06-15 14:46:13.000000000 -0400 @@ -0,0 +1,631 @@ +/****************************************************************************** + * xc_linuxtranslate_build.c + * Derived from xc_linux_build.c + */ + +#include "xc_private.h" +#include +//#include "../../xen/include/public/grant_table.h" +#define ELFSIZE 32 +#include "xc_elf.h" +#include +#include + +#if !defined(__i386__) +# error Linux translate only available in i386 mode! +#endif + +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) + +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) +#define round_pgdown(_p) ((_p)&PAGE_MASK) + +// **FIXME** Kludge to know the number of grant table frames. Needs something +// more consistant, but the information is currently not exported? +#define ORDER_GRANT_FRAMES 2 +#define NR_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES) +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) + +// **FIXME** Another shameless cut and paste job... +static int +do_gnttab_op( int xc_handle, + unsigned long cmd, + gnttab_op_t *op, + unsigned long count ) +{ + int ret = -1; + privcmd_hypercall_t hypercall; + + hypercall.op = __HYPERVISOR_grant_table_op; + hypercall.arg[0] = cmd; + hypercall.arg[1] = (unsigned long)(op); + hypercall.arg[2] = count; + + if ( mlock(op, sizeof(*op)) != 0 ) + { + PERROR("Could not lock memory for Xen hypercall"); + goto out1; + } + + if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 ) + { + printf("do_gnttab_op: hypercall returned error %d\n", ret); + goto out2; + } + + out2: (void)munlock(op, sizeof(*op)); + out1: return ret; +} + +static int probeimageformat(char *image, + unsigned long image_size, + struct load_funcs *load_funcs) +{ + if ( probe_elf(image, image_size, load_funcs) && + probe_bin(image, image_size, load_funcs) ) + { + ERROR( "Unrecognized image format" ); + return -EINVAL; + } + + return 0; +} + +static int setup_guest(int xc_handle, + u32 dom, + char *image, unsigned long image_size, + gzFile initrd_gfd, unsigned long initrd_len, + unsigned long nr_pages, + unsigned long *pvsi, unsigned long *pvke, + unsigned long *pvss, vcpu_guest_context_t *ctxt, + const char *cmdline, + unsigned long shared_info_frame, + unsigned int control_evtchn, + unsigned long flags, + unsigned int vcpus, + unsigned int store_evtchn, unsigned long *store_mfn) +{ + l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; + l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; + unsigned long *page_array = NULL; + unsigned long ml2tab, pl2tab; + unsigned long ml1tab, pl1tab; + unsigned long mfn, pfn; + unsigned long count, i; + start_info_t *start_info; + shared_info_t *shared_info; + mmu_t *mmu = NULL; + int rc; + + unsigned long nr_pt_pages; + unsigned long ppt_alloc; + unsigned long *physmap, *physmap_e, physmap_pfn; + + struct load_funcs load_funcs; + struct domain_setup_info dsi; + unsigned long vinitrd_start; + unsigned long vinitrd_end; + unsigned long vphysmap_start; + unsigned long vphysmap_end; + unsigned long vstartinfo_start; + unsigned long vstartinfo_end; + unsigned long vstoreinfo_start; + unsigned long vstoreinfo_end; + unsigned long vstack_start; + unsigned long vstack_end; + unsigned long vpt_start; + unsigned long vpt_end; + unsigned long v_end; + unsigned long pshared_info; + unsigned long pshared_grant; // pfn of the shared grant + + rc = probeimageformat(image, image_size, &load_funcs); + if ( rc != 0 ) + goto error_out; + + memset(&dsi, 0, sizeof(struct domain_setup_info)); + + rc = (load_funcs.parseimage)(image, image_size, &dsi); + if ( rc != 0 ) + goto error_out; + + if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 ) + { + PERROR("Guest OS must load to a page boundary.\n"); + goto error_out; + } + + /* Untranslated guests are given the mfn of the shared_info frame, and + * they are expected to map it. We need to allocate a pfn to map the + * mfn into; so we steal a page from the end. We don't need to put + * it in a page table, we just need to have a p2m and m2p for it... */ + pshared_info = nr_pages; + /* The same for the grant table structure */ + pshared_grant = nr_pages+1; + + /* + * Why do we need this? The number of page-table frames depends on the + * size of the bootstrap address space. But the size of the address space + * depends on the number of page-table frames (since each one is mapped + * read-only). We have a pair of simultaneous equations in two unknowns, + * which we solve by exhaustive search. + */ + vinitrd_start = round_pgup(dsi.v_end); + vinitrd_end = vinitrd_start + initrd_len; + vphysmap_start = round_pgup(vinitrd_end); + vphysmap_end = vphysmap_start + ((nr_pages+1+NR_GRANT_FRAMES) * sizeof(unsigned long)); + vpt_start = round_pgup(vphysmap_end); + for ( nr_pt_pages = 2; ; nr_pt_pages++ ) + { + vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); + vstartinfo_start = vpt_end; + vstartinfo_end = vstartinfo_start + PAGE_SIZE; + /* Place store shared page after startinfo */ + vstoreinfo_start = vstartinfo_end; + vstoreinfo_end = vstartinfo_end + PAGE_SIZE; + vstack_start = vstoreinfo_end; + vstack_end = vstack_start + PAGE_SIZE; + v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); + if ( (v_end - vstack_end) < (512UL << 10) ) + v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ + if ( (((v_end - dsi.v_start + ((1<> + L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) + break; + } + +#define _p(a) ((void *) (a)) + + printf("VIRTUAL MEMORY ARRANGEMENT:\n" + " Loaded kernel: %p->%p\n" + " Init. ramdisk: %p->%p\n" + " Phys-Mach map: %p->%p\n" + " Page tables: %p->%p\n" + " Start info: %p->%p\n" + " Store page: %p->%p\n" + " Boot stack: %p->%p\n" + " TOTAL: %p->%p\n", + _p(dsi.v_kernstart), _p(dsi.v_kernend), + _p(vinitrd_start), _p(vinitrd_end), + _p(vphysmap_start), _p(vphysmap_end), + _p(vpt_start), _p(vpt_end), + _p(vstartinfo_start), _p(vstartinfo_end), + _p(vstoreinfo_start), _p(vstoreinfo_end), + _p(vstack_start), _p(vstack_end), + _p(dsi.v_start), _p(v_end)); + printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry)); + + if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) + { + printf("Initial guest OS requires too much space\n" + "(%luMB is greater than %luMB limit)\n", + (v_end-dsi.v_start)>>20, (nr_pages<>20); + goto error_out; + } + + if ( (page_array = malloc((nr_pages+1+NR_GRANT_FRAMES) * sizeof(unsigned long))) == NULL ) + { + PERROR("Could not allocate memory"); + goto error_out; + } + + + if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages ) + { + PERROR("Could not get the page frame list"); + goto error_out; + } + + (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array, &dsi); + + /* Load the initial ramdisk image. */ + if ( initrd_len != 0 ) + { + for ( i = (vinitrd_start - dsi.v_start); + i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE ) + { + char page[PAGE_SIZE]; + if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 ) + { + PERROR("Error reading initrd image, could not"); + goto error_out; + } + xc_copy_to_domain_page(xc_handle, dom, + page_array[i>>PAGE_SHIFT], page); + } + } + + if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL ) + goto error_out; + + /* First allocate page for page dir. */ + ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT; + pl2tab = ppt_alloc++; + ml2tab = page_array[pl2tab]; + ctxt->pt_base = ml2tab << PAGE_SHIFT; + + /* Initialise the page tables. */ + if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, + ml2tab)) == NULL ) + goto error_out; + memset(vl2tab, 0, PAGE_SIZE); + vl2e = &vl2tab[l2_table_offset(dsi.v_start)]; + fprintf(stderr, " v_start %lx l2_table_offset %lx\n", + dsi.v_start, + l2_table_offset(dsi.v_start)); + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) + { + if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 ) + { + pl1tab = ppt_alloc++; + ml1tab = page_array[pl1tab]; + fprintf(stderr, " allocating new l1 page; pfn %lx, mfn %lx\n", + pl1tab, ml1tab); + if ( vl1tab != NULL ) + munmap(vl1tab, PAGE_SIZE); + if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, + ml1tab)) == NULL ) + { + munmap(vl2tab, PAGE_SIZE); + goto error_out; + } + memset(vl1tab, 0, PAGE_SIZE); + vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<machine and machine->phys table entries. */ + physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT; + physmap = physmap_e = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[physmap_pfn++]); + for ( count = 0; count < nr_pages; count++ ) + { + pfn = count; + mfn = page_array[count]; + if ( add_mmu_update(xc_handle, mmu, + (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, + pfn) ) + { + munmap(physmap, PAGE_SIZE); + goto error_out; + } + *physmap_e++ = pfn; + if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 ) + { + munmap(physmap, PAGE_SIZE); + physmap = physmap_e = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[physmap_pfn++]); + } + } + + if(xc_shadow_control(xc_handle, + dom, + DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE, + NULL, + 0, + NULL)<0) { + PERROR("Could not enable full translate mode!"); + goto error_out; + } + + // Map in shared info page + mfn = shared_info_frame; + pfn = pshared_info; + printf("SHARE: Mapping mfn=%8lx <-> pfn=%8lx\n",mfn,pfn); + if( add_mmu_update(xc_handle, mmu, + (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, + pfn)) { + munmap(physmap, PAGE_SIZE); + goto error_out; + } + *physmap_e++ = pfn; + if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 ) { + munmap(physmap, PAGE_SIZE); + physmap = physmap_e = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + physmap_pfn++); + //page_array[physmap_pfn++]); + } + fprintf(stderr, "SHARE: setup_guest: finish_mmu_updates\n"); + /* Send the page update requests down to the hypervisor. */ + if ( finish_mmu_updates(xc_handle, mmu) ) + goto error_out; + + + // Map in grant tables + gnttab_op_t op; + + op.u.setup_table.dom = (domid_t)dom; + rc = do_gnttab_op(xc_handle, GNTTABOP_get_shared_mfn, &op, 1); + for (count=0; count < NR_GRANT_FRAMES ;count++) { + mfn = op.u.shared_mfn.mfn+count; + pfn = pshared_grant+count; + printf("GRANT: Mapping mfn=%8lx <-> pfn=%8lx\n",mfn,pfn); + if( add_mmu_update(xc_handle, mmu, + (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, + pfn)) { + munmap(physmap, PAGE_SIZE); + goto error_out; + } + *physmap_e++ = pfn; + if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 ) { + munmap(physmap, PAGE_SIZE); + physmap = physmap_e = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + //physmap_pfn++); + page_array[physmap_pfn++]); + } + } + munmap(physmap,PAGE_SIZE); + fprintf(stderr, "GRANT: setup_guest: finish_mmu_updates\n"); + /* Send the page update requests down to the hypervisor. */ + if ( finish_mmu_updates(xc_handle, mmu) ) + goto error_out; + + + /* + * Pin down l2tab addr as page dir page - causes hypervisor to provide + * correct protection for the page + */ + if ( pin_table(xc_handle, + MMUEXT_PIN_L2_TABLE, + ml2tab, + dom) ) + goto error_out; + + start_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + (vstartinfo_start-dsi.v_start)>>PAGE_SHIFT); + //page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]); + + memset(start_info, 0, sizeof(*start_info)); + start_info->nr_pages = nr_pages; + start_info->shared_info = pshared_info << PAGE_SHIFT; + start_info->flags = flags; + start_info->pt_base = vpt_start; + start_info->nr_pt_frames = nr_pt_pages; + start_info->mfn_list = vphysmap_start; + start_info->domain_controller_evtchn = control_evtchn; + start_info->store_page = vstoreinfo_start; + start_info->store_evtchn = store_evtchn; + + if ( initrd_len != 0 ) + { + start_info->mod_start = vinitrd_start; + start_info->mod_len = initrd_len; + } + strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE); + start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0'; + munmap(start_info, PAGE_SIZE); + + /* Tell our caller where we told domain store page was. */ + pfn=((vstoreinfo_start-dsi.v_start)>>PAGE_SHIFT); + mfn=page_array[pfn]; + /* XXX -- for now, we give the gpfn; the hypervisor should translate */ + *store_mfn = pfn; + + + /* shared_info page starts its life empty. */ + shared_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, pshared_info);//shared_info_frame); + memset(shared_info, 0, sizeof(shared_info_t)); + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + + shared_info->n_vcpu = vcpus; + printf(" VCPUS: %d\n", shared_info->n_vcpu); + + munmap(shared_info, PAGE_SIZE); + + fprintf(stderr, "setup_guest: finish_mmu_updates\n"); + /* Send the page update requests down to the hypervisor. */ + if ( finish_mmu_updates(xc_handle, mmu) ) + goto error_out; + + free(mmu); + free(page_array); + + *pvsi = vstartinfo_start; + *pvss = vstack_start; + *pvke = dsi.v_kernentry; + + fprintf(stderr, "setup_guest: done!"); + return 0; + + error_out: + if ( mmu != NULL ) + free(mmu); + if ( page_array != NULL ) + free(page_array); + return -1; +} + +int xc_linuxtranslate_build(int xc_handle, + u32 domid, + const char *image_name, + const char *ramdisk_name, + const char *cmdline, + unsigned int control_evtchn, + unsigned long flags, + unsigned int vcpus, + unsigned int store_evtchn, + unsigned long *store_mfn) +{ + dom0_op_t launch_op, op; + int initrd_fd = -1; + gzFile initrd_gfd = NULL; + int rc, i; + vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt; + unsigned long nr_pages; + char *image = NULL; + unsigned long image_size, initrd_size=0; + unsigned long vstartinfo_start, vkern_entry, vstack_start; + + if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 ) + { + PERROR("Could not find total pages for domain"); + goto error_out; + } + + if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL ) + goto error_out; + + if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) ) + { + if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 ) + { + PERROR("Could not open the initial ramdisk image"); + goto error_out; + } + + initrd_size = xc_get_filesz(initrd_fd); + + if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL ) + { + PERROR("Could not allocate decompression state for initrd"); + goto error_out; + } + } + + if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ) + { + PERROR("Unable to mlock ctxt"); + return 1; + } + + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + if ( (do_dom0_op(xc_handle, &op) < 0) || + ((u16)op.u.getdomaininfo.domain != domid) ) + { + PERROR("Could not get info on domain"); + goto error_out; + } + + if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ) + { + PERROR("Could not get vcpu context"); + goto error_out; + } + + if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || + (ctxt->pt_base != 0) ) + { + ERROR("Domain is already constructed"); + goto error_out; + } + + if ( setup_guest(xc_handle, domid, image, image_size, + initrd_gfd, initrd_size, nr_pages, + &vstartinfo_start, &vkern_entry, + &vstack_start, + ctxt, cmdline, + op.u.getdomaininfo.shared_info_frame, + control_evtchn, flags, vcpus, + store_evtchn, store_mfn) < 0 ) + { + ERROR("Error constructing guest OS"); + goto error_out; + } + + if ( initrd_fd >= 0 ) + close(initrd_fd); + if ( initrd_gfd ) + gzclose(initrd_gfd); + if ( image != NULL ) + free(image); + + ctxt->flags = 0; + + /* + * Initial register values: + * DS,ES,FS,GS = FLAT_KERNEL_DS + * CS:EIP = FLAT_KERNEL_CS:start_pc + * SS:ESP = FLAT_KERNEL_DS:start_stack + * ESI = start_info + * [EAX,EBX,ECX,EDX,EDI,EBP are zero] + * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1) + */ + ctxt->user_regs.ds = FLAT_KERNEL_DS; + ctxt->user_regs.es = FLAT_KERNEL_DS; + ctxt->user_regs.fs = FLAT_KERNEL_DS; + ctxt->user_regs.gs = FLAT_KERNEL_DS; + ctxt->user_regs.ss = FLAT_KERNEL_DS; + ctxt->user_regs.cs = FLAT_KERNEL_CS; + ctxt->user_regs.eip = vkern_entry; + ctxt->user_regs.esp = vstack_start + PAGE_SIZE; + ctxt->user_regs.esi = vstartinfo_start; + ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */ + + /* FPU is set up to default initial state. */ + memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); + + /* Virtual IDT is empty at start-of-day. */ + for ( i = 0; i < 256; i++ ) + { + ctxt->trap_ctxt[i].vector = i; + ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS; + } + + /* No LDT. */ + ctxt->ldt_ents = 0; + + /* Use the default Xen-provided GDT. */ + ctxt->gdt_ents = 0; + + /* Ring 1 stack is the initial stack. */ + ctxt->kernel_ss = FLAT_KERNEL_DS; + ctxt->kernel_sp = vstack_start + PAGE_SIZE; + + /* No debugging. */ + memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg)); + + /* No callback handlers. */ +#if defined(__i386__) + ctxt->event_callback_cs = FLAT_KERNEL_CS; + ctxt->event_callback_eip = 0; + ctxt->failsafe_callback_cs = FLAT_KERNEL_CS; + ctxt->failsafe_callback_eip = 0; +#elif defined(__x86_64__) + ctxt->event_callback_eip = 0; + ctxt->failsafe_callback_eip = 0; + ctxt->syscall_callback_eip = 0; +#endif + + memset( &launch_op, 0, sizeof(launch_op) ); + + launch_op.u.setdomaininfo.domain = (domid_t)domid; + launch_op.u.setdomaininfo.vcpu = 0; + launch_op.u.setdomaininfo.ctxt = ctxt; + + launch_op.cmd = DOM0_SETDOMAININFO; + rc = do_dom0_op(xc_handle, &launch_op); + + return rc; + + error_out: + if ( initrd_gfd != NULL ) + gzclose(initrd_gfd); + else if ( initrd_fd >= 0 ) + close(initrd_fd); + if ( image != NULL ) + free(image); + + return -1; +} diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/python/xen/lowlevel/xc/xc.c xeno-ft-clean/tools/python/xen/lowlevel/xc/xc.c --- xen-unstable.latest/tools/python/xen/lowlevel/xc/xc.c 2005-06-09 13:29:55.000000000 -0400 +++ xeno-ft-clean/tools/python/xen/lowlevel/xc/xc.c 2005-06-15 14:46:13.000000000 -0400 @@ -284,6 +284,36 @@ return Py_BuildValue("{s:i}", "store_mfn", store_mfn); } +static PyObject *pyxc_linuxtranslate_build(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + + u32 dom; + char *image, *ramdisk = NULL, *cmdline = ""; + int flags = 0, vcpus = 1; + int control_evtchn, store_evtchn; + unsigned long store_mfn = 0; + + static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn", + "image", "ramdisk", "cmdline", "flags", + "vcpus", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssii", kwd_list, + &dom, &control_evtchn, &store_evtchn, + &image, &ramdisk, &cmdline, &flags, + &vcpus) ) + return NULL; + + if ( xc_linuxtranslate_build(xc->xc_handle, dom, image, + ramdisk, cmdline, control_evtchn, flags, vcpus, + store_evtchn, &store_mfn) != 0 ) + return PyErr_SetFromErrno(xc_error); + + return Py_BuildValue("{s:i}", "store_mfn", store_mfn); +} + static PyObject *pyxc_plan9_build(PyObject *self, PyObject *args, PyObject *kwds) @@ -929,6 +959,17 @@ " vcpus [int, 1]: Number of Virtual CPUS in domain.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "linuxtranslate_build", + (PyCFunction)pyxc_linuxtranslate_build, + METH_VARARGS | METH_KEYWORDS, "\n" + "Build a new shadow-translated Linux guest OS.\n" + " dom [int]: Identifier of domain to build into.\n" + " image [str]: Name of kernel image file. May be gzipped.\n" + " ramdisk [str, n/a]: Name of ramdisk file, if any.\n" + " cmdline [str, n/a]: Kernel parameters, if any.\n\n" + " vcpus [int, 1]: Number of Virtual CPUS in domain.\n\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + { "plan9_build", (PyCFunction)pyxc_plan9_build, METH_VARARGS | METH_KEYWORDS, "\n" diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/python/xen/xend/image.py xeno-ft-clean/tools/python/xen/xend/image.py --- xen-unstable.latest/tools/python/xen/xend/image.py 2005-06-09 13:29:55.000000000 -0400 +++ xeno-ft-clean/tools/python/xen/xend/image.py 2005-06-15 14:46:13.000000000 -0400 @@ -219,6 +219,28 @@ return 0 return ret +class LinuxtranslateImageHandler(ImageHandler): + + ostype = "linuxtranslate" + + def buildDomain(self): + if self.vm.store_channel: + store_evtchn = self.vm.store_channel.port2 + else: + store_evtchn = 0 + ret = xc.linuxtranslate_build(dom = self.vm.getDomain(), + image = self.kernel, + control_evtchn = self.vm.channel.getRemotePort(), + store_evtchn = store_evtchn, + cmdline = self.cmdline, + ramdisk = self.ramdisk, + flags = self.flags, + vcpus = self.vm.vcpus) + if isinstance(ret, dict): + self.vm.store_mfn = ret.get('store_mfn') + return 0 + return ret + class Plan9ImageHandler(ImageHandler): ostype = "plan9" diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/python/xen/xend/XendDomainInfo.py xeno-ft-clean/tools/python/xen/xend/XendDomainInfo.py --- xen-unstable.latest/tools/python/xen/xend/XendDomainInfo.py 2005-06-09 13:29:55.000000000 -0400 +++ xeno-ft-clean/tools/python/xen/xend/XendDomainInfo.py 2005-06-15 14:46:13.000000000 -0400 @@ -1001,10 +1001,12 @@ addImageHandlerClass, \ ImageHandler, \ LinuxImageHandler, \ + LinuxtranslateImageHandler, \ Plan9ImageHandler, \ VmxImageHandler addImageHandlerClass(LinuxImageHandler) +addImageHandlerClass(LinuxtranslateImageHandler) addImageHandlerClass(Plan9ImageHandler) addImageHandlerClass(VmxImageHandler) diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tree_push.sh xeno-ft-clean/tree_push.sh --- xen-unstable.latest/tree_push.sh 2005-06-15 13:56:32.000000000 -0400 +++ xeno-ft-clean/tree_push.sh 1969-12-31 19:00:00.000000000 -0500 @@ -1,2 +0,0 @@ -#!/bin/sh -FROM=. TO=root@covirt6:/home/xeno-unstable ./exclude_rsync.sh diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/arch/x86/mm.c xeno-ft-clean/xen/arch/x86/mm.c --- xen-unstable.latest/xen/arch/x86/mm.c 2005-06-13 08:30:54.000000000 -0400 +++ xeno-ft-clean/xen/arch/x86/mm.c 2005-06-15 14:48:47.000000000 -0400 @@ -1703,6 +1703,8 @@ break; case MMUEXT_NEW_BASEPTR: + if ( shadow_mode_translate(d) ) + op.mfn = __gpfn_to_mfn(d, op.mfn); okay = new_guest_cr3(op.mfn); percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; break; @@ -1815,6 +1817,9 @@ okay = 0; break; } + + if ( shadow_mode_translate(d) ) + BUG(); // not supported yet, need to think about this. e = percpu_info[cpu].foreign; if ( unlikely(e == NULL) ) @@ -2031,9 +2036,35 @@ page, type_info & (PGT_type_mask|PGT_va_mask))) ) { l1_pgentry_t l1e; + unsigned long val; + if (unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d))) { + unsigned long gpfn2=l1e_get_pfn(l1e_from_intpte(req.val)); + unsigned long mode2=req.val & (PAGE_SIZE-1); + unsigned long mfn2 = gpfn_to_mfn_foreign(FOREIGNDOM,gpfn2); + unsigned long mfn2p = __mfn_to_gpfn(FOREIGNDOM,gpfn2); + val = (mfn2< mfn(0x%8lx->0x%8lx->0x%8lx), fdom(%d)(%d) -> dom(%d)(%d)\n", + mfn,mfn2p,gpfn2,mfn2, + FOREIGNDOM->domain_id,shadow_mode_translate(FOREIGNDOM), + d->domain_id,shadow_mode_translate(d)); + } else if ((page_get_owner(&frame_table[mfn2])) && + foreigndom != page_get_owner(&frame_table[mfn2])->domain_id) { + val=req.val; + printk("Owner mismatch!!\n"); + printk("PGT_l1_page_table: pte(0x%8lx) -> mfn(0x%8lx->0x%8lx->0x%8lx), fdom(%d)(%d) -> dom(%d)(%d)\n", + mfn,mfn2p,gpfn2,mfn2, + FOREIGNDOM->domain_id,shadow_mode_translate(FOREIGNDOM), + d->domain_id,shadow_mode_translate(d)); + } + } else { + val = req.val; + } /* FIXME: doesn't work with PAE */ - l1e = l1e_from_intpte(req.val); + l1e = l1e_from_intpte(val); okay = mod_l1_entry(va, l1e); if ( okay && unlikely(shadow_mode_enabled(d)) ) shadow_l1_normal_pt_update(d, req.ptr, l1e, &sh_mapcache); @@ -2102,9 +2133,22 @@ shadow_mark_mfn_out_of_sync(v, gpfn, mfn); } } - - *(unsigned long *)va = req.val; - okay = 1; + if (unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d))) { + unsigned long gpfn2=req.val>>PAGE_SHIFT; + unsigned long mode2=req.val & (PAGE_SIZE-1); + unsigned long mfn2 =gpfn_to_mfn_foreign(FOREIGNDOM,gpfn2); + unsigned long val; + val = (mfn2< mfn(0x%8lx), fdom(%d)(%d) -> dom(%d)(%d)\n", + mfn,mfn2, + FOREIGNDOM->domain_id,shadow_mode_translate(FOREIGNDOM), + d->domain_id,shadow_mode_translate(d)); + } else { + *(unsigned long *)va = req.val; + okay = 1; + } if ( shadow_mode_enabled(d) ) shadow_unlock(d); @@ -2128,8 +2172,6 @@ if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) ) { shadow_lock(FOREIGNDOM); - printk("privileged guest dom%d requests pfn=%lx to map mfn=%lx for dom%d\n", - d->domain_id, gpfn, mfn, FOREIGNDOM->domain_id); set_machinetophys(mfn, gpfn); set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache); okay = 1; @@ -2440,6 +2482,7 @@ { int nr_pages = (entries + 511) / 512; unsigned long frames[16]; + struct domain* d = current->domain; long ret; if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) ) @@ -2447,6 +2490,18 @@ LOCK_BIGLOCK(current->domain); + if(shadow_mode_translate(d)) { + int i; + unsigned long mfn; + shadow_lock(d); + for(i=0; iarch.shadow_ht[i]; x != NULL; x = x->next ) - if ( MFN_PINNED(x->smfn) ) - count++; + + for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) { + /* Skip entries that are writable_pred) */ + switch(x->gpfn_and_flags & PGT_type_mask){ + case PGT_l1_shadow: + case PGT_l2_shadow: + case PGT_l3_shadow: + case PGT_l4_shadow: + case PGT_hl2_shadow: + if ( MFN_PINNED(x->smfn) ) + count++; + break; + case PGT_snapshot: + case PGT_writable_pred: + break; + default: + BUG(); + + } + } + if ( !count ) continue; mfn_list = xmalloc_array(unsigned long, count); count = 0; - for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) - if ( MFN_PINNED(x->smfn) ) - mfn_list[count++] = x->smfn; + for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) { + /* Skip entries that are writable_pred) */ + switch(x->gpfn_and_flags & PGT_type_mask){ + case PGT_l1_shadow: + case PGT_l2_shadow: + case PGT_l3_shadow: + case PGT_l4_shadow: + case PGT_hl2_shadow: + if ( MFN_PINNED(x->smfn) ) + mfn_list[count++] = x->smfn; + break; + case PGT_snapshot: + case PGT_writable_pred: + break; + default: + BUG(); + + } + } while ( count ) { @@ -773,6 +807,7 @@ unsigned long va = pfn << PAGE_SHIFT; ASSERT(tabpfn != 0); + ASSERT(shadow_lock_is_acquired(d)); l2 = map_domain_page_with_cache(tabpfn, l2cache); l2e = l2[l2_table_offset(va)]; @@ -1366,7 +1401,7 @@ case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE: free_shadow_pages(d); rc = __shadow_mode_enable( - d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate); + d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate|SHM_write_all); break; default: @@ -1676,6 +1711,7 @@ shadow_lock(d); __shadow_sync_va(v, va); + SH_VVLOG("shadow_invlpg va=%lx", va); // XXX mafetter: will need to think about 4MB pages... @@ -2052,7 +2088,11 @@ while ( count ) { count--; + /* Delete_shadow_status does a shadow_audit(), so we need to + * keep accurate count of writable_pte_predictions to keep it + * happy. */ delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred); + perfc_decr(writable_pte_predictions); } xfree(gpfn_list); @@ -2581,6 +2621,7 @@ * STEP 2. Check the guest PTE. */ __guest_get_l2e(v, va, &gpde); + SH_VVLOG("shadow_fault: gpde=%" PRIpte, l2e_get_intpte(gpde)); if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) ) { SH_VVLOG("shadow_fault - EXIT: L1 not present"); @@ -2608,19 +2649,11 @@ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) ) { - if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) ) - { - allow_writes = 1; - l1e_add_flags(gpte, _PAGE_RW); - } - else - { /* Write fault on a read-only mapping. */ SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", l1e_get_intpte(gpte)); perfc_incrc(shadow_fault_bail_ro_mapping); goto fail; - } } if ( !l1pte_write_fault(v, &gpte, &spte, va) ) @@ -2765,8 +2798,9 @@ // just everything involved in getting to this L1 (i.e. we need // linear_pg_table[l1_linear_offset(va)] to be in sync)... // + SH_VVLOG("shadow_do_update_va_maping va=%lx, val=%" PRIpte ")", + va, l1e_get_intpte(val)); __shadow_sync_va(v, va); - l1pte_propagate_from_guest(d, val, &spte); shadow_set_l1e(va, spte, 0); diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/common/dom0_ops.c xeno-ft-clean/xen/common/dom0_ops.c --- xen-unstable.latest/xen/common/dom0_ops.c 2005-06-07 09:41:55.000000000 -0400 +++ xeno-ft-clean/xen/common/dom0_ops.c 2005-06-15 14:51:17.000000000 -0400 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -359,8 +360,15 @@ op->u.getdomaininfo.tot_pages = d->tot_pages; op->u.getdomaininfo.max_pages = d->max_pages; - op->u.getdomaininfo.shared_info_frame = - __pa(d->shared_info) >> PAGE_SHIFT; + if (shadow_mode_translate(d)) { + /* If we are in translate mode, then return addresses in the + * pfn space. */ + op->u.getdomaininfo.shared_info_frame = + __mfn_to_gpfn(d,__pa(d->shared_info) >> PAGE_SHIFT); + } else { + op->u.getdomaininfo.shared_info_frame = + __pa(d->shared_info) >> PAGE_SHIFT; + } if ( copy_to_user(u_dom0_op, op, sizeof(*op)) ) ret = -EINVAL; diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/common/event_channel.c xeno-ft-clean/xen/common/event_channel.c --- xen-unstable.latest/xen/common/event_channel.c 2005-06-06 08:50:14.000000000 -0400 +++ xeno-ft-clean/xen/common/event_channel.c 2005-06-15 14:46:14.000000000 -0400 @@ -65,6 +65,10 @@ struct domain *d = current->domain; int port = alloc->port; long rc = 0; + int dom = alloc->dom; + + if( dom == DOMID_SELF ) + dom = current->domain->domain_id; spin_lock(&d->evtchn_lock); @@ -83,7 +87,7 @@ { case ECS_FREE: chn->state = ECS_UNBOUND; - chn->u.unbound.remote_domid = alloc->dom; + chn->u.unbound.remote_domid = dom; break; case ECS_UNBOUND: @@ -102,6 +106,22 @@ return rc; } +/* Allocate a port in domain d, and return its number. */ +int evtchn_alloc_port(struct domain *d) +{ + struct evtchn *chn; + int r; + + spin_lock(&d->evtchn_lock); + r = get_free_port(d); + if (r >= 0) { + chn = evtchn_from_port(d, r); + chn->state = ECS_UNBOUND; + chn->u.unbound.remote_domid = d->domain_id; + } + spin_unlock(&d->evtchn_lock); + return r; +} static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) { diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/common/grant_table.c xeno-ft-clean/xen/common/grant_table.c --- xen-unstable.latest/xen/common/grant_table.c 2005-06-09 13:29:56.000000000 -0400 +++ xeno-ft-clean/xen/common/grant_table.c 2005-06-15 14:54:59.000000000 -0400 @@ -694,16 +694,22 @@ { ASSERT(d->grant_table != NULL); (void)put_user(GNTST_okay, &uop->status); - for ( i = 0; i < op.nr_frames; i++ ) - (void)put_user( - (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i, - &uop->frame_list[i]); + for ( i = 0; i < op.nr_frames; i++ ) { + if (shadow_mode_translate(d)) { + (void)put_user(__mfn_to_gpfn(d, (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i), + &uop->frame_list[i]); + } else { + (void)put_user((virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i, + &uop->frame_list[i]); + } + } } put_domain(d); return 0; } + #if GRANT_DEBUG static int gnttab_dump_table(gnttab_dump_table_t *uop) @@ -757,7 +763,8 @@ { DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) " "dom:(%hu) frame:(%lx)\n", - op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame); + op.dom, i, sha_copy.flags, sha_copy.domid, + (unsigned long)sha_copy.frame); } } @@ -797,6 +804,46 @@ } #endif +static long +gnttab_get_shared_mfn( + gnttab_get_shared_mfn_t *uop, unsigned int count) +{ + gnttab_get_shared_mfn_t op; + struct domain *d; + + if ( count != 1 ) + return -EINVAL; + + if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) + { + DPRINTK("Fault while reading gnttab_setup_table_t.\n"); + return -EFAULT; + } + + if ( op.dom == DOMID_SELF ) + { + op.dom = current->domain->domain_id; + } + else if ( unlikely(!IS_PRIV(current->domain)) ) + { + (void)put_user(GNTST_permission_denied, &uop->status); + return 0; + } + + if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) + { + DPRINTK("Bad domid %d.\n", op.dom); + (void)put_user(GNTST_bad_domain, &uop->status); + return 0; + } + + ASSERT(d->grant_table != NULL); + (void)put_user(virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT, &uop->mfn); + + put_domain(d); + return 0; +} + long do_grant_table_op( unsigned int cmd, void *uop, unsigned int count) @@ -808,6 +855,8 @@ LOCK_BIGLOCK(current->domain); + DPRINTK("do_grant_table_op(cmd=%u,uop=%p,count=%u)\n",cmd,uop,count); + rc = -EFAULT; switch ( cmd ) { @@ -831,6 +880,13 @@ rc = gnttab_dump_table((gnttab_dump_table_t *)uop); break; #endif + case GNTTABOP_get_shared_mfn: + if (IS_PRIV(current->domain)) { + rc = gnttab_get_shared_mfn((gnttab_get_shared_mfn_t *)uop, count); + } else { + rc = -ENOSYS; + } + break; default: rc = -ENOSYS; break; @@ -869,7 +925,7 @@ #if GRANT_DEBUG_VERBOSE if ( ld->domain_id != 0 ) { - DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n", + DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n", rd->domain_id, ld->domain_id, frame, readonly); } #endif @@ -1228,6 +1284,7 @@ void) { /* Nothing. */ + DPRINTK("Grant table init\n"); } /* diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/include/asm-x86/shadow.h xeno-ft-clean/xen/include/asm-x86/shadow.h --- xen-unstable.latest/xen/include/asm-x86/shadow.h 2005-06-07 09:41:55.000000000 -0400 +++ xeno-ft-clean/xen/include/asm-x86/shadow.h 2005-06-15 14:46:14.000000000 -0400 @@ -1036,6 +1036,21 @@ { int live = 0, free = 0, j = 0, abs; struct shadow_status *a; + int live_shadow_l1_pages, + live_shadow_l2_pages, + live_shadow_l3_pages, + live_shadow_l4_pages, + live_hl2_table_pages, + live_snapshot_pages, + live_writable_pte_predictions; + + live_shadow_l1_pages= + live_shadow_l2_pages= + live_shadow_l3_pages= + live_shadow_l4_pages= + live_hl2_table_pages= + live_snapshot_pages= + live_writable_pte_predictions=0; for ( j = 0; j < shadow_ht_buckets; j++ ) { @@ -1043,11 +1058,37 @@ if ( a->gpfn_and_flags ) { live++; + switch(a->gpfn_and_flags & PGT_type_mask) { + case PGT_l1_shadow: + live_shadow_l1_pages++; + break; + case PGT_l2_shadow: + live_shadow_l2_pages++; + break; + case PGT_l3_shadow: + live_shadow_l3_pages++; + break; + case PGT_l4_shadow: + live_shadow_l4_pages++; + break; + case PGT_hl2_shadow: + live_hl2_table_pages++; + break; + case PGT_snapshot: + live_snapshot_pages++; + break; + case PGT_writable_pred: + live_writable_pte_predictions++; + break; + default: + BUG(); + } ASSERT(a->smfn); } else ASSERT(!a->next); + a = a->next; while ( a && (live < 9999) ) { @@ -1058,6 +1099,31 @@ live, a->gpfn_and_flags, a->smfn, a->next); BUG(); } + switch(a->gpfn_and_flags & PGT_type_mask) { + case PGT_l1_shadow: + live_shadow_l1_pages++; + break; + case PGT_l2_shadow: + live_shadow_l2_pages++; + break; + case PGT_l3_shadow: + live_shadow_l3_pages++; + break; + case PGT_l4_shadow: + live_shadow_l4_pages++; + break; + case PGT_hl2_shadow: + live_hl2_table_pages++; + break; + case PGT_snapshot: + live_snapshot_pages++; + break; + case PGT_writable_pred: + live_writable_pte_predictions++; + break; + default: + BUG(); + } ASSERT(a->smfn); a = a->next; } @@ -1083,13 +1149,21 @@ #ifdef PERF_COUNTERS if ( (abs < -1) || (abs > 1) ) { - printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n", + printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_pred=%d\n", live, free, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages), perfc_value(hl2_table_pages), perfc_value(snapshot_pages), perfc_value(writable_pte_predictions)); + printk("counted: l1=%d l2=%d l3=%d l4=%d hl2=%d snapshot=%d writable_pred=%d\n", + live_shadow_l1_pages, + live_shadow_l2_pages, + live_shadow_l3_pages, + live_shadow_l4_pages, + live_hl2_table_pages, + live_snapshot_pages, + live_writable_pte_predictions); BUG(); } #endif @@ -1192,7 +1266,7 @@ #ifndef NDEBUG if ( ___shadow_status(d, gpfn, stype) != 0 ) { - printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x " + printk("d->domain_id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x " "mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n", d->domain_id, gpfn, gmfn, stype, frame_table[gmfn].count_info, @@ -1439,7 +1513,8 @@ if ( stype != PGT_writable_pred ) BUG(); // we should never replace entries into the hash table x->smfn = smfn; - put_page(pfn_to_page(gmfn)); // already had a ref... + if ( stype != PGT_writable_pred ) + put_page(pfn_to_page(gmfn)); // already had a ref... goto done; } diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/include/public/grant_table.h xeno-ft-clean/xen/include/public/grant_table.h --- xen-unstable.latest/xen/include/public/grant_table.h 2005-04-13 05:44:50.000000000 -0400 +++ xeno-ft-clean/xen/include/public/grant_table.h 2005-06-15 14:46:14.000000000 -0400 @@ -220,6 +220,20 @@ s16 status; /* 2: GNTST_* */ } PACKED gnttab_dump_table_t; /* 4 bytes */ +/* + * GNTTABOP_get_shared_mfn: Get the start of the shared mfns + * of the requested dom to allow the builder to map these pages + * into the domU's memory space for full translate mode. + */ +#define GNTTABOP_get_shared_mfn 4 +typedef struct { + /* IN parameters. */ + domid_t dom; /* 0 */ + /* OUT parameters. */ + s16 status; /* 2: GNTST_* */ + u32 mfn; /* 4 */ +} PACKED gnttab_get_shared_mfn_t; /* 8 bytes */ + /* * Bitfield values for update_pin_status.flags. @@ -273,6 +287,7 @@ gnttab_unmap_grant_ref_t unmap_grant_ref; gnttab_setup_table_t setup_table; gnttab_dump_table_t dump_table; + gnttab_get_shared_mfn_t shared_mfn; u8 __dummy[24]; } PACKED u; } PACKED gnttab_op_t; /* 32 bytes */ diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/include/xen/domain_page.h xeno-ft-clean/xen/include/xen/domain_page.h --- xen-unstable.latest/xen/include/xen/domain_page.h 2005-06-07 09:41:55.000000000 -0400 +++ xeno-ft-clean/xen/include/xen/domain_page.h 2005-06-15 14:46:14.000000000 -0400 @@ -81,7 +81,6 @@ cache->flags = 0; } } - #else /* !CONFIG_DOMAIN_PAGE */ #define map_domain_page(pfn) phys_to_virt((pfn)<