diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c 2005-05-16 13:05:03.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c 2005-05-16 13:55:06.000000000 -0400 @@ -554,7 +554,7 @@ void __init cpu_gdt_init(struct Xgt_desc_struct *gdt_descr) { - unsigned long frames[gdt_descr->size >> PAGE_SHIFT]; + unsigned long frames[(gdt_descr->size >> PAGE_SHIFT)+1]; unsigned long va; int f; diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 2005-06-04 18:14:40.000000000 -0400 @@ -109,11 +109,13 @@ retval = copy_ldt(&mm->context, &old_mm->context); up(&old_mm->context.sem); } +#ifndef CONFIG_XEN_SHADOW_MODE if (retval == 0) { spin_lock(&mm_unpinned_lock); list_add(&mm->context.unpinned, &mm_unpinned); spin_unlock(&mm_unpinned_lock); } +#endif return retval; } @@ -134,11 +136,13 @@ kfree(mm->context.ldt); mm->context.size = 0; } +#ifndef CONFIG_XEN_SHADOW_MODE if (!mm->context.pinned) { spin_lock(&mm_unpinned_lock); list_del(&mm->context.unpinned); spin_unlock(&mm_unpinned_lock); } +#endif } static int read_ldt(void __user * ptr, unsigned long bytecount) diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c 2005-05-16 13:05:03.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c 2005-05-16 13:55:06.000000000 -0400 @@ -27,6 +27,7 @@ static void xen_contig_memory(unsigned long vstart, unsigned int order) { +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE /* * Ensure multi-page extents are contiguous in machine memory. * This code could be cleaned up some, and the number of @@ -69,6 +70,7 @@ flush_tlb_all(); balloon_unlock(flags); +#endif /* ! CONFIG_XEN_SHADOW_TRANSLATE_MODE */ } void *dma_alloc_coherent(struct device *dev, size_t size, diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c 2005-05-16 13:05:03.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c 2005-05-16 13:55:06.000000000 -0400 @@ -360,8 +360,10 @@ shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; EXPORT_SYMBOL(HYPERVISOR_shared_info); +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list; EXPORT_SYMBOL(phys_to_machine_mapping); +#endif /* Raw start-of-day parameters from the hypervisor. */ union xen_start_info_union xen_start_info_union; @@ -1156,7 +1158,9 @@ } #endif +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list; +#endif return max_low_pfn; } @@ -1509,6 +1513,7 @@ find_smp_config(); #endif +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE /* Make sure we have a correctly sized P->M table. */ if (max_pfn != xen_start_info.nr_pages) { phys_to_machine_mapping = alloc_bootmem_low_pages( @@ -1545,7 +1550,7 @@ } HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; - +#endif /* ! CONFIG_XEN_SHADOW_TRANSLATE_MODE */ /* * NOTE: at this point the bootmem allocator is fully available. diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c 2005-06-01 14:06:28.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c 2005-06-01 15:08:44.000000000 -0400 @@ -408,6 +408,7 @@ } #endif /* CONFIG_XEN_SHADOW_MODE */ +#ifndef CONFIG_XEN_SHADOW_MODE LIST_HEAD(mm_unpinned); DEFINE_SPINLOCK(mm_unpinned_lock); @@ -454,6 +455,7 @@ } } + void mm_pin(struct mm_struct *mm) { spin_lock(&mm->page_table_lock); @@ -521,3 +523,4 @@ if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) ) mm_unpin(mm); } +#endif /* CONFIG_XEN_SHADOW_MODE */ diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/Kconfig xeno-ft/linux-2.6.11-xen-sparse/arch/xen/Kconfig --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-05-16 13:05:03.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-05-16 13:55:06.000000000 -0400 @@ -124,11 +124,19 @@ space. Odds are that you want to say N here. config XEN_SHADOW_MODE - bool "Fake shadow mode" + bool "Build linux to use Xen's shadow mode support" default n - help - fakes out a shadow mode kernel + help + Builds a xenolinux that expects Xen's shadow mode support to be + enabled. +config XEN_SHADOW_TRANSLATE_MODE + bool "Build linux to use Xen's shadow translate mode support" + depends on XEN_SHADOW_MODE + default n + help + Builds a xenolinux that expects Xen's shadow translate mode support + to be enabled. config XEN_SCRUB_PAGES bool "Scrub memory before freeing it to Xen" diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c xeno-ft/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c 2005-06-04 18:14:40.000000000 -0400 @@ -93,7 +93,9 @@ extern void time_suspend(void); extern void time_resume(void); extern unsigned long max_pfn; +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE extern unsigned int *pfn_to_mfn_frame_list; +#endif suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL); if ( suspend_record == NULL ) @@ -139,6 +141,7 @@ memset(empty_zero_page, 0, PAGE_SIZE); +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) { pfn_to_mfn_frame_list[j] = @@ -146,7 +149,7 @@ } HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; - +#endif /* ! CONFIG_XEN_SHADOW_TRANSLATE_MODE */ gnttab_resume(); irq_resume(); diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c xeno-ft/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c --- xen-unstable.latest/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c 2005-04-13 05:44:49.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c 2005-05-16 13:06:46.000000000 -0400 @@ -197,12 +197,14 @@ BUG(); pfn = page - mem_map; +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY ) BUG(); /* Update P->M and M->P tables. */ phys_to_machine_mapping[pfn] = mfn_list[i]; xen_machphys_update(mfn_list[i], pfn); +#endif /* Link back into the page tables if it's not a highmem page. */ if ( pfn < max_low_pfn ) @@ -239,7 +241,11 @@ } pfn = page - mem_map; +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE mfn_list[i] = phys_to_machine_mapping[pfn]; +#else + mfn_list[i] = pfn; +#endif if ( !PageHighMem(page) ) { @@ -266,7 +272,9 @@ for ( i = 0; i < debt; i++ ) { pfn = mfn_to_pfn(mfn_list[i]); +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; +#endif balloon_append(pfn_to_page(pfn)); } diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h xeno-ft/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h --- xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h 2005-06-01 14:06:28.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h 2005-06-01 14:17:39.000000000 -0400 @@ -41,9 +41,15 @@ : : "r" (0) ); } +#ifndef CONFIG_XEN_SHADOW_MODE extern void mm_pin(struct mm_struct *mm); extern void mm_unpin(struct mm_struct *mm); void mm_pin_all(void); +#else +#define mm_pin(_mm) ((void)0) +#define mm_unpin(_mm) ((void)0) +#define mm_pin_all() ((void)0) +#endif static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h xeno-ft/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h --- xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h 2005-06-01 14:06:28.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h 2005-06-01 15:15:30.000000000 -0400 @@ -16,6 +16,7 @@ struct list_head unpinned; } mm_context_t; +#ifndef CONFIG_XEN_SHADOW_MODE extern struct list_head mm_unpinned; extern spinlock_t mm_unpinned_lock; @@ -23,4 +24,6 @@ extern void _arch_exit_mmap(struct mm_struct *mm); #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) +#endif /* CONFIG_XEN_SHADOW_MODE */ + #endif diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h xeno-ft/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h --- xen-unstable.latest/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h 2005-05-31 08:28:29.000000000 -0400 +++ xeno-ft/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h 2005-05-31 08:43:13.000000000 -0400 @@ -58,9 +58,15 @@ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#ifndef CONFIG_XEN_SHADOW_TRANSLATE_MODE extern unsigned int *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)])) -#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)])) +#ifdef CONFIG_XEN_SHADOW_MODE +# define pfn_to_mfn(_pfn) ((unsigned long)(_pfn)) +# define mfn_to_pfn(_mfn) ((unsigned long)(_mfn)) +#else +# define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)])) +# define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)])) +#endif static inline unsigned long phys_to_machine(unsigned long phys) { unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT); @@ -73,6 +79,12 @@ phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); return phys; } +#else /* CONFIG_XEN_SHADOW_TRANSLATE_MODE */ +#define pfn_to_mfn(_pfn) (_pfn) +#define mfn_to_pfn(_mfn) (_mfn) +#define phys_to_machine(_phys) (_phys) +#define machine_to_phys(_mach) (_mach) +#endif /* CONFIG_XEN_SHADOW_TRANSLATE_MODE */ /* * These are used to make use of C type-checking.. diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/libxc/Makefile xeno-ft/tools/libxc/Makefile --- xen-unstable.latest/tools/libxc/Makefile 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft/tools/libxc/Makefile 2005-06-04 18:14:41.000000000 -0400 @@ -20,6 +20,7 @@ SRCS += xc_evtchn.c SRCS += xc_gnttab.c SRCS += xc_linux_build.c +SRCS += xc_linuxtranslate_build.c SRCS += xc_plan9_build.c SRCS += xc_linux_restore.c SRCS += xc_linux_save.c diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/libxc/xc_evtchn.c xeno-ft/tools/libxc/xc_evtchn.c --- xen-unstable.latest/tools/libxc/xc_evtchn.c 2005-04-13 05:44:49.000000000 -0400 +++ xeno-ft/tools/libxc/xc_evtchn.c 2005-05-16 13:06:46.000000000 -0400 @@ -40,6 +40,7 @@ op.cmd = EVTCHNOP_alloc_unbound; op.u.alloc_unbound.dom = (domid_t)dom; + op.u.alloc_unbound.port = *port; if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) { diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/libxc/xc.h xeno-ft/tools/libxc/xc.h --- xen-unstable.latest/tools/libxc/xc.h 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft/tools/libxc/xc.h 2005-06-04 18:14:41.000000000 -0400 @@ -254,6 +254,15 @@ unsigned long flags, unsigned int vcpus); +int xc_linuxtranslate_build(int xc_handle, + u32 domid, + const char *image_name, + const char *ramdisk_name, + const char *cmdline, + unsigned int control_evtchn, + unsigned long flags, + unsigned int vcpus); + int xc_plan9_build (int xc_handle, u32 domid, diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/libxc/xc_linuxtranslate_build.c xeno-ft/tools/libxc/xc_linuxtranslate_build.c --- xen-unstable.latest/tools/libxc/xc_linuxtranslate_build.c 1969-12-31 19:00:00.000000000 -0500 +++ xeno-ft/tools/libxc/xc_linuxtranslate_build.c 2005-06-04 18:05:18.000000000 -0400 @@ -0,0 +1,796 @@ +/****************************************************************************** + * xc_linuxtranslate_build.c + * Derived from xc_linux_build.c + */ + +#include "xc_private.h" +#define ELFSIZE 32 +#include "xc_elf.h" +#include +#include + +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) + +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) +#define round_pgdown(_p) ((_p)&PAGE_MASK) + +struct domain_setup_info +{ + unsigned long v_start; + unsigned long v_end; + unsigned long v_kernstart; + unsigned long v_kernend; + unsigned long v_kernentry; + + unsigned int load_symtab; + unsigned long symtab_addr; + unsigned long symtab_len; +}; + +static int +parseelfimage( + char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi); +static int +loadelfimage( + char *elfbase, int xch, u32 dom, unsigned long *parray, + struct domain_setup_info *dsi); +static int +loadelfsymtab( + char *elfbase, int xch, u32 dom, unsigned long *parray, + struct domain_setup_info *dsi); + +static int setup_guest(int xc_handle, + u32 dom, + char *image, unsigned long image_size, + gzFile initrd_gfd, unsigned long initrd_len, + unsigned long nr_pages, + unsigned long *pvsi, unsigned long *pvke, + vcpu_guest_context_t *ctxt, + const char *cmdline, + unsigned long shared_info_frame, + unsigned int control_evtchn, + unsigned long flags, + unsigned int vcpus) +{ + l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; + l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; + unsigned long *page_array = NULL; + unsigned long ml2tab, pl2tab; + unsigned long ml1tab, pl1tab; + unsigned long mfn, pfn; + unsigned long count, i; + start_info_t *start_info; + shared_info_t *shared_info; + mmu_t *mmu = NULL; + int rc; + + unsigned long nr_pt_pages; + unsigned long ppt_alloc; + unsigned long *physmap, *physmap_e, physmap_pfn; + + struct domain_setup_info dsi; + unsigned long vinitrd_start; + unsigned long vinitrd_end; + unsigned long vphysmap_start; + unsigned long vphysmap_end; + unsigned long vstartinfo_start; + unsigned long vstartinfo_end; + unsigned long vstack_start; + unsigned long vstack_end; + unsigned long vpt_start; + unsigned long vpt_end; + unsigned long v_end; + unsigned long pshared_info; + + memset(&dsi, 0, sizeof(struct domain_setup_info)); + + rc = parseelfimage(image, image_size, &dsi); + if ( rc != 0 ) + goto error_out; + + if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 ) + { + PERROR("Guest OS must load to a page boundary.\n"); + goto error_out; + } + + /* Untranslated guests are given the mfn of the shared_info frame, and + * they are expected to map it. We need to allocate a pfn to map the + * mfn into; so we steal a page from the end. We don't need to put + * it in a page table, we just need to have a p2m and m2p for it... */ + pshared_info = nr_pages; + + /* + * Why do we need this? The number of page-table frames depends on the + * size of the bootstrap address space. But the size of the address space + * depends on the number of page-table frames (since each one is mapped + * read-only). We have a pair of simultaneous equations in two unknowns, + * which we solve by exhaustive search. + */ + vinitrd_start = round_pgup(dsi.v_end); + vinitrd_end = vinitrd_start + initrd_len; + vphysmap_start = round_pgup(vinitrd_end); + vphysmap_end = vphysmap_start + ((nr_pages+1) * sizeof(unsigned long)); + vpt_start = round_pgup(vphysmap_end); + for ( nr_pt_pages = 2; ; nr_pt_pages++ ) + { + vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); + vstartinfo_start = vpt_end; + vstartinfo_end = vstartinfo_start + PAGE_SIZE; + vstack_start = vstartinfo_end; + vstack_end = vstack_start + PAGE_SIZE; + v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1); + if ( (v_end - vstack_end) < (512 << 10) ) + v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */ + if ( (((v_end - dsi.v_start + ((1<> + L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) + break; + } + + printf("VIRTUAL MEMORY ARRANGEMENT:\n" + " Loaded kernel: %08lx->%08lx\n" + " Init. ramdisk: %08lx->%08lx\n" + " Phys-Mach map: %08lx->%08lx\n" + " Page tables: %08lx->%08lx\n" + " Start info: %08lx->%08lx\n" + " Boot stack: %08lx->%08lx\n" + " TOTAL: %08lx->%08lx\n", + dsi.v_kernstart, dsi.v_kernend, + vinitrd_start, vinitrd_end, + vphysmap_start, vphysmap_end, + vpt_start, vpt_end, + vstartinfo_start, vstartinfo_end, + vstack_start, vstack_end, + dsi.v_start, v_end); + printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); + + if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) + { + printf("Initial guest OS requires too much space\n" + "(%luMB is greater than %luMB limit)\n", + (v_end-dsi.v_start)>>20, (nr_pages<>20); + goto error_out; + } + + if ( (page_array = malloc((nr_pages+1) * sizeof(unsigned long))) == NULL ) + { + PERROR("Could not allocate memory"); + goto error_out; + } + + + if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages ) + { + PERROR("Could not get the page frame list"); + goto error_out; + } + + fprintf(stderr, "setup_guest: loadelfimage\n"); + loadelfimage(image, xc_handle, dom, page_array, &dsi); + + /* Load the initial ramdisk image. */ + if ( initrd_len != 0 ) + { + fprintf(stderr, "setup_guest: xc_copy_to_domain_page (pre-for)\n"); + for ( i = (vinitrd_start - dsi.v_start); + i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE ) + { + char page[PAGE_SIZE]; + if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 ) + { + PERROR("Error reading initrd image, could not"); + goto error_out; + } + xc_copy_to_domain_page(xc_handle, dom, + page_array[i>>PAGE_SHIFT], page); + } + } + + fprintf(stderr, "setup_guest: init_mmu_updates\n"); + if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL ) + goto error_out; + + /* First allocate page for page dir. */ + ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT; + //l2tab = page_array[ppt_alloc++] << PAGE_SHIFT; + pl2tab = ppt_alloc++; + ml2tab = page_array[pl2tab]; + //ctxt->pt_base = l2tab; + ctxt->pt_base = ml2tab << PAGE_SHIFT; + + /* Initialise the page tables. */ + fprintf(stderr, "setup_guest: Initializing l2 table pfn %lx mfn %lx\n", + pl2tab, ml2tab); + if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, + ml2tab)) == NULL ) + goto error_out; + memset(vl2tab, 0, PAGE_SIZE); + vl2e = &vl2tab[l2_table_offset(dsi.v_start)]; + fprintf(stderr, " v_start %lx l2_table_offset %lx\n", + dsi.v_start, + l2_table_offset(dsi.v_start)); + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) + { + if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 ) + { + //l1tab = page_array[ppt_alloc++] << PAGE_SHIFT; + pl1tab = ppt_alloc++; + ml1tab = page_array[pl1tab]; + fprintf(stderr, " allocating new l1 page; pfn %lx, mfn %lx\n", + pl1tab, ml1tab); + if ( vl1tab != NULL ) + munmap(vl1tab, PAGE_SIZE); + if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, + ml1tab)) == NULL ) + { + munmap(vl2tab, PAGE_SIZE); + goto error_out; + } + memset(vl1tab, 0, PAGE_SIZE); + vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<machine and machine->phys table entries. */ + physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT; + physmap = physmap_e = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[physmap_pfn++]); + for ( count = 0; count < nr_pages; count++ ) + { + pfn = count; + mfn = page_array[count]; + if ( add_mmu_update(xc_handle, mmu, + (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, + pfn) ) + { + munmap(physmap, PAGE_SIZE); + goto error_out; + } + *physmap_e++ = pfn; + if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 ) + { + munmap(physmap, PAGE_SIZE); + physmap = physmap_e = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[physmap_pfn++]); + } + } + + if(xc_shadow_control(xc_handle, + dom, + DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE, + NULL, + 0, + NULL)<0) { + PERROR("Could not enable full translate mode!"); + goto error_out; + } + + /* Translate the shared_info page. (we allocated enough for nr_pages+1.) */ + mfn = shared_info_frame; + pfn = pshared_info; + if( add_mmu_update(xc_handle, mmu, + (mfn<>PAGE_SHIFT]); + + memset(start_info, 0, sizeof(*start_info)); + start_info->nr_pages = nr_pages; + start_info->shared_info = pshared_info << PAGE_SHIFT; + start_info->flags = flags; + start_info->pt_base = vpt_start; + start_info->nr_pt_frames = nr_pt_pages; + start_info->mfn_list = vphysmap_start; + start_info->domain_controller_evtchn = control_evtchn; + if ( initrd_len != 0 ) + { + start_info->mod_start = vinitrd_start; + start_info->mod_len = initrd_len; + } + strncpy((char *)start_info->cmd_line, cmdline, MAX_CMDLINE); + start_info->cmd_line[MAX_CMDLINE-1] = '\0'; + munmap(start_info, PAGE_SIZE); + + /* shared_info page starts its life empty. */ + shared_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame); + memset(shared_info, 0, sizeof(shared_info_t)); + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + + shared_info->n_vcpu = vcpus; + printf(" VCPUS: %d\n", shared_info->n_vcpu); + + munmap(shared_info, PAGE_SIZE); + + fprintf(stderr, "setup_guest: finish_mmu_updates\n"); + /* Send the page update requests down to the hypervisor. */ + if ( finish_mmu_updates(xc_handle, mmu) ) + goto error_out; + + free(mmu); + free(page_array); + + *pvsi = vstartinfo_start; + *pvke = dsi.v_kernentry; + + fprintf(stderr, "setup_guest: done!"); + return 0; + + error_out: + if ( mmu != NULL ) + free(mmu); + if ( page_array != NULL ) + free(page_array); + return -1; +} + +int xc_linuxtranslate_build(int xc_handle, + u32 domid, + const char *image_name, + const char *ramdisk_name, + const char *cmdline, + unsigned int control_evtchn, + unsigned long flags, + unsigned int vcpus) +{ + dom0_op_t launch_op, op; + int initrd_fd = -1; + gzFile initrd_gfd = NULL; + int rc, i; + vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt; + unsigned long nr_pages; + char *image = NULL; + unsigned long image_size, initrd_size=0; + unsigned long vstartinfo_start, vkern_entry; + + if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 ) + { + PERROR("Could not find total pages for domain"); + goto error_out; + } + + if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL ) + goto error_out; + + if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) ) + { + if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 ) + { + PERROR("Could not open the initial ramdisk image"); + goto error_out; + } + + initrd_size = xc_get_filesz(initrd_fd); + + if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL ) + { + PERROR("Could not allocate decompression state for initrd"); + goto error_out; + } + } + + if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ) + { + PERROR("Unable to mlock ctxt"); + return 1; + } + + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + if ( (do_dom0_op(xc_handle, &op) < 0) || + ((u16)op.u.getdomaininfo.domain != domid) ) + { + PERROR("Could not get info on domain"); + goto error_out; + } + + if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ) + { + PERROR("Could not get vcpu context"); + goto error_out; + } + + if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || + (ctxt->pt_base != 0) ) + { + ERROR("Domain is already constructed"); + goto error_out; + } + + if ( setup_guest(xc_handle, domid, image, image_size, + initrd_gfd, initrd_size, nr_pages, + &vstartinfo_start, &vkern_entry, + ctxt, cmdline, + op.u.getdomaininfo.shared_info_frame, + control_evtchn, flags, vcpus) < 0 ) + { + ERROR("Error constructing guest OS"); + goto error_out; + } + + if ( initrd_fd >= 0 ) + close(initrd_fd); + if ( initrd_gfd ) + gzclose(initrd_gfd); + if ( image != NULL ) + free(image); + + ctxt->flags = 0; + + /* + * Initial register values: + * DS,ES,FS,GS = FLAT_KERNEL_DS + * CS:EIP = FLAT_KERNEL_CS:start_pc + * SS:ESP = FLAT_KERNEL_DS:start_stack + * ESI = start_info + * [EAX,EBX,ECX,EDX,EDI,EBP are zero] + * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1) + */ + ctxt->user_regs.ds = FLAT_KERNEL_DS; + ctxt->user_regs.es = FLAT_KERNEL_DS; + ctxt->user_regs.fs = FLAT_KERNEL_DS; + ctxt->user_regs.gs = FLAT_KERNEL_DS; + ctxt->user_regs.ss = FLAT_KERNEL_DS; + ctxt->user_regs.cs = FLAT_KERNEL_CS; + ctxt->user_regs.eip = vkern_entry; + ctxt->user_regs.esp = vstartinfo_start + 2*PAGE_SIZE; + ctxt->user_regs.esi = vstartinfo_start; + ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */ + + /* FPU is set up to default initial state. */ + memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); + + /* Virtual IDT is empty at start-of-day. */ + for ( i = 0; i < 256; i++ ) + { + ctxt->trap_ctxt[i].vector = i; + ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS; + } + + /* No LDT. */ + ctxt->ldt_ents = 0; + + /* Use the default Xen-provided GDT. */ + ctxt->gdt_ents = 0; + + /* Ring 1 stack is the initial stack. */ + ctxt->kernel_ss = FLAT_KERNEL_DS; + ctxt->kernel_sp = vstartinfo_start + 2*PAGE_SIZE; + + /* No debugging. */ + memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg)); + + /* No callback handlers. */ +#if defined(__i386__) + ctxt->event_callback_cs = FLAT_KERNEL_CS; + ctxt->event_callback_eip = 0; + ctxt->failsafe_callback_cs = FLAT_KERNEL_CS; + ctxt->failsafe_callback_eip = 0; +#elif defined(__x86_64__) + ctxt->event_callback_eip = 0; + ctxt->failsafe_callback_eip = 0; + ctxt->syscall_callback_eip = 0; +#endif + + memset( &launch_op, 0, sizeof(launch_op) ); + + launch_op.u.setdomaininfo.domain = (domid_t)domid; + launch_op.u.setdomaininfo.vcpu = 0; + launch_op.u.setdomaininfo.ctxt = ctxt; + + launch_op.cmd = DOM0_SETDOMAININFO; + rc = do_dom0_op(xc_handle, &launch_op); + + return rc; + + error_out: + if ( initrd_gfd != NULL ) + gzclose(initrd_gfd); + else if ( initrd_fd >= 0 ) + close(initrd_fd); + if ( image != NULL ) + free(image); + + return -1; +} + +static inline int is_loadable_phdr(Elf_Phdr *phdr) +{ + return ((phdr->p_type == PT_LOAD) && + ((phdr->p_flags & (PF_W|PF_X)) != 0)); +} + +static int parseelfimage(char *elfbase, + unsigned long elfsize, + struct domain_setup_info *dsi) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase; + Elf_Phdr *phdr; + Elf_Shdr *shdr; + unsigned long kernstart = ~0UL, kernend=0UL; + char *shstrtab, *guestinfo=NULL, *p; + int h; + + if ( !IS_ELF(*ehdr) ) + { + ERROR("Kernel image does not have an ELF header."); + return -EINVAL; + } + + if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize ) + { + ERROR("ELF program headers extend beyond end of image."); + return -EINVAL; + } + + if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize ) + { + ERROR("ELF section headers extend beyond end of image."); + return -EINVAL; + } + + /* Find the section-header strings table. */ + if ( ehdr->e_shstrndx == SHN_UNDEF ) + { + ERROR("ELF image has no section-header strings table (shstrtab)."); + return -EINVAL; + } + shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + + (ehdr->e_shstrndx*ehdr->e_shentsize)); + shstrtab = elfbase + shdr->sh_offset; + + /* Find the special '__xen_guest' section and check its contents. */ + for ( h = 0; h < ehdr->e_shnum; h++ ) + { + shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + (h*ehdr->e_shentsize)); + if ( strcmp(&shstrtab[shdr->sh_name], "__xen_guest") != 0 ) + continue; + + guestinfo = elfbase + shdr->sh_offset; + + if ( (strstr(guestinfo, "LOADER=generic") == NULL) && + (strstr(guestinfo, "GUEST_OS=linux") == NULL) ) + { + ERROR("Will only load images built for the generic loader " + "or Linux images"); + ERROR("Actually saw: '%s'", guestinfo); + return -EINVAL; + } + + if ( (strstr(guestinfo, "XEN_VER=3.0") == NULL) ) + { + ERROR("Will only load images built for Xen v3.0"); + ERROR("Actually saw: '%s'", guestinfo); + return -EINVAL; + } + + break; + } + if ( guestinfo == NULL ) + { + ERROR("Not a Xen-ELF image: '__xen_guest' section not found."); + return -EINVAL; + } + + for ( h = 0; h < ehdr->e_phnum; h++ ) + { + phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize)); + if ( !is_loadable_phdr(phdr) ) + continue; + if ( phdr->p_paddr < kernstart ) + kernstart = phdr->p_paddr; + if ( (phdr->p_paddr + phdr->p_memsz) > kernend ) + kernend = phdr->p_paddr + phdr->p_memsz; + } + + if ( (kernstart > kernend) || + (ehdr->e_entry < kernstart) || + (ehdr->e_entry > kernend) ) + { + ERROR("Malformed ELF image."); + return -EINVAL; + } + + dsi->v_start = kernstart; + if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL ) + dsi->v_start = strtoul(p+10, &p, 0); + + if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL ) + dsi->load_symtab = 1; + + dsi->v_kernstart = kernstart; + dsi->v_kernend = kernend; + dsi->v_kernentry = ehdr->e_entry; + dsi->v_end = dsi->v_kernend; + + loadelfsymtab(elfbase, 0, 0, NULL, dsi); + + return 0; +} + +static int +loadelfimage( + char *elfbase, int xch, u32 dom, unsigned long *parray, + struct domain_setup_info *dsi) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase; + Elf_Phdr *phdr; + int h; + + char *va; + unsigned long pa, done, chunksz; + + for ( h = 0; h < ehdr->e_phnum; h++ ) + { + phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize)); + if ( !is_loadable_phdr(phdr) ) + continue; + + for ( done = 0; done < phdr->p_filesz; done += chunksz ) + { + pa = (phdr->p_paddr + done) - dsi->v_start; + va = xc_map_foreign_range( + xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]); + chunksz = phdr->p_filesz - done; + if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) ) + chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1)); + memcpy(va + (pa & (PAGE_SIZE-1)), + elfbase + phdr->p_offset + done, chunksz); + munmap(va, PAGE_SIZE); + } + + for ( ; done < phdr->p_memsz; done += chunksz ) + { + pa = (phdr->p_paddr + done) - dsi->v_start; + va = xc_map_foreign_range( + xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]); + chunksz = phdr->p_memsz - done; + if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) ) + chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1)); + memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz); + munmap(va, PAGE_SIZE); + } + } + + loadelfsymtab(elfbase, xch, dom, parray, dsi); + + return 0; +} + +#define ELFROUND (ELFSIZE / 8) + +static int +loadelfsymtab( + char *elfbase, int xch, u32 dom, unsigned long *parray, + struct domain_setup_info *dsi) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase, *sym_ehdr; + Elf_Shdr *shdr; + unsigned long maxva, symva; + char *p; + int h, i; + + if ( !dsi->load_symtab ) + return 0; + + p = malloc(sizeof(int) + sizeof(Elf_Ehdr) + + ehdr->e_shnum * sizeof(Elf_Shdr)); + if (p == NULL) + return 0; + + maxva = (dsi->v_kernend + ELFROUND - 1) & ~(ELFROUND - 1); + symva = maxva; + maxva += sizeof(int); + dsi->symtab_addr = maxva; + dsi->symtab_len = 0; + maxva += sizeof(Elf_Ehdr) + ehdr->e_shnum * sizeof(Elf_Shdr); + maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1); + + shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr)); + memcpy(shdr, elfbase + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr)); + + for ( h = 0; h < ehdr->e_shnum; h++ ) + { + if ( shdr[h].sh_type == SHT_STRTAB ) + { + /* Look for a strtab @i linked to symtab @h. */ + for ( i = 0; i < ehdr->e_shnum; i++ ) + if ( (shdr[i].sh_type == SHT_SYMTAB) && + (shdr[i].sh_link == h) ) + break; + /* Skip symtab @h if we found no corresponding strtab @i. */ + if ( i == ehdr->e_shnum ) + { + shdr[h].sh_offset = 0; + continue; + } + } + + if ( (shdr[h].sh_type == SHT_STRTAB) || + (shdr[h].sh_type == SHT_SYMTAB) ) + { + if ( parray != NULL ) + xc_map_memcpy(maxva, elfbase + shdr[h].sh_offset, shdr[h].sh_size, + xch, dom, parray, dsi->v_start); + + /* Mangled to be based on ELF header location. */ + shdr[h].sh_offset = maxva - dsi->symtab_addr; + + dsi->symtab_len += shdr[h].sh_size; + maxva += shdr[h].sh_size; + maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1); + } + + shdr[h].sh_name = 0; /* Name is NULL. */ + } + + if ( dsi->symtab_len == 0 ) + { + dsi->symtab_addr = 0; + goto out; + } + + if ( parray != NULL ) + { + *(int *)p = maxva - dsi->symtab_addr; + sym_ehdr = (Elf_Ehdr *)(p + sizeof(int)); + memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr)); + sym_ehdr->e_phoff = 0; + sym_ehdr->e_shoff = sizeof(Elf_Ehdr); + sym_ehdr->e_phentsize = 0; + sym_ehdr->e_phnum = 0; + sym_ehdr->e_shstrndx = SHN_UNDEF; + + /* Copy total length, crafted ELF header and section header table */ + xc_map_memcpy(symva, p, sizeof(int) + sizeof(Elf_Ehdr) + + ehdr->e_shnum * sizeof(Elf_Shdr), xch, dom, parray, + dsi->v_start); + } + + dsi->symtab_len = maxva - dsi->symtab_addr; + dsi->v_end = round_pgup(maxva); + + out: + if ( p != NULL ) + free(p); + + return 0; +} diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/python/xen/lowlevel/xc/xc.c xeno-ft/tools/python/xen/lowlevel/xc/xc.c --- xen-unstable.latest/tools/python/xen/lowlevel/xc/xc.c 2005-05-31 08:28:29.000000000 -0400 +++ xeno-ft/tools/python/xen/lowlevel/xc/xc.c 2005-06-04 17:00:20.000000000 -0400 @@ -280,6 +280,33 @@ return zero; } +static PyObject *pyxc_linuxtranslate_build(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + + u32 dom; + char *image, *ramdisk = NULL, *cmdline = ""; + int control_evtchn, flags = 0, vcpus = 1; + + static char *kwd_list[] = { "dom", "control_evtchn", + "image", "ramdisk", "cmdline", "flags", "vcpus", + NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|ssii", kwd_list, + &dom, &control_evtchn, + &image, &ramdisk, &cmdline, &flags, &vcpus) ) + return NULL; + + if ( xc_linuxtranslate_build(xc->xc_handle, dom, image, + ramdisk, cmdline, control_evtchn, flags, vcpus) != 0 ) + return PyErr_SetFromErrno(xc_error); + + Py_INCREF(zero); + return zero; +} + static PyObject *pyxc_plan9_build(PyObject *self, PyObject *args, PyObject *kwds) @@ -922,6 +949,17 @@ " vcpus [int, 1]: Number of Virtual CPUS in domain.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "linuxtranslate_build", + (PyCFunction)pyxc_linuxtranslate_build, + METH_VARARGS | METH_KEYWORDS, "\n" + "Build a new shadow-translated Linux guest OS.\n" + " dom [int]: Identifier of domain to build into.\n" + " image [str]: Name of kernel image file. May be gzipped.\n" + " ramdisk [str, n/a]: Name of ramdisk file, if any.\n" + " cmdline [str, n/a]: Kernel parameters, if any.\n\n" + " vcpus [int, 1]: Number of Virtual CPUS in domain.\n\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + { "plan9_build", (PyCFunction)pyxc_plan9_build, METH_VARARGS | METH_KEYWORDS, "\n" diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/tools/python/xen/xend/XendDomainInfo.py xeno-ft/tools/python/xen/xend/XendDomainInfo.py --- xen-unstable.latest/tools/python/xen/xend/XendDomainInfo.py 2005-06-01 14:06:28.000000000 -0400 +++ xeno-ft/tools/python/xen/xend/XendDomainInfo.py 2005-06-04 17:09:24.000000000 -0400 @@ -1114,6 +1114,30 @@ vm.create_domain("linux", kernel, ramdisk, cmdline) return vm +def vm_image_linuxtranslate(vm, image): + """Create a VM for a shaodw translate linux image. + + @param name: vm name + @param memory: vm memory + @param image: image config + @return: vm + """ + kernel = sxp.child_value(image, "kernel") + cmdline = "" + ip = sxp.child_value(image, "ip", None) + if ip: + cmdline += " ip=" + ip + root = sxp.child_value(image, "root") + if root: + cmdline += " root=" + root + args = sxp.child_value(image, "args") + if args: + cmdline += " " + args + ramdisk = sxp.child_value(image, "ramdisk", '') + log.debug("creating translated linux domain with cmdline: %s" %(cmdline,)) + vm.create_domain("linuxtranslate", kernel, ramdisk, cmdline) + return vm + def vm_image_plan9(vm, image): """Create a VM for a Plan 9 image. @@ -1197,6 +1221,7 @@ #============================================================================ # Register image handlers. add_image_handler('linux', vm_image_linux) +add_image_handler('linuxtranslate', vm_image_linuxtranslate) add_image_handler('plan9', vm_image_plan9) add_image_handler('vmx', vm_image_vmx) diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/arch/x86/mm.c xeno-ft/xen/arch/x86/mm.c --- xen-unstable.latest/xen/arch/x86/mm.c 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft/xen/arch/x86/mm.c 2005-06-04 18:14:41.000000000 -0400 @@ -446,7 +446,8 @@ if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) ) { - MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK); + MEM_LOG("Bad L1 flags %x (%lx)\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK, + (unsigned long)l1e_get_intpte(l1e)); return 0; } @@ -1695,6 +1696,8 @@ break; case MMUEXT_NEW_BASEPTR: + if ( shadow_mode_translate(d) ) + op.mfn = __gpfn_to_mfn(d, op.mfn); okay = new_guest_cr3(op.mfn); percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; break; @@ -1807,6 +1810,9 @@ okay = 0; break; } + + if ( shadow_mode_translate(d) ) + BUG(); // not supported yet, need to think about this. e = percpu_info[cpu].foreign; if ( unlikely(e == NULL) ) @@ -2431,6 +2437,7 @@ { int nr_pages = (entries + 511) / 512; unsigned long frames[16]; + struct domain* d = current->domain; long ret; if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) ) @@ -2438,6 +2445,18 @@ LOCK_BIGLOCK(current->domain); + if(shadow_mode_translate(d)) { + int i; + unsigned long mfn; + shadow_lock(d); + for(i=0; iarch.shadow_ht[i]; x != NULL; x = x->next ) - if ( MFN_PINNED(x->smfn) ) - count++; + + for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) { + /* Skip entries that are writable_pred) */ + switch(x->gpfn_and_flags & PGT_type_mask){ + case PGT_l1_shadow: + case PGT_l2_shadow: + case PGT_l3_shadow: + case PGT_l4_shadow: + case PGT_hl2_shadow: + if ( MFN_PINNED(x->smfn) ) + count++; + break; + case PGT_snapshot: + case PGT_writable_pred: + printk(" SKIPPING type %lx\n", + x->gpfn_and_flags & PGT_type_mask); + break; + default: + BUG(); + + } + } + if ( !count ) continue; mfn_list = xmalloc_array(unsigned long, count); count = 0; - for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) - if ( MFN_PINNED(x->smfn) ) - mfn_list[count++] = x->smfn; + for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) { + /* Skip entries that are writable_pred) */ + switch(x->gpfn_and_flags & PGT_type_mask){ + case PGT_l1_shadow: + case PGT_l2_shadow: + case PGT_l3_shadow: + case PGT_l4_shadow: + case PGT_hl2_shadow: + if ( MFN_PINNED(x->smfn) ) + mfn_list[count++] = x->smfn; + break; + case PGT_snapshot: + case PGT_writable_pred: + printk(" SKIPPING type %lx\n", + x->gpfn_and_flags & PGT_type_mask); + break; + default: + BUG(); + + } + } while ( count ) { @@ -773,6 +811,9 @@ unsigned long va = pfn << PAGE_SHIFT; ASSERT( phystab ); + ASSERT(shadow_lock_is_acquired(d)); + + l2 = map_domain_mem_with_cache(phystab, l2cache); l2 = map_domain_mem_with_cache(phystab, l2cache); l2e = l2[l2_table_offset(va)]; @@ -851,6 +892,9 @@ list_ent = page->list.next; } + unmap_domain_mem_cache(&l2cache); + unmap_domain_mem_cache(&l1cache); + destroy_map_domain_mem_cache(&l2cache); destroy_map_domain_mem_cache(&l1cache); @@ -1366,7 +1410,7 @@ case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE: free_shadow_pages(d); rc = __shadow_mode_enable( - d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate); + d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate|SHM_write_all); break; default: @@ -1420,7 +1464,7 @@ unmap_domain_mem(l1); #if 0 - printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx phystab=%lx l2e=%lx l1tab=%lx, l1e=%lx\n", + printk("gpfn_to_mfn_foreign(d->domain_id=%d, gpfn=%lx) => %lx phystab=%lx l2e=%lx l1tab=%lx, l1e=%lx\n", d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, phystab, l2e, l1tab, l1e); #endif @@ -1677,6 +1721,7 @@ shadow_lock(d); __shadow_sync_va(v, va); + SH_VVLOG("shadow_invlpg va=%lx", va); // XXX mafetter: will need to think about 4MB pages... @@ -2053,7 +2098,11 @@ while ( count ) { count--; + /* Delete_shadow_status does a shadow_audit(), so we need to + * keep accurate count of writable_pte_predictions to keep it + * happy. */ delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred); + perfc_decr(writable_pte_predictions); } xfree(gpfn_list); @@ -2580,6 +2629,8 @@ * STEP 2. Check the guest PTE. */ __guest_get_l2e(v, va, &gpde); + SH_VVLOG("shadow_fault: gpde (0x%lx | 0x%lx)", + (unsigned long)l2e_get_pfn(gpde), (unsigned long)l2e_get_flags(gpde)); if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) ) { SH_VVLOG("shadow_fault - EXIT: L1 not present"); @@ -2595,7 +2646,7 @@ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) ) { SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")", - l1e_get_intpte(gpte)); + (unsigned long)l1e_get_intpte(gpte)); perfc_incrc(shadow_fault_bail_pte_not_present); goto fail; } @@ -2607,19 +2658,11 @@ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) ) { - if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) ) - { - allow_writes = 1; - l1e_add_flags(gpte, _PAGE_RW); - } - else - { /* Write fault on a read-only mapping. */ SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", - l1e_get_intpte(gpte)); + (unsigned long)l1e_get_intpte(gpte)); perfc_incrc(shadow_fault_bail_ro_mapping); goto fail; - } } if ( !l1pte_write_fault(v, &gpte, &spte, va) ) @@ -2693,7 +2736,7 @@ if ( sl1mfn ) { SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte, - (void *)pa, l1e_get_intpte(gpte)); + (void *)pa, (unsigned long)l1e_get_intpte(gpte)); l1pte_propagate_from_guest(current->domain, gpte, &spte); spl1e = map_domain_mem_with_cache(sl1mfn << PAGE_SHIFT, cache); @@ -2718,7 +2761,7 @@ if ( sl2mfn ) { SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte, - (void *)pa, l2e_get_intpte(gpde)); + (void *)pa, (unsigned long)l2e_get_intpte(gpde)); spl2e = map_domain_mem_with_cache(sl2mfn << PAGE_SHIFT, cache); validate_pde_change(d, gpde, &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]); @@ -2764,8 +2807,9 @@ // just everything involved in getting to this L1 (i.e. we need // linear_pg_table[l1_linear_offset(va)] to be in sync)... // + SH_VVLOG("shadow_do_update_va_maping va=%lx, val=%lx)", + va, (unsigned long)l1e_get_intpte(val)); __shadow_sync_va(v, va); - l1pte_propagate_from_guest(d, val, &spte); shadow_set_l1e(va, spte, 0); diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/common/event_channel.c xeno-ft/xen/common/event_channel.c --- xen-unstable.latest/xen/common/event_channel.c 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft/xen/common/event_channel.c 2005-06-04 18:29:10.000000000 -0400 @@ -61,15 +61,28 @@ { struct evtchn *chn; struct domain *d = current->domain; - int port; + int port = alloc->port; + int dom = alloc->dom; spin_lock(&d->evtchn_lock); - if ( (port = get_free_port(d)) >= 0 ) - { + if ( dom == DOMID_SELF ) + dom = current->domain->domain_id; + + if( port == 0 ) + port = get_free_port(d); + else if( port > 0 && port < MAX_EVTCHNS) { + chn = evtchn_from_port(d, port); + if(chn->state != ECS_FREE) + port = -EBUSY; + } else { + port=-EINVAL; + } + + if( port > 0 ) { chn = evtchn_from_port(d, port); chn->state = ECS_UNBOUND; - chn->u.unbound.remote_domid = alloc->dom; + chn->u.unbound.remote_domid = dom; } spin_unlock(&d->evtchn_lock); @@ -81,6 +94,22 @@ return 0; } +/* Allocate a port in domain d, and return its number. */ +int evtchn_alloc_port(struct domain *d) +{ + struct evtchn *chn; + int r; + + spin_lock(&d->evtchn_lock); + r = get_free_port(d); + if (r >= 0) { + chn = evtchn_from_port(d, r); + chn->state = ECS_UNBOUND; + chn->u.unbound.remote_domid = d->domain_id; + } + spin_unlock(&d->evtchn_lock); + return r; +} static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) { diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/include/asm-x86/shadow.h xeno-ft/xen/include/asm-x86/shadow.h --- xen-unstable.latest/xen/include/asm-x86/shadow.h 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft/xen/include/asm-x86/shadow.h 2005-06-04 18:44:27.000000000 -0400 @@ -399,7 +399,7 @@ { perfc_incrc(shadow_get_page_fail); FSH_LOG("%s failed to get ref l1e=%" PRIpte "\n", - __func__, l1e_get_intpte(l1e)); + __func__, (unsigned long)l1e_get_intpte(l1e)); } return res; @@ -740,7 +740,7 @@ spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL); SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte, - l1e_get_intpte(spte), l1e_get_intpte(gpte)); + (unsigned long)l1e_get_intpte(spte), (unsigned long)l1e_get_intpte(gpte)); if ( shadow_mode_log_dirty(d) ) __mark_dirty(d, gmfn); @@ -779,7 +779,7 @@ } SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte, - l1e_get_intpte(spte), l1e_get_intpte(gpte)); + (unsigned long)l1e_get_intpte(spte), (unsigned long)l1e_get_intpte(gpte)); *gpte_p = gpte; *spte_p = spte; @@ -811,7 +811,9 @@ if ( l1e_get_intpte(spte) || l1e_get_intpte(gpte) ) SH_VVVLOG("%s: gpte=%" PRIpte ", new spte=%" PRIpte, - __func__, l1e_get_intpte(gpte), l1e_get_intpte(spte)); + __func__, + (unsigned long)l1e_get_intpte(gpte), + (unsigned long)l1e_get_intpte(spte)); *spte_p = spte; } @@ -845,7 +847,7 @@ if ( l1e_get_intpte(hl2e) || l2e_get_intpte(gpde) ) SH_VVLOG("%s: gpde=%" PRIpte " hl2e=%" PRIpte, __func__, - l2e_get_intpte(gpde), l1e_get_intpte(hl2e)); + (unsigned long)l2e_get_intpte(gpde), (unsigned long)l1e_get_intpte(hl2e)); *hl2e_p = hl2e; } @@ -874,7 +876,7 @@ if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) ) SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__, - l2e_get_intpte(gpde), l2e_get_intpte(spde)); + (unsigned long)l2e_get_intpte(gpde), (unsigned long)l2e_get_intpte(spde)); *spde_p = spde; } @@ -1038,6 +1040,21 @@ { int live = 0, free = 0, j = 0, abs; struct shadow_status *a; + int live_shadow_l1_pages, + live_shadow_l2_pages, + live_shadow_l3_pages, + live_shadow_l4_pages, + live_hl2_table_pages, + live_snapshot_pages, + live_writable_pte_predictions; + + live_shadow_l1_pages= + live_shadow_l2_pages= + live_shadow_l3_pages= + live_shadow_l4_pages= + live_hl2_table_pages= + live_snapshot_pages= + live_writable_pte_predictions=0; for ( j = 0; j < shadow_ht_buckets; j++ ) { @@ -1045,11 +1062,37 @@ if ( a->gpfn_and_flags ) { live++; + switch(a->gpfn_and_flags & PGT_type_mask) { + case PGT_l1_shadow: + live_shadow_l1_pages++; + break; + case PGT_l2_shadow: + live_shadow_l2_pages++; + break; + case PGT_l3_shadow: + live_shadow_l3_pages++; + break; + case PGT_l4_shadow: + live_shadow_l4_pages++; + break; + case PGT_hl2_shadow: + live_hl2_table_pages++; + break; + case PGT_snapshot: + live_snapshot_pages++; + break; + case PGT_writable_pred: + live_writable_pte_predictions++; + break; + default: + BUG(); + } ASSERT(a->smfn); } else ASSERT(!a->next); + a = a->next; while ( a && (live < 9999) ) { @@ -1060,6 +1103,31 @@ live, a->gpfn_and_flags, a->smfn, a->next); BUG(); } + switch(a->gpfn_and_flags & PGT_type_mask) { + case PGT_l1_shadow: + live_shadow_l1_pages++; + break; + case PGT_l2_shadow: + live_shadow_l2_pages++; + break; + case PGT_l3_shadow: + live_shadow_l3_pages++; + break; + case PGT_l4_shadow: + live_shadow_l4_pages++; + break; + case PGT_hl2_shadow: + live_hl2_table_pages++; + break; + case PGT_snapshot: + live_snapshot_pages++; + break; + case PGT_writable_pred: + live_writable_pte_predictions++; + break; + default: + BUG(); + } ASSERT(a->smfn); a = a->next; } @@ -1085,13 +1153,21 @@ #ifdef PERF_COUNTERS if ( (abs < -1) || (abs > 1) ) { - printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n", + printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_pred=%d\n", live, free, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages), perfc_value(hl2_table_pages), perfc_value(snapshot_pages), perfc_value(writable_pte_predictions)); + printk("counted: l1=%d l2=%d l3=%d l4=%d hl2=%d snapshot=%d writable_pred=%d\n", + live_shadow_l1_pages, + live_shadow_l2_pages, + live_shadow_l3_pages, + live_shadow_l4_pages, + live_hl2_table_pages, + live_snapshot_pages, + live_writable_pte_predictions); BUG(); } #endif @@ -1194,7 +1270,7 @@ #ifndef NDEBUG if ( ___shadow_status(d, gpfn, stype) != 0 ) { - printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x " + printk("d->domain_id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x " "mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n", d->domain_id, gpfn, gmfn, stype, frame_table[gmfn].count_info, @@ -1441,7 +1517,8 @@ if ( stype != PGT_writable_pred ) BUG(); // we should never replace entries into the hash table x->smfn = smfn; - put_page(pfn_to_page(gmfn)); // already had a ref... + if ( stype != PGT_writable_pred ) + put_page(pfn_to_page(gmfn)); // already had a ref... goto done; } diff -urN --exclude=SCCS --exclude=BitKeeper xen-unstable.latest/xen/include/asm-x86/x86_32/domain_page.h xeno-ft/xen/include/asm-x86/x86_32/domain_page.h --- xen-unstable.latest/xen/include/asm-x86/x86_32/domain_page.h 2005-06-04 18:07:26.000000000 -0400 +++ xeno-ft/xen/include/asm-x86/x86_32/domain_page.h 2005-06-04 18:14:41.000000000 -0400 @@ -77,4 +77,16 @@ } } +#define MAP_DOM_MEM_CACHE_INIT { .pa = 0 } + +static inline void +unmap_domain_mem_cache(struct map_dom_mem_cache *cache) +{ + if ( likely(cache != NULL) && likely(cache->pa) ) + { + unmap_domain_mem(cache->va); + cache->pa = 0; + } +} + #endif /* __ASM_DOMAIN_PAGE_H__ */