# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Node ID 796aadb735e04f2beadcf1bb5be599829099438c # Parent 66dac877aa4264c42188a3cee6176cef940fd344 make domain0 builder for dom0 vp model. PATCHNAME: dom0_construct_xen Signed-off-by: Isaku Yamahata Signed-off-by: Alex Williamson diff -r 66dac877aa42 -r 796aadb735e0 xen/arch/ia64/xen/dom_fw.c --- a/xen/arch/ia64/xen/dom_fw.c Mon Apr 24 22:28:00 2006 +0900 +++ b/xen/arch/ia64/xen/dom_fw.c Mon Apr 24 22:28:02 2006 +0900 @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -48,12 +49,25 @@ dom_pa(unsigned long imva) return dom_fw_base_mpa + (imva - imva_fw_base); } +// allocate a page for fw +// build_physmap_table() which is called by new_thread() +// does for domU. +#define ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, mpaddr) \ + do { \ + if ((d) == dom0) { \ + assign_new_domain0_page((d), (mpaddr)); \ + } \ + } while (0) + // builds a hypercall bundle at domain physical address static void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall) { unsigned long *imva; +#ifndef CONFIG_XEN_IA64_DOM0_VP if (d == dom0) paddr += dom0_start; +#endif + ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, paddr); imva = (unsigned long *) domain_mpa_to_imva(d, paddr); build_hypercall_bundle(imva, d->arch.breakimm, hypercall, 1); } @@ -64,6 +78,7 @@ static void dom_fw_hypercall_patch(struc { unsigned long *imva; + ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, paddr); imva = (unsigned long *) domain_mpa_to_imva(d, paddr); build_hypercall_bundle(imva, d->arch.breakimm, hypercall, ret); } @@ -72,6 +87,7 @@ static void dom_fw_pal_hypercall_patch(s { unsigned long *imva; + ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, paddr); imva = (unsigned long *) domain_mpa_to_imva(d, paddr); build_pal_hypercall_bundles(imva, d->arch.breakimm, FW_HYPERCALL_PAL_CALL); } @@ -85,7 +101,10 @@ unsigned long dom_fw_setup(struct domain struct ia64_boot_param *bp; dom_fw_base_mpa = 0; +#ifndef CONFIG_XEN_IA64_DOM0_VP if (d == dom0) dom_fw_base_mpa += dom0_start; +#endif + ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, dom_fw_base_mpa); imva_fw_base = domain_mpa_to_imva(d, dom_fw_base_mpa); bp = dom_fw_init(d, args, arglen, (char *) imva_fw_base, PAGE_SIZE); return dom_pa((unsigned long) bp); @@ -645,7 +664,75 @@ dom_fw_fake_acpi(struct domain *d, struc } #define NUM_EFI_SYS_TABLES 6 -#define NUM_MEM_DESCS 5 +#define NUM_MEM_DESCS 64 //large enough + +struct dom0_passthrough_arg { +#ifdef CONFIG_XEN_IA64_DOM0_VP + struct domain* d; +#endif + efi_memory_desc_t *md; + int* i; +}; + +static int +dom_fw_dom0_passthrough(efi_memory_desc_t *md, void *arg__) +{ + struct dom0_passthrough_arg* arg = (struct dom0_passthrough_arg*)arg__; + unsigned long paddr; + +#ifdef CONFIG_XEN_IA64_DOM0_VP + struct domain* d = arg->d; + u64 start = md->phys_addr; + u64 end = start + (md->num_pages << EFI_PAGE_SHIFT); + + if (md->type == EFI_MEMORY_MAPPED_IO || + md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { + + //XXX some machine has large mmio area whose size is about several TB. + // It requires impractical memory to map such a huge region + // to a domain. + // For now we don't map it, but later we must fix this. + if (md->type == EFI_MEMORY_MAPPED_IO && + ((md->num_pages << EFI_PAGE_SHIFT) > 0x100000000UL)) + return 0; + + paddr = assign_domain_mmio_page(d, start, end - start); + } else + paddr = assign_domain_mach_page(d, start, end - start); +#else + paddr = md->phys_addr; +#endif + + BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA && + md->type != EFI_ACPI_RECLAIM_MEMORY && + md->type != EFI_MEMORY_MAPPED_IO && + md->type != EFI_MEMORY_MAPPED_IO_PORT_SPACE); + + arg->md->type = md->type; + arg->md->pad = 0; + arg->md->phys_addr = paddr; + arg->md->virt_addr = 0; + arg->md->num_pages = md->num_pages; + arg->md->attribute = md->attribute; + print_md(arg->md); + + (*arg->i)++; + arg->md++; + return 0; +} + +static int +efi_mdt_cmp(const void *a, const void *b) +{ + const efi_memory_desc_t *x = a, *y = b; + + if (x->phys_addr > y->phys_addr) + return 1; + if (x->phys_addr < y->phys_addr) + return -1; + return 0; +} static struct ia64_boot_param * dom_fw_init (struct domain *d, const char *args, int arglen, char *fw_mem, int fw_mem_size) @@ -663,7 +750,11 @@ dom_fw_init (struct domain *d, const cha char *cp, *cmd_line, *fw_vendor; int i = 0; unsigned long maxmem = (d->max_pages - d->arch.sys_pgnr) * PAGE_SIZE; +#ifdef CONFIG_XEN_IA64_DOM0_VP + const unsigned long start_mpaddr = 0; +#else const unsigned long start_mpaddr = ((d==dom0)?dom0_start:0); +#endif # define MAKE_MD(typ, attr, start, end, abs) \ do { \ @@ -751,11 +842,17 @@ dom_fw_init (struct domain *d, const cha efi_tables[i].table = 0; } if (d == dom0) { +#ifdef CONFIG_XEN_IA64_DOM0_VP +# define ASSIGN_DOMAIN_MACH_PAGE(d, p) assign_domain_mach_page(d, p, PAGE_SIZE) +#else +# define ASSIGN_DOMAIN_MACH_PAGE(d, p) ({p;}) +#endif + printf("Domain0 EFI passthrough:"); i = 1; if (efi.mps) { efi_tables[i].guid = MPS_TABLE_GUID; - efi_tables[i].table = __pa(efi.mps); + efi_tables[i].table = ASSIGN_DOMAIN_MACH_PAGE(d, __pa(efi.mps)); printf(" MPS=0x%lx",efi_tables[i].table); i++; } @@ -764,25 +861,25 @@ dom_fw_init (struct domain *d, const cha if (efi.acpi20) { efi_tables[i].guid = ACPI_20_TABLE_GUID; - efi_tables[i].table = __pa(efi.acpi20); + efi_tables[i].table = ASSIGN_DOMAIN_MACH_PAGE(d, __pa(efi.acpi20)); printf(" ACPI 2.0=0x%lx",efi_tables[i].table); i++; } if (efi.acpi) { efi_tables[i].guid = ACPI_TABLE_GUID; - efi_tables[i].table = __pa(efi.acpi); + efi_tables[i].table = ASSIGN_DOMAIN_MACH_PAGE(d, __pa(efi.acpi)); printf(" ACPI=0x%lx",efi_tables[i].table); i++; } if (efi.smbios) { efi_tables[i].guid = SMBIOS_TABLE_GUID; - efi_tables[i].table = __pa(efi.smbios); + efi_tables[i].table = ASSIGN_DOMAIN_MACH_PAGE(d, __pa(efi.smbios)); printf(" SMBIOS=0x%lx",efi_tables[i].table); i++; } if (efi.hcdp) { efi_tables[i].guid = HCDP_TABLE_GUID; - efi_tables[i].table = __pa(efi.hcdp); + efi_tables[i].table = ASSIGN_DOMAIN_MACH_PAGE(d, __pa(efi.hcdp)); printf(" HCDP=0x%lx",efi_tables[i].table); i++; } @@ -835,6 +932,7 @@ dom_fw_init (struct domain *d, const cha i = 0; if (d == dom0) { +#ifndef CONFIG_XEN_IA64_DOM0_VP /* * This is a bad hack. Dom0 may share other domains' memory * through a dom0 physical address. Unfortunately, this @@ -849,31 +947,42 @@ dom_fw_init (struct domain *d, const cha unsigned long last_end = last_start + IA64_GRANULE_SIZE; /* simulate 1MB free memory at physical address zero */ - MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0); + MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);//XXX +#endif /* hypercall patches live here, masquerade as reserved PAL memory */ MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0); - MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem-IA64_GRANULE_SIZE, 0); + MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem-IA64_GRANULE_SIZE, 0);//XXX make sure this doesn't overlap on i/o, runtime area. +#ifndef CONFIG_XEN_IA64_DOM0_VP /* hack */ MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,last_start,last_end,1); +#endif /* pass through the I/O port space */ if (!running_on_sim) { - efi_memory_desc_t *efi_get_io_md(void); - efi_memory_desc_t *ia64_efi_io_md; - u32 type; - u64 iostart, ioend, ioattr; - - ia64_efi_io_md = efi_get_io_md(); - type = ia64_efi_io_md->type; - iostart = ia64_efi_io_md->phys_addr; - ioend = ia64_efi_io_md->phys_addr + - (ia64_efi_io_md->num_pages << 12); - ioattr = ia64_efi_io_md->attribute; - MAKE_MD(type,ioattr,iostart,ioend, 1); + struct dom0_passthrough_arg arg; +#ifdef CONFIG_XEN_IA64_DOM0_VP + arg.d = d; +#endif + arg.md = &efi_memmap[i]; + arg.i = &i; + //XXX Is this needed? + efi_memmap_walk_type(EFI_RUNTIME_SERVICES_CODE, + dom_fw_dom0_passthrough, &arg); + // for ACPI table. + efi_memmap_walk_type(EFI_RUNTIME_SERVICES_DATA, + dom_fw_dom0_passthrough, &arg); + efi_memmap_walk_type(EFI_ACPI_RECLAIM_MEMORY, + dom_fw_dom0_passthrough, &arg); + efi_memmap_walk_type(EFI_MEMORY_MAPPED_IO, + dom_fw_dom0_passthrough, &arg); + efi_memmap_walk_type(EFI_MEMORY_MAPPED_IO_PORT_SPACE, + dom_fw_dom0_passthrough, &arg); } else MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); } else { +#ifndef CONFIG_XEN_IA64_DOM0_VP MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 1); +#endif /* hypercall patches live here, masquerade as reserved PAL memory */ MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 1); MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 1); @@ -884,9 +993,12 @@ dom_fw_init (struct domain *d, const cha MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); } + sort(efi_memmap, i, sizeof(efi_memory_desc_t), efi_mdt_cmp, NULL); + bp->efi_systab = dom_pa((unsigned long) fw_mem); bp->efi_memmap = dom_pa((unsigned long) efi_memmap); - bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t); + BUG_ON(i > NUM_MEM_DESCS); + bp->efi_memmap_size = i * sizeof(efi_memory_desc_t); bp->efi_memdesc_size = sizeof(efi_memory_desc_t); bp->efi_memdesc_version = 1; bp->command_line = dom_pa((unsigned long) cmd_line); @@ -896,6 +1008,8 @@ dom_fw_init (struct domain *d, const cha bp->console_info.orig_y = 24; bp->fpswa = 0; if (d == dom0) { + // XXX CONFIG_XEN_IA64_DOM0_VP + // initrd_start address is hard coded in start_kernel() bp->initrd_start = (dom0_start+dom0_size) - (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024); bp->initrd_size = ia64_boot_param->initrd_size; diff -r 66dac877aa42 -r 796aadb735e0 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Apr 24 22:28:00 2006 +0900 +++ b/xen/arch/ia64/xen/domain.c Mon Apr 24 22:28:02 2006 +0900 @@ -1073,10 +1073,10 @@ static void loaddomainelfimage(struct do void alloc_dom0(void) { + if (platform_is_hp_ski()) { + dom0_size = 128*1024*1024; //FIXME: Should be configurable + } #ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (platform_is_hp_ski()) { - dom0_size = 128*1024*1024; //FIXME: Should be configurable - } printf("alloc_dom0: starting (initializing %lu MB...)\n",dom0_size/(1024*1024)); /* FIXME: The first trunk (say 256M) should always be assigned to @@ -1093,6 +1093,8 @@ void alloc_dom0(void) } printf("alloc_dom0: dom0_start=0x%lx\n", dom0_start); #else + // no need to allocate pages for now + // pages are allocated by map_new_domain_page() via loaddomainelfimage() dom0_start = 0; #endif @@ -1123,6 +1125,7 @@ int construct_dom0(struct domain *d, unsigned long alloc_start, alloc_end; start_info_t *si; struct vcpu *v = d->vcpu[0]; + unsigned long max_pages; struct domain_setup_info dsi; unsigned long p_start; @@ -1131,11 +1134,8 @@ int construct_dom0(struct domain *d, unsigned long pkern_end; unsigned long pinitrd_start = 0; unsigned long pstart_info; -#if 0 - char *dst; - unsigned long nr_pt_pages; - unsigned long count; -#endif + struct page_info *start_info_page; + #ifdef VALIDATE_VT unsigned long mfn; struct page_info *page = NULL; @@ -1154,7 +1154,13 @@ int construct_dom0(struct domain *d, alloc_start = dom0_start; alloc_end = dom0_start + dom0_size; - d->tot_pages = d->max_pages = dom0_size/PAGE_SIZE; + max_pages = dom0_size / PAGE_SIZE; + d->max_pages = max_pages; +#ifndef CONFIG_XEN_IA64_DOM0_VP + d->tot_pages = d->max_pages; +#else + d->tot_pages = 0; +#endif dsi.image_addr = (unsigned long)image_start; dsi.image_len = image_len; rc = parseelfimage(&dsi); @@ -1191,15 +1197,28 @@ int construct_dom0(struct domain *d, return -EINVAL; } - if(initrd_start&&initrd_len){ - pinitrd_start=(dom0_start+dom0_size) - - (PAGE_ALIGN(initrd_len) + 4*1024*1024); - - memcpy(__va(pinitrd_start), (void *) initrd_start, initrd_len); - pstart_info = PAGE_ALIGN(pinitrd_start + initrd_len); - } else { - pstart_info = PAGE_ALIGN(pkern_end); + pstart_info = PAGE_ALIGN(pkern_end); + if(initrd_start && initrd_len){ + unsigned long offset; + + pinitrd_start= (dom0_start + dom0_size) - + (PAGE_ALIGN(initrd_len) + 4*1024*1024); + if (pinitrd_start <= pstart_info) + panic("%s:enough memory is not assigned to dom0", __func__); + + for (offset = 0; offset < initrd_len; offset += PAGE_SIZE) { + struct page_info *p; + p = assign_new_domain_page(d, pinitrd_start + offset); + if (p == NULL) + panic("%s: can't allocate page for initrd image", __func__); + if (initrd_len < offset + PAGE_SIZE) + memcpy(page_to_virt(p), (void*)(initrd_start + offset), + initrd_len - offset); + else + copy_page(page_to_virt(p), + (void*)(initrd_start + offset)); } + } printk("METAPHYSICAL MEMORY ARRANGEMENT:\n" " Kernel image: %lx->%lx\n" @@ -1209,12 +1228,12 @@ int construct_dom0(struct domain *d, pkern_start, pkern_end, pkern_entry, pinitrd_start, initrd_len, pstart_info, pstart_info + PAGE_SIZE); - if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) ) + if ( (pkern_end - pkern_start) > (max_pages * PAGE_SIZE) ) { printk("Initial guest OS requires too much space\n" "(%luMB is greater than %luMB limit)\n", - (pkern_end-pkern_start)>>20, - (unsigned long) (d->max_pages<>20); + (pkern_end-pkern_start)>>20, + (max_pages <>20); return -ENOMEM; } @@ -1222,13 +1241,6 @@ int construct_dom0(struct domain *d, // if pkern end is after end of metaphysical memory, error // (we should be able to deal with this... later) - - - // - -#if 0 - strcpy(d->name,"Domain0"); -#endif /* Mask all upcalls... */ for ( i = 1; i < MAX_VIRT_CPUS; i++ ) @@ -1246,7 +1258,7 @@ int construct_dom0(struct domain *d, if (alloc_vcpu(d, i, i) == NULL) printf ("Cannot allocate dom0 vcpu %d\n", i); -#ifdef VALIDATE_VT +#if defined(VALIDATE_VT) && !defined(CONFIG_XEN_IA64_DOM0_VP) /* Construct a frame-allocation list for the initial domain, since these * pages are allocated by boot allocator and pfns are not set properly */ @@ -1261,9 +1273,8 @@ int construct_dom0(struct domain *d, list_add_tail(&page->list, &d->page_list); /* Construct 1:1 mapping */ - machine_to_phys_mapping[mfn] = mfn; - } - + set_gpfn_from_mfn(mfn, mfn); + } #endif /* Copy the OS image. */ @@ -1276,41 +1287,14 @@ int construct_dom0(struct domain *d, /* Set up start info area. */ d->shared_info->arch.start_info_pfn = pstart_info >> PAGE_SHIFT; - si = __va(pstart_info); + start_info_page = assign_new_domain_page(d, pstart_info); + if (start_info_page == NULL) + panic("can't allocate start info page"); + si = page_to_virt(start_info_page); memset(si, 0, PAGE_SIZE); sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION); - si->nr_pages = d->tot_pages; - -#if 0 - si->shared_info = virt_to_maddr(d->shared_info); - si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; - //si->pt_base = vpt_start; - //si->nr_pt_frames = nr_pt_pages; - //si->mfn_list = vphysmap_start; - - if ( initrd_len != 0 ) - { - //si->mod_start = vinitrd_start; - si->mod_len = initrd_len; - printk("Initrd len 0x%lx, start at 0x%08lx\n", - si->mod_len, si->mod_start); - } - - dst = si->cmd_line; - if ( cmdline != NULL ) - { - for ( i = 0; i < 255; i++ ) - { - if ( cmdline[i] == '\0' ) - break; - *dst++ = cmdline[i]; - } - } - *dst = '\0'; - - zap_low_mappings(); /* Do the same for the idle page tables. */ -#endif - + si->nr_pages = max_pages; + /* Give up the VGA console if DOM0 is configured to grab it. */ if (cmdline != NULL) console_endboot(strstr(cmdline, "tty0") != NULL); @@ -1327,6 +1311,14 @@ int construct_dom0(struct domain *d, new_thread(v, pkern_entry, 0, 0); physdev_init_dom0(d); + // dom0 doesn't need build_physmap_table() + // see arch_set_info_guest() + // instead we allocate pages manually. + for (i = 0; i < max_pages; i++) { + assign_new_domain0_page(d, i << PAGE_SHIFT); + } + d->arch.physmap_built = 1; + // FIXME: Hack for keyboard input //serial_input_init(); diff -r 66dac877aa42 -r 796aadb735e0 xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Mon Apr 24 22:28:00 2006 +0900 +++ b/xen/arch/ia64/xen/xensetup.c Mon Apr 24 22:28:02 2006 +0900 @@ -294,11 +294,13 @@ void start_kernel(void) max_page = 0; efi_memmap_walk(find_max_pfn, &max_page); printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page); +#ifndef CONFIG_XEN_IA64_DOM0_VP /* this is a bad hack. see dom_fw.c creation of EFI map for dom0 */ max_page = (GRANULEROUNDDOWN(max_page << PAGE_SHIFT) - IA64_GRANULE_SIZE) >> PAGE_SHIFT; printf("find_memory: last granule reserved for dom0; xen max_page=%lx\n", max_page); +#endif efi_print(); heap_start = memguard_init(ia64_imva(&_end));