x86: debugging code for testing 16Tb support on smaller memory systems DO NOT APPLY AS IS. Signed-off-by: Jan Beulich --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -66,8 +66,10 @@ void *map_domain_page(unsigned long mfn) struct mapcache_vcpu *vcache; struct vcpu_maphash_entry *hashent; +#ifdef NDEBUG if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return mfn_to_virt(mfn); +#endif v = mapcache_current_vcpu(); if ( !v || is_hvm_vcpu(v) ) @@ -139,6 +141,14 @@ void *map_domain_page(unsigned long mfn) if ( ++i == MAPHASH_ENTRIES ) i = 0; } while ( i != MAPHASH_HASHFN(mfn) ); +if(idx >= dcache->entries) {//temp + mapcache_domain_dump(v->domain); + for(i = 0; i < ARRAY_SIZE(vcache->hash); ++i) + if(hashent->idx != MAPHASHENT_NOTINUSE) { + hashent = &vcache->hash[i]; + printk("vc[%u]: ref=%u idx=%04x mfn=%08lx\n", i, hashent->refcnt, hashent->idx, hashent->mfn); + } +} } BUG_ON(idx >= dcache->entries); @@ -249,8 +259,10 @@ int mapcache_domain_init(struct domain * if ( is_hvm_domain(d) || is_idle_domain(d) ) return 0; +#ifdef NDEBUG if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return 0; +#endif dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1); d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf); @@ -418,8 +430,10 @@ void *map_domain_page_global(unsigned lo ASSERT(!in_irq() && local_irq_is_enabled()); +#ifdef NDEBUG if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return mfn_to_virt(mfn); +#endif spin_lock(&globalmap_lock); @@ -497,3 +511,26 @@ unsigned long domain_page_map_to_mfn(con return l1e_get_pfn(*pl1e); } + +void mapcache_domain_dump(struct domain *d) {//temp + unsigned i, n = 0; + const struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache; + const struct vcpu *v; + if(is_hvm_domain(d) || is_idle_domain(d)) + return; + for_each_vcpu(d, v) { + const struct mapcache_vcpu *vcache = &v->arch.pv_vcpu.mapcache; + for(i = 0; i < ARRAY_SIZE(vcache->hash); ++i) + n += (vcache->hash[i].idx != MAPHASHENT_NOTINUSE); + } + printk("Dom%d mc (#=%u v=%u) [%p]:\n", d->domain_id, n, d->max_vcpus, __builtin_return_address(0)); + for(i = 0; i < BITS_TO_LONGS(dcache->entries); ++i) + printk("dcu[%02x]: %016lx\n", i, dcache->inuse[i]); + for(i = 0; i < BITS_TO_LONGS(dcache->entries); ++i) + printk("dcg[%02x]: %016lx\n", i, dcache->garbage[i]); + for(i = 0; i < dcache->entries; ++i) { + l1_pgentry_t l1e = DCACHE_L1ENT(dcache, i); + if((test_bit(i, dcache->inuse) && !test_bit(i, dcache->garbage)) || (l1e_get_flags(l1e) & _PAGE_PRESENT)) + printk("dc[%04x]: %"PRIpte"\n", i, l1e_get_intpte(l1e)); + } +} --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -250,6 +250,14 @@ void __init init_frametable(void) init_spagetable(); } +#ifndef NDEBUG +static unsigned int __read_mostly root_pgt_pv_xen_slots + = ROOT_PAGETABLE_PV_XEN_SLOTS; +static l4_pgentry_t __read_mostly split_l4e; +#else +#define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS +#endif + void __init arch_init_memory(void) { unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn; @@ -344,6 +352,41 @@ void __init arch_init_memory(void) efi_init_memory(); mem_sharing_init(); + +#ifndef NDEBUG + if ( split_gb ) + { + paddr_t split_pa = split_gb * GB(1); + unsigned long split_va = (unsigned long)__va(split_pa); + + if ( split_va < HYPERVISOR_VIRT_END && + split_va - 1 == (unsigned long)__va(split_pa - 1) ) + { + root_pgt_pv_xen_slots = l4_table_offset(split_va) - + ROOT_PAGETABLE_FIRST_XEN_SLOT; + ASSERT(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS); + if ( l4_table_offset(split_va) == l4_table_offset(split_va - 1) ) + { + l3_pgentry_t *l3tab = alloc_xen_pagetable(); + + if ( l3tab ) + { + const l3_pgentry_t *l3idle = + l4e_to_l3e(idle_pg_table[l4_table_offset(split_va)]); + + for ( i = 0; i < l3_table_offset(split_va); ++i ) + l3tab[i] = l3idle[i]; + for ( ; i <= L3_PAGETABLE_ENTRIES; ++i ) + l3tab[i] = l3e_empty(); + split_l4e = l4e_from_pfn(virt_to_mfn(l3tab), + __PAGE_HYPERVISOR); + } + else + ++root_pgt_pv_xen_slots; + } + } + } +#endif } int page_is_ram_type(unsigned long mfn, unsigned long mem_type) @@ -1320,7 +1363,12 @@ void init_guest_l4_table(l4_pgentry_t l4 /* Xen private mappings. */ memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT], &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], - ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t)); + root_pgt_pv_xen_slots * sizeof(l4_pgentry_t)); +#ifndef NDEBUG + if ( l4e_get_intpte(split_l4e) ) + l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots] = + split_l4e; +#endif l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -82,6 +82,11 @@ boolean_param("noapic", skip_ioapic_setu s8 __read_mostly xen_cpuidle = -1; boolean_param("cpuidle", xen_cpuidle); +#ifndef NDEBUG +unsigned int __initdata split_gb; +integer_param("split-gb", split_gb); +#endif + cpumask_t __read_mostly cpu_present_map; unsigned long __read_mostly xen_phys_start; @@ -789,6 +794,11 @@ void __init __start_xen(unsigned long mb modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end); bootstrap_map(NULL); +#ifndef split_gb /* Don't allow split below 4Gb. */ + if ( split_gb < 4 ) + split_gb = 0; +#endif + for ( i = boot_e820.nr_map-1; i >= 0; i-- ) { uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1; @@ -917,6 +927,9 @@ void __init __start_xen(unsigned long mb /* Don't overlap with other modules. */ end = consider_modules(s, e, size, mod, mbi->mods_count, j); + if ( split_gb && end > split_gb * GB(1) ) + continue; + if ( s < end && (headroom || ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) ) @@ -958,6 +971,8 @@ void __init __start_xen(unsigned long mb kexec_reserve_area(&boot_e820); setup_max_pdx(); + if ( split_gb ) + xenheap_max_mfn(split_gb << (30 - PAGE_SHIFT)); /* * Walk every RAM region and map it in its entirety (on x86/64, at least) @@ -1129,7 +1144,8 @@ void __init __start_xen(unsigned long mb unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1); uint64_t mask = PAGE_SIZE - 1; - xenheap_max_mfn(limit); + if ( !split_gb ) + xenheap_max_mfn(limit); /* Pass the remaining memory to the allocator. */ for ( i = 0; i < boot_e820.nr_map; i++ ) --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -45,6 +45,7 @@ #include #ifdef CONFIG_X86 #include +#include /* for split_gb only */ #else #define p2m_pod_offline_or_broken_hit(pg) 0 #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg != NULL) @@ -203,6 +204,25 @@ unsigned long __init alloc_boot_pages( pg = (r->e - nr_pfns) & ~(pfn_align - 1); if ( pg < r->s ) continue; + +#if defined(CONFIG_X86) && !defined(NDEBUG) + /* + * Filtering pfn_align == 1 since the only allocations using a bigger + * alignment are the ones used for setting up the frame table chunks. + * Those allocations get remapped anyway, i.e. them not having 1:1 + * mappings always accessible is not a problem. + */ + if ( split_gb && pfn_align == 1 && + r->e > (split_gb << (30 - PAGE_SHIFT)) ) + { + pg = r->s; + if ( pg + nr_pfns > (split_gb << (30 - PAGE_SHIFT)) ) + continue; + r->s = pg + nr_pfns; + return pg; + } +#endif + _e = r->e; r->e = pg; bootmem_region_add(pg + nr_pfns, _e); --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -72,6 +72,7 @@ struct mapcache_domain { int mapcache_domain_init(struct domain *); void mapcache_domain_exit(struct domain *); +void mapcache_domain_dump(struct domain *);//temp int mapcache_vcpu_init(struct vcpu *); void mapcache_override_current(struct vcpu *); --- a/xen/include/asm-x86/setup.h +++ b/xen/include/asm-x86/setup.h @@ -43,4 +43,10 @@ void microcode_grab_module( extern uint8_t kbd_shift_flags; +#ifdef NDEBUG +# define split_gb 0 +#else +extern unsigned int split_gb; +#endif + #endif