# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID a4ce0ba0f8ffc0a73fb8733ec0301d0d3d55abbb
# Parent a51fcb5de4700ffbd94e84759907c34fdfef6f35
# Parent da7873110bbb8b55d9adb9111d100e209fc49ee6
Merged.
diff -r a51fcb5de470 -r a4ce0ba0f8ff
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Tue Jan 10
14:23:56 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Tue Jan 10
14:24:12 2006
@@ -109,7 +109,7 @@
size_t len, loff_t *ppos)
{
struct xenbus_dev_data *u = filp->private_data;
- struct xenbus_dev_transaction *trans;
+ struct xenbus_dev_transaction *trans = NULL;
void *reply;
if ((len + u->len) > sizeof(u->u.buffer))
@@ -134,14 +134,19 @@
case XS_MKDIR:
case XS_RM:
case XS_SET_PERMS:
- reply = xenbus_dev_request_and_reply(&u->u.msg);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
-
if (u->u.msg.type == XS_TRANSACTION_START) {
trans = kmalloc(sizeof(*trans), GFP_KERNEL);
if (!trans)
return -ENOMEM;
+ }
+
+ reply = xenbus_dev_request_and_reply(&u->u.msg);
+ if (IS_ERR(reply)) {
+ kfree(trans);
+ return PTR_ERR(reply);
+ }
+
+ if (u->u.msg.type == XS_TRANSACTION_START) {
trans->handle = (struct xenbus_transaction *)
simple_strtoul(reply, NULL, 0);
list_add(&trans->list, &u->transactions);
diff -r a51fcb5de470 -r a4ce0ba0f8ff
tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
--- a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
Tue Jan 10 14:23:56 2006
+++ b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
Tue Jan 10 14:24:12 2006
@@ -46,6 +46,8 @@
if not checkXmLongList(domain):
FAIL("xm long list does not show that hda1 was attached")
+time.sleep(2)
+
s, o = traceCommand("xm block-detach %s hda1" % domain.getName())
if s != 0:
FAIL("block-detach failed")
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/ia64/linux-xen/smpboot.c
--- a/xen/arch/ia64/linux-xen/smpboot.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/ia64/linux-xen/smpboot.c Tue Jan 10 14:24:12 2006
@@ -484,7 +484,6 @@
if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
panic("failed 'createdomain' for CPU %d", cpu);
- set_bit(_DOMF_idle_domain, &idle->domain_flags);
v = idle->vcpu[0];
printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v);
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/ia64/xen/idle0_task.c
--- a/xen/arch/ia64/xen/idle0_task.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/ia64/xen/idle0_task.c Tue Jan 10 14:24:12 2006
@@ -22,7 +22,6 @@
#define IDLE0_DOMAIN(_t) \
{ \
domain_id: IDLE_DOMAIN_ID, \
- domain_flags:DOMF_idle_domain, \
refcnt: ATOMIC_INIT(1) \
}
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/ia64/xen/xensetup.c Tue Jan 10 14:24:12 2006
@@ -26,7 +26,7 @@
char saved_command_line[COMMAND_LINE_SIZE];
-struct vcpu *idle_domain[NR_CPUS] = { &idle0_vcpu };
+struct vcpu *idle_vcpu[NR_CPUS] = { &idle0_vcpu };
cpumask_t cpu_present_map;
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/domain.c Tue Jan 10 14:24:12 2006
@@ -91,11 +91,9 @@
{
struct vcpu *v = current;
- ASSERT(is_idle_domain(v->domain));
- percpu_ctxt[smp_processor_id()].curr_vcpu = v;
+ ASSERT(is_idle_vcpu(v));
cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);
cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);
- v->arch.schedule_tail = continue_idle_domain;
reset_stack_and_jump(idle_loop);
}
@@ -217,14 +215,20 @@
memset(v, 0, sizeof(*v));
- memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch));
+ memcpy(&v->arch, &idle_vcpu[0]->arch, sizeof(v->arch));
v->arch.flags = TF_kernel_mode;
+
+ if ( is_idle_domain(d) )
+ {
+ percpu_ctxt[vcpu_id].curr_vcpu = v;
+ v->arch.schedule_tail = continue_idle_domain;
+ }
if ( (v->vcpu_id = vcpu_id) != 0 )
{
v->arch.schedule_tail = d->vcpu[0]->arch.schedule_tail;
v->arch.perdomain_ptes =
- d->arch.mm_perdomain_pt + (vcpu_id << PDPT_VCPU_SHIFT);
+ d->arch.mm_perdomain_pt + (vcpu_id << GDT_LDT_VCPU_SHIFT);
}
return v;
@@ -259,31 +263,11 @@
int i;
#endif
- if ( is_idle_domain(d) )
- return 0;
-
- d->arch.ioport_caps =
- rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
- if ( d->arch.ioport_caps == NULL )
- return -ENOMEM;
-
- if ( (d->shared_info = alloc_xenheap_page()) == NULL )
- return -ENOMEM;
-
- if ( (rc = ptwr_init(d)) != 0 )
- {
- free_xenheap_page(d->shared_info);
- return rc;
- }
-
- v->arch.schedule_tail = continue_nonidle_domain;
-
- memset(d->shared_info, 0, PAGE_SIZE);
- v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
- SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
-
pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
+ if ( d->arch.mm_perdomain_pt == NULL )
+ goto fail_nomem;
+
memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
v->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
@@ -296,34 +280,75 @@
*/
gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
- d->arch.mm_perdomain_pt[
- (vcpuid << PDPT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE] = gdt_l1e;
+ d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
v->arch.guest_vtable = __linear_l2_table;
v->arch.shadow_vtable = __shadow_linear_l2_table;
-#ifdef __x86_64__
+#if defined(__i386__)
+
+ d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
+ (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
+ spin_lock_init(&d->arch.mapcache.lock);
+
+#else /* __x86_64__ */
+
v->arch.guest_vl3table = __linear_l3_table;
v->arch.guest_vl4table = __linear_l4_table;
d->arch.mm_perdomain_l2 = alloc_xenheap_page();
+ d->arch.mm_perdomain_l3 = alloc_xenheap_page();
+ if ( (d->arch.mm_perdomain_l2 == NULL) ||
+ (d->arch.mm_perdomain_l3 == NULL) )
+ goto fail_nomem;
+
memset(d->arch.mm_perdomain_l2, 0, PAGE_SIZE);
for ( i = 0; i < (1 << pdpt_order); i++ )
d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
__PAGE_HYPERVISOR);
- d->arch.mm_perdomain_l3 = alloc_xenheap_page();
memset(d->arch.mm_perdomain_l3, 0, PAGE_SIZE);
d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
__PAGE_HYPERVISOR);
-#endif
+
+#endif /* __x86_64__ */
shadow_lock_init(d);
INIT_LIST_HEAD(&d->arch.free_shadow_frames);
+ if ( !is_idle_domain(d) )
+ {
+ d->arch.ioport_caps =
+ rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+ if ( d->arch.ioport_caps == NULL )
+ goto fail_nomem;
+
+ if ( (d->shared_info = alloc_xenheap_page()) == NULL )
+ goto fail_nomem;
+
+ if ( (rc = ptwr_init(d)) != 0 )
+ goto fail_nomem;
+
+ memset(d->shared_info, 0, PAGE_SIZE);
+ v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
+ SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
+
+ v->arch.schedule_tail = continue_nonidle_domain;
+ }
+
return 0;
+
+ fail_nomem:
+ free_xenheap_page(d->shared_info);
+#ifdef __x86_64__
+ free_xenheap_page(d->arch.mm_perdomain_l2);
+ free_xenheap_page(d->arch.mm_perdomain_l3);
+#endif
+ free_xenheap_pages(d->arch.mm_perdomain_pt, pdpt_order);
+ return -ENOMEM;
}
/* This is called by arch_final_setup_guest and do_boot_vcpu */
@@ -689,7 +714,10 @@
struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
struct vcpu *n = current;
- if ( !is_idle_domain(p->domain) )
+ ASSERT(p != n);
+ ASSERT(cpus_empty(n->vcpu_dirty_cpumask));
+
+ if ( !is_idle_vcpu(p) )
{
memcpy(&p->arch.guest_context.user_regs,
stack_regs,
@@ -698,7 +726,7 @@
save_segments(p);
}
- if ( !is_idle_domain(n->domain) )
+ if ( !is_idle_vcpu(n) )
{
memcpy(stack_regs,
&n->arch.guest_context.user_regs,
@@ -748,24 +776,31 @@
void context_switch(struct vcpu *prev, struct vcpu *next)
{
unsigned int cpu = smp_processor_id();
+ cpumask_t dirty_mask = next->vcpu_dirty_cpumask;
ASSERT(local_irq_is_enabled());
+ /* Allow at most one CPU at a time to be dirty. */
+ ASSERT(cpus_weight(dirty_mask) <= 1);
+ if ( unlikely(!cpu_isset(cpu, dirty_mask) && !cpus_empty(dirty_mask)) )
+ {
+ /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
+ flush_tlb_mask(dirty_mask);
+ }
+
+ local_irq_disable();
+
set_current(next);
- if ( (percpu_ctxt[cpu].curr_vcpu != next) &&
- !is_idle_domain(next->domain) )
- {
- /* This may happen if next has been migrated by the scheduler. */
- if ( unlikely(!cpus_empty(next->vcpu_dirty_cpumask)) )
- {
- ASSERT(!cpu_isset(cpu, next->vcpu_dirty_cpumask));
- sync_vcpu_execstate(next);
- ASSERT(cpus_empty(next->vcpu_dirty_cpumask));
- }
-
- local_irq_disable();
+ if ( (percpu_ctxt[cpu].curr_vcpu == next) || is_idle_vcpu(next) )
+ {
+ local_irq_enable();
+ }
+ else
+ {
__context_switch();
+
+ /* Re-enable interrupts before restoring state which may fault. */
local_irq_enable();
if ( VMX_DOMAIN(next) )
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/domain_build.c Tue Jan 10 14:24:12 2006
@@ -366,26 +366,19 @@
l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
}
- {
- unsigned long va;
- for (va = PERDOMAIN_VIRT_START; va < PERDOMAIN_VIRT_END;
- va += (1 << L2_PAGETABLE_SHIFT)) {
- l2tab[va >> L2_PAGETABLE_SHIFT] =
- l2e_from_paddr(__pa(d->arch.mm_perdomain_pt) +
- (va-PERDOMAIN_VIRT_START),
- __PAGE_HYPERVISOR);
- }
- }
v->arch.guest_table = mk_pagetable((unsigned long)l3start);
#else
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
- l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
v->arch.guest_table = mk_pagetable((unsigned long)l2start);
#endif
+
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ l2tab[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
l2tab += l2_linear_offset(dsi.v_start);
mfn = alloc_spfn;
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/mm.c Tue Jan 10 14:24:12 2006
@@ -841,10 +841,11 @@
L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
- pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- l2e_from_page(
- virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt),
- __PAGE_HYPERVISOR);
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(
+ virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
#endif
unmap_domain_page(pl2e);
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/setup.c Tue Jan 10 14:24:12 2006
@@ -81,6 +81,10 @@
extern void initialize_keytable(void);
extern void early_cpu_init(void);
+struct tss_struct init_tss[NR_CPUS];
+
+struct vcpu *idle_vcpu[NR_CPUS];
+
extern unsigned long cpu0_stack[];
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
@@ -91,8 +95,6 @@
unsigned long mmu_cr4_features = X86_CR4_PSE;
#endif
EXPORT_SYMBOL(mmu_cr4_features);
-
-struct vcpu *idle_domain[NR_CPUS] = { &idle0_vcpu };
int acpi_disabled;
@@ -144,8 +146,8 @@
void __init __start_xen(multiboot_info_t *mbi)
{
- unsigned long vgdt, gdt_pfn;
char *cmdline;
+ struct domain *idle_domain;
unsigned long _initrd_start = 0, _initrd_len = 0;
unsigned int initrdidx = 1;
module_t *mod = (module_t *)__va(mbi->mods_addr);
@@ -163,9 +165,8 @@
if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
cmdline_parse(__va(mbi->cmdline));
- /* Must do this early -- e.g., spinlocks rely on get_current(). */
- set_current(&idle0_vcpu);
- set_processor_id(0);
+ set_current((struct vcpu *)0xfffff000); /* debug sanity */
+ set_processor_id(0); /* needed early, for smp_processor_id() */
smp_prepare_boot_cpu();
@@ -382,6 +383,14 @@
early_cpu_init();
+ scheduler_init();
+
+ idle_domain = do_createdomain(IDLE_DOMAIN_ID, 0);
+ BUG_ON(idle_domain == NULL);
+
+ set_current(idle_domain->vcpu[0]);
+ idle_vcpu[0] = current;
+
paging_init();
/* Unmap the first page of CPU0's stack. */
@@ -394,21 +403,6 @@
sort_exception_tables();
- if ( arch_do_createdomain(current) != 0 )
- BUG();
-
- /*
- * Map default GDT into its final positions in the idle page table. As
- * noted in arch_do_createdomain(), we must map for every possible VCPU#.
- */
- vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
- gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- {
- map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
- vgdt += 1 << PDPT_VCPU_VA_SHIFT;
- }
-
find_smp_config();
smp_alloc_memory();
@@ -434,8 +428,6 @@
early_time_init();
arch_init_memory();
-
- scheduler_init();
identify_cpu(&boot_cpu_data);
if ( cpu_has_fxsr )
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/smpboot.c Tue Jan 10 14:24:12 2006
@@ -435,7 +435,7 @@
extern void percpu_traps_init(void);
- set_current(idle_domain[cpu]);
+ set_current(idle_vcpu[cpu]);
set_processor_id(cpu);
percpu_traps_init();
@@ -761,7 +761,6 @@
* Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
*/
{
- struct domain *idle;
struct vcpu *v;
unsigned long boot_error;
int timeout, cpu;
@@ -770,14 +769,10 @@
cpu = ++cpucount;
- if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
- panic("failed 'createdomain' for CPU %d", cpu);
-
- v = idle_domain[cpu] = idle->vcpu[0];
-
- set_bit(_DOMF_idle_domain, &idle->domain_flags);
-
- v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+ v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu);
+ BUG_ON(v == NULL);
+
+ v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
/* start_eip had better be page-aligned! */
start_eip = setup_trampoline();
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/traps.c Tue Jan 10 14:24:12 2006
@@ -427,7 +427,7 @@
tb->flags |= TBF_INTERRUPT;
}
-static int handle_perdomain_mapping_fault(
+static int handle_gdt_ldt_mapping_fault(
unsigned long offset, struct cpu_user_regs *regs)
{
extern int map_ldt_shadow_page(unsigned int);
@@ -437,14 +437,14 @@
int ret;
/* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
- unsigned int is_ldt_area = (offset >> (PDPT_VCPU_VA_SHIFT-1)) & 1;
- unsigned int vcpu_area = (offset >> PDPT_VCPU_VA_SHIFT);
+ unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
+ unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT);
/* Should never fault in another vcpu's area. */
BUG_ON(vcpu_area != current->vcpu_id);
/* Byte offset within the gdt/ldt sub-area. */
- offset &= (1UL << (PDPT_VCPU_VA_SHIFT-1)) - 1UL;
+ offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL;
if ( likely(is_ldt_area) )
{
@@ -490,9 +490,9 @@
{
if ( shadow_mode_external(d) && GUEST_CONTEXT(v, regs) )
return shadow_fault(addr, regs);
- if ( (addr >= PERDOMAIN_VIRT_START) && (addr < PERDOMAIN_VIRT_END) )
- return handle_perdomain_mapping_fault(
- addr - PERDOMAIN_VIRT_START, regs);
+ if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
+ return handle_gdt_ldt_mapping_fault(
+ addr - GDT_LDT_VIRT_START, regs);
}
else if ( unlikely(shadow_mode_enabled(d)) )
{
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/x86_32/domain_page.c
--- a/xen/arch/x86/x86_32/domain_page.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/x86_32/domain_page.c Tue Jan 10 14:24:12 2006
@@ -23,28 +23,24 @@
#define MAPCACHE_ORDER 10
#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
-l1_pgentry_t *mapcache;
-static unsigned int map_idx, epoch, shadow_epoch[NR_CPUS];
-static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
-
/* Use a spare PTE bit to mark entries ready for recycling. */
#define READY_FOR_TLB_FLUSH (1<<10)
static void flush_all_ready_maps(void)
{
- l1_pgentry_t *cache = mapcache;
+ struct mapcache *cache = ¤t->domain->arch.mapcache;
unsigned int i;
for ( i = 0; i < MAPCACHE_ENTRIES; i++ )
- if ( (l1e_get_flags(cache[i]) & READY_FOR_TLB_FLUSH) )
- cache[i] = l1e_empty();
+ if ( (l1e_get_flags(cache->l1tab[i]) & READY_FOR_TLB_FLUSH) )
+ cache->l1tab[i] = l1e_empty();
}
void *map_domain_pages(unsigned long pfn, unsigned int order)
{
unsigned long va;
- unsigned int idx, i, flags, cpu = smp_processor_id();
- l1_pgentry_t *cache = mapcache;
+ unsigned int idx, i, flags, vcpu = current->vcpu_id;
+ struct mapcache *cache = ¤t->domain->arch.mapcache;
#ifndef NDEBUG
unsigned int flush_count = 0;
#endif
@@ -52,37 +48,41 @@
ASSERT(!in_irq());
perfc_incrc(map_domain_page_count);
- spin_lock(&map_lock);
+ /* If we are the idle domain, ensure that we run on our own page tables. */
+ if ( unlikely(is_idle_vcpu(current)) )
+ __sync_lazy_execstate();
+
+ spin_lock(&cache->lock);
/* Has some other CPU caused a wrap? We must flush if so. */
- if ( epoch != shadow_epoch[cpu] )
+ if ( cache->epoch != cache->shadow_epoch[vcpu] )
{
perfc_incrc(domain_page_tlb_flush);
local_flush_tlb();
- shadow_epoch[cpu] = epoch;
+ cache->shadow_epoch[vcpu] = cache->epoch;
}
do {
- idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
+ idx = cache->cursor = (cache->cursor + 1) & (MAPCACHE_ENTRIES - 1);
if ( unlikely(idx == 0) )
{
ASSERT(flush_count++ == 0);
flush_all_ready_maps();
perfc_incrc(domain_page_tlb_flush);
local_flush_tlb();
- shadow_epoch[cpu] = ++epoch;
+ cache->shadow_epoch[vcpu] = ++cache->epoch;
}
flags = 0;
for ( i = 0; i < (1U << order); i++ )
- flags |= l1e_get_flags(cache[idx+i]);
+ flags |= l1e_get_flags(cache->l1tab[idx+i]);
}
while ( flags & _PAGE_PRESENT );
for ( i = 0; i < (1U << order); i++ )
- cache[idx+i] = l1e_from_pfn(pfn+i, __PAGE_HYPERVISOR);
+ cache->l1tab[idx+i] = l1e_from_pfn(pfn+i, __PAGE_HYPERVISOR);
- spin_unlock(&map_lock);
+ spin_unlock(&cache->lock);
va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
return (void *)va;
@@ -91,9 +91,13 @@
void unmap_domain_pages(void *va, unsigned int order)
{
unsigned int idx, i;
+ struct mapcache *cache = ¤t->domain->arch.mapcache;
+
ASSERT((void *)MAPCACHE_VIRT_START <= va);
ASSERT(va < (void *)MAPCACHE_VIRT_END);
+
idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
+
for ( i = 0; i < (1U << order); i++ )
- l1e_add_flags(mapcache[idx+i], READY_FOR_TLB_FLUSH);
+ l1e_add_flags(cache->l1tab[idx+i], READY_FOR_TLB_FLUSH);
}
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/x86_32/mm.c Tue Jan 10 14:24:12 2006
@@ -29,8 +29,6 @@
#include <asm/fixmap.h>
#include <public/memory.h>
-extern l1_pgentry_t *mapcache;
-
unsigned int PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
@@ -68,7 +66,7 @@
void *ioremap_pt;
unsigned long v;
struct pfn_info *pg;
- int i, mapcache_order;
+ int i;
#ifdef CONFIG_X86_PAE
printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
@@ -76,7 +74,7 @@
printk("PAE disabled.\n");
#endif
- idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+ idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
if ( cpu_has_pge )
{
@@ -121,14 +119,12 @@
l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
}
- /* Set up mapping cache for domain pages. */
- mapcache_order = get_order_from_bytes(
- MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER));
- mapcache = alloc_xenheap_pages(mapcache_order);
- memset(mapcache, 0, PAGE_SIZE << mapcache_order);
- for ( i = 0; i < (MAPCACHE_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
- idle_pg_table_l2[l2_linear_offset(MAPCACHE_VIRT_START) + i] =
- l2e_from_page(virt_to_page(mapcache) + i, __PAGE_HYPERVISOR);
+ /* Install per-domain mappings for idle domain. */
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(idle_vcpu[0]->domain->
+ arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
}
void __init zap_low_mappings(l2_pgentry_t *base)
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Tue Jan 10 14:23:56 2006
+++ b/xen/arch/x86/x86_64/mm.c Tue Jan 10 14:24:12 2006
@@ -80,7 +80,7 @@
l2_pgentry_t *l2_ro_mpt;
struct pfn_info *pg;
- idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+ idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
/* Create user-accessible L2 directory to map the MPT for guests. */
l3_ro_mpt = alloc_xenheap_page();
@@ -119,6 +119,12 @@
/* Set up linear page table mapping. */
idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR);
+
+ /* Install per-domain mappings for idle domain. */
+ idle_pg_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_page(
+ virt_to_page(idle_vcpu[0]->domain->arch.mm_perdomain_l3),
+ __PAGE_HYPERVISOR);
}
void __init zap_low_mappings(void)
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/common/domain.c
--- a/xen/common/domain.c Tue Jan 10 14:23:56 2006
+++ b/xen/common/domain.c Tue Jan 10 14:24:12 2006
@@ -46,9 +46,7 @@
INIT_LIST_HEAD(&d->page_list);
INIT_LIST_HEAD(&d->xenpage_list);
- if ( d->domain_id == IDLE_DOMAIN_ID )
- set_bit(_DOMF_idle_domain, &d->domain_flags);
- else
+ if ( !is_idle_domain(d) )
set_bit(_DOMF_ctrl_pause, &d->domain_flags);
if ( !is_idle_domain(d) &&
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c Tue Jan 10 14:23:56 2006
+++ b/xen/common/sched_bvt.c Tue Jan 10 14:24:12 2006
@@ -220,7 +220,7 @@
einf->vcpu = v;
- if ( is_idle_domain(v->domain) )
+ if ( is_idle_vcpu(v) )
{
einf->avt = einf->evt = ~0U;
BUG_ON(__task_on_runqueue(v));
@@ -268,7 +268,7 @@
((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
ctx_allow;
- if ( is_idle_domain(curr->domain) || (einf->evt <= curr_evt) )
+ if ( is_idle_vcpu(curr) || (einf->evt <= curr_evt) )
cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
else if ( schedule_data[cpu].s_timer.expires > r_time )
set_ac_timer(&schedule_data[cpu].s_timer, r_time);
@@ -399,7 +399,7 @@
ASSERT(prev_einf != NULL);
ASSERT(__task_on_runqueue(prev));
- if ( likely(!is_idle_domain(prev->domain)) )
+ if ( likely(!is_idle_vcpu(prev)) )
{
prev_einf->avt = calc_avt(prev, now);
prev_einf->evt = calc_evt(prev, prev_einf->avt);
@@ -490,13 +490,13 @@
}
/* work out time for next run through scheduler */
- if ( is_idle_domain(next->domain) )
+ if ( is_idle_vcpu(next) )
{
r_time = ctx_allow;
goto sched_done;
}
- if ( (next_prime == NULL) || is_idle_domain(next_prime->domain) )
+ if ( (next_prime == NULL) || is_idle_vcpu(next_prime) )
{
/* We have only one runnable task besides the idle task. */
r_time = 10 * ctx_allow; /* RN: random constant */
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Tue Jan 10 14:23:56 2006
+++ b/xen/common/sched_sedf.c Tue Jan 10 14:24:12 2006
@@ -396,7 +396,7 @@
INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
- if ( !is_idle_domain(d->domain) )
+ if ( !is_idle_vcpu(d) )
{
extraq_check(d);
}
@@ -777,7 +777,7 @@
struct task_slice ret;
/*idle tasks don't need any of the following stuf*/
- if (is_idle_domain(current->domain))
+ if ( is_idle_vcpu(current) )
goto check_waitq;
/* create local state of the status of the domain, in order to avoid
@@ -874,7 +874,7 @@
PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
d->domain->domain_id, d->vcpu_id);
- if ( is_idle_domain(d->domain) )
+ if ( is_idle_vcpu(d) )
return;
EDOM_INFO(d)->status |= SEDF_ASLEEP;
@@ -1194,7 +1194,7 @@
static inline int get_run_type(struct vcpu* d)
{
struct sedf_vcpu_info* inf = EDOM_INFO(d);
- if (is_idle_domain(d->domain))
+ if (is_idle_vcpu(d))
return DOMAIN_IDLE;
if (inf->status & EXTRA_RUN_PEN)
return DOMAIN_EXTRA_PEN;
@@ -1258,7 +1258,7 @@
PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
d->vcpu_id);
- if ( unlikely(is_idle_domain(d->domain)) )
+ if ( unlikely(is_idle_vcpu(d)) )
return;
if ( unlikely(__task_on_queue(d)) )
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/common/schedule.c
--- a/xen/common/schedule.c Tue Jan 10 14:23:56 2006
+++ b/xen/common/schedule.c Tue Jan 10 14:24:12 2006
@@ -140,12 +140,10 @@
*/
void sched_add_domain(struct vcpu *v)
{
- struct domain *d = v->domain;
-
/* Initialise the per-domain timer. */
init_ac_timer(&v->timer, dom_timer_fn, v, v->processor);
- if ( is_idle_domain(d) )
+ if ( is_idle_vcpu(v) )
{
schedule_data[v->processor].curr = v;
schedule_data[v->processor].idle = v;
@@ -153,7 +151,7 @@
}
SCHED_OP(add_task, v);
- TRACE_2D(TRC_SCHED_DOM_ADD, d->domain_id, v->vcpu_id);
+ TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
}
void sched_rem_domain(struct vcpu *v)
@@ -435,7 +433,7 @@
prev->wokenup = now;
#if defined(WAKE_HISTO)
- if ( !is_idle_domain(next->domain) && next->wokenup )
+ if ( !is_idle_vcpu(next) && next->wokenup )
{
ulong diff = (ulong)(now - next->wokenup);
diff /= (ulong)MILLISECS(1);
@@ -445,7 +443,7 @@
next->wokenup = (s_time_t)0;
#elif defined(BLOCKTIME_HISTO)
prev->lastdeschd = now;
- if ( !is_idle_domain(next->domain) )
+ if ( !is_idle_vcpu(next) )
{
ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
@@ -462,7 +460,7 @@
prev->sleep_tick = schedule_data[cpu].tick;
/* Ensure that the domain has an up-to-date time base. */
- if ( !is_idle_domain(next->domain) )
+ if ( !is_idle_vcpu(next) )
{
update_dom_time(next);
if ( next->sleep_tick != schedule_data[cpu].tick )
@@ -499,7 +497,7 @@
schedule_data[cpu].tick++;
- if ( !is_idle_domain(v->domain) )
+ if ( !is_idle_vcpu(v) )
{
update_dom_time(v);
send_guest_virq(v, VIRQ_TIMER);
@@ -533,9 +531,6 @@
init_ac_timer(&t_timer[i], t_timer_fn, NULL, i);
}
- schedule_data[0].curr = idle_domain[0];
- schedule_data[0].idle = idle_domain[0];
-
for ( i = 0; schedulers[i] != NULL; i++ )
{
ops = *schedulers[i];
@@ -548,10 +543,16 @@
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- rc = SCHED_OP(alloc_task, idle_domain[0]);
- BUG_ON(rc < 0);
-
- sched_add_domain(idle_domain[0]);
+ if ( idle_vcpu[0] != NULL )
+ {
+ schedule_data[0].curr = idle_vcpu[0];
+ schedule_data[0].idle = idle_vcpu[0];
+
+ rc = SCHED_OP(alloc_task, idle_vcpu[0]);
+ BUG_ON(rc < 0);
+
+ sched_add_domain(idle_vcpu[0]);
+ }
}
/*
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h Tue Jan 10 14:23:56 2006
+++ b/xen/include/asm-x86/config.h Tue Jan 10 14:24:12 2006
@@ -148,7 +148,8 @@
#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
/* Slot 260: per-domain mappings. */
#define PERDOMAIN_VIRT_START (PML4_ADDR(260))
-#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + PML4_ENTRY_BYTES)
+#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (PERDOMAIN_MBYTES<<20))
+#define PERDOMAIN_MBYTES ((unsigned long)GDT_LDT_MBYTES)
/* Slot 261: machine-to-phys conversion table (16GB). */
#define RDWR_MPT_VIRT_START (PML4_ADDR(261))
#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (16UL<<30))
@@ -195,8 +196,7 @@
* ------ ------
* I/O remapping area ( 4MB)
* Direct-map (1:1) area [Xen code/data/heap] (12MB)
- * map_domain_page cache ( 4MB)
- * Per-domain mappings ( 4MB)
+ * Per-domain mappings (inc. 4MB map_domain_page cache) ( 4MB)
* Shadow linear pagetable ( 4MB) ( 8MB)
* Guest linear pagetable ( 4MB) ( 8MB)
* Machine-to-physical translation table [writable] ( 4MB) (16MB)
@@ -209,7 +209,7 @@
#define IOREMAP_MBYTES 4
#define DIRECTMAP_MBYTES 12
#define MAPCACHE_MBYTES 4
-#define PERDOMAIN_MBYTES 4
+#define PERDOMAIN_MBYTES 8
#ifdef CONFIG_X86_PAE
# define LINEARPT_MBYTES 8
@@ -227,7 +227,7 @@
#define DIRECTMAP_VIRT_START (DIRECTMAP_VIRT_END - (DIRECTMAP_MBYTES<<20))
#define MAPCACHE_VIRT_END DIRECTMAP_VIRT_START
#define MAPCACHE_VIRT_START (MAPCACHE_VIRT_END - (MAPCACHE_MBYTES<<20))
-#define PERDOMAIN_VIRT_END MAPCACHE_VIRT_START
+#define PERDOMAIN_VIRT_END DIRECTMAP_VIRT_START
#define PERDOMAIN_VIRT_START (PERDOMAIN_VIRT_END - (PERDOMAIN_MBYTES<<20))
#define SH_LINEAR_PT_VIRT_END PERDOMAIN_VIRT_START
#define SH_LINEAR_PT_VIRT_START (SH_LINEAR_PT_VIRT_END -
(LINEARPT_MBYTES<<20))
@@ -282,14 +282,21 @@
extern unsigned long xenheap_phys_end; /* user-configurable */
#endif
-#define GDT_VIRT_START(ed) \
- (PERDOMAIN_VIRT_START + ((ed)->vcpu_id << PDPT_VCPU_VA_SHIFT))
-#define LDT_VIRT_START(ed) \
- (GDT_VIRT_START(ed) + (64*1024))
-
-#define PDPT_VCPU_SHIFT 5
-#define PDPT_VCPU_VA_SHIFT (PDPT_VCPU_SHIFT + PAGE_SHIFT)
-#define PDPT_L1_ENTRIES (MAX_VIRT_CPUS << PDPT_VCPU_SHIFT)
+/* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
+#define GDT_LDT_VCPU_SHIFT 5
+#define GDT_LDT_VCPU_VA_SHIFT (GDT_LDT_VCPU_SHIFT + PAGE_SHIFT)
+#define GDT_LDT_MBYTES (MAX_VIRT_CPUS >> (20-GDT_LDT_VCPU_VA_SHIFT))
+#define GDT_LDT_VIRT_START PERDOMAIN_VIRT_START
+#define GDT_LDT_VIRT_END (GDT_LDT_VIRT_START + (GDT_LDT_MBYTES << 20))
+
+/* The address of a particular VCPU's GDT or LDT. */
+#define GDT_VIRT_START(v) \
+ (PERDOMAIN_VIRT_START + ((v)->vcpu_id << GDT_LDT_VCPU_VA_SHIFT))
+#define LDT_VIRT_START(v) \
+ (GDT_VIRT_START(v) + (64*1024))
+
+#define PDPT_L1_ENTRIES \
+ ((PERDOMAIN_VIRT_END - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
#define PDPT_L2_ENTRIES \
((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER)
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Tue Jan 10 14:23:56 2006
+++ b/xen/include/asm-x86/domain.h Tue Jan 10 14:24:12 2006
@@ -13,12 +13,24 @@
unsigned long eip;
};
+struct mapcache {
+ l1_pgentry_t *l1tab;
+ unsigned int cursor;
+ unsigned int epoch, shadow_epoch[MAX_VIRT_CPUS];
+ spinlock_t lock;
+};
+
struct arch_domain
{
l1_pgentry_t *mm_perdomain_pt;
#ifdef CONFIG_X86_64
l2_pgentry_t *mm_perdomain_l2;
l3_pgentry_t *mm_perdomain_l3;
+#endif
+
+#ifdef CONFIG_X86_32
+ /* map_domain_page() mapping cache. */
+ struct mapcache mapcache;
#endif
/* Writable pagetables. */
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Tue Jan 10 14:23:56 2006
+++ b/xen/include/xen/sched.h Tue Jan 10 14:24:12 2006
@@ -172,12 +172,10 @@
char *xen_section_string;
};
-extern struct domain idle0_domain;
-extern struct vcpu idle0_vcpu;
-
-extern struct vcpu *idle_domain[NR_CPUS];
+extern struct vcpu *idle_vcpu[NR_CPUS];
#define IDLE_DOMAIN_ID (0x7FFFU)
-#define is_idle_domain(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
+#define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
+#define is_idle_vcpu(v) (is_idle_domain((v)->domain))
struct vcpu *alloc_vcpu(
struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
@@ -367,23 +365,20 @@
/*
* Per-domain flags (domain_flags).
*/
- /* Is this one of the per-CPU idle domains? */
-#define _DOMF_idle_domain 0
-#define DOMF_idle_domain (1UL<<_DOMF_idle_domain)
/* Is this domain privileged? */
-#define _DOMF_privileged 1
+#define _DOMF_privileged 0
#define DOMF_privileged (1UL<<_DOMF_privileged)
/* Guest shut itself down for some reason. */
-#define _DOMF_shutdown 2
+#define _DOMF_shutdown 1
#define DOMF_shutdown (1UL<<_DOMF_shutdown)
/* Death rattle. */
-#define _DOMF_dying 3
+#define _DOMF_dying 2
#define DOMF_dying (1UL<<_DOMF_dying)
/* Domain is paused by controller software. */
-#define _DOMF_ctrl_pause 4
+#define _DOMF_ctrl_pause 3
#define DOMF_ctrl_pause (1UL<<_DOMF_ctrl_pause)
/* Domain is being debugged by controller software. */
-#define _DOMF_debugging 5
+#define _DOMF_debugging 4
#define DOMF_debugging (1UL<<_DOMF_debugging)
diff -r a51fcb5de470 -r a4ce0ba0f8ff xen/arch/x86/idle0_task.c
--- a/xen/arch/x86/idle0_task.c Tue Jan 10 14:23:56 2006
+++ /dev/null Tue Jan 10 14:24:12 2006
@@ -1,28 +0,0 @@
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/desc.h>
-
-struct domain idle0_domain = {
- domain_id: IDLE_DOMAIN_ID,
- domain_flags:DOMF_idle_domain,
- refcnt: ATOMIC_INIT(1)
-};
-
-struct vcpu idle0_vcpu = {
- processor: 0,
- cpu_affinity:CPU_MASK_CPU0,
- domain: &idle0_domain
-};
-
-struct tss_struct init_tss[NR_CPUS];
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|