ChangeSet 1.1402.1.1, 2005/05/03 15:41:52+01:00, cl349@xxxxxxxxxxxxxxxxxxxx
Patch to allow vcpu hotplugging in domU
Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
From: Ryan Harper
Two config changes are required to use this change:
1) CONFIG_SMP=y
2) CONFIG_HOTPLUG_CPU=y
I've tested unplugging/plugging cpus in domU via the sysfs interface
that the patch provides.
hungerforce:~# grep processor /proc/cpuinfo
processor : 0
processor : 1
processor : 2
processor : 3
hungerforce:~# cd /sys/devices/system/cpu/
hungerforce:/sys/devices/system/cpu# echo 0 > cpu3/online
hungerforce:/sys/devices/system/cpu# grep processor /proc/cpuinfo
processor : 0
processor : 1
processor : 2
hungerforce:/sys/devices/system/cpu# echo 1 > cpu3/online
hungerforce:/sys/devices/system/cpu# grep processor /proc/cpuinfo
processor : 0
processor : 1
processor : 2
processor : 3
It seems that all processors besides cpu0 can be removed. I've not
done
any investigation nor optimization of the hotplug patch. I attempted
to
unplug cpus in dom0, but this resulted in Xen rebooting.
Makefile, Kconfig, smpboot.c, smp.c, process.c, irq.c:
Fix hotplug cpu support.
Makefile:
Setup reach-over build of topology.o.
i386-cpu-hotplug-updated-for-mm.patch:
Add i386 hotplug patch from:
ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.11-rc5/2.6.11-rc5-mm1/broken-out/i386-cpu-hotplug-updated-for-mm.patch
i386-cpu-hotplug-updated-for-mm.patch, Makefile:
new file
traps.c, smpboot.c, smp.c, process.c, irq.c, Kconfig:
Merge changes from hotplug patch.
linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig | 10
linux-2.6.11-xen-sparse/arch/xen/i386/Makefile | 1
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c | 65 +
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c | 34 +
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c | 24
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c | 101 ++-
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c | 8
linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile | 12
patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch | 398 ++++++++++++
9 files changed, 627 insertions(+), 26 deletions(-)
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig
b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig 2005-05-10 11:04:53
-04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig 2005-05-10 11:04:53
-04:00
@@ -668,6 +668,16 @@
depends on (X86_VISWS || SMP) && !X86_VOYAGER
default n
+config HOTPLUG_CPU
+ bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+ depends on SMP && HOTPLUG && EXPERIMENTAL
+ ---help---
+ Say Y here to experiment with turning CPUs off and on. CPUs
+ can be controlled through /sys/devices/system/cpu.
+
+ Say N.
+
+
if XEN_PHYSDEV_ACCESS
menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile
b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile 2005-05-10 11:04:53
-04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile 2005-05-10 11:04:53
-04:00
@@ -72,6 +72,7 @@
libs-y += arch/i386/lib/
core-y += arch/xen/i386/kernel/ \
arch/xen/i386/mm/ \
+ arch/xen/i386/mach-default/ \
arch/i386/crypto/
# \
# arch/xen/$(mcore-y)/
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c 2005-05-10
11:04:53 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c 2005-05-10
11:04:53 -04:00
@@ -15,6 +15,9 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
#ifndef CONFIG_X86_LOCAL_APIC
/*
@@ -207,9 +210,8 @@
if (i == 0) {
seq_printf(p, " ");
- for (j=0; j<NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "CPU%d ",j);
+ for_each_cpu(j)
+ seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
}
@@ -222,9 +224,8 @@
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
seq_printf(p, " %s", action->name);
@@ -237,16 +238,13 @@
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", nmi_count(j));
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ",
- irq_stat[j].apic_timer_irqs);
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -256,3 +254,44 @@
}
return 0;
}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ cpumask_t mask;
+ if (irq == 2)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+#if 0
+ barrier();
+ /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+ [note the nop - the interrupt-enable boundary on x86 is two
+ instructions from sti] - to flush out pending hardirqs and
+ IPIs. After this point nothing is supposed to reach this CPU." */
+ __asm__ __volatile__("sti; nop; cli");
+ barrier();
+#else
+ /* That doesn't seem sufficient. Give it 1ms. */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+#endif
+}
+#endif
+
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c 2005-05-10
11:04:53 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c 2005-05-10
11:04:53 -04:00
@@ -13,6 +13,7 @@
#include <stdarg.h>
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
@@ -54,6 +55,9 @@
#include <linux/irq.h>
#include <linux/err.h>
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
int hlt_counter;
@@ -112,6 +116,33 @@
}
}
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /* We shouldn't have to disable interrupts while dead, but
+ * some interrupts just don't seem to go away, and this makes
+ * it "work" for testing purposes. */
+ /* Death loop */
+ while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+ HYPERVISOR_yield();
+
+ local_irq_disable();
+ __flush_tlb_all();
+ cpu_set(smp_processor_id(), cpu_online_map);
+ local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
@@ -129,6 +160,9 @@
if (cpu_isset(cpu, cpu_idle_map))
cpu_clear(cpu, cpu_idle_map);
rmb();
+
+ if (cpu_is_offline(cpu))
+ play_dead();
irq_stat[cpu].idle_timestamp = jiffies;
xen_idle();
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c 2005-05-10
11:04:53 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c 2005-05-10
11:04:53 -04:00
@@ -19,6 +19,7 @@
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
+#include <linux/cpu.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
@@ -185,6 +186,7 @@
unsigned int cpu;
local_irq_save(flags);
+ WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
for (cpu = 0; cpu < NR_CPUS; ++cpu) {
if (cpu_isset(cpu, mask)) {
@@ -320,21 +322,21 @@
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
- cpumask_t tmp;
/*
* A couple of (to be removed) sanity checks:
*
- * - we do not send IPIs to not-yet booted CPUs.
* - current CPU must not be in mask
* - mask must exist :)
*/
BUG_ON(cpus_empty(cpumask));
-
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(cpumask, tmp));
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);
+ /* If a CPU which we ran on has gone down, OK. */
+ cpus_and(cpumask, cpumask, cpu_online_map);
+ if (cpus_empty(cpumask))
+ return;
+
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
@@ -465,6 +467,7 @@
*/
void smp_send_reschedule(int cpu)
{
+ WARN_ON(cpu_is_offline(cpu));
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|