# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
# Date 1180947928 -3600
# Node ID a533be77c572209cd133734bab614a102a61ab75
# Parent 42f6970e18c9d339a78789e1a3e50b4cb948d99a
Imported linux-2.6-xen-sparse from xen-unstable.hg 15200:bd3d6b4c52ec
---
arch/i386/Kconfig | 85
arch/i386/Kconfig.cpu | 6
arch/i386/Kconfig.debug | 1
arch/i386/Makefile | 24
arch/i386/boot-xen/Makefile | 19
arch/i386/kernel/Makefile | 22
arch/i386/kernel/acpi/Makefile | 4
arch/i386/kernel/acpi/boot-xen.c | 1168 ++++++++
arch/i386/kernel/apic-xen.c | 155 +
arch/i386/kernel/asm-offsets.c | 7
arch/i386/kernel/cpu/Makefile | 5
arch/i386/kernel/cpu/common-xen.c | 743 +++++
arch/i386/kernel/cpu/mtrr/Makefile | 7
arch/i386/kernel/cpu/mtrr/main-xen.c | 197 +
arch/i386/kernel/crash.c | 4
arch/i386/kernel/early_printk-xen.c | 2
arch/i386/kernel/entry-xen.S | 1216 ++++++++
arch/i386/kernel/fixup.c | 88
arch/i386/kernel/head-xen.S | 207 +
arch/i386/kernel/init_task-xen.c | 51
arch/i386/kernel/io_apic-xen.c | 2777 ++++++++++++++++++++
arch/i386/kernel/ioport-xen.c | 122
arch/i386/kernel/irq-xen.c | 324 ++
arch/i386/kernel/ldt-xen.c | 270 +
arch/i386/kernel/machine_kexec.c | 40
arch/i386/kernel/microcode-xen.c | 144 +
arch/i386/kernel/mpparse-xen.c | 1185 ++++++++
arch/i386/kernel/pci-dma-xen.c | 378 ++
arch/i386/kernel/process-xen.c | 853 ++++++
arch/i386/kernel/quirks-xen.c | 47
arch/i386/kernel/setup-xen.c | 1898 +++++++++++++
arch/i386/kernel/smp-xen.c | 624 ++++
arch/i386/kernel/swiotlb.c | 726 +++++
arch/i386/kernel/sysenter.c | 18
arch/i386/kernel/time-xen.c | 1143 ++++++++
arch/i386/kernel/traps-xen.c | 1186 ++++++++
arch/i386/kernel/vm86.c | 12
arch/i386/kernel/vsyscall-note-xen.S | 32
arch/i386/mach-xen/Makefile | 5
arch/i386/mach-xen/setup.c | 147 +
arch/i386/mm/Makefile | 8
arch/i386/mm/fault-xen.c | 769 +++++
arch/i386/mm/highmem-xen.c | 136
arch/i386/mm/hypervisor.c | 432 +++
arch/i386/mm/init-xen.c | 850 ++++++
arch/i386/mm/ioremap-xen.c | 443 +++
arch/i386/mm/pgtable-xen.c | 727 +++++
arch/i386/oprofile/Makefile | 7
arch/i386/oprofile/xenoprof.c | 179 +
arch/i386/pci/Makefile | 9
arch/i386/pci/irq-xen.c | 1205 ++++++++
arch/i386/pci/pcifront.c | 55
arch/i386/power/Makefile | 4
arch/ia64/Kconfig | 57
arch/ia64/Makefile | 12
arch/ia64/hp/common/sba_iommu.c | 61
arch/ia64/kernel/acpi.c | 6
arch/ia64/kernel/asm-offsets.c | 25
arch/ia64/kernel/entry.S | 36
arch/ia64/kernel/fsys.S | 41
arch/ia64/kernel/gate.S | 103
arch/ia64/kernel/gate.lds.S | 14
arch/ia64/kernel/head.S | 6
arch/ia64/kernel/iosapic.c | 75
arch/ia64/kernel/irq_ia64.c | 356 ++
arch/ia64/kernel/pal.S | 5
arch/ia64/kernel/patch.c | 67
arch/ia64/kernel/perfmon.c | 89
arch/ia64/kernel/setup.c | 135
arch/ia64/kernel/time.c | 194 +
arch/ia64/mm/ioremap.c | 3
arch/ia64/oprofile/Makefile | 4
arch/ia64/oprofile/init.c | 14
arch/ia64/oprofile/oprofile_perfmon.h | 28
arch/ia64/oprofile/perfmon.c | 35
arch/ia64/oprofile/xenoprof.c | 142 +
arch/ia64/pci/pci.c | 16
arch/ia64/xen/Makefile | 9
arch/ia64/xen/hypercall.S | 170 +
arch/ia64/xen/hypervisor.c | 1264 +++++++++
arch/ia64/xen/machvec.c | 4
arch/ia64/xen/mem.c | 75
arch/ia64/xen/swiotlb.c | 882 ++++++
arch/ia64/xen/util.c | 105
arch/ia64/xen/xcom_hcall.c | 397 ++
arch/ia64/xen/xcom_mini.c | 469 +++
arch/ia64/xen/xcom_privcmd.c | 673 ++++
arch/ia64/xen/xen_dma.c | 145 +
arch/ia64/xen/xencomm.c | 263 +
arch/ia64/xen/xenentry.S | 931 ++++++
arch/ia64/xen/xenhpski.c | 19
arch/ia64/xen/xenivt.S | 2177 +++++++++++++++
arch/ia64/xen/xenminstate.h | 358 ++
arch/ia64/xen/xenpal.S | 85
arch/ia64/xen/xensetup.S | 52
arch/um/kernel/physmem.c | 4
arch/x86_64/Kconfig | 68
arch/x86_64/Makefile | 20
arch/x86_64/ia32/Makefile | 20
arch/x86_64/ia32/ia32entry-xen.S | 743 +++++
arch/x86_64/ia32/syscall32-xen.c | 128
arch/x86_64/ia32/syscall32_syscall-xen.S | 28
arch/x86_64/ia32/vsyscall-int80.S | 58
arch/x86_64/ia32/vsyscall-sigreturn.S | 2
arch/x86_64/kernel/Makefile | 19
arch/x86_64/kernel/acpi/Makefile | 1
arch/x86_64/kernel/apic-xen.c | 197 +
arch/x86_64/kernel/asm-offsets.c | 2
arch/x86_64/kernel/crash.c | 6
arch/x86_64/kernel/e820-xen.c | 774 +++++
arch/x86_64/kernel/early_printk-xen.c | 302 ++
arch/x86_64/kernel/entry-xen.S | 1325 +++++++++
arch/x86_64/kernel/genapic-xen.c | 143 +
arch/x86_64/kernel/genapic_xen.c | 161 +
arch/x86_64/kernel/head-xen.S | 203 +
arch/x86_64/kernel/head64-xen.c | 162 +
arch/x86_64/kernel/init_task.c | 3
arch/x86_64/kernel/io_apic-xen.c | 2269 ++++++++++++++++
arch/x86_64/kernel/ioport-xen.c | 99
arch/x86_64/kernel/irq-xen.c | 197 +
arch/x86_64/kernel/ldt-xen.c | 282 ++
arch/x86_64/kernel/machine_kexec.c | 118
arch/x86_64/kernel/mpparse-xen.c | 1011 +++++++
arch/x86_64/kernel/pci-swiotlb-xen.c | 55
arch/x86_64/kernel/process-xen.c | 829 +++++
arch/x86_64/kernel/setup-xen.c | 1677 ++++++++++++
arch/x86_64/kernel/setup64-xen.c | 361 ++
arch/x86_64/kernel/smp-xen.c | 600 ++++
arch/x86_64/kernel/traps-xen.c | 1175 ++++++++
arch/x86_64/kernel/vsyscall-xen.c | 239 +
arch/x86_64/kernel/xen_entry.S | 40
arch/x86_64/mm/Makefile | 10
arch/x86_64/mm/fault-xen.c | 724 +++++
arch/x86_64/mm/init-xen.c | 1238 ++++++++
arch/x86_64/mm/pageattr-xen.c | 433 +++
arch/x86_64/oprofile/Makefile | 10
arch/x86_64/pci/Makefile | 12
drivers/Makefile | 1
drivers/acpi/Kconfig | 3
drivers/char/mem.c | 6
drivers/char/tpm/Kconfig | 10
drivers/char/tpm/Makefile | 2
drivers/char/tpm/tpm.h | 15
drivers/char/tpm/tpm_vtpm.c | 542 +++
drivers/char/tpm/tpm_vtpm.h | 55
drivers/char/tpm/tpm_xen.c | 720 +++++
drivers/char/tty_io.c | 7
drivers/firmware/Kconfig | 1
drivers/pci/Kconfig | 1
drivers/serial/Kconfig | 1
drivers/video/console/Kconfig | 1
drivers/xen/Kconfig | 260 +
drivers/xen/Makefile | 20
drivers/xen/balloon/Makefile | 2
drivers/xen/balloon/balloon.c | 663 ++++
drivers/xen/balloon/common.h | 58
drivers/xen/balloon/sysfs.c | 170 +
drivers/xen/blkback/Makefile | 3
drivers/xen/blkback/blkback.c | 614 ++++
drivers/xen/blkback/common.h | 139 +
drivers/xen/blkback/interface.c | 181 +
drivers/xen/blkback/vbd.c | 118
drivers/xen/blkback/xenbus.c | 533 +++
drivers/xen/blkfront/Makefile | 5
drivers/xen/blkfront/blkfront.c | 902 ++++++
drivers/xen/blkfront/block.h | 142 +
drivers/xen/blkfront/vbd.c | 372 ++
drivers/xen/blktap/Makefile | 5
drivers/xen/blktap/blktap.c | 1641 +++++++++++
drivers/xen/blktap/common.h | 121
drivers/xen/blktap/interface.c | 174 +
drivers/xen/blktap/xenbus.c | 477 +++
drivers/xen/char/Makefile | 2
drivers/xen/char/mem.c | 203 +
drivers/xen/console/Makefile | 2
drivers/xen/console/console.c | 721 +++++
drivers/xen/console/xencons_ring.c | 143 +
drivers/xen/core/Makefile | 12
drivers/xen/core/cpu_hotplug.c | 172 +
drivers/xen/core/evtchn.c | 1015 +++++++
drivers/xen/core/features.c | 34
drivers/xen/core/gnttab.c | 753 +++++
drivers/xen/core/hypervisor_sysfs.c | 59
drivers/xen/core/machine_kexec.c | 189 +
drivers/xen/core/machine_reboot.c | 241 +
drivers/xen/core/reboot.c | 249 +
drivers/xen/core/smpboot.c | 452 +++
drivers/xen/core/xen_proc.c | 23
drivers/xen/core/xen_sysfs.c | 378 ++
drivers/xen/evtchn/Makefile | 2
drivers/xen/evtchn/evtchn.c | 469 +++
drivers/xen/fbfront/Makefile | 2
drivers/xen/fbfront/xenfb.c | 752 +++++
drivers/xen/fbfront/xenkbd.c | 333 ++
drivers/xen/gntdev/Makefile | 1
drivers/xen/gntdev/gntdev.c | 973 +++++++
drivers/xen/netback/Makefile | 5
drivers/xen/netback/common.h | 165 +
drivers/xen/netback/interface.c | 336 ++
drivers/xen/netback/loopback.c | 320 ++
drivers/xen/netback/netback.c | 1606 +++++++++++
drivers/xen/netback/xenbus.c | 453 +++
drivers/xen/netfront/Makefile | 4
drivers/xen/netfront/netfront.c | 2133 +++++++++++++++
drivers/xen/pciback/Makefile | 15
drivers/xen/pciback/conf_space.c | 426 +++
drivers/xen/pciback/conf_space.h | 126
drivers/xen/pciback/conf_space_capability.c | 71
drivers/xen/pciback/conf_space_capability.h | 23
drivers/xen/pciback/conf_space_capability_pm.c | 128
drivers/xen/pciback/conf_space_capability_vpd.c | 42
drivers/xen/pciback/conf_space_header.c | 309 ++
drivers/xen/pciback/conf_space_quirks.c | 126
drivers/xen/pciback/conf_space_quirks.h | 35
drivers/xen/pciback/passthrough.c | 157 +
drivers/xen/pciback/pci_stub.c | 929 ++++++
drivers/xen/pciback/pciback.h | 93
drivers/xen/pciback/pciback_ops.c | 95
drivers/xen/pciback/slot.c | 151 +
drivers/xen/pciback/vpci.c | 204 +
drivers/xen/pciback/xenbus.c | 454 +++
drivers/xen/pcifront/Makefile | 7
drivers/xen/pcifront/pci.c | 46
drivers/xen/pcifront/pci_op.c | 268 +
drivers/xen/pcifront/pcifront.h | 40
drivers/xen/pcifront/xenbus.c | 295 ++
drivers/xen/privcmd/Makefile | 2
drivers/xen/privcmd/privcmd.c | 284 ++
drivers/xen/tpmback/Makefile | 4
drivers/xen/tpmback/common.h | 85
drivers/xen/tpmback/interface.c | 167 +
drivers/xen/tpmback/tpmback.c | 944 ++++++
drivers/xen/tpmback/xenbus.c | 289 ++
drivers/xen/util.c | 70
drivers/xen/xenbus/Makefile | 9
drivers/xen/xenbus/xenbus_backend_client.c | 147 +
drivers/xen/xenbus/xenbus_client.c | 283 ++
drivers/xen/xenbus/xenbus_comms.c | 232 +
drivers/xen/xenbus/xenbus_comms.h | 46
drivers/xen/xenbus/xenbus_dev.c | 404 ++
drivers/xen/xenbus/xenbus_probe.c | 1087 +++++++
drivers/xen/xenbus/xenbus_probe.h | 75
drivers/xen/xenbus/xenbus_probe_backend.c | 287 ++
drivers/xen/xenbus/xenbus_xs.c | 880 ++++++
drivers/xen/xenoprof/xenoprofile.c | 500 +++
fs/Kconfig | 1
include/asm-i386/apic.h | 2
include/asm-i386/kexec.h | 14
include/asm-i386/mach-xen/asm/agp.h | 37
include/asm-i386/mach-xen/asm/desc.h | 164 +
include/asm-i386/mach-xen/asm/dma-mapping.h | 157 +
include/asm-i386/mach-xen/asm/fixmap.h | 155 +
include/asm-i386/mach-xen/asm/floppy.h | 147 +
include/asm-i386/mach-xen/asm/highmem.h | 80
include/asm-i386/mach-xen/asm/hw_irq.h | 72
include/asm-i386/mach-xen/asm/hypercall.h | 407 ++
include/asm-i386/mach-xen/asm/hypervisor.h | 258 +
include/asm-i386/mach-xen/asm/io.h | 390 ++
include/asm-i386/mach-xen/asm/irqflags.h | 127
include/asm-i386/mach-xen/asm/maddr.h | 193 +
include/asm-i386/mach-xen/asm/mmu.h | 29
include/asm-i386/mach-xen/asm/mmu_context.h | 108
include/asm-i386/mach-xen/asm/page.h | 235 +
include/asm-i386/mach-xen/asm/param.h | 23
include/asm-i386/mach-xen/asm/pci.h | 146 +
include/asm-i386/mach-xen/asm/pgalloc.h | 59
include/asm-i386/mach-xen/asm/pgtable-2level-defs.h | 20
include/asm-i386/mach-xen/asm/pgtable-2level.h | 120
include/asm-i386/mach-xen/asm/pgtable-3level-defs.h | 24
include/asm-i386/mach-xen/asm/pgtable-3level.h | 209 +
include/asm-i386/mach-xen/asm/pgtable.h | 531 +++
include/asm-i386/mach-xen/asm/processor.h | 741 +++++
include/asm-i386/mach-xen/asm/ptrace.h | 90
include/asm-i386/mach-xen/asm/scatterlist.h | 22
include/asm-i386/mach-xen/asm/segment.h | 117
include/asm-i386/mach-xen/asm/setup.h | 81
include/asm-i386/mach-xen/asm/smp.h | 103
include/asm-i386/mach-xen/asm/spinlock.h | 202 +
include/asm-i386/mach-xen/asm/swiotlb.h | 43
include/asm-i386/mach-xen/asm/synch_bitops.h | 145 +
include/asm-i386/mach-xen/asm/system.h | 488 +++
include/asm-i386/mach-xen/asm/tlbflush.h | 101
include/asm-i386/mach-xen/asm/vga.h | 20
include/asm-i386/mach-xen/asm/xenoprof.h | 48
include/asm-i386/mach-xen/irq_vectors.h | 125
include/asm-i386/mach-xen/mach_traps.h | 33
include/asm-i386/mach-xen/setup_arch.h | 5
include/asm-ia64/agp.h | 31
include/asm-ia64/dma-mapping.h | 23
include/asm-ia64/gcc_intrin.h | 68
include/asm-ia64/hw_irq.h | 10
include/asm-ia64/hypercall.h | 416 ++
include/asm-ia64/hypervisor.h | 226 +
include/asm-ia64/intel_intrin.h | 66
include/asm-ia64/io.h | 37
include/asm-ia64/iosapic.h | 2
include/asm-ia64/irq.h | 33
include/asm-ia64/machvec.h | 2
include/asm-ia64/machvec_xen.h | 43
include/asm-ia64/maddr.h | 107
include/asm-ia64/meminit.h | 5
include/asm-ia64/page.h | 23
include/asm-ia64/pal.h | 1
include/asm-ia64/pgalloc.h | 4
include/asm-ia64/privop.h | 60
include/asm-ia64/processor.h | 1
include/asm-ia64/sal.h | 21
include/asm-ia64/swiotlb.h | 41
include/asm-ia64/synch_bitops.h | 61
include/asm-ia64/system.h | 4
include/asm-ia64/uaccess.h | 20
include/asm-ia64/xen/privop.h | 310 ++
include/asm-ia64/xen/xcom_hcall.h | 88
include/asm-ia64/xen/xencomm.h | 60
include/asm-ia64/xenoprof.h | 48
include/asm-um/page.h | 2
include/asm-x86_64/apic.h | 2
include/asm-x86_64/kexec.h | 13
include/asm-x86_64/mach-xen/asm/agp.h | 35
include/asm-x86_64/mach-xen/asm/arch_hooks.h | 27
include/asm-x86_64/mach-xen/asm/bootsetup.h | 42
include/asm-x86_64/mach-xen/asm/desc.h | 263 +
include/asm-x86_64/mach-xen/asm/dma-mapping.h | 207 +
include/asm-x86_64/mach-xen/asm/e820.h | 66
include/asm-x86_64/mach-xen/asm/fixmap.h | 112
include/asm-x86_64/mach-xen/asm/floppy.h | 206 +
include/asm-x86_64/mach-xen/asm/hw_irq.h | 136
include/asm-x86_64/mach-xen/asm/hypercall.h | 406 ++
include/asm-x86_64/mach-xen/asm/hypervisor.h | 2
include/asm-x86_64/mach-xen/asm/io.h | 330 ++
include/asm-x86_64/mach-xen/asm/irq.h | 38
include/asm-x86_64/mach-xen/asm/irqflags.h | 139 +
include/asm-x86_64/mach-xen/asm/maddr.h | 161 +
include/asm-x86_64/mach-xen/asm/mmu.h | 38
include/asm-x86_64/mach-xen/asm/mmu_context.h | 136
include/asm-x86_64/mach-xen/asm/msr.h | 399 ++
include/asm-x86_64/mach-xen/asm/nmi.h | 93
include/asm-x86_64/mach-xen/asm/page.h | 217 +
include/asm-x86_64/mach-xen/asm/pci.h | 166 +
include/asm-x86_64/mach-xen/asm/pgalloc.h | 204 +
include/asm-x86_64/mach-xen/asm/pgtable.h | 575 ++++
include/asm-x86_64/mach-xen/asm/processor.h | 506 +++
include/asm-x86_64/mach-xen/asm/ptrace.h | 127
include/asm-x86_64/mach-xen/asm/smp.h | 150 +
include/asm-x86_64/mach-xen/asm/synch_bitops.h | 2
include/asm-x86_64/mach-xen/asm/system.h | 262 +
include/asm-x86_64/mach-xen/asm/timer.h | 67
include/asm-x86_64/mach-xen/asm/tlbflush.h | 103
include/asm-x86_64/mach-xen/asm/vga.h | 20
include/asm-x86_64/mach-xen/asm/xenoprof.h | 1
include/asm-x86_64/mach-xen/asm/xor.h | 328 ++
include/asm-x86_64/mach-xen/irq_vectors.h | 123
include/asm-x86_64/mach-xen/mach_time.h | 111
include/asm-x86_64/mach-xen/mach_timer.h | 50
include/asm-x86_64/mach-xen/setup_arch_post.h | 63
include/asm-x86_64/mach-xen/setup_arch_pre.h | 5
include/linux/gfp.h | 6
include/linux/highmem.h | 6
include/linux/interrupt.h | 6
include/linux/kexec.h | 13
include/linux/mm.h | 14
include/linux/page-flags.h | 14
include/linux/skbuff.h | 8
include/xen/balloon.h | 57
include/xen/blkif.h | 97
include/xen/cpu_hotplug.h | 44
include/xen/driver_util.h | 14
include/xen/evtchn.h | 126
include/xen/features.h | 20
include/xen/gnttab.h | 167 +
include/xen/hvm.h | 23
include/xen/hypercall.h | 24
include/xen/hypervisor_sysfs.h | 32
include/xen/interface/COPYING | 38
include/xen/interface/acm.h | 228 +
include/xen/interface/acm_ops.h | 159 +
include/xen/interface/arch-ia64.h | 628 ++++
include/xen/interface/arch-powerpc.h | 125
include/xen/interface/arch-x86/xen-x86_32.h | 168 +
include/xen/interface/arch-x86/xen-x86_64.h | 211 +
include/xen/interface/arch-x86/xen.h | 204 +
include/xen/interface/arch-x86_32.h | 27
include/xen/interface/arch-x86_64.h | 27
include/xen/interface/callback.h | 92
include/xen/interface/dom0_ops.h | 120
include/xen/interface/domctl.h | 478 +++
include/xen/interface/elfnote.h | 233 +
include/xen/interface/elfstructs.h | 527 +++
include/xen/interface/event_channel.h | 264 +
include/xen/interface/features.h | 71
include/xen/interface/foreign/Makefile | 37
include/xen/interface/foreign/mkchecker.py | 58
include/xen/interface/foreign/mkheader.py | 167 +
include/xen/interface/foreign/reference.size | 18
include/xen/interface/foreign/structs.py | 58
include/xen/interface/grant_table.h | 422 +++
include/xen/interface/hvm/e820.h | 47
include/xen/interface/hvm/hvm_info_table.h | 41
include/xen/interface/hvm/hvm_op.h | 73
include/xen/interface/hvm/ioreq.h | 122
include/xen/interface/hvm/params.h | 60
include/xen/interface/hvm/save.h | 462 +++
include/xen/interface/hvm/vmx_assist.h | 116
include/xen/interface/io/blkif.h | 128
include/xen/interface/io/console.h | 51
include/xen/interface/io/fbif.h | 138
include/xen/interface/io/kbdif.h | 130
include/xen/interface/io/netif.h | 184 +
include/xen/interface/io/pciif.h | 83
include/xen/interface/io/protocols.h | 21
include/xen/interface/io/ring.h | 299 ++
include/xen/interface/io/tpmif.h | 77
include/xen/interface/io/xenbus.h | 73
include/xen/interface/io/xs_wire.h | 117
include/xen/interface/kexec.h | 137
include/xen/interface/libelf.h | 241 +
include/xen/interface/memory.h | 281 ++
include/xen/interface/nmi.h | 78
include/xen/interface/physdev.h | 169 +
include/xen/interface/platform.h | 143 +
include/xen/interface/sched.h | 121
include/xen/interface/sysctl.h | 182 +
include/xen/interface/trace.h | 119
include/xen/interface/vcpu.h | 192 +
include/xen/interface/version.h | 91
include/xen/interface/xen-compat.h | 51
include/xen/interface/xen.h | 610 ++++
include/xen/interface/xencomm.h | 41
include/xen/interface/xenoprof.h | 132
include/xen/pcifront.h | 77
include/xen/public/evtchn.h | 88
include/xen/public/gntdev.h | 105
include/xen/public/privcmd.h | 79
include/xen/xen_proc.h | 12
include/xen/xenbus.h | 302 ++
include/xen/xencons.h | 19
include/xen/xenoprof.h | 42
kernel/Kconfig.preempt | 1
kernel/fork.c | 3
kernel/irq/spurious.c | 3
kernel/kexec.c | 73
lib/Makefile | 6
mm/Kconfig | 3
mm/highmem.c | 11
mm/memory.c | 129
mm/mmap.c | 4
mm/page_alloc.c | 24
net/core/dev.c | 66
net/core/skbuff.c | 5
scripts/Makefile.xen | 14
450 files changed, 100652 insertions(+), 189 deletions(-)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig
--- a/arch/i386/Kconfig Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig Mon Jun 04 10:05:28 2007 +0100
@@ -16,6 +16,7 @@ config X86_32
config GENERIC_TIME
bool
+ depends on !X86_XEN
default y
config LOCKDEP_SUPPORT
@@ -102,6 +103,15 @@ config X86_PC
bool "PC-compatible"
help
Choose this option if your computer is a standard PC or compatible.
+
+config X86_XEN
+ bool "Xen-compatible"
+ select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST
+ select X86_UP_IOAPIC if !SMP && XEN_PRIVILEGED_GUEST
+ select SWIOTLB
+ help
+ Choose this option if you plan to run this kernel on top of the
+ Xen Hypervisor.
config X86_ELAN
bool "AMD Elan"
@@ -213,6 +223,7 @@ source "arch/i386/Kconfig.cpu"
config HPET_TIMER
bool "HPET Timer Support"
+ depends on !X86_XEN
help
This enables the use of the HPET for the kernel's internal timer.
HPET is the next generation timer replacing legacy 8254s.
@@ -263,7 +274,7 @@ source "kernel/Kconfig.preempt"
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
- depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+ depends on !SMP && !(X86_VISWS || X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
help
A local APIC (Advanced Programmable Interrupt Controller) is an
integrated interrupt controller in the CPU. If you have a single-CPU
@@ -288,12 +299,12 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
bool
- depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
+ depends on X86_UP_APIC || ((X86_VISWS || SMP) && !(X86_VOYAGER ||
XEN_UNPRIVILEGED_GUEST))
default y
config X86_IO_APIC
bool
- depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
+ depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER ||
XEN_UNPRIVILEGED_GUEST))
default y
config X86_VISWS_APIC
@@ -303,7 +314,7 @@ config X86_VISWS_APIC
config X86_MCE
bool "Machine Check Exception"
- depends on !X86_VOYAGER
+ depends on !(X86_VOYAGER || X86_XEN)
---help---
Machine Check Exception support allows the processor to notify the
kernel if it detects a problem (e.g. overheating, component failure).
@@ -402,6 +413,7 @@ config X86_REBOOTFIXUPS
config MICROCODE
tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+ depends on !XEN_UNPRIVILEGED_GUEST
---help---
If you say Y here and also to "/dev file system support" in the
'File systems' section, you will be able to update the microcode on
@@ -419,6 +431,7 @@ config MICROCODE
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
+ depends on !X86_XEN
help
This device gives privileged processes access to the x86
Model-Specific Registers (MSRs). It is a character device with
@@ -433,6 +446,10 @@ config X86_CPUID
be executed on a specific processor. It is a character device
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
/dev/cpu/31/cpuid.
+
+config SWIOTLB
+ bool
+ default n
source "drivers/firmware/Kconfig"
@@ -616,6 +633,7 @@ config HIGHPTE
config MATH_EMULATION
bool "Math emulation"
+ depends on !X86_XEN
---help---
Linux can emulate a math coprocessor (used for floating point
operations) if you don't have one. 486DX and Pentium processors have
@@ -641,6 +659,8 @@ config MATH_EMULATION
config MTRR
bool "MTRR (Memory Type Range Register) support"
+ depends on !XEN_UNPRIVILEGED_GUEST
+ default y if X86_XEN
---help---
On Intel P6 family processors (Pentium Pro, Pentium II and later)
the Memory Type Range Registers (MTRRs) may be used to control
@@ -675,7 +695,7 @@ config MTRR
config EFI
bool "Boot from EFI support"
- depends on ACPI
+ depends on ACPI && !X86_XEN
default n
---help---
This enables the the kernel to boot on EFI platforms using
@@ -693,7 +713,7 @@ config EFI
config IRQBALANCE
bool "Enable kernel irq balancing"
- depends on SMP && X86_IO_APIC
+ depends on SMP && X86_IO_APIC && !X86_XEN
default y
help
The default yes will allow the kernel to do irq load balancing.
@@ -741,7 +761,7 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
@@ -793,6 +813,7 @@ config HOTPLUG_CPU
config COMPAT_VDSO
bool "Compat VDSO support"
+ depends on !X86_XEN
default y
help
Map the VDSO to the predictable old-style address too.
@@ -810,18 +831,20 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
depends on HIGHMEM
menu "Power management options (ACPI, APM)"
- depends on !X86_VOYAGER
-
+ depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
+
+if !X86_XEN
source kernel/power/Kconfig
+endif
source "drivers/acpi/Kconfig"
menu "APM (Advanced Power Management) BIOS Support"
-depends on PM && !X86_VISWS
+depends on PM && !(X86_VISWS || X86_XEN)
config APM
tristate "APM (Advanced Power Management) BIOS support"
- depends on PM
+ depends on PM && PM_LEGACY
---help---
APM is a BIOS specification for saving power using several different
techniques. This is mostly useful for battery powered laptops with
@@ -1006,6 +1029,7 @@ choice
config PCI_GOBIOS
bool "BIOS"
+ depends on !X86_XEN
config PCI_GOMMCONFIG
bool "MMConfig"
@@ -1013,6 +1037,13 @@ config PCI_GODIRECT
config PCI_GODIRECT
bool "Direct"
+config PCI_GOXEN_FE
+ bool "Xen PCI Frontend"
+ depends on X86_XEN
+ help
+ The PCI device frontend driver allows the kernel to import arbitrary
+ PCI devices from a PCI backend to support PCI driver domains.
+
config PCI_GOANY
bool "Any"
@@ -1020,7 +1051,7 @@ endchoice
config PCI_BIOS
bool
- depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+ depends on !(X86_VISWS || X86_XEN) && PCI && (PCI_GOBIOS || PCI_GOANY)
default y
config PCI_DIRECT
@@ -1032,6 +1063,18 @@ config PCI_MMCONFIG
bool
depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
default y
+
+config XEN_PCIDEV_FRONTEND
+ bool
+ depends on PCI && X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)
+ default y
+
+config XEN_PCIDEV_FE_DEBUG
+ bool "Xen PCI Frontend Debugging"
+ depends on XEN_PCIDEV_FRONTEND
+ default n
+ help
+ Enables some debug statements within the PCI Frontend.
source "drivers/pci/pcie/Kconfig"
@@ -1043,7 +1086,7 @@ config ISA_DMA_API
config ISA
bool "ISA support"
- depends on !(X86_VOYAGER || X86_VISWS)
+ depends on !(X86_VOYAGER || X86_VISWS || X86_XEN)
help
Find out whether you have ISA slots on your motherboard. ISA is the
name of a bus system, i.e. the way the CPU talks to the other stuff
@@ -1070,7 +1113,7 @@ source "drivers/eisa/Kconfig"
source "drivers/eisa/Kconfig"
config MCA
- bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+ bool "MCA support" if !(X86_VISWS || X86_VOYAGER || X86_XEN)
default y if X86_VOYAGER
help
MicroChannel Architecture is found in some IBM PS/2 machines and
@@ -1146,6 +1189,8 @@ source "security/Kconfig"
source "crypto/Kconfig"
+source "drivers/xen/Kconfig"
+
source "lib/Kconfig"
#
@@ -1171,7 +1216,7 @@ config X86_SMP
config X86_HT
bool
- depends on SMP && !(X86_VISWS || X86_VOYAGER)
+ depends on SMP && !(X86_VISWS || X86_VOYAGER || X86_XEN)
default y
config X86_BIOS_REBOOT
@@ -1184,6 +1229,16 @@ config X86_TRAMPOLINE
depends on X86_SMP || (X86_VOYAGER && SMP)
default y
+config X86_NO_TSS
+ bool
+ depends on X86_XEN
+ default y
+
+config X86_NO_IDT
+ bool
+ depends on X86_XEN
+ default y
+
config KTIME_SCALAR
bool
default y
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig.cpu
--- a/arch/i386/Kconfig.cpu Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig.cpu Mon Jun 04 10:05:28 2007 +0100
@@ -251,7 +251,7 @@ config X86_PPRO_FENCE
config X86_F00F_BUG
bool
- depends on M586MMX || M586TSC || M586 || M486 || M386
+ depends on (M586MMX || M586TSC || M586 || M486 || M386) && !X86_NO_IDT
default y
config X86_WP_WORKS_OK
@@ -311,5 +311,5 @@ config X86_OOSTORE
config X86_TSC
bool
- depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON ||
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) &&
!X86_NUMAQ
- default y
+ depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON ||
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) &&
!X86_NUMAQ && !X86_XEN
+ default y
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig.debug
--- a/arch/i386/Kconfig.debug Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig.debug Mon Jun 04 10:05:28 2007 +0100
@@ -79,6 +79,7 @@ config DOUBLEFAULT
config DOUBLEFAULT
default y
bool "Enable doublefault exception handler" if EMBEDDED
+ depends on !X86_NO_TSS
help
This option allows trapping of rare doublefault exceptions that
would otherwise cause a system to silently reboot. Disabling this
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Makefile
--- a/arch/i386/Makefile Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Makefile Mon Jun 04 10:05:28 2007 +0100
@@ -48,6 +48,11 @@ CFLAGS += $(shell if [ $(call
cc-vers
CFLAGS += $(cflags-y)
+cppflags-$(CONFIG_XEN) += \
+ -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
+
+CPPFLAGS += $(cppflags-y)
+
# Default subarch .c files
mcore-y := mach-default
@@ -70,6 +75,10 @@ mcore-$(CONFIG_X86_BIGSMP) := mach-defau
#Summit subarch support
mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit
mcore-$(CONFIG_X86_SUMMIT) := mach-default
+
+# Xen subarch support
+mflags-$(CONFIG_X86_XEN) := -Iinclude/asm-i386/mach-xen
+mcore-$(CONFIG_X86_XEN) := mach-xen
# generic subarchitecture
mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic
@@ -105,6 +114,19 @@ PHONY += zImage bzImage compressed zlilo
PHONY += zImage bzImage compressed zlilo bzlilo \
zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
+ifdef CONFIG_XEN
+CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
+head-y := arch/i386/kernel/head-xen.o arch/i386/kernel/init_task-xen.o
+boot := arch/i386/boot-xen
+.PHONY: vmlinuz
+all: vmlinuz
+
+vmlinuz: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $@
+
+install:
+ $(Q)$(MAKE) $(build)=$(boot) $@
+else
all: bzImage
# KBUILD_IMAGE specify target image being built
@@ -127,6 +149,7 @@ fdimage fdimage144 fdimage288 isoimage:
install:
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
+endif
archclean:
$(Q)$(MAKE) $(clean)=arch/i386/boot
@@ -145,3 +168,4 @@ CLEAN_FILES += arch/$(ARCH)/boot/fdimage
CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
arch/$(ARCH)/boot/image.iso \
arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += vmlinuz vmlinux-stripped
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/boot-xen/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/boot-xen/Makefile Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,19 @@
+
+OBJCOPYFLAGS := -g --strip-unneeded
+
+vmlinuz: vmlinux-stripped FORCE
+ $(call if_changed,gzip)
+
+vmlinux-stripped: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+INSTALL_ROOT := $(patsubst %/boot,%,$(INSTALL_PATH))
+
+XINSTALL_NAME ?= $(KERNELRELEASE)
+install:
+ mkdir -p $(INSTALL_ROOT)/boot
+ install -m0644 vmlinuz
$(INSTALL_ROOT)/boot/vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+ install -m0644 vmlinux
$(INSTALL_ROOT)/boot/vmlinux-syms-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+ install -m0664 .config
$(INSTALL_ROOT)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+ install -m0664 System.map
$(INSTALL_ROOT)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+ ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
$(INSTALL_ROOT)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(shell [ -e
$(objtree)/localversion-xen ] && cat
$(objtree)/localversion-xen)$(INSTALL_SUFFIX)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/Makefile Mon Jun 04 10:05:28 2007 +0100
@@ -44,6 +44,12 @@ EXTRA_AFLAGS := -traditional
obj-$(CONFIG_SCx200) += scx200.o
+ifdef CONFIG_XEN
+vsyscall_note := vsyscall-note-xen.o
+else
+vsyscall_note := vsyscall-note.o
+endif
+
# vsyscall.o contains the vsyscall DSO images as __initdata.
# We must build both images before we can assemble it.
# Note: kbuild does not track this dependency due to usage of .incbin
@@ -65,7 +71,7 @@ SYSCFLAGS_vsyscall-int80.so = $(vsyscall
$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
$(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
- $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE
+ $(obj)/vsyscall-%.o $(obj)/$(vsyscall_note) FORCE
$(call if_changed,syscall)
# We also create a special relocatable object that should mirror the symbol
@@ -77,8 +83,20 @@ extra-y += vsyscall-syms.o
SYSCFLAGS_vsyscall-syms.o = -r
$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
- $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
+ $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE
$(call if_changed,syscall)
k8-y += ../../x86_64/kernel/k8.o
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+
+obj-y += fixup.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
+n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+extra-y := $(call cherrypickxen, $(extra-y))
+%/head-xen.o %/head-xen.s: EXTRA_AFLAGS :=
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/acpi/Makefile
--- a/arch/i386/kernel/acpi/Makefile Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/acpi/Makefile Mon Jun 04 10:05:28 2007 +0100
@@ -6,3 +6,7 @@ obj-y += cstate.o processor.o
obj-y += cstate.o processor.o
endif
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y), $(src))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/acpi/boot-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/acpi/boot-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1168 @@
+/*
+ * boot.c - Architecture-Specific Low-Level ACPI Boot Support
+ *
+ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ * Copyright (C) 2001 Jun Nakajima <jun.nakajima@xxxxxxxxx>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/irq.h>
+
+#include <asm/pgtable.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/io.h>
+#include <asm/mpspec.h>
+
+#ifdef CONFIG_X86_64
+
+extern void __init clustered_apic_check(void);
+
+extern int gsi_irq_sharing(int gsi);
+#include <asm/proto.h>
+
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) {
return 0; }
+
+
+#else /* X86 */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+static inline int gsi_irq_sharing(int gsi) { return gsi; }
+
+#endif /* X86 */
+
+#define BAD_MADT_ENTRY(entry, end) ( \
+ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
+ ((acpi_table_entry_header *)entry)->length < sizeof(*entry))
+
+#define PREFIX "ACPI: "
+
+int acpi_noirq __initdata; /* skip ACPI IRQ initialization */
+int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ
initialization */
+int acpi_ht __initdata = 1; /* enable HT */
+
+int acpi_lapic;
+int acpi_ioapic;
+int acpi_strict;
+EXPORT_SYMBOL(acpi_strict);
+
+acpi_interrupt_flags acpi_sci_flags __initdata;
+int acpi_sci_override_gsi __initdata;
+int acpi_skip_timer_override __initdata;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+#endif
+
+#ifndef __HAVE_ARCH_CMPXCHG
+#warning ACPI uses CMPXCHG, i486 and later hardware
+#endif
+
+#define MAX_MADT_ENTRIES 256
+u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
+ {[0 ... MAX_MADT_ENTRIES - 1] = 0xff };
+EXPORT_SYMBOL(x86_acpiid_to_apicid);
+
+/* --------------------------------------------------------------------------
+ Boot-time Configuration
+ --------------------------------------------------------------------------
*/
+
+/*
+ * The default interrupt routing model is PIC (8259). This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+
+/* rely on all ACPI tables being in the direct mapping */
+char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+{
+ if (!phys_addr || !size)
+ return NULL;
+
+ if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE)
+ return __va(phys_addr);
+
+ return NULL;
+}
+
+#else
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+char *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+ unsigned long base, offset, mapped_size;
+ int idx;
+
+#ifndef CONFIG_XEN
+ if (phys + size < 8 * 1024 * 1024)
+ return __va(phys);
+#endif
+
+ offset = phys & (PAGE_SIZE - 1);
+ mapped_size = PAGE_SIZE - offset;
+ set_fixmap(FIX_ACPI_END, phys);
+ base = fix_to_virt(FIX_ACPI_END);
+
+ /*
+ * Most cases can be covered by the below.
+ */
+ idx = FIX_ACPI_END;
+ while (mapped_size < size) {
+ if (--idx < FIX_ACPI_BEGIN)
+ return NULL; /* cannot handle this */
+ phys += PAGE_SIZE;
+ set_fixmap(idx, phys);
+ mapped_size += PAGE_SIZE;
+ }
+
+ return ((unsigned char *)base + offset);
+}
+#endif
+
+#ifdef CONFIG_PCI_MMCONFIG
+/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
+struct acpi_table_mcfg_config *pci_mmcfg_config;
+int pci_mmcfg_config_num;
+
+int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg;
+ unsigned long i;
+ int config_size;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+
+ /* how many config structures do we have */
+ pci_mmcfg_config_num = 0;
+ i = size - sizeof(struct acpi_table_mcfg);
+ while (i >= sizeof(struct acpi_table_mcfg_config)) {
+ ++pci_mmcfg_config_num;
+ i -= sizeof(struct acpi_table_mcfg_config);
+ };
+ if (pci_mmcfg_config_num == 0) {
+ printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
+ return -ENODEV;
+ }
+
+ config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
+ pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
+ if (!pci_mmcfg_config) {
+ printk(KERN_WARNING PREFIX
+ "No memory for MCFG config tables\n");
+ return -ENOMEM;
+ }
+
+ memcpy(pci_mmcfg_config, &mcfg->config, config_size);
+ for (i = 0; i < pci_mmcfg_config_num; ++i) {
+ if (mcfg->config[i].base_reserved) {
+ printk(KERN_ERR PREFIX
+ "MMCONFIG not in low 4GB of memory\n");
+ kfree(pci_mmcfg_config);
+ pci_mmcfg_config_num = 0;
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_madt *madt = NULL;
+
+ if (!phys_addr || !size || !cpu_has_apic)
+ return -EINVAL;
+
+ madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size);
+ if (!madt) {
+ printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+ return -ENODEV;
+ }
+
+ if (madt->lapic_address) {
+ acpi_lapic_addr = (u64) madt->lapic_address;
+
+ printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+ madt->lapic_address);
+ }
+
+ acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
+
+ return 0;
+}
+
+static int __init
+acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end)
+{
+ struct acpi_table_lapic *processor = NULL;
+
+ processor = (struct acpi_table_lapic *)header;
+
+ if (BAD_MADT_ENTRY(processor, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* Record local apic id only when enabled */
+ if (processor->flags.enabled)
+ x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
+
+ /*
+ * We need to register disabled CPU as well to permit
+ * counting disabled CPUs. This allows us to size
+ * cpus_possible_map more accurately, to permit
+ * to not preallocating memory for all NR_CPUS
+ * when we use CPU hotplug.
+ */
+ mp_register_lapic(processor->id, /* APIC ID */
+ processor->flags.enabled); /* Enabled? */
+
+ return 0;
+}
+
+static int __init
+acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
+ const unsigned long end)
+{
+ struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
+
+ lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header;
+
+ if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
+ return -EINVAL;
+
+ acpi_lapic_addr = lapic_addr_ovr->address;
+
+ return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
+{
+ struct acpi_table_lapic_nmi *lapic_nmi = NULL;
+
+ lapic_nmi = (struct acpi_table_lapic_nmi *)header;
+
+ if (BAD_MADT_ENTRY(lapic_nmi, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (lapic_nmi->lint != 1)
+ printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+ return 0;
+}
+
+#endif /*CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+
+static int __init
+acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end)
+{
+ struct acpi_table_ioapic *ioapic = NULL;
+
+ ioapic = (struct acpi_table_ioapic *)header;
+
+ if (BAD_MADT_ENTRY(ioapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ mp_register_ioapic(ioapic->id,
+ ioapic->address, ioapic->global_irq_base);
+
+ return 0;
+}
+
+/*
+ * Parse Interrupt Source Override for the ACPI SCI
+ */
+static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
+{
+ if (trigger == 0) /* compatible SCI trigger is level */
+ trigger = 3;
+
+ if (polarity == 0) /* compatible SCI polarity is low */
+ polarity = 3;
+
+ /* Command-line over-ride via acpi_sci= */
+ if (acpi_sci_flags.trigger)
+ trigger = acpi_sci_flags.trigger;
+
+ if (acpi_sci_flags.polarity)
+ polarity = acpi_sci_flags.polarity;
+
+ /*
+ * mp_config_acpi_legacy_irqs() already setup IRQs < 16
+ * If GSI is < 16, this will update its flags,
+ * else it will create a new mp_irqs[] entry.
+ */
+ mp_override_legacy_irq(gsi, polarity, trigger, gsi);
+
+ /*
+ * stash over-ride to indicate we've been here
+ * and for later update of acpi_fadt
+ */
+ acpi_sci_override_gsi = gsi;
+ return;
+}
+
+static int __init
+acpi_parse_int_src_ovr(acpi_table_entry_header * header,
+ const unsigned long end)
+{
+ struct acpi_table_int_src_ovr *intsrc = NULL;
+
+ intsrc = (struct acpi_table_int_src_ovr *)header;
+
+ if (BAD_MADT_ENTRY(intsrc, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (intsrc->bus_irq == acpi_fadt.sci_int) {
+ acpi_sci_ioapic_setup(intsrc->global_irq,
+ intsrc->flags.polarity,
+ intsrc->flags.trigger);
+ return 0;
+ }
+
+ if (acpi_skip_timer_override &&
+ intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
+ printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ return 0;
+ }
+
+ mp_override_legacy_irq(intsrc->bus_irq,
+ intsrc->flags.polarity,
+ intsrc->flags.trigger, intsrc->global_irq);
+
+ return 0;
+}
+
+static int __init
+acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
+{
+ struct acpi_table_nmi_src *nmi_src = NULL;
+
+ nmi_src = (struct acpi_table_nmi_src *)header;
+
+ if (BAD_MADT_ENTRY(nmi_src, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* TBD: Support nimsrc entries? */
+
+ return 0;
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+/*
+ * acpi_pic_sci_set_trigger()
+ *
+ * use ELCR to set PIC-mode trigger type for SCI
+ *
+ * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
+ * it may require Edge Trigger -- use "acpi_sci=edge"
+ *
+ * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
+ * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
+ * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
+ * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
+ */
+
+void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
+{
+ unsigned int mask = 1 << irq;
+ unsigned int old, new;
+
+ /* Real old ELCR mask */
+ old = inb(0x4d0) | (inb(0x4d1) << 8);
+
+ /*
+ * If we use ACPI to set PCI irq's, then we should clear ELCR
+ * since we will set it correctly as we enable the PCI irq
+ * routing.
+ */
+ new = acpi_noirq ? old : 0;
+
+ /*
+ * Update SCI information in the ELCR, it isn't in the PCI
+ * routing tables..
+ */
+ switch (trigger) {
+ case 1: /* Edge - clear */
+ new &= ~mask;
+ break;
+ case 3: /* Level - set */
+ new |= mask;
+ break;
+ }
+
+ if (old == new)
+ return;
+
+ printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
+ outb(new, 0x4d0);
+ outb(new >> 8, 0x4d1);
+}
+
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+ if (use_pci_vector() && !platform_legacy_irq(gsi))
+ *irq = IO_APIC_VECTOR(gsi);
+ else
+#endif
+ *irq = gsi_irq_sharing(gsi);
+ return 0;
+}
+
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+{
+ unsigned int irq;
+ unsigned int plat_gsi = gsi;
+
+#ifdef CONFIG_PCI
+ /*
+ * Make sure all (legacy) PCI IRQs are set as level-triggered.
+ */
+ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+ extern void eisa_set_level_irq(unsigned int irq);
+
+ if (triggering == ACPI_LEVEL_SENSITIVE)
+ eisa_set_level_irq(gsi);
+ }
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+ if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
+ plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+ }
+#endif
+ acpi_gsi_to_irq(plat_gsi, &irq);
+ return irq;
+}
+
+EXPORT_SYMBOL(acpi_register_gsi);
+
+/*
+ * ACPI based hotplug support for CPU
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+ /* TBD */
+ return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+int acpi_unmap_lsapic(int cpu)
+{
+ /* TBD */
+ return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
+int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+ /* TBD */
+ return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
+{
+ /* TBD */
+ return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
+static unsigned long __init
+acpi_scan_rsdp(unsigned long start, unsigned long length)
+{
+ unsigned long offset = 0;
+ unsigned long sig_len = sizeof("RSD PTR ") - 1;
+ unsigned long vstart = (unsigned long)isa_bus_to_virt(start);
+
+ /*
+ * Scan all 16-byte boundaries of the physical memory region for the
+ * RSDP signature.
+ */
+ for (offset = 0; offset < length; offset += 16) {
+ if (strncmp((char *)(vstart + offset), "RSD PTR ", sig_len))
+ continue;
+ return (start + offset);
+ }
+
+ return 0;
+}
+
+static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_sbf *sb;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size);
+ if (!sb) {
+ printk(KERN_WARNING PREFIX "Unable to map SBF\n");
+ return -ENODEV;
+ }
+
+ sbf_port = sb->sbf_cmos; /* Save CMOS port */
+
+ return 0;
+}
+
+#ifdef CONFIG_HPET_TIMER
+
+static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
+{
+ struct acpi_table_hpet *hpet_tbl;
+
+ if (!phys || !size)
+ return -EINVAL;
+
+ hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size);
+ if (!hpet_tbl) {
+ printk(KERN_WARNING PREFIX "Unable to map HPET\n");
+ return -ENODEV;
+ }
+
+ if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
+ printk(KERN_WARNING PREFIX "HPET timers must be located in "
+ "memory.\n");
+ return -1;
+ }
+#ifdef CONFIG_X86_64
+ vxtime.hpet_address = hpet_tbl->addr.addrl |
+ ((long)hpet_tbl->addr.addrh << 32);
+
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, vxtime.hpet_address);
+#else /* X86 */
+ {
+ extern unsigned long hpet_address;
+
+ hpet_address = hpet_tbl->addr.addrl;
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, hpet_address);
+ }
+#endif /* X86 */
+
+ return 0;
+}
+#else
+#define acpi_parse_hpet NULL
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+extern u32 pmtmr_ioport;
+#endif
+
+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
+{
+ struct fadt_descriptor *fadt = NULL;
+
+ fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size);
+ if (!fadt) {
+ printk(KERN_WARNING PREFIX "Unable to map FADT\n");
+ return 0;
+ }
+ /* initialize sci_int early for INT_SRC_OVR MADT parsing */
+ acpi_fadt.sci_int = fadt->sci_int;
+
+ /* initialize rev and apic_phys_dest_mode for x86_64 genapic */
+ acpi_fadt.revision = fadt->revision;
+ acpi_fadt.force_apic_physical_destination_mode =
+ fadt->force_apic_physical_destination_mode;
+
+#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN)
+ /* detect the location of the ACPI PM Timer */
+ if (fadt->revision >= FADT2_REVISION_ID) {
+ /* FADT rev. 2 */
+ if (fadt->xpm_tmr_blk.address_space_id !=
+ ACPI_ADR_SPACE_SYSTEM_IO)
+ return 0;
+
+ pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ /*
+ * "X" fields are optional extensions to the original V1.0
+ * fields, so we must selectively expand V1.0 fields if the
+ * corresponding X field is zero.
+ */
+ if (!pmtmr_ioport)
+ pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ } else {
+ /* FADT rev. 1 */
+ pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ }
+ if (pmtmr_ioport)
+ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
+ pmtmr_ioport);
+#endif
+ return 0;
+}
+
+unsigned long __init acpi_find_rsdp(void)
+{
+ unsigned long rsdp_phys = 0;
+
+ if (efi_enabled) {
+ if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+ return efi.acpi20;
+ else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+ return efi.acpi;
+ }
+ /*
+ * Scan memory looking for the RSDP signature. First search EBDA (low
+ * memory) paragraphs and then search upper memory (E0000-FFFFF).
+ */
+ rsdp_phys = acpi_scan_rsdp(0, 0x400);
+ if (!rsdp_phys)
+ rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
+
+ return rsdp_phys;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Parse LAPIC entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init acpi_parse_madt_lapic_entries(void)
+{
+ int count;
+
+ if (!cpu_has_apic)
+ return -ENODEV;
+
+ /*
+ * Note that the LAPIC address is obtained from the MADT (32-bit value)
+ * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+ */
+
+ count =
+ acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR,
+ acpi_parse_lapic_addr_ovr, 0);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX
+ "Error parsing LAPIC address override entry\n");
+ return count;
+ }
+
+ mp_register_lapic_address(acpi_lapic_addr);
+
+ count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
+ MAX_APICS);
+ if (!count) {
+ printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return -ENODEV;
+ } else if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+ count =
+ acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+ return 0;
+}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * Parse IOAPIC related entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init acpi_parse_madt_ioapic_entries(void)
+{
+ int count;
+
+ /*
+ * ACPI interpreter is required to complete interrupt setup,
+ * so if it is off, don't enumerate the io-apics with ACPI.
+ * If MPS is present, it will handle them,
+ * otherwise the system will stay in PIC mode
+ */
+ if (acpi_disabled || acpi_noirq) {
+ return -ENODEV;
+ }
+
+ if (!cpu_has_apic)
+ return -ENODEV;
+
+ /*
+ * if "noapic" boot option, don't look for IO-APICs
+ */
+ if (skip_ioapic_setup) {
+ printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
+ "due to 'noapic' option.\n");
+ return -ENODEV;
+ }
+
+ count =
+ acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic,
+ MAX_IO_APICS);
+ if (!count) {
+ printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+ return -ENODEV;
+ } else if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+ return count;
+ }
+
+ count =
+ acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr,
+ NR_IRQ_VECTORS);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX
+ "Error parsing interrupt source overrides entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+ /*
+ * If BIOS did not supply an INT_SRC_OVR for the SCI
+ * pretend we got one so we can set the SCI flags.
+ */
+ if (!acpi_sci_override_gsi)
+ acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
+
+ /* Fill in identity legacy mapings where no override */
+ mp_config_acpi_legacy_irqs();
+
+ count =
+ acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src,
+ NR_IRQ_VECTORS);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+ return 0;
+}
+#else
+static inline int acpi_parse_madt_ioapic_entries(void)
+{
+ return -1;
+}
+#endif /* !CONFIG_X86_IO_APIC */
+
+static void __init acpi_process_madt(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ int count, error;
+
+ count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
+ if (count >= 1) {
+
+ /*
+ * Parse MADT LAPIC entries
+ */
+ error = acpi_parse_madt_lapic_entries();
+ if (!error) {
+ acpi_lapic = 1;
+
+#ifdef CONFIG_X86_GENERICARCH
+ generic_bigsmp_probe();
+#endif
+ /*
+ * Parse MADT IO-APIC entries
+ */
+ error = acpi_parse_madt_ioapic_entries();
+ if (!error) {
+ acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+ acpi_irq_balance_set(NULL);
+ acpi_ioapic = 1;
+
+ smp_found_config = 1;
+ clustered_apic_check();
+ }
+ }
+ if (error == -EINVAL) {
+ /*
+ * Dell Precision Workstation 410, 610 come here.
+ */
+ printk(KERN_ERR PREFIX
+ "Invalid BIOS MADT, disabling ACPI\n");
+ disable_acpi();
+ }
+ }
+#endif
+ return;
+}
+
+extern int acpi_force;
+
+#ifdef __i386__
+
+static int __init disable_acpi_irq(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
+ d->ident);
+ acpi_noirq_set();
+ }
+ return 0;
+}
+
+static int __init disable_acpi_pci(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
+ d->ident);
+ acpi_disable_pci();
+ }
+ return 0;
+}
+
+static int __init dmi_disable_acpi(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: acpi off\n", d->ident);
+ disable_acpi();
+ } else {
+ printk(KERN_NOTICE
+ "Warning: DMI blacklist says broken, but acpi forced\n");
+ }
+ return 0;
+}
+
+/*
+ * Limit ACPI to CPU enumeration for HT
+ */
+static int __init force_acpi_ht(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=ht\n",
+ d->ident);
+ disable_acpi();
+ acpi_ht = 1;
+ } else {
+ printk(KERN_NOTICE
+ "Warning: acpi=force overrules DMI blacklist:
acpi=ht\n");
+ }
+ return 0;
+}
+
+/*
+ * If your system is blacklisted here, but you find that acpi=force
+ * works for you, please contact acpi-devel@xxxxxxxxxxxxxxx
+ */
+static struct dmi_system_id __initdata acpi_dmi_table[] = {
+ /*
+ * Boxes that need ACPI disabled
+ */
+ {
+ .callback = dmi_disable_acpi,
+ .ident = "IBM Thinkpad",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
+ },
+ },
+
+ /*
+ * Boxes that need acpi=ht
+ */
+ {
+ .callback = force_acpi_ht,
+ .ident = "FSC Primergy T850",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "DELL GX240",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "HP VISUALIZE NT Workstation",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "Compaq Workstation W8000",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS P4B266",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS P2B-DS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS CUR-DLS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ABIT i440BX-W83977",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
+ DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM Bladecenter",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eServer xSeries 360",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eserver xSeries 330",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eserver xSeries 440",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
+ },
+ },
+
+ /*
+ * Boxes that need ACPI PCI IRQ routing disabled
+ */
+ {
+ .callback = disable_acpi_irq,
+ .ident = "ASUS A7V",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
+ DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
+ /* newer BIOS, Revision 1011, does work */
+ DMI_MATCH(DMI_BIOS_VERSION,
+ "ASUS A7V ACPI BIOS Revision 1007"),
+ },
+ },
+
+ /*
+ * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
+ */
+ { /* _BBN 0 bug */
+ .callback = disable_acpi_pci,
+ .ident = "ASUS PR-DLS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
+ DMI_MATCH(DMI_BIOS_VERSION,
+ "ASUS PR-DLS ACPI BIOS Revision 1010"),
+ DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
+ },
+ },
+ {
+ .callback = disable_acpi_pci,
+ .ident = "Acer TravelMate 36x Laptop",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+ },
+ },
+ {}
+};
+
+#endif /* __i386__ */
+
+/*
+ * acpi_boot_table_init() and acpi_boot_init()
+ * called from setup_arch(), always.
+ * 1. checksums all tables
+ * 2. enumerates lapics
+ * 3. enumerates io-apics
+ *
+ * acpi_table_init() is separate to allow reading SRAT without
+ * other side effects.
+ *
+ * side effects of acpi_boot_init:
+ * acpi_lapic = 1 if LAPIC found
+ * acpi_ioapic = 1 if IOAPIC found
+ * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ * if acpi_blacklisted() acpi_disabled = 1;
+ * acpi_irq_model=...
+ * ...
+ *
+ * return value: (currently ignored)
+ * 0: success
+ * !0: failure
+ */
+
+int __init acpi_boot_table_init(void)
+{
+ int error;
+
+#ifdef __i386__
+ dmi_check_system(acpi_dmi_table);
+#endif
+
+ /*
+ * If acpi_disabled, bail out
+ * One exception: acpi=ht continues far enough to enumerate LAPICs
+ */
+ if (acpi_disabled && !acpi_ht)
+ return 1;
+
+ /*
+ * Initialize the ACPI boot-time table parser.
+ */
+ error = acpi_table_init();
+ if (error) {
+ disable_acpi();
+ return error;
+ }
+
+ acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+ /*
+ * blacklist may disable ACPI entirely
+ */
+ error = acpi_blacklisted();
+ if (error) {
+ if (acpi_force) {
+ printk(KERN_WARNING PREFIX "acpi=force override\n");
+ } else {
+ printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
+ disable_acpi();
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+int __init acpi_boot_init(void)
+{
+ /*
+ * If acpi_disabled, bail out
+ * One exception: acpi=ht continues far enough to enumerate LAPICs
+ */
+ if (acpi_disabled && !acpi_ht)
+ return 1;
+
+ acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+ /*
+ * set sci_int and PM timer address
+ */
+ acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
+
+ /*
+ * Process the Multiple APIC Description Table (MADT), if present
+ */
+ acpi_process_madt();
+
+ acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+
+ return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/apic-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/apic-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,155 @@
+/*
+ * Local APIC handling, local APIC timers
+ *
+ * (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively.
+ * Maciej W. Rozycki : Various updates and fixes.
+ * Mikael Pettersson : Power Management for UP-APIC.
+ * Pavel Machek and
+ * Mikael Pettersson : PM converted to driver model.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+#include <asm/i8253.h>
+#include <asm/nmi.h>
+
+#include <mach_apic.h>
+#include <mach_apicdef.h>
+#include <mach_ipi.h>
+
+#include "io_ports.h"
+
+#ifndef CONFIG_XEN
+/*
+ * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
+ * IPIs in place of local APIC timers
+ */
+static cpumask_t timer_bcast_ipi;
+#endif
+
+/*
+ * Knob to control our willingness to enable the local APIC.
+ */
+int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+/*
+ * Debug level
+ */
+int apic_verbosity;
+
+#ifndef CONFIG_XEN
+static int modern_apic(void)
+{
+ unsigned int lvr, version;
+ /* AMD systems use old APIC versions, so check the CPU */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 >= 0xf)
+ return 1;
+ lvr = apic_read(APIC_LVR);
+ version = GET_APIC_VERSION(lvr);
+ return version >= 0x14;
+}
+#endif /* !CONFIG_XEN */
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk("unexpected IRQ trap at vector %02x\n", irq);
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ * But only ack when the APIC is enabled -AK
+ */
+ if (cpu_has_apic)
+ ack_APIC_irq();
+}
+
+int get_physical_broadcast(void)
+{
+ return 0xff;
+}
+
+#ifndef CONFIG_XEN
+#ifndef CONFIG_SMP
+static void up_apic_timer_interrupt_call(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+ per_cpu(irq_stat, cpu).apic_timer_irqs++;
+
+ smp_local_timer_interrupt(regs);
+}
+#endif
+
+void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
+{
+ cpumask_t mask;
+
+ cpus_and(mask, cpu_online_map, timer_bcast_ipi);
+ if (!cpus_empty(mask)) {
+#ifdef CONFIG_SMP
+ send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#else
+ /*
+ * We can directly call the apic timer interrupt handler
+ * in UP case. Minus all irq related functions
+ */
+ up_apic_timer_interrupt_call(regs);
+#endif
+ }
+}
+#endif
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ if (smp_found_config)
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+#endif
+
+ return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/asm-offsets.c
--- a/arch/i386/kernel/asm-offsets.c Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/asm-offsets.c Mon Jun 04 10:05:28 2007 +0100
@@ -66,9 +66,14 @@ void foo(void)
OFFSET(pbe_orig_address, pbe, orig_address);
OFFSET(pbe_next, pbe, next);
+#ifndef CONFIG_X86_NO_TSS
/* Offset from the sysenter stack to tss.esp0 */
- DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+ DEFINE(SYSENTER_stack_esp0, offsetof(struct tss_struct, esp0) -
sizeof(struct tss_struct));
+#else
+ /* sysenter stack points directly to esp0 */
+ DEFINE(SYSENTER_stack_esp0, 0);
+#endif
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
DEFINE(VDSO_PRELINK, VDSO_PRELINK);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/Makefile
--- a/arch/i386/kernel/cpu/Makefile Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/cpu/Makefile Mon Jun 04 10:05:28 2007 +0100
@@ -17,3 +17,8 @@ obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
+
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y), $(src))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/common-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/cpu/common-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,743 @@
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/bootmem.h>
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mtrr.h>
+#include <asm/mce.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#else
+#ifdef CONFIG_XEN
+#define phys_pkg_id(a,b) a
+#endif
+#endif
+#include <asm/hypervisor.h>
+
+#include "cpu.h"
+
+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+
+#ifndef CONFIG_XEN
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+#endif
+
+static int cachesize_override __cpuinitdata = -1;
+static int disable_x86_fxsr __cpuinitdata;
+static int disable_x86_serial_nr __cpuinitdata = 1;
+static int disable_x86_sep __cpuinitdata;
+
+struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+
+extern int disable_pse;
+
+static void default_init(struct cpuinfo_x86 * c)
+{
+ /* Not much we can do here... */
+ /* Check if at least it has cpuid */
+ if (c->cpuid_level == -1) {
+ /* No cpuid. It must be an ancient CPU */
+ if (c->x86 == 4)
+ strcpy(c->x86_model_id, "486");
+ else if (c->x86 == 3)
+ strcpy(c->x86_model_id, "386");
+ }
+}
+
+static struct cpu_dev default_cpu = {
+ .c_init = default_init,
+ .c_vendor = "Unknown",
+};
+static struct cpu_dev * this_cpu = &default_cpu;
+
+static int __init cachesize_setup(char *str)
+{
+ get_option (&str, &cachesize_override);
+ return 1;
+}
+__setup("cachesize=", cachesize_setup);
+
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
+{
+ unsigned int *v;
+ char *p, *q;
+
+ if (cpuid_eax(0x80000000) < 0x80000004)
+ return 0;
+
+ v = (unsigned int *) c->x86_model_id;
+ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+ c->x86_model_id[48] = 0;
+
+ /* Intel chips right-justify this string for some dumb reason;
+ undo that brain damage */
+ p = q = &c->x86_model_id[0];
+ while ( *p == ' ' )
+ p++;
+ if ( p != q ) {
+ while ( *p )
+ *q++ = *p++;
+ while ( q <= &c->x86_model_id[48] )
+ *q++ = '\0'; /* Zero-pad the rest */
+ }
+
+ return 1;
+}
+
+
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+{
+ unsigned int n, dummy, ecx, edx, l2size;
+
+ n = cpuid_eax(0x80000000);
+
+ if (n >= 0x80000005) {
+ cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache
%dK (%d bytes/line)\n",
+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+ c->x86_cache_size=(ecx>>24)+(edx>>24);
+ }
+
+ if (n < 0x80000006) /* Some chips just has a large L1. */
+ return;
+
+ ecx = cpuid_ecx(0x80000006);
+ l2size = ecx >> 16;
+
+ /* do processor-specific cache resizing */
+ if (this_cpu->c_size_cache)
+ l2size = this_cpu->c_size_cache(c,l2size);
+
+ /* Allow user to override all this if necessary. */
+ if (cachesize_override != -1)
+ l2size = cachesize_override;
+
+ if ( l2size == 0 )
+ return; /* Again, no L2 cache is possible */
+
+ c->x86_cache_size = l2size;
+
+ printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+ l2size, ecx & 0xFF);
+}
+
+/* Naming convention should be: <Name> [(<Codename>)] */
+/* This table only is used unless init_<vendor>() below doesn't set it; */
+/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
*/
+
+/* Look up CPU names by table lookup. */
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+{
+ struct cpu_model_info *info;
+
+ if ( c->x86_model >= 16 )
+ return NULL; /* Range check */
+
+ if (!this_cpu)
+ return NULL;
+
+ info = this_cpu->c_models;
+
+ while (info && info->family) {
+ if (info->family == c->x86)
+ return info->model_names[c->x86_model];
+ info++;
+ }
+ return NULL; /* Not found */
+}
+
+
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+{
+ char *v = c->x86_vendor_id;
+ int i;
+ static int printed;
+
+ for (i = 0; i < X86_VENDOR_NUM; i++) {
+ if (cpu_devs[i]) {
+ if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
+ (cpu_devs[i]->c_ident[1] &&
+ !strcmp(v,cpu_devs[i]->c_ident[1]))) {
+ c->x86_vendor = i;
+ if (!early)
+ this_cpu = cpu_devs[i];
+ return;
+ }
+ }
+ }
+ if (!printed) {
+ printed++;
+ printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
+ printk(KERN_ERR "CPU: Your system may be unstable.\n");
+ }
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+ this_cpu = &default_cpu;
+}
+
+
+static int __init x86_fxsr_setup(char * s)
+{
+ disable_x86_fxsr = 1;
+ return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
+
+static int __init x86_sep_setup(char * s)
+{
+ disable_x86_sep = 1;
+ return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+ u32 f1, f2;
+
+ asm("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl %2,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl\n\t"
+ : "=&r" (f1), "=&r" (f2)
+ : "ir" (flag));
+
+ return ((f1^f2) & flag) != 0;
+}
+
+
+/* Probe for the CPUID instruction */
+static int __cpuinit have_cpuid_p(void)
+{
+ return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+/* Do minimum CPU detection early.
+ Fields really needed: vendor, cpuid_level, family, model, mask, cache
alignment.
+ The others are not touched to avoid unwanted side effects.
+
+ WARNING: this function is only called on the BP. Don't add code here
+ that is supposed to run on all CPUs. */
+static void __init early_cpu_detect(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ c->x86_cache_alignment = 32;
+
+ if (!have_cpuid_p())
+ return;
+
+ /* Get vendor name */
+ cpuid(0x00000000, &c->cpuid_level,
+ (int *)&c->x86_vendor_id[0],
+ (int *)&c->x86_vendor_id[8],
+ (int *)&c->x86_vendor_id[4]);
+
+ get_cpu_vendor(c, 1);
+
+ c->x86 = 4;
+ if (c->cpuid_level >= 0x00000001) {
+ u32 junk, tfms, cap0, misc;
+ cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
+ c->x86 = (tfms >> 8) & 15;
+ c->x86_model = (tfms >> 4) & 15;
+ if (c->x86 == 0xf)
+ c->x86 += (tfms >> 20) & 0xff;
+ if (c->x86 >= 0x6)
+ c->x86_model += ((tfms >> 16) & 0xF) << 4;
+ c->x86_mask = tfms & 15;
+ if (cap0 & (1<<19))
+ c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
+ }
+}
+
+void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+{
+ u32 tfms, xlvl;
+ int ebx;
+
+ if (have_cpuid_p()) {
+ /* Get vendor name */
+ cpuid(0x00000000, &c->cpuid_level,
+ (int *)&c->x86_vendor_id[0],
+ (int *)&c->x86_vendor_id[8],
+ (int *)&c->x86_vendor_id[4]);
+
+ get_cpu_vendor(c, 0);
+ /* Initialize the standard set of capabilities */
+ /* Note that the vendor-specific code below might override */
+
+ /* Intel-defined flags: level 0x00000001 */
+ if ( c->cpuid_level >= 0x00000001 ) {
+ u32 capability, excap;
+ cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+ c->x86_capability[0] = capability;
+ c->x86_capability[4] = excap;
+ c->x86 = (tfms >> 8) & 15;
+ c->x86_model = (tfms >> 4) & 15;
+ if (c->x86 == 0xf)
+ c->x86 += (tfms >> 20) & 0xff;
+ if (c->x86 >= 0x6)
+ c->x86_model += ((tfms >> 16) & 0xF) << 4;
+ c->x86_mask = tfms & 15;
+#ifdef CONFIG_X86_HT
+ c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+#else
+ c->apicid = (ebx >> 24) & 0xFF;
+#endif
+ } else {
+ /* Have CPUID level 0 only - unheard of */
+ c->x86 = 4;
+ }
+
+ /* AMD-defined flags: level 0x80000001 */
+ xlvl = cpuid_eax(0x80000000);
+ if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+ if ( xlvl >= 0x80000001 ) {
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
+ }
+ if ( xlvl >= 0x80000004 )
+ get_model_name(c); /* Default name */
+ }
+ }
+
+ early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+ c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
+}
+
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
+ /* Disable processor serial number */
+ unsigned long lo,hi;
+ rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+ lo |= 0x200000;
+ wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+ printk(KERN_NOTICE "CPU serial number disabled.\n");
+ clear_bit(X86_FEATURE_PN, c->x86_capability);
+
+ /* Disabling the serial number may affect the cpuid level */
+ c->cpuid_level = cpuid_eax(0);
+ }
+}
+
+static int __init x86_serial_nr_setup(char *s)
+{
+ disable_x86_serial_nr = 0;
+ return 1;
+}
+__setup("serialnumber", x86_serial_nr_setup);
+
+
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ c->loops_per_jiffy = loops_per_jiffy;
+ c->x86_cache_size = -1;
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+ c->cpuid_level = -1; /* CPUID not detected */
+ c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_vendor_id[0] = '\0'; /* Unset */
+ c->x86_model_id[0] = '\0'; /* Unset */
+ c->x86_max_cores = 1;
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+ if (!have_cpuid_p()) {
+ /* First of all, decide if this is a 486 or higher */
+ /* It's a 486 if we can modify the AC flag */
+ if ( flag_is_changeable_p(X86_EFLAGS_AC) )
+ c->x86 = 4;
+ else
+ c->x86 = 3;
+ }
+
+ generic_identify(c);
+
+ printk(KERN_DEBUG "CPU: After generic identify, caps:");
+ for (i = 0; i < NCAPINTS; i++)
+ printk(" %08lx", c->x86_capability[i]);
+ printk("\n");
+
+ if (this_cpu->c_identify) {
+ this_cpu->c_identify(c);
+
+ printk(KERN_DEBUG "CPU: After vendor identify, caps:");
+ for (i = 0; i < NCAPINTS; i++)
+ printk(" %08lx", c->x86_capability[i]);
+ printk("\n");
+ }
+
+ /*
+ * Vendor-specific initialization. In this section we
+ * canonicalize the feature flags, meaning if there are
+ * features a certain CPU supports which CPUID doesn't
+ * tell us, CPUID claiming incorrect flags, or other bugs,
+ * we handle them here.
+ *
+ * At the end of this section, c->x86_capability better
+ * indicate the features this CPU genuinely supports!
+ */
+ if (this_cpu->c_init)
+ this_cpu->c_init(c);
+
+ /* Disable the PN if appropriate */
+ squash_the_stupid_serial_number(c);
+
+ /*
+ * The vendor-specific functions might have changed features. Now
+ * we do "generic changes."
+ */
+
+ /* TSC disabled? */
+ if ( tsc_disable )
+ clear_bit(X86_FEATURE_TSC, c->x86_capability);
+
+ /* FXSR disabled? */
+ if (disable_x86_fxsr) {
+ clear_bit(X86_FEATURE_FXSR, c->x86_capability);
+ clear_bit(X86_FEATURE_XMM, c->x86_capability);
+ }
+
+ /* SEP disabled? */
+ if (disable_x86_sep)
+ clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
+ if (disable_pse)
+ clear_bit(X86_FEATURE_PSE, c->x86_capability);
+
+ /* If the model name is still unset, do table lookup. */
+ if ( !c->x86_model_id[0] ) {
+ char *p;
+ p = table_lookup_model(c);
+ if ( p )
+ strcpy(c->x86_model_id, p);
+ else
+ /* Last resort... */
+ sprintf(c->x86_model_id, "%02x/%02x",
+ c->x86, c->x86_model);
+ }
+
+ /* Now the feature flags better reflect actual CPU features! */
+
+ printk(KERN_DEBUG "CPU: After all inits, caps:");
+ for (i = 0; i < NCAPINTS; i++)
+ printk(" %08lx", c->x86_capability[i]);
+ printk("\n");
+
+ /*
+ * On SMP, boot_cpu_data holds the common feature set between
+ * all CPUs; so make sure that we indicate which features are
+ * common between the CPUs. The first time this routine gets
+ * executed, c == &boot_cpu_data.
+ */
+ if ( c != &boot_cpu_data ) {
+ /* AND the already accumulated flags with these */
+ for ( i = 0 ; i < NCAPINTS ; i++ )
+ boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+ }
+
+ /* Init Machine Check Exception if available. */
+ mcheck_init(c);
+
+ if (c == &boot_cpu_data)
+ sysenter_setup();
+ enable_sep_cpu();
+
+ if (c == &boot_cpu_data)
+ mtrr_bp_init();
+ else
+ mtrr_ap_init();
+}
+
+#ifdef CONFIG_X86_HT
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+{
+ u32 eax, ebx, ecx, edx;
+ int index_msb, core_bits;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+
+ if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
+ return;
+
+ smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+ if (smp_num_siblings == 1) {
+ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
+ } else if (smp_num_siblings > 1 ) {
+
+ if (smp_num_siblings > NR_CPUS) {
+ printk(KERN_WARNING "CPU: Unsupported number of the "
+ "siblings %d", smp_num_siblings);
+ smp_num_siblings = 1;
+ return;
+ }
+
+ index_msb = get_count_order(smp_num_siblings);
+ c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+ index_msb = get_count_order(smp_num_siblings) ;
+
+ core_bits = get_count_order(c->x86_max_cores);
+
+ c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+ ((1 << core_bits) - 1);
+
+ if (c->x86_max_cores > 1)
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
+ }
+}
+#endif
+
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+{
+ char *vendor = NULL;
+
+ if (c->x86_vendor < X86_VENDOR_NUM)
+ vendor = this_cpu->c_vendor;
+ else if (c->cpuid_level >= 0)
+ vendor = c->x86_vendor_id;
+
+ if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+ printk("%s ", vendor);
+
+ if (!c->x86_model_id[0])
+ printk("%d86", c->x86);
+ else
+ printk("%s", c->x86_model_id);
+
+ if (c->x86_mask || c->cpuid_level >= 0)
+ printk(" stepping %02x\n", c->x86_mask);
+ else
+ printk("\n");
+}
+
+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
+
+/* This is hacky. :)
+ * We're emulating future behavior.
+ * In the future, the cpu-specific init functions will be called implicitly
+ * via the magic of initcalls.
+ * They will insert themselves into the cpu_devs structure.
+ * Then, when cpu_init() is called, we can just iterate over that array.
+ */
+
+extern int intel_cpu_init(void);
+extern int cyrix_init_cpu(void);
+extern int nsc_init_cpu(void);
+extern int amd_init_cpu(void);
+extern int centaur_init_cpu(void);
+extern int transmeta_init_cpu(void);
+extern int rise_init_cpu(void);
+extern int nexgen_init_cpu(void);
+extern int umc_init_cpu(void);
+
+void __init early_cpu_init(void)
+{
+ intel_cpu_init();
+ cyrix_init_cpu();
+ nsc_init_cpu();
+ amd_init_cpu();
+ centaur_init_cpu();
+ transmeta_init_cpu();
+ rise_init_cpu();
+ nexgen_init_cpu();
+ umc_init_cpu();
+ early_cpu_detect();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /* pse is not compatible with on-the-fly unmapping,
+ * disable it even if the cpus claim to support it.
+ */
+ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+ disable_pse = 1;
+#endif
+}
+
+void __cpuinit cpu_gdt_init(struct Xgt_desc_struct *gdt_descr)
+{
+ unsigned long frames[16];
+ unsigned long va;
+ int f;
+
+ for (va = gdt_descr->address, f = 0;
+ va < gdt_descr->address + gdt_descr->size;
+ va += PAGE_SIZE, f++) {
+ frames[f] = virt_to_mfn(va);
+ make_lowmem_page_readonly(
+ (void *)va, XENFEAT_writable_descriptor_tables);
+ }
+ if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
+ BUG();
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
+{
+ int cpu = smp_processor_id();
+#ifndef CONFIG_X86_NO_TSS
+ struct tss_struct * t = &per_cpu(init_tss, cpu);
+#endif
+ struct thread_struct *thread = ¤t->thread;
+ struct desc_struct *gdt;
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+
+ if (cpu_test_and_set(cpu, cpu_initialized)) {
+ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
+ for (;;) local_irq_enable();
+ }
+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+ if (cpu_has_vme || cpu_has_de)
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ if (tsc_disable && cpu_has_tsc) {
+ printk(KERN_NOTICE "Disabling TSC...\n");
+ /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
+ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+ set_in_cr4(X86_CR4_TSD);
+ }
+
+#ifndef CONFIG_XEN
+ /* The CPU hotplug case */
+ if (cpu_gdt_descr->address) {
+ gdt = (struct desc_struct *)cpu_gdt_descr->address;
+ memset(gdt, 0, PAGE_SIZE);
+ goto old_gdt;
+ }
+ /*
+ * This is a horrible hack to allocate the GDT. The problem
+ * is that cpu_init() is called really early for the boot CPU
+ * (and hence needs bootmem) but much later for the secondary
+ * CPUs, when bootmem will have gone away
+ */
+ if (NODE_DATA(0)->bdata->node_bootmem_map) {
+ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+ /* alloc_bootmem_pages panics on failure, so no check */
+ memset(gdt, 0, PAGE_SIZE);
+ } else {
+ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+ if (unlikely(!gdt)) {
+ printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
+ for (;;)
+ local_irq_enable();
+ }
+ }
+old_gdt:
+ /*
+ * Initialize the per-CPU GDT with the boot GDT,
+ * and set up the GDT descriptor:
+ */
+ memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+
+ /* Set up GDT entry for 16bit stack */
+ *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
+ ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
+ ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
+ (CPU_16BIT_STACK_SIZE - 1);
+
+ cpu_gdt_descr->size = GDT_SIZE - 1;
+ cpu_gdt_descr->address = (unsigned long)gdt;
+#else
+ if (cpu == 0 && cpu_gdt_descr->address == 0) {
+ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+ /* alloc_bootmem_pages panics on failure, so no check */
+ memset(gdt, 0, PAGE_SIZE);
+
+ memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+
+ cpu_gdt_descr->size = GDT_SIZE;
+ cpu_gdt_descr->address = (unsigned long)gdt;
+ }
+#endif
+
+ cpu_gdt_init(cpu_gdt_descr);
+
+ /*
+ * Set up and load the per-CPU TSS and LDT
+ */
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+ if (current->mm)
+ BUG();
+ enter_lazy_tlb(&init_mm, current);
+
+ load_esp0(t, thread);
+
+ load_LDT(&init_mm.context);
+
+#ifdef CONFIG_DOUBLEFAULT
+ /* Set up doublefault TSS pointer in the GDT */
+ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#endif
+
+ /* Clear %fs and %gs. */
+ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+
+ /* Clear all 6 debug registers: */
+ set_debugreg(0, 0);
+ set_debugreg(0, 1);
+ set_debugreg(0, 2);
+ set_debugreg(0, 3);
+ set_debugreg(0, 6);
+ set_debugreg(0, 7);
+
+ /*
+ * Force FPU initialization:
+ */
+ current_thread_info()->status = 0;
+ clear_used_math();
+ mxcsr_feature_mask_init();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __cpuinit cpu_uninit(void)
+{
+ int cpu = raw_smp_processor_id();
+ cpu_clear(cpu, cpu_initialized);
+
+ /* lazy TLB state */
+ per_cpu(cpu_tlbstate, cpu).state = 0;
+ per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+#endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/mtrr/Makefile
--- a/arch/i386/kernel/cpu/mtrr/Makefile Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/cpu/mtrr/Makefile Mon Jun 04 10:05:28 2007 +0100
@@ -3,3 +3,10 @@ obj-y += cyrix.o
obj-y += cyrix.o
obj-y += centaur.o
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+n-obj-xen := generic.o state.o amd.o cyrix.o centaur.o
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/mtrr/main-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/cpu/mtrr/main-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,197 @@
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <linux/mutex.h>
+
+#include <asm/mtrr.h>
+#include "mtrr.h"
+
+static DEFINE_MUTEX(mtrr_mutex);
+
+void generic_get_mtrr(unsigned int reg, unsigned long *base,
+ unsigned int *size, mtrr_type * type)
+{
+ struct xen_platform_op op;
+
+ op.cmd = XENPF_read_memtype;
+ op.u.read_memtype.reg = reg;
+ (void)HYPERVISOR_platform_op(&op);
+
+ *size = op.u.read_memtype.nr_mfns;
+ *base = op.u.read_memtype.mfn;
+ *type = op.u.read_memtype.type;
+}
+
+struct mtrr_ops generic_mtrr_ops = {
+ .use_intel_if = 1,
+ .get = generic_get_mtrr,
+};
+
+struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
+unsigned int num_var_ranges;
+unsigned int *usage_table;
+
+static void __init set_num_var_ranges(void)
+{
+ struct xen_platform_op op;
+
+ for (num_var_ranges = 0; ; num_var_ranges++) {
+ op.cmd = XENPF_read_memtype;
+ op.u.read_memtype.reg = num_var_ranges;
+ if (HYPERVISOR_platform_op(&op) != 0)
+ break;
+ }
+}
+
+static void __init init_table(void)
+{
+ int i, max;
+
+ max = num_var_ranges;
+ if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
+ == NULL) {
+ printk(KERN_ERR "mtrr: could not allocate\n");
+ return;
+ }
+ for (i = 0; i < max; i++)
+ usage_table[i] = 0;
+}
+
+int mtrr_add_page(unsigned long base, unsigned long size,
+ unsigned int type, char increment)
+{
+ int error;
+ struct xen_platform_op op;
+
+ mutex_lock(&mtrr_mutex);
+
+ op.cmd = XENPF_add_memtype;
+ op.u.add_memtype.mfn = base;
+ op.u.add_memtype.nr_mfns = size;
+ op.u.add_memtype.type = type;
+ error = HYPERVISOR_platform_op(&op);
+ if (error) {
+ mutex_unlock(&mtrr_mutex);
+ BUG_ON(error > 0);
+ return error;
+ }
+
+ if (increment)
+ ++usage_table[op.u.add_memtype.reg];
+
+ mutex_unlock(&mtrr_mutex);
+
+ return op.u.add_memtype.reg;
+}
+
+static int mtrr_check(unsigned long base, unsigned long size)
+{
+ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+ printk(KERN_WARNING
+ "mtrr: size and base must be multiples of 4 kiB\n");
+ printk(KERN_DEBUG
+ "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
+ dump_stack();
+ return -1;
+ }
+ return 0;
+}
+
+int
+mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+ char increment)
+{
+ if (mtrr_check(base, size))
+ return -EINVAL;
+ return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
+ increment);
+}
+
+int mtrr_del_page(int reg, unsigned long base, unsigned long size)
+{
+ unsigned i;
+ mtrr_type ltype;
+ unsigned long lbase;
+ unsigned int lsize;
+ int error = -EINVAL;
+ struct xen_platform_op op;
+
+ mutex_lock(&mtrr_mutex);
+
+ if (reg < 0) {
+ /* Search for existing MTRR */
+ for (i = 0; i < num_var_ranges; ++i) {
+ mtrr_if->get(i, &lbase, &lsize, <ype);
+ if (lbase == base && lsize == size) {
+ reg = i;
+ break;
+ }
+ }
+ if (reg < 0) {
+ printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000
found\n", base,
+ size);
+ goto out;
+ }
+ }
+ if (usage_table[reg] < 1) {
+ printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+ goto out;
+ }
+ if (--usage_table[reg] < 1) {
+ op.cmd = XENPF_del_memtype;
+ op.u.del_memtype.handle = 0;
+ op.u.del_memtype.reg = reg;
+ error = HYPERVISOR_platform_op(&op);
+ if (error) {
+ BUG_ON(error > 0);
+ goto out;
+ }
+ }
+ error = reg;
+ out:
+ mutex_unlock(&mtrr_mutex);
+ return error;
+}
+
+int
+mtrr_del(int reg, unsigned long base, unsigned long size)
+{
+ if (mtrr_check(base, size))
+ return -EINVAL;
+ return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(mtrr_add);
+EXPORT_SYMBOL(mtrr_del);
+
+void __init mtrr_bp_init(void)
+{
+}
+
+void mtrr_ap_init(void)
+{
+}
+
+static int __init mtrr_init(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ if (!is_initial_xendomain())
+ return -ENODEV;
+
+ if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
+ (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
+ (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
+ (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
+ return -ENODEV;
+
+ set_num_var_ranges();
+ init_table();
+
+ return 0;
+}
+
+subsys_initcall(mtrr_init);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/crash.c
--- a/arch/i386/kernel/crash.c Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/crash.c Mon Jun 04 10:05:28 2007 +0100
@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re
crash_save_this_cpu(regs, cpu);
}
+#ifndef CONFIG_XEN
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static atomic_t waiting_for_crash_ipi;
@@ -154,6 +155,7 @@ static void nmi_shootdown_cpus(void)
/* There are no cpus to shootdown */
}
#endif
+#endif /* CONFIG_XEN */
void machine_crash_shutdown(struct pt_regs *regs)
{
@@ -170,10 +172,12 @@ void machine_crash_shutdown(struct pt_re
/* Make a note of crashing cpu. Will be used in NMI callback.*/
crashing_cpu = smp_processor_id();
+#ifndef CONFIG_XEN
nmi_shootdown_cpus();
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();
#endif
+#endif /* CONFIG_XEN */
crash_save_self(regs);
}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/early_printk-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/early_printk-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,2 @@
+
+#include "../../x86_64/kernel/early_printk-xen.c"
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/entry-xen.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/entry-xen.S Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1216 @@
+/*
+ * linux/arch/i386/entry.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+ * Stack layout in 'ret_from_system_call':
+ * ptrace needs to have all regs on the stack.
+ * if the order here is changed, it needs to be
+ * updated in fork.c:copy_process, signal.c:do_signal,
+ * ptrace.c and ptrace.h
+ *
+ * 0(%esp) - %ebx
+ * 4(%esp) - %ecx
+ * 8(%esp) - %edx
+ * C(%esp) - %esi
+ * 10(%esp) - %edi
+ * 14(%esp) - %ebp
+ * 18(%esp) - %eax
+ * 1C(%esp) - %ds
+ * 20(%esp) - %es
+ * 24(%esp) - orig_eax
+ * 28(%esp) - %eip
+ * 2C(%esp) - %cs
+ * 30(%esp) - %eflags
+ * 34(%esp) - %oldesp
+ * 38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/irqflags.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/desc.h>
+#include <asm/dwarf2.h>
+#include "irq_vectors.h"
+#include <xen/interface/xen.h>
+
+#define nr_syscalls ((syscall_table_size)/4)
+
+EBX = 0x00
+ECX = 0x04
+EDX = 0x08
+ESI = 0x0C
+EDI = 0x10
+EBP = 0x14
+EAX = 0x18
+DS = 0x1C
+ES = 0x20
+ORIG_EAX = 0x24
+EIP = 0x28
+CS = 0x2C
+EFLAGS = 0x30
+OLDESP = 0x34
+OLDSS = 0x38
+
+CF_MASK = 0x00000001
+TF_MASK = 0x00000100
+IF_MASK = 0x00000200
+DF_MASK = 0x00000400
+NT_MASK = 0x00004000
+VM_MASK = 0x00020000
+/* Pseudo-eflags. */
+NMI_MASK = 0x80000000
+
+#ifndef CONFIG_XEN
+#define DISABLE_INTERRUPTS cli
+#define ENABLE_INTERRUPTS sti
+#else
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending /* 0 */
+#define evtchn_upcall_mask 1
+
+#define sizeof_vcpu_shift 6
+
+#ifdef CONFIG_SMP
+#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
+ shl $sizeof_vcpu_shift,%esi ; \
+ addl HYPERVISOR_shared_info,%esi
+#else
+#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
+#endif
+
+#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
+#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
+#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
+ __DISABLE_INTERRUPTS
+#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
+ __ENABLE_INTERRUPTS
+#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
+#endif
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop cli; TRACE_IRQS_OFF
+#else
+#define preempt_stop
+#define resume_kernel restore_nocheck
+#endif
+
+.macro TRACE_IRQS_IRET
+#ifdef CONFIG_TRACE_IRQFLAGS
+ testl $IF_MASK,EFLAGS(%esp) # interrupts off?
+ jz 1f
+ TRACE_IRQS_ON
+1:
+#endif
+.endm
+
+#ifdef CONFIG_VM86
+#define resume_userspace_sig check_userspace
+#else
+#define resume_userspace_sig resume_userspace
+#endif
+
+#define SAVE_ALL \
+ cld; \
+ pushl %es; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET es, 0;*/\
+ pushl %ds; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET ds, 0;*/\
+ pushl %eax; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET eax, 0;\
+ pushl %ebp; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ebp, 0;\
+ pushl %edi; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET edi, 0;\
+ pushl %esi; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET esi, 0;\
+ pushl %edx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET edx, 0;\
+ pushl %ecx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ecx, 0;\
+ pushl %ebx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ebx, 0;\
+ movl $(__USER_DS), %edx; \
+ movl %edx, %ds; \
+ movl %edx, %es;
+
+#define RESTORE_INT_REGS \
+ popl %ebx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ebx;\
+ popl %ecx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ecx;\
+ popl %edx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE edx;\
+ popl %esi; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE esi;\
+ popl %edi; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE edi;\
+ popl %ebp; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ebp;\
+ popl %eax; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE eax
+
+#define RESTORE_REGS \
+ RESTORE_INT_REGS; \
+1: popl %ds; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE ds;*/\
+2: popl %es; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE es;*/\
+.section .fixup,"ax"; \
+3: movl $0,(%esp); \
+ jmp 1b; \
+4: movl $0,(%esp); \
+ jmp 2b; \
+.previous; \
+.section __ex_table,"a";\
+ .align 4; \
+ .long 1b,3b; \
+ .long 2b,4b; \
+.previous
+
+#define RING0_INT_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, 3*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+#define RING0_EC_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, 4*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+#define RING0_PTREGS_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, OLDESP-EBX;\
+ /*CFI_OFFSET cs, CS-OLDESP;*/\
+ CFI_OFFSET eip, EIP-OLDESP;\
+ /*CFI_OFFSET es, ES-OLDESP;*/\
+ /*CFI_OFFSET ds, DS-OLDESP;*/\
+ CFI_OFFSET eax, EAX-OLDESP;\
+ CFI_OFFSET ebp, EBP-OLDESP;\
+ CFI_OFFSET edi, EDI-OLDESP;\
+ CFI_OFFSET esi, ESI-OLDESP;\
+ CFI_OFFSET edx, EDX-OLDESP;\
+ CFI_OFFSET ecx, ECX-OLDESP;\
+ CFI_OFFSET ebx, EBX-OLDESP
+
+ENTRY(ret_from_fork)
+ CFI_STARTPROC
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ call schedule_tail
+ GET_THREAD_INFO(%ebp)
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ pushl $0x0202 # Reset kernel eflags
+ CFI_ADJUST_CFA_OFFSET 4
+ popfl
+ CFI_ADJUST_CFA_OFFSET -4
+ jmp syscall_exit
+ CFI_ENDPROC
+
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
+
+ # userspace resumption stub bypassing syscall exit tracing
+ ALIGN
+ RING0_PTREGS_FRAME
+ret_from_exception:
+ preempt_stop
+ret_from_intr:
+ GET_THREAD_INFO(%ebp)
+check_userspace:
+ movl EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb CS(%esp), %al
+ testl $(VM_MASK | 2), %eax
+ jz resume_kernel
+ENTRY(resume_userspace)
+ DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ movl TI_flags(%ebp), %ecx
+ andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
+ # int/exception return?
+ jne work_pending
+ jmp restore_all
+
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
+ cli
+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
+ jnz restore_nocheck
+need_resched:
+ movl TI_flags(%ebp), %ecx # need_resched set ?
+ testb $_TIF_NEED_RESCHED, %cl
+ jz restore_all
+ testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
+ jz restore_all
+ call preempt_schedule_irq
+ jmp need_resched
+#endif
+ CFI_ENDPROC
+
+/* SYSENTER_RETURN points to after the "sysenter" instruction in
+ the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
+
+ # sysenter call handler stub
+ENTRY(sysenter_entry)
+ CFI_STARTPROC simple
+ CFI_DEF_CFA esp, 0
+ CFI_REGISTER esp, ebp
+ movl SYSENTER_stack_esp0(%esp),%esp
+sysenter_past_esp:
+ /*
+ * No need to follow this irqs on/off section: the syscall
+ * disabled irqs and here we enable it straight after entry:
+ */
+ sti
+ pushl $(__USER_DS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ss, 0*/
+ pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esp, 0
+ pushfl
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $(__USER_CS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET cs, 0*/
+ /*
+ * Push current_thread_info()->sysenter_return to the stack.
+ * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
+ * pushed above; +8 corresponds to copy_thread's esp0 setting.
+ */
+ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eip, 0
+
+/*
+ * Load the potential sixth argument from user stack.
+ * Careful about security.
+ */
+ cmpl $__PAGE_OFFSET-3,%ebp
+ jae syscall_fault
+1: movl (%ebp),%ebp
+.section __ex_table,"a"
+ .align 4
+ .long 1b,syscall_fault
+.previous
+
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ GET_THREAD_INFO(%ebp)
+
+ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not
testb */
+ testw
$(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ jnz syscall_trace_entry
+ cmpl $(nr_syscalls), %eax
+ jae syscall_badsys
+ call *sys_call_table(,%eax,4)
+ movl %eax,EAX(%esp)
+ DISABLE_INTERRUPTS
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testw $_TIF_ALLWORK_MASK, %cx
+ jne syscall_exit_work
+/* if something modifies registers it must also disable sysexit */
+ movl EIP(%esp), %edx
+ movl OLDESP(%esp), %ecx
+ xorl %ebp,%ebp
+#ifdef CONFIG_XEN
+ TRACE_IRQS_ON
+ __ENABLE_INTERRUPTS
+sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
+ __TEST_PENDING
+ jnz 14f # process more events if necessary...
+ movl ESI(%esp), %esi
+ sysexit
+14: __DISABLE_INTERRUPTS
+ TRACE_IRQS_OFF
+sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
+ push %esp
+ call evtchn_do_upcall
+ add $4,%esp
+ jmp ret_from_intr
+#else
+ TRACE_IRQS_ON
+ sti
+ sysexit
+#endif /* !CONFIG_XEN */
+ CFI_ENDPROC
+
+
+ # system call handler stub
+ENTRY(system_call)
+ RING0_INT_FRAME # can't unwind into user space anyway
+ pushl %eax # save orig_eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ GET_THREAD_INFO(%ebp)
+ testl $TF_MASK,EFLAGS(%esp)
+ jz no_singlestep
+ orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+no_singlestep:
+ # system call tracing in operation /
emulation
+ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not
testb */
+ testw
$(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ jnz syscall_trace_entry
+ cmpl $(nr_syscalls), %eax
+ jae syscall_badsys
+syscall_call:
+ call *sys_call_table(,%eax,4)
+ movl %eax,EAX(%esp) # store the return value
+syscall_exit:
+ DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testw $_TIF_ALLWORK_MASK, %cx # current->work
+ jne syscall_exit_work
+
+restore_all:
+#ifndef CONFIG_XEN
+ movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
+ # Warning: OLDSS(%esp) contains the wrong/random values if we
+ # are returning to the kernel.
+ # See comments in process.c:copy_thread() for details.
+ movb OLDSS(%esp), %ah
+ movb CS(%esp), %al
+ andl $(VM_MASK | (4 << 8) | 3), %eax
+ cmpl $((4 << 8) | 3), %eax
+ CFI_REMEMBER_STATE
+ je ldt_ss # returning to user-space with LDT SS
+restore_nocheck:
+#else
+restore_nocheck:
+ movl EFLAGS(%esp), %eax
+ testl $(VM_MASK|NMI_MASK), %eax
+ CFI_REMEMBER_STATE
+ jnz hypervisor_iret
+ shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
+ GET_VCPU_INFO
+ andb evtchn_upcall_mask(%esi),%al
+ andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
+ CFI_REMEMBER_STATE
+ jnz restore_all_enable_events # != 0 => enable event delivery
+#endif
+ TRACE_IRQS_IRET
+restore_nocheck_notrace:
+ RESTORE_REGS
+ addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
+1: iret
+.section .fixup,"ax"
+iret_exc:
+#ifndef CONFIG_XEN
+ TRACE_IRQS_ON
+ sti
+#endif
+ pushl $0 # no error code
+ pushl $do_iret_error
+ jmp error_code
+.previous
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+
+ CFI_RESTORE_STATE
+#ifndef CONFIG_XEN
+ldt_ss:
+ larl OLDSS(%esp), %eax
+ jnz restore_nocheck
+ testl $0x00400000, %eax # returning to 32bit stack?
+ jnz restore_nocheck # allright, normal return
+ /* If returning to userspace with 16bit stack,
+ * try to fix the higher word of ESP, as the CPU
+ * won't restore it.
+ * This is an "official" bug of all the x86-compatible
+ * CPUs, which we can try to work around to make
+ * dosemu and wine happy. */
+ subl $8, %esp # reserve space for switch16 pointer
+ CFI_ADJUST_CFA_OFFSET 8
+ cli
+ TRACE_IRQS_OFF
+ movl %esp, %eax
+ /* Set up the 16bit stack frame with switch32 pointer on top,
+ * and a switch16 pointer on top of the current frame. */
+ call setup_x86_bogus_stack
+ CFI_ADJUST_CFA_OFFSET -8 # frame has moved
+ TRACE_IRQS_IRET
+ RESTORE_REGS
+ lss 20+4(%esp), %esp # switch to 16bit stack
+1: iret
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+#else
+ ALIGN
+restore_all_enable_events:
+ TRACE_IRQS_ON
+ __ENABLE_INTERRUPTS
+scrit: /**** START OF CRITICAL REGION ****/
+ __TEST_PENDING
+ jnz 14f # process more events if necessary...
+ RESTORE_REGS
+ addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
+1: iret
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+14: __DISABLE_INTERRUPTS
+ TRACE_IRQS_OFF
+ jmp 11f
+ecrit: /**** END OF CRITICAL REGION ****/
+
+ CFI_RESTORE_STATE
+hypervisor_iret:
+ andl $~NMI_MASK, EFLAGS(%esp)
+ RESTORE_REGS
+ addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
+ jmp hypercall_page + (__HYPERVISOR_iret * 32)
+#endif
+ CFI_ENDPROC
+
+ # perform work that needs to be done immediately before resumption
+ ALIGN
+ RING0_PTREGS_FRAME # can't unwind into user space anyway
+work_pending:
+ testb $_TIF_NEED_RESCHED, %cl
+ jz work_notifysig
+work_resched:
+ call schedule
+ DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
+ # than syscall tracing?
+ jz restore_all
+ testb $_TIF_NEED_RESCHED, %cl
+ jnz work_resched
+
+work_notifysig: # deal with pending signals and
+ # notify-resume requests
+ testl $VM_MASK, EFLAGS(%esp)
+ movl %esp, %eax
+ jne work_notifysig_v86 # returning to kernel-space or
+ # vm86-space
+ xorl %edx, %edx
+ call do_notify_resume
+ jmp resume_userspace_sig
+
+ ALIGN
+work_notifysig_v86:
+#ifdef CONFIG_VM86
+ pushl %ecx # save ti_flags for do_notify_resume
+ CFI_ADJUST_CFA_OFFSET 4
+ call save_v86_state # %eax contains pt_regs pointer
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ movl %eax, %esp
+ xorl %edx, %edx
+ call do_notify_resume
+ jmp resume_userspace_sig
+#endif
+
+ # perform syscall exit tracing
+ ALIGN
+syscall_trace_entry:
+ movl $-ENOSYS,EAX(%esp)
+ movl %esp, %eax
+ xorl %edx,%edx
+ call do_syscall_trace
+ cmpl $0, %eax
+ jne resume_userspace # ret != 0 -> running under
PTRACE_SYSEMU,
+ # so must skip actual syscall
+ movl ORIG_EAX(%esp), %eax
+ cmpl $(nr_syscalls), %eax
+ jnae syscall_call
+ jmp syscall_exit
+
+ # perform syscall exit tracing
+ ALIGN
+syscall_exit_work:
+ testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+ jz work_pending
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS # could let do_syscall_trace() call
+ # schedule() instead
+ movl %esp, %eax
+ movl $1, %edx
+ call do_syscall_trace
+ jmp resume_userspace
+ CFI_ENDPROC
+
+ RING0_INT_FRAME # can't unwind into user space anyway
+syscall_fault:
+ pushl %eax # save orig_eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ GET_THREAD_INFO(%ebp)
+ movl $-EFAULT,EAX(%esp)
+ jmp resume_userspace
+
+syscall_badsys:
+ movl $-ENOSYS,EAX(%esp)
+ jmp resume_userspace
+ CFI_ENDPROC
+
+#ifndef CONFIG_XEN
+#define FIXUP_ESPFIX_STACK \
+ movl %esp, %eax; \
+ /* switch to 32bit stack using the pointer on top of 16bit stack */ \
+ lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
+ /* copy data from 16bit stack to 32bit stack */ \
+ call fixup_x86_bogus_stack; \
+ /* put ESP to the proper location */ \
+ movl %eax, %esp;
+#define UNWIND_ESPFIX_STACK \
+ pushl %eax; \
+ CFI_ADJUST_CFA_OFFSET 4; \
+ movl %ss, %eax; \
+ /* see if on 16bit stack */ \
+ cmpw $__ESPFIX_SS, %ax; \
+ je 28f; \
+27: popl %eax; \
+ CFI_ADJUST_CFA_OFFSET -4; \
+.section .fixup,"ax"; \
+28: movl $__KERNEL_DS, %eax; \
+ movl %eax, %ds; \
+ movl %eax, %es; \
+ /* switch to 32bit stack */ \
+ FIXUP_ESPFIX_STACK; \
+ jmp 27b; \
+.previous
+
+/*
+ * Build the entry stubs and pointer table with
+ * some assembler magic.
+ */
+.data
+ENTRY(interrupt)
+.text
+
+vector=0
+ENTRY(irq_entries_start)
+ RING0_INT_FRAME
+.rept NR_IRQS
+ ALIGN
+ .if vector
+ CFI_ADJUST_CFA_OFFSET -4
+ .endif
+1: pushl $~(vector)
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp common_interrupt
+.data
+ .long 1b
+.text
+vector=vector+1
+.endr
+
+/*
+ * the CPU automatically disables interrupts when executing an IRQ vector,
+ * so IRQ-flags tracing has to follow that:
+ */
+ ALIGN
+common_interrupt:
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ movl %esp,%eax
+ call do_IRQ
+ jmp ret_from_intr
+ CFI_ENDPROC
+
+#define BUILD_INTERRUPT(name, nr) \
+ENTRY(name) \
+ RING0_INT_FRAME; \
+ pushl $~(nr); \
+ CFI_ADJUST_CFA_OFFSET 4; \
+ SAVE_ALL; \
+ TRACE_IRQS_OFF \
+ movl %esp,%eax; \
+ call smp_/**/name; \
+ jmp ret_from_intr; \
+ CFI_ENDPROC
+
+/* The include is where all of the SMP etc. interrupts come from */
+#include "entry_arch.h"
+#else
+#define UNWIND_ESPFIX_STACK
+#endif
+
+ENTRY(divide_error)
+ RING0_INT_FRAME
+ pushl $0 # no error code
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_divide_error
+ CFI_ADJUST_CFA_OFFSET 4
+ ALIGN
+error_code:
+ pushl %ds
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ds, 0*/
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax, 0
+ xorl %eax, %eax
+ pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebp, 0
+ pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
+ pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx, 0
+ decl %eax # eax = -1
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
+ pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
+ cld
+ pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0*/
+ UNWIND_ESPFIX_STACK
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_REGISTER es, ecx*/
+ movl ES(%esp), %edi # get the function address
+ movl ORIG_EAX(%esp), %edx # get the error code
+ movl %eax, ORIG_EAX(%esp)
+ movl %ecx, ES(%esp)
+ /*CFI_REL_OFFSET es, ES*/
+ movl $(__USER_DS), %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
+ movl %esp,%eax # pt_regs pointer
+ call *%edi
+ jmp ret_from_exception
+ CFI_ENDPROC
+
+#ifdef CONFIG_XEN
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+#
+# The sysexit critical region is slightly different. sysexit
+# atomically removes the entire stack frame. If we interrupt in the
+# critical region we know that the entire frame is present and correct
+# so we can simply throw away the new one.
+ENTRY(hypervisor_callback)
+ RING0_INT_FRAME
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ movl EIP(%esp),%eax
+ cmpl $scrit,%eax
+ jb 11f
+ cmpl $ecrit,%eax
+ jb critical_region_fixup
+ cmpl $sysexit_scrit,%eax
+ jb 11f
+ cmpl $sysexit_ecrit,%eax
+ ja 11f
+ addl $OLDESP,%esp # Remove eflags...ebx from stack frame.
+11: push %esp
+ CFI_ADJUST_CFA_OFFSET 4
+ call evtchn_do_upcall
+ add $4,%esp
+ CFI_ADJUST_CFA_OFFSET -4
+ jmp ret_from_intr
+ CFI_ENDPROC
+
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame.
+critical_region_fixup:
+ movzbl critical_fixup_table-scrit(%eax),%ecx # %eax contains num bytes
popped
+ cmpb $0xff,%cl # 0xff => vcpu_info critical region
+ jne 15f
+ xorl %ecx,%ecx
+15: leal (%esp,%ecx),%esi # %esi points at end of src region
+ leal OLDESP(%esp),%edi # %edi points at end of dst region
+ shrl $2,%ecx # convert words to bytes
+ je 17f # skip loop if nothing to copy
+16: subl $4,%esi # pre-decrementing copy loop
+ subl $4,%edi
+ movl (%esi),%eax
+ movl %eax,(%edi)
+ loop 16b
+17: movl %edi,%esp # final %edi is top of merged stack
+ jmp 11b
+
+.section .rodata,"a"
+critical_fixup_table:
+ .byte 0xff,0xff,0xff # testb $0xff,(%esi) = __TEST_PENDING
+ .byte 0xff,0xff # jnz 14f
+ .byte 0x00 # pop %ebx
+ .byte 0x04 # pop %ecx
+ .byte 0x08 # pop %edx
+ .byte 0x0c # pop %esi
+ .byte 0x10 # pop %edi
+ .byte 0x14 # pop %ebp
+ .byte 0x18 # pop %eax
+ .byte 0x1c # pop %ds
+ .byte 0x20 # pop %es
+ .byte 0x24,0x24,0x24 # add $4,%esp
+ .byte 0x28 # iret
+ .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi)
+ .byte 0x00,0x00 # jmp 11b
+.previous
+
+# Hypervisor uses this for application faults while it executes.
+# We get here for two reasons:
+# 1. Fault while reloading DS, ES, FS or GS
+# 2. Fault while executing IRET
+# Category 1 we fix up by reattempting the load, and zeroing the segment
+# register if the load fails.
+# Category 2 we fix up by jumping to do_iret_error. We cannot use the
+# normal Linux return path in this case because if we use the IRET hypercall
+# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+# We distinguish between categories by maintaining a status value in EAX.
+ENTRY(failsafe_callback)
+ pushl %eax
+ movl $1,%eax
+1: mov 4(%esp),%ds
+2: mov 8(%esp),%es
+3: mov 12(%esp),%fs
+4: mov 16(%esp),%gs
+ testl %eax,%eax
+ popl %eax
+ jz 5f
+ addl $16,%esp # EAX != 0 => Category 2 (Bad IRET)
+ jmp iret_exc
+5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment)
+ RING0_INT_FRAME
+ pushl $0
+ SAVE_ALL
+ jmp ret_from_exception
+.section .fixup,"ax"; \
+6: xorl %eax,%eax; \
+ movl %eax,4(%esp); \
+ jmp 1b; \
+7: xorl %eax,%eax; \
+ movl %eax,8(%esp); \
+ jmp 2b; \
+8: xorl %eax,%eax; \
+ movl %eax,12(%esp); \
+ jmp 3b; \
+9: xorl %eax,%eax; \
+ movl %eax,16(%esp); \
+ jmp 4b; \
+.previous; \
+.section __ex_table,"a"; \
+ .align 4; \
+ .long 1b,6b; \
+ .long 2b,7b; \
+ .long 3b,8b; \
+ .long 4b,9b; \
+.previous
+#endif
+ CFI_ENDPROC
+
+ENTRY(coprocessor_error)
+ RING0_INT_FRAME
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_coprocessor_error
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+ENTRY(simd_coprocessor_error)
+ RING0_INT_FRAME
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_simd_coprocessor_error
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+ENTRY(device_not_available)
+ RING0_INT_FRAME
+ pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+#ifndef CONFIG_XEN
+ movl %cr0, %eax
+ testl $0x4, %eax # EM (math emulation bit)
+ je device_available_emulate
+ pushl $0 # temporary storage for ORIG_EIP
+ CFI_ADJUST_CFA_OFFSET 4
+ call math_emulate
+ addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
+ jmp ret_from_exception
+device_available_emulate:
+#endif
+ preempt_stop
+ call math_state_restore
+ jmp ret_from_exception
+ CFI_ENDPROC
+
+#ifndef CONFIG_XEN
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+#define FIX_STACK(offset, ok, label) \
+ cmpw $__KERNEL_CS,4(%esp); \
+ jne ok; \
+label: \
+ movl SYSENTER_stack_esp0+offset(%esp),%esp; \
+ pushfl; \
+ pushl $__KERNEL_CS; \
+ pushl $sysenter_past_esp
+#endif /* CONFIG_XEN */
+
+KPROBE_ENTRY(debug)
+ RING0_INT_FRAME
+#ifndef CONFIG_XEN
+ cmpl $sysenter_entry,(%esp)
+ jne debug_stack_correct
+ FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+debug_stack_correct:
+#endif /* !CONFIG_XEN */
+ pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ xorl %edx,%edx # error code 0
+ movl %esp,%eax # pt_regs pointer
+ call do_debug
+ jmp ret_from_exception
+ CFI_ENDPROC
+ .previous .text
+#ifndef CONFIG_XEN
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+ RING0_INT_FRAME
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ movl %ss, %eax
+ cmpw $__ESPFIX_SS, %ax
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ je nmi_16bit_stack
+ cmpl $sysenter_entry,(%esp)
+ je nmi_stack_fixup
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ movl %esp,%eax
+ /* Do not access memory above the end of our stack page,
+ * it might not exist.
+ */
+ andl $(THREAD_SIZE-1),%eax
+ cmpl $(THREAD_SIZE-20),%eax
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ jae nmi_stack_correct
+ cmpl $sysenter_entry,12(%esp)
+ je nmi_debug_stack_check
+nmi_stack_correct:
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_nmi
+ jmp restore_nocheck_notrace
+ CFI_ENDPROC
+
+nmi_stack_fixup:
+ FIX_STACK(12,nmi_stack_correct, 1)
+ jmp nmi_stack_correct
+nmi_debug_stack_check:
+ cmpw $__KERNEL_CS,16(%esp)
+ jne nmi_stack_correct
+ cmpl $debug,(%esp)
+ jb nmi_stack_correct
+ cmpl $debug_esp_fix_insn,(%esp)
+ ja nmi_stack_correct
+ FIX_STACK(24,nmi_stack_correct, 1)
+ jmp nmi_stack_correct
+
+nmi_16bit_stack:
+ RING0_INT_FRAME
+ /* create the pointer to lss back */
+ pushl %ss
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %esp
+ CFI_ADJUST_CFA_OFFSET 4
+ movzwl %sp, %esp
+ addw $4, (%esp)
+ /* copy the iret frame of 12 bytes */
+ .rept 3
+ pushl 16(%esp)
+ CFI_ADJUST_CFA_OFFSET 4
+ .endr
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ FIXUP_ESPFIX_STACK # %eax == %esp
+ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
+ xorl %edx,%edx # zero error code
+ call do_nmi
+ RESTORE_REGS
+ lss 12+4(%esp), %esp # back to 16bit stack
+1: iret
+ CFI_ENDPROC
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+#else
+ENTRY(nmi)
+ RING0_INT_FRAME
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_nmi
+ orl $NMI_MASK, EFLAGS(%esp)
+ jmp restore_all
+ CFI_ENDPROC
+#endif
+
+KPROBE_ENTRY(int3)
+ RING0_INT_FRAME
+ pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_int3
+ jmp ret_from_exception
+ CFI_ENDPROC
+ .previous .text
+
+ENTRY(overflow)
+ RING0_INT_FRAME
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_overflow
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+ENTRY(bounds)
+ RING0_INT_FRAME
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_bounds
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+ENTRY(invalid_op)
+ RING0_INT_FRAME
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_invalid_op
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+ENTRY(coprocessor_segment_overrun)
+ RING0_INT_FRAME
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_coprocessor_segment_overrun
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+ENTRY(invalid_TSS)
+ RING0_EC_FRAME
+ pushl $do_invalid_TSS
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+ENTRY(segment_not_present)
+ RING0_EC_FRAME
+ pushl $do_segment_not_present
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+ENTRY(stack_segment)
+ RING0_EC_FRAME
+ pushl $do_stack_segment
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+KPROBE_ENTRY(general_protection)
+ RING0_EC_FRAME
+ pushl $do_general_protection
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+ .previous .text
+
+ENTRY(alignment_check)
+ RING0_EC_FRAME
+ pushl $do_alignment_check
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+KPROBE_ENTRY(page_fault)
+ RING0_EC_FRAME
+ pushl $do_page_fault
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+ .previous .text
+
+#ifdef CONFIG_X86_MCE
+ENTRY(machine_check)
+ RING0_INT_FRAME
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl machine_check_vector
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+#endif
+
+#ifndef CONFIG_XEN
+ENTRY(spurious_interrupt_bug)
+ RING0_INT_FRAME
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_spurious_interrupt_bug
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+#endif /* !CONFIG_XEN */
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+ CFI_STARTPROC
+ movl 4(%esp), %edx
+ movl (%esp), %ecx
+ leal 4(%esp), %eax
+ movl %ebx, EBX(%edx)
+ xorl %ebx, %ebx
+ movl %ebx, ECX(%edx)
+ movl %ebx, EDX(%edx)
+ movl %esi, ESI(%edx)
+ movl %edi, EDI(%edx)
+ movl %ebp, EBP(%edx)
+ movl %ebx, EAX(%edx)
+ movl $__USER_DS, DS(%edx)
+ movl $__USER_DS, ES(%edx)
+ movl %ebx, ORIG_EAX(%edx)
+ movl %ecx, EIP(%edx)
+ movl 12(%esp), %ecx
+ movl $__KERNEL_CS, CS(%edx)
+ movl %ebx, EFLAGS(%edx)
+ movl %eax, OLDESP(%edx)
+ movl 8(%esp), %eax
+ movl %ecx, 8(%esp)
+ movl EBX(%edx), %ebx
+ movl $__KERNEL_DS, OLDSS(%edx)
+ jmpl *%eax
+ CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
+
+ENTRY(fixup_4gb_segment)
+ RING0_EC_FRAME
+ pushl $do_fixup_4gb_segment
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+
+.section .rodata,"a"
+.align 4
+#include "syscall_table.S"
+
+syscall_table_size=(.-sys_call_table)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/fixup.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/fixup.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * fixup.c
+ *
+ * Binary-rewriting of certain IA32 instructions, on notification by Xen.
+ * Used to avoid repeated slow emulation of common instructions used by the
+ * user-space TLS (Thread-Local Storage) libraries.
+ *
+ * **** NOTE ****
+ * Issues with the binary rewriting have caused it to be removed. Instead
+ * we rely on Xen's emulator to boot the kernel, and then print a banner
+ * message recommending that the user disables /lib/tls.
+ *
+ * Copyright (c) 2004, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/version.h>
+
+#define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args )
+
+fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
+{
+ static unsigned long printed = 0;
+ char info[100];
+ int i;
+
+ /* Ignore statically-linked init. */
+ if (current->tgid == 1)
+ return;
+
+ HYPERVISOR_vm_assist(
+ VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify);
+
+ if (test_and_set_bit(0, &printed))
+ return;
+
+ sprintf(info, "%s (pid=%d)", current->comm, current->tgid);
+
+ DP("");
+ DP("***************************************************************");
+ DP("***************************************************************");
+ DP("** WARNING: Currently emulating unsupported memory accesses **");
+ DP("** in /lib/tls glibc libraries. The emulation is **");
+ DP("** slow. To ensure full performance you should **");
+ DP("** install a 'xen-friendly' (nosegneg) version of **");
+ DP("** the library, or disable tls support by executing **");
+ DP("** the following as root: **");
+ DP("** mv /lib/tls /lib/tls.disabled **");
+ DP("** Offending process: %-38.38s **", info);
+ DP("***************************************************************");
+ DP("***************************************************************");
+ DP("");
+
+ for (i = 5; i > 0; i--) {
+ touch_softlockup_watchdog();
+ printk("Pausing... %d", i);
+ mdelay(1000);
+ printk("\b\b\b\b\b\b\b\b\b\b\b\b");
+ }
+
+ printk("Continuing...\n\n");
+}
+
+static int __init fixup_init(void)
+{
+ HYPERVISOR_vm_assist(
+ VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
+ return 0;
+}
+__initcall(fixup_init);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/head-xen.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/head-xen.S Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,207 @@
+
+
+.text
+#include <linux/elfnote.h>
+#include <linux/threads.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf2.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/elfnote.h>
+
+/*
+ * References to members of the new_cpu_data structure.
+ */
+
+#define X86 new_cpu_data+CPUINFO_x86
+#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor
+#define X86_MODEL new_cpu_data+CPUINFO_x86_model
+#define X86_MASK new_cpu_data+CPUINFO_x86_mask
+#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math
+#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level
+#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
+#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
+
+#define VIRT_ENTRY_OFFSET 0x0
+.org VIRT_ENTRY_OFFSET
+ENTRY(startup_32)
+ movl %esi,xen_start_info
+ cld
+
+ /* Set up the stack pointer */
+ movl $(init_thread_union+THREAD_SIZE),%esp
+
+ /* get vendor info */
+ xorl %eax,%eax # call CPUID with 0 -> return vendor ID
+ XEN_CPUID
+ movl %eax,X86_CPUID # save CPUID level
+ movl %ebx,X86_VENDOR_ID # lo 4 chars
+ movl %edx,X86_VENDOR_ID+4 # next 4 chars
+ movl %ecx,X86_VENDOR_ID+8 # last 4 chars
+
+ movl $1,%eax # Use the CPUID instruction to get CPU type
+ XEN_CPUID
+ movb %al,%cl # save reg for future use
+ andb $0x0f,%ah # mask processor family
+ movb %ah,X86
+ andb $0xf0,%al # mask model
+ shrb $4,%al
+ movb %al,X86_MODEL
+ andb $0x0f,%cl # mask mask revision
+ movb %cl,X86_MASK
+ movl %edx,X86_CAPABILITY
+
+ movb $1,X86_HARD_MATH
+
+ xorl %eax,%eax # Clear FS/GS and LDT
+ movl %eax,%fs
+ movl %eax,%gs
+ cld # gcc2 wants the direction flag cleared at all
times
+
+ pushl %eax # fake return address
+ jmp start_kernel
+
+#define HYPERCALL_PAGE_OFFSET 0x1000
+.org HYPERCALL_PAGE_OFFSET
+ENTRY(hypercall_page)
+ CFI_STARTPROC
+.skip 0x1000
+ CFI_ENDPROC
+
+/*
+ * Real beginning of normal "text" segment
+ */
+ENTRY(stext)
+ENTRY(_stext)
+
+/*
+ * BSS section
+ */
+.section ".bss.page_aligned","w"
+ENTRY(empty_zero_page)
+ .fill 4096,1,0
+
+/*
+ * This starts the data section.
+ */
+.data
+
+/*
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
+ */
+ .align L1_CACHE_BYTES
+ENTRY(cpu_gdt_table)
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x0000000000000000 /* 0x0b reserved */
+ .quad 0x0000000000000000 /* 0x13 reserved */
+ .quad 0x0000000000000000 /* 0x1b reserved */
+ .quad 0x0000000000000000 /* 0x20 unused */
+ .quad 0x0000000000000000 /* 0x28 unused */
+ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
+ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
+ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
+ .quad 0x0000000000000000 /* 0x4b reserved */
+ .quad 0x0000000000000000 /* 0x53 reserved */
+ .quad 0x0000000000000000 /* 0x5b reserved */
+
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
+
+ .quad 0x0000000000000000 /* 0x80 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x88 LDT descriptor */
+
+ /*
+ * Segments used for calling PnP BIOS have byte granularity.
+ * They code segments and data segments have fixed 64k limits,
+ * the transfer segment sizes are set at run time.
+ */
+ .quad 0x0000000000000000 /* 0x90 32-bit code */
+ .quad 0x0000000000000000 /* 0x98 16-bit code */
+ .quad 0x0000000000000000 /* 0xa0 16-bit data */
+ .quad 0x0000000000000000 /* 0xa8 16-bit data */
+ .quad 0x0000000000000000 /* 0xb0 16-bit data */
+
+ /*
+ * The APM segments have byte granularity and their bases
+ * are set at run time. All have 64k limits.
+ */
+ .quad 0x0000000000000000 /* 0xb8 APM CS code */
+ .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */
+ .quad 0x0000000000000000 /* 0xc8 APM DS data */
+
+ .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */
+ .quad 0x0000000000000000 /* 0xd8 - unused */
+ .quad 0x0000000000000000 /* 0xe0 - unused */
+ .quad 0x0000000000000000 /* 0xe8 - unused */
+ .quad 0x0000000000000000 /* 0xf0 - unused */
+ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault
TSS */
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+/*
+ * __xen_guest information
+ */
+.macro utoa value
+ .if (\value) < 0 || (\value) >= 0x10
+ utoa (((\value)>>4)&0x0fffffff)
+ .endif
+ .if ((\value) & 0xf) < 10
+ .byte '0' + ((\value) & 0xf)
+ .else
+ .byte 'A' + ((\value) & 0xf) - 10
+ .endif
+.endm
+
+.section __xen_guest
+ .ascii "GUEST_OS=linux,GUEST_VER=2.6"
+ .ascii ",XEN_VER=xen-3.0"
+ .ascii ",VIRT_BASE=0x"
+ utoa __PAGE_OFFSET
+ .ascii ",ELF_PADDR_OFFSET=0x"
+ utoa __PAGE_OFFSET
+ .ascii ",VIRT_ENTRY=0x"
+ utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
+ .ascii ",HYPERCALL_PAGE=0x"
+ utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
+ .ascii ",FEATURES=writable_page_tables"
+ .ascii "|writable_descriptor_tables"
+ .ascii "|auto_translated_physmap"
+ .ascii "|pae_pgdir_above_4gb"
+ .ascii "|supervisor_mode_kernel"
+#ifdef CONFIG_X86_PAE
+ .ascii ",PAE=yes[extended-cr3]"
+#else
+ .ascii ",PAE=no"
+#endif
+ .ascii ",LOADER=generic"
+ .byte 0
+#endif /* CONFIG_XEN_COMPAT <= 0x030002 */
+
+
+ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux")
+ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6")
+ ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
+ ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET)
+#if CONFIG_XEN_COMPAT <= 0x030002
+ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, __PAGE_OFFSET)
+#else
+ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0)
+#endif
+ ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32)
+ ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
+ ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START)
+ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz,
"writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
+#ifdef CONFIG_X86_PAE
+ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
+ ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad,
_PAGE_PRESENT,_PAGE_PRESENT)
+#else
+ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no")
+ ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long,
_PAGE_PRESENT,_PAGE_PRESENT)
+#endif
+ ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
+ ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/init_task-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/init_task-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,51 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+#define swapper_pg_dir ((pgd_t *)NULL)
+struct mm_struct init_mm = INIT_MM(init_mm);
+#undef swapper_pg_dir
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+#ifndef CONFIG_X86_NO_TSS
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's.
+ */
+DEFINE_PER_CPU(struct tss_struct, init_tss)
____cacheline_internodealigned_in_smp = INIT_TSS;
+#endif
+
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/io_apic-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/io_apic-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,2777 @@
+/*
+ * Intel IO-APIC support for multi-Pentium hosts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ * Many thanks to Stig Venaas for trying out countless experimental
+ * patches and reporting/debugging problems patiently!
+ *
+ * (c) 1999, Multiple IO-APIC support, developed by
+ * Ken-ichi Yaku <yaku@xxxxxxxxxxxxxxxxxxxx> and
+ * Hidemi Kishimoto <kisimoto@xxxxxxxxxxxxxxxxxxxx>,
+ * further tested and cleaned up by Zach Brown <zab@xxxxxxxxxx>
+ * and Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively
+ * Paul Diefenbaugh : Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/timer.h>
+#include <asm/i8259.h>
+#include <asm/nmi.h>
+
+#include <mach_apic.h>
+
+#include "io_ports.h"
+
+#ifdef CONFIG_XEN
+
+#include <xen/interface/xen.h>
+#include <xen/interface/physdev.h>
+
+/* Fake i8259 */
+#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
+#define disable_8259A_irq(_irq) ((void)0)
+#define i8259A_irq_pending(_irq) (0)
+
+unsigned long io_apic_irqs;
+
+static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int
reg)
+{
+ struct physdev_apic apic_op;
+ int ret;
+
+ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+ apic_op.reg = reg;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+ if (ret)
+ return ret;
+ return apic_op.value;
+}
+
+static inline void xen_io_apic_write(unsigned int apic, unsigned int reg,
unsigned int value)
+{
+ struct physdev_apic apic_op;
+
+ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+ apic_op.reg = reg;
+ apic_op.value = value;
+ HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
+}
+
+#define io_apic_read(a,r) xen_io_apic_read(a,r)
+#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
+
+#endif /* CONFIG_XEN */
+
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+atomic_t irq_mis_count;
+
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+int timer_over_8254 __initdata = 1;
+
+/*
+ * Is the SiS APIC rmw bug present ?
+ * -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+int disable_timer_pin_1 __initdata;
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+ int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+#ifdef CONFIG_PCI_MSI
+#define vector_to_irq(vector) \
+ (platform_legacy_irq(vector) ? vector : vector_irq[vector])
+#else
+#define vector_to_irq(vector) (vector)
+#endif
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+ static int first_free_entry = NR_IRQS;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (entry->next)
+ entry = irq_2_pin + entry->next;
+
+ if (entry->pin != -1) {
+ entry->next = first_free_entry;
+ entry = irq_2_pin + entry->next;
+ if (++first_free_entry >= PIN_MAP_SIZE)
+ panic("io_apic.c: whoops");
+ }
+ entry->apic = apic;
+ entry->pin = pin;
+}
+
+#ifdef CONFIG_XEN
+#define clear_IO_APIC() ((void)0)
+#else
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+ int oldapic, int oldpin,
+ int newapic, int newpin)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (1) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ }
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+}
+
+static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable,
unsigned long disable)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+ unsigned int pin, reg;
+
+ for (;;) {
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ reg &= ~disable;
+ reg |= enable;
+ io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0x00010000, 0);
+}
+
+/* mask = 0 */
+static void __unmask_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0, 0x00010000);
+}
+
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
+}
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __mask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+ *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (entry.delivery_mode == dest_SMI)
+ return;
+
+ /*
+ * Disable it in the IO-APIC irq-routing table:
+ */
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 1;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ clear_IO_APIC_pin(apic, pin);
+}
+
+#ifdef CONFIG_SMP
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
+{
+ unsigned long flags;
+ int pin;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+ unsigned int apicid_value;
+ cpumask_t tmp;
+
+ cpus_and(tmp, cpumask, cpu_online_map);
+ if (cpus_empty(tmp))
+ tmp = TARGET_CPUS;
+
+ cpus_and(cpumask, tmp, CPU_MASK_ALL);
+
+ apicid_value = cpu_mask_to_apicid(cpumask);
+ /* Prepare to do the io_apic_write */
+ apicid_value = apicid_value << 24;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ for (;;) {
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+ io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ set_irq_info(irq, cpumask);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#if defined(CONFIG_IRQBALANCE)
+# include <asm/processor.h> /* kernel_thread() */
+# include <linux/kernel_stat.h> /* kstat */
+# include <linux/slab.h> /* kmalloc() */
+# include <linux/timer.h> /* time_after() */
+
+#ifdef CONFIG_BALANCED_IRQ_DEBUG
+# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__,
__LINE__); printk(x); } while (0)
+# define Dprintk(x...) do { TDprintk(x); } while (0)
+# else
+# define TDprintk(x...)
+# define Dprintk(x...)
+# endif
+
+#define IRQBALANCE_CHECK_ARCH -999
+#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
+#define BALANCED_IRQ_MORE_DELTA (HZ/10)
+#define BALANCED_IRQ_LESS_DELTA (HZ)
+
+static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
+static int physical_balance __read_mostly;
+static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
+
+static struct irq_cpu_info {
+ unsigned long * last_irq;
+ unsigned long * irq_delta;
+ unsigned long irq;
+} irq_cpu_data[NR_CPUS];
+
+#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
+#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq])
+#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq])
+
+#define IDLE_ENOUGH(cpu,now) \
+ (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
+
+#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
+
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
+
+static cpumask_t balance_irq_affinity[NR_IRQS] = {
+ [0 ... NR_IRQS-1] = CPU_MASK_ALL
+};
+
+void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+ balance_irq_affinity[irq] = mask;
+}
+
+static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
+ unsigned long now, int direction)
+{
+ int search_idle = 1;
+ int cpu = curr_cpu;
+
+ goto inside;
+
+ do {
+ if (unlikely(cpu == curr_cpu))
+ search_idle = 0;
+inside:
+ if (direction == 1) {
+ cpu++;
+ if (cpu >= NR_CPUS)
+ cpu = 0;
+ } else {
+ cpu--;
+ if (cpu == -1)
+ cpu = NR_CPUS-1;
+ }
+ } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
+ (search_idle && !IDLE_ENOUGH(cpu,now)));
+
+ return cpu;
+}
+
+static inline void balance_irq(int cpu, int irq)
+{
+ unsigned long now = jiffies;
+ cpumask_t allowed_mask;
+ unsigned int new_cpu;
+
+ if (irqbalance_disabled)
+ return;
+
+ cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
+ new_cpu = move(cpu, allowed_mask, now, 1);
+ if (cpu != new_cpu) {
+ set_pending_irq(irq, cpumask_of_cpu(new_cpu));
+ }
+}
+
+static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
+{
+ int i, j;
+ Dprintk("Rotating IRQs among CPUs.\n");
+ for_each_online_cpu(i) {
+ for (j = 0; j < NR_IRQS; j++) {
+ if (!irq_desc[j].action)
+ continue;
+ /* Is it a significant load ? */
+ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
+ useful_load_threshold)
+ continue;
+ balance_irq(i, j);
+ }
+ }
+ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
+ return;
+}
+
+static void do_irq_balance(void)
+{
+ int i, j;
+ unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
+ unsigned long move_this_load = 0;
+ int max_loaded = 0, min_loaded = 0;
+ int load;
+ unsigned long useful_load_threshold = balanced_irq_interval + 10;
+ int selected_irq;
+ int tmp_loaded, first_attempt = 1;
+ unsigned long tmp_cpu_irq;
+ unsigned long imbalance = 0;
+ cpumask_t allowed_mask, target_cpu_mask, tmp;
+
+ for_each_possible_cpu(i) {
+ int package_index;
+ CPU_IRQ(i) = 0;
+ if (!cpu_online(i))
+ continue;
+ package_index = CPU_TO_PACKAGEINDEX(i);
+ for (j = 0; j < NR_IRQS; j++) {
+ unsigned long value_now, delta;
+ /* Is this an active IRQ? */
+ if (!irq_desc[j].action)
+ continue;
+ if ( package_index == i )
+ IRQ_DELTA(package_index,j) = 0;
+ /* Determine the total count per processor per IRQ */
+ value_now = (unsigned long) kstat_cpu(i).irqs[j];
+
+ /* Determine the activity per processor per IRQ */
+ delta = value_now - LAST_CPU_IRQ(i,j);
+
+ /* Update last_cpu_irq[][] for the next time */
+ LAST_CPU_IRQ(i,j) = value_now;
+
+ /* Ignore IRQs whose rate is less than the clock */
+ if (delta < useful_load_threshold)
+ continue;
+ /* update the load for the processor or package total */
+ IRQ_DELTA(package_index,j) += delta;
+
+ /* Keep track of the higher numbered sibling as well */
+ if (i != package_index)
+ CPU_IRQ(i) += delta;
+ /*
+ * We have sibling A and sibling B in the package
+ *
+ * cpu_irq[A] = load for cpu A + load for cpu B
+ * cpu_irq[B] = load for cpu B
+ */
+ CPU_IRQ(package_index) += delta;
+ }
+ }
+ /* Find the least loaded processor package */
+ for_each_online_cpu(i) {
+ if (i != CPU_TO_PACKAGEINDEX(i))
+ continue;
+ if (min_cpu_irq > CPU_IRQ(i)) {
+ min_cpu_irq = CPU_IRQ(i);
+ min_loaded = i;
+ }
+ }
+ max_cpu_irq = ULONG_MAX;
+
+tryanothercpu:
+ /* Look for heaviest loaded processor.
+ * We may come back to get the next heaviest loaded processor.
+ * Skip processors with trivial loads.
+ */
+ tmp_cpu_irq = 0;
+ tmp_loaded = -1;
+ for_each_online_cpu(i) {
+ if (i != CPU_TO_PACKAGEINDEX(i))
+ continue;
+ if (max_cpu_irq <= CPU_IRQ(i))
+ continue;
+ if (tmp_cpu_irq < CPU_IRQ(i)) {
+ tmp_cpu_irq = CPU_IRQ(i);
+ tmp_loaded = i;
+ }
+ }
+
+ if (tmp_loaded == -1) {
+ /* In the case of small number of heavy interrupt sources,
+ * loading some of the cpus too much. We use Ingo's original
+ * approach to rotate them around.
+ */
+ if (!first_attempt && imbalance >= useful_load_threshold) {
+ rotate_irqs_among_cpus(useful_load_threshold);
+ return;
+ }
+ goto not_worth_the_effort;
+ }
+
+ first_attempt = 0; /* heaviest search */
+ max_cpu_irq = tmp_cpu_irq; /* load */
+ max_loaded = tmp_loaded; /* processor */
+ imbalance = (max_cpu_irq - min_cpu_irq) / 2;
+
+ Dprintk("max_loaded cpu = %d\n", max_loaded);
+ Dprintk("min_loaded cpu = %d\n", min_loaded);
+ Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
+ Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
+ Dprintk("load imbalance = %lu\n", imbalance);
+
+ /* if imbalance is less than approx 10% of max load, then
+ * observe diminishing returns action. - quit
+ */
+ if (imbalance < (max_cpu_irq >> 3)) {
+ Dprintk("Imbalance too trivial\n");
+ goto not_worth_the_effort;
+ }
+
+tryanotherirq:
+ /* if we select an IRQ to move that can't go where we want, then
+ * see if there is another one to try.
+ */
+ move_this_load = 0;
+ selected_irq = -1;
+ for (j = 0; j < NR_IRQS; j++) {
+ /* Is this an active IRQ? */
+ if (!irq_desc[j].action)
+ continue;
+ if (imbalance <= IRQ_DELTA(max_loaded,j))
+ continue;
+ /* Try to find the IRQ that is closest to the imbalance
+ * without going over.
+ */
+ if (move_this_load < IRQ_DELTA(max_loaded,j)) {
+ move_this_load = IRQ_DELTA(max_loaded,j);
+ selected_irq = j;
+ }
+ }
+ if (selected_irq == -1) {
+ goto tryanothercpu;
+ }
+
+ imbalance = move_this_load;
+
+ /* For physical_balance case, we accumlated both load
+ * values in the one of the siblings cpu_irq[],
+ * to use the same code for physical and logical processors
+ * as much as possible.
+ *
+ * NOTE: the cpu_irq[] array holds the sum of the load for
+ * sibling A and sibling B in the slot for the lowest numbered
+ * sibling (A), _AND_ the load for sibling B in the slot for
+ * the higher numbered sibling.
+ *
+ * We seek the least loaded sibling by making the comparison
+ * (A+B)/2 vs B
+ */
+ load = CPU_IRQ(min_loaded) >> 1;
+ for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+ if (load > CPU_IRQ(j)) {
+ /* This won't change cpu_sibling_map[min_loaded] */
+ load = CPU_IRQ(j);
+ min_loaded = j;
+ }
+ }
+
+ cpus_and(allowed_mask,
+ cpu_online_map,
+ balance_irq_affinity[selected_irq]);
+ target_cpu_mask = cpumask_of_cpu(min_loaded);
+ cpus_and(tmp, target_cpu_mask, allowed_mask);
+
+ if (!cpus_empty(tmp)) {
+
+ Dprintk("irq = %d moved to cpu = %d\n",
+ selected_irq, min_loaded);
+ /* mark for change destination */
+ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
+
+ /* Since we made a change, come back sooner to
+ * check for more variation.
+ */
+ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
+ return;
+ }
+ goto tryanotherirq;
+
+not_worth_the_effort:
+ /*
+ * if we did not find an IRQ to move, then adjust the time interval
+ * upward
+ */
+ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
+ balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
+ Dprintk("IRQ worth rotating not found\n");
+ return;
+}
+
+static int balanced_irq(void *unused)
+{
+ int i;
+ unsigned long prev_balance_time = jiffies;
+ long time_remaining = balanced_irq_interval;
+
+ daemonize("kirqd");
+
+ /* push everything to CPU 0 to give us a starting point. */
+ for (i = 0 ; i < NR_IRQS ; i++) {
+ irq_desc[i].pending_mask = cpumask_of_cpu(0);
+ set_pending_irq(i, cpumask_of_cpu(0));
+ }
+
+ for ( ; ; ) {
+ time_remaining = schedule_timeout_interruptible(time_remaining);
+ try_to_freeze();
+ if (time_after(jiffies,
+ prev_balance_time+balanced_irq_interval)) {
+ preempt_disable();
+ do_irq_balance();
+ prev_balance_time = jiffies;
+ time_remaining = balanced_irq_interval;
+ preempt_enable();
+ }
+ }
+ return 0;
+}
+
+static int __init balanced_irq_init(void)
+{
+ int i;
+ struct cpuinfo_x86 *c;
+ cpumask_t tmp;
+
+ cpus_shift_right(tmp, cpu_online_map, 2);
+ c = &boot_cpu_data;
+ /* When not overwritten by the command line ask subarchitecture. */
+ if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
+ irqbalance_disabled = NO_BALANCE_IRQ;
+ if (irqbalance_disabled)
+ return 0;
+
+ /* disable irqbalance completely if there is only one processor online
*/
+ if (num_online_cpus() < 2) {
+ irqbalance_disabled = 1;
+ return 0;
+ }
+ /*
+ * Enable physical balance only if more than 1 physical processor
+ * is present
+ */
+ if (smp_num_siblings > 1 && !cpus_empty(tmp))
+ physical_balance = 1;
+
+ for_each_online_cpu(i) {
+ irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) *
NR_IRQS, GFP_KERNEL);
+ irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) *
NR_IRQS, GFP_KERNEL);
+ if (irq_cpu_data[i].irq_delta == NULL ||
irq_cpu_data[i].last_irq == NULL) {
+ printk(KERN_ERR "balanced_irq_init: out of memory");
+ goto failed;
+ }
+ memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) *
NR_IRQS);
+ memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) *
NR_IRQS);
+ }
+
+ printk(KERN_INFO "Starting balanced_irq\n");
+ if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
+ return 0;
+ else
+ printk(KERN_ERR "balanced_irq_init: failed to spawn
balanced_irq");
+failed:
+ for_each_possible_cpu(i) {
+ kfree(irq_cpu_data[i].irq_delta);
+ irq_cpu_data[i].irq_delta = NULL;
+ kfree(irq_cpu_data[i].last_irq);
+ irq_cpu_data[i].last_irq = NULL;
+ }
+ return 0;
+}
+
+int __init irqbalance_disable(char *str)
+{
+ irqbalance_disabled = 1;
+ return 1;
+}
+
+__setup("noirqbalance", irqbalance_disable);
+
+late_initcall(balanced_irq_init);
+#endif /* CONFIG_IRQBALANCE */
+#endif /* CONFIG_SMP */
+#endif
+
+#ifndef CONFIG_SMP
+void fastcall send_IPI_self(int vector)
+{
+#ifndef CONFIG_XEN
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+ cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write_around(APIC_ICR, cfg);
+#endif
+}
+#endif /* !CONFIG_SMP */
+
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+int skip_ioapic_setup;
+
+static int __init ioapic_setup(char *str)
+{
+ skip_ioapic_setup = 1;
+ return 1;
+}
+
+__setup("noapic", ioapic_setup);
+
+static int __init ioapic_pirq_setup(char *str)
+{
+ int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
+
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ pirqs_enabled = 1;
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
+
+ for (i = 0; i < max; i++) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+ }
+ return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int apic, int pin, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_irqtype == type &&
+ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+ mp_irqs[i].mpc_dstirq == pin)
+ return i;
+
+ return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+ ) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+
+ return mp_irqs[i].mpc_dstirq;
+ }
+ return -1;
+}
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+ ) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+ break;
+ }
+ if (i < mp_irq_entries) {
+ int apic;
+ for(apic = 0; apic < nr_ioapics; apic++) {
+ if (mp_ioapics[apic].mpc_apicid ==
mp_irqs[i].mpc_dstapic)
+ return apic;
+ }
+ }
+
+ return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+ int apic, i, best_guess = -1;
+
+ apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
+ "slot:%d, pin:%d.\n", bus, slot, pin);
+ if (mp_bus_id_to_pci_bus[bus] == -1) {
+ printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus
%d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_ioapics[apic].mpc_apicid ==
mp_irqs[i].mpc_dstapic ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+ break;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+ !mp_irqs[i].mpc_irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+ if (!(apic || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+ return irq;
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0)
+ best_guess = irq;
+ }
+ }
+ return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+/*
+ * This function currently is only a helper for the i386 smp boot process
where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come
online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+void __init setup_ioapic_dest(void)
+{
+ int pin, ioapic, irq, irq_entry;
+
+ if (skip_ioapic_setup == 1)
+ return;
+
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
+ set_ioapic_affinity_irq(irq, TARGET_CPUS);
+ }
+
+ }
+}
+#endif /* !CONFIG_XEN */
+#endif
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+ if (irq < 16) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "Broken MPtable reports ISA irq %d\n", irq);
+ return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx) (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx) (0)
+#define default_ISA_polarity(idx) (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx) (1)
+#define default_PCI_polarity(idx) (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx) (1)
+#define default_MCA_polarity(idx) (0)
+
+/* NEC98 interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_NEC98_trigger(idx) (0)
+#define default_NEC98_polarity(idx) (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int polarity;
+
+ /*
+ * Determine IRQ line polarity (high active or low active):
+ */
+ switch (mp_irqs[idx].mpc_irqflag & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent polarity */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ polarity = default_ISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ polarity = default_EISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ polarity = default_PCI_polarity(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ polarity = default_MCA_polarity(idx);
+ break;
+ }
+ case MP_BUS_NEC98: /* NEC 98 pin */
+ {
+ polarity = default_NEC98_polarity(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* high active */
+ {
+ polarity = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ case 3: /* low active */
+ {
+ polarity = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ return polarity;
+}
+
+static int MPBIOS_trigger(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int trigger;
+
+ /*
+ * Determine IRQ trigger mode (edge or level sensitive):
+ */
+ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ trigger = default_ISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ trigger = default_EISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ trigger = default_PCI_trigger(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ trigger = default_MCA_trigger(idx);
+ break;
+ }
+ case MP_BUS_NEC98: /* NEC 98 pin */
+ {
+ trigger = default_NEC98_trigger(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* edge */
+ {
+ trigger = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ case 3: /* level */
+ {
+ trigger = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 0;
+ break;
+ }
+ }
+ return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+ return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+ return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+ int irq, i;
+ int bus = mp_irqs[idx].mpc_srcbus;
+
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].mpc_dstirq != pin)
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ case MP_BUS_EISA:
+ case MP_BUS_MCA:
+ case MP_BUS_NEC98:
+ {
+ irq = mp_irqs[idx].mpc_srcbusirq;
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ /*
+ * PCI IRQs are mapped in order
+ */
+ i = irq = 0;
+ while (i < apic)
+ irq += nr_ioapic_registers[i++];
+ irq += pin;
+
+ /*
+ * For MPS mode, so far only needed by ES7000 platform
+ */
+ if (ioapic_renumber_irq)
+ irq = ioapic_renumber_irq(apic, irq);
+
+ break;
+ }
+ default:
+ {
+ printk(KERN_ERR "unknown bus type %d.\n",bus);
+ irq = 0;
+ break;
+ }
+ }
+
+ /*
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
+ */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin-16] != -1) {
+ if (!pirq_entries[pin-16]) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "disabling PIRQ%d\n", pin-16);
+ } else {
+ irq = pirq_entries[pin-16];
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "using PIRQ%d -> IRQ %d\n",
+ pin-16, irq);
+ }
+ }
+ }
+ return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ int apic, idx, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+ return irq_trigger(idx);
+ }
+ }
+ /*
+ * nonexistent IRQs are edge default
+ */
+ return 0;
+}
+
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 };
*/
+
+int assign_irq_vector(int irq)
+{
+ unsigned long flags;
+ int vector;
+ struct physdev_irq irq_op;
+
+ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
+
+ spin_lock_irqsave(&vector_lock, flags);
+
+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+ spin_unlock_irqrestore(&vector_lock, flags);
+ return IO_APIC_VECTOR(irq);
+ }
+
+ irq_op.irq = irq;
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+ spin_unlock_irqrestore(&vector_lock, flags);
+ return -ENOSPC;
+ }
+
+ vector = irq_op.vector;
+ vector_irq[vector] = irq;
+ if (irq != AUTO_ASSIGN)
+ IO_APIC_VECTOR(irq) = vector;
+
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ return vector;
+}
+
+#ifndef CONFIG_XEN
+static struct hw_interrupt_type ioapic_level_type;
+static struct hw_interrupt_type ioapic_edge_type;
+
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
+
+static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+{
+ unsigned idx;
+
+ idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[idx].chip = &ioapic_level_type;
+ else
+ irq_desc[idx].chip = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[idx]);
+}
+#else
+#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
+#endif
+
+static void __init setup_IO_APIC_irqs(void)
+{
+ struct IO_APIC_route_entry entry;
+ int apic, pin, idx, irq, first_notcon = 1, vector;
+ unsigned long flags;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* enable IRQ */
+ entry.dest.logical.logical_dest =
+ cpu_mask_to_apicid(TARGET_CPUS);
+
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if (idx == -1) {
+ if (first_notcon) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ " IO-APIC (apicid-pin) %d-%d",
+ mp_ioapics[apic].mpc_apicid,
+ pin);
+ first_notcon = 0;
+ } else
+ apic_printk(APIC_VERBOSE, ", %d-%d",
+ mp_ioapics[apic].mpc_apicid, pin);
+ continue;
+ }
+
+ entry.trigger = irq_trigger(idx);
+ entry.polarity = irq_polarity(idx);
+
+ if (irq_trigger(idx)) {
+ entry.trigger = 1;
+ entry.mask = 1;
+ }
+
+ irq = pin_2_irq(idx, apic, pin);
+ /*
+ * skip adding the timer int on secondary nodes, which causes
+ * a small but painful rift in the time-space continuum
+ */
+ if (multi_timer_check(apic, irq))
+ continue;
+ else
+ add_pin_to_irq(irq, apic, pin);
+
+ if (/*!apic &&*/ !IO_APIC_IRQ(irq))
+ continue;
+
+ if (IO_APIC_IRQ(irq)) {
+ vector = assign_irq_vector(irq);
+ entry.vector = vector;
+ ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ set_native_irq_info(irq, TARGET_CPUS);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ }
+
+ if (!first_notcon)
+ apic_printk(APIC_VERBOSE, " not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin:
+ */
+#ifndef CONFIG_XEN
+static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin,
int vector)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry,0,sizeof(entry));
+
+ disable_8259A_irq(0);
+
+ /* mask LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+ /*
+ * We use logical delivery to get the timer IRQ
+ * to the first CPU.
+ */
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* unmask IRQ now */
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.polarity = 0;
+ entry.trigger = 0;
+ entry.vector = vector;
+
+ /*
+ * The timer IRQ doesn't have to know that behind the
+ * scene we have a 8259A-master in AEOI mode ...
+ */
+ irq_desc[0].chip = &ioapic_edge_type;
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ enable_8259A_irq(0);
+}
+
+static inline void UNEXPECTED_IO_APIC(void)
+{
+}
+
+void __init print_IO_APIC(void)
+{
+ int apic, i;
+ union IO_APIC_reg_00 reg_00;
+ union IO_APIC_reg_01 reg_01;
+ union IO_APIC_reg_02 reg_02;
+ union IO_APIC_reg_03 reg_03;
+ unsigned long flags;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+ for (i = 0; i < nr_ioapics; i++)
+ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+ /*
+ * We are a bit conservative about what we expect. We have to
+ * know about every hardware change ASAP.
+ */
+ printk(KERN_INFO "testing the IO APIC.......................\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ reg_01.raw = io_apic_read(apic, 1);
+ if (reg_01.bits.version >= 0x10)
+ reg_02.raw = io_apic_read(apic, 2);
+ if (reg_01.bits.version >= 0x20)
+ reg_03.raw = io_apic_read(apic, 3);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n",
reg_00.bits.ID);
+ printk(KERN_DEBUG "....... : Delivery Type: %X\n",
reg_00.bits.delivery_type);
+ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
+ if (reg_00.bits.ID >= get_physical_broadcast())
+ UNEXPECTED_IO_APIC();
+ if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n",
reg_01.bits.entries);
+ if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+ (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+ (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+ (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+ (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+ (reg_01.bits.entries != 0x2E) &&
+ (reg_01.bits.entries != 0x3F)
+ )
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n",
reg_01.bits.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n",
reg_01.bits.version);
+ if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+ (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+ (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+ (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+ (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
+ )
+ UNEXPECTED_IO_APIC();
+ if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+ * but the value of reg_02 is read as the previous read register
+ * value, so ignore it if reg_02 == reg_01.
+ */
+ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n",
reg_02.bits.arbitration);
+ if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+ }
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+ * or reg_03, but the value of reg_0[23] is read as the previous read
+ * register value, so ignore it if reg_03 == reg_0[12].
+ */
+ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+ reg_03.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+ printk(KERN_DEBUG "....... : Boot DT : %X\n",
reg_03.bits.boot_DT);
+ if (reg_03.bits.__reserved_1)
+ UNEXPECTED_IO_APIC();
+ }
+
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+ printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+ " Stat Dest Deli Vect: \n");
+
+ for (i = 0; i <= reg_01.bits.entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk(KERN_DEBUG " %02x %03X %02X ",
+ i,
+ entry.dest.logical.logical_dest,
+ entry.dest.physical.physical_dest
+ );
+
+ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector
+ );
+ }
+ }
+ if (use_pci_vector())
+ printk(KERN_INFO "Using vector-based indexing\n");
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for (i = 0; i < NR_IRQS; i++) {
+ struct irq_pin_list *entry = irq_2_pin + i;
+ if (entry->pin < 0)
+ continue;
+ if (use_pci_vector() && !platform_legacy_irq(i))
+ printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+ else
+ printk(KERN_DEBUG "IRQ%d ", i);
+ for (;;) {
+ printk("-> %d:%d", entry->apic, entry->pin);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ printk("\n");
+ }
+
+ printk(KERN_INFO ".................................... done.\n");
+
+ return;
+}
+
+#if 0
+
+static void print_APIC_bitfield (int base)
+{
+ unsigned int v;
+ int i, j;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+ for (i = 0; i < 8; i++) {
+ v = apic_read(base + i*0x10);
+ for (j = 0; j < 32; j++) {
+ if (v & (1<<j))
+ printk("1");
+ else
+ printk("0");
+ }
+ printk("\n");
+ }
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+ unsigned int v, ver, maxlvt;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+ smp_processor_id(), hard_smp_processor_id());
+ v = apic_read(APIC_ID);
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
+ v = apic_read(APIC_LVR);
+ printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+ ver = GET_APIC_VERSION(v);
+ maxlvt = get_maxlvt();
+
+ v = apic_read(APIC_TASKPRI);
+ printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v &
APIC_TPRI_MASK);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+ }
+
+ v = apic_read(APIC_EOI);
+ printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+ v = apic_read(APIC_RRR);
+ printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+ v = apic_read(APIC_LDR);
+ printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+ v = apic_read(APIC_DFR);
+ printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+ v = apic_read(APIC_SPIV);
+ printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+ printk(KERN_DEBUG "... APIC ISR field:\n");
+ print_APIC_bitfield(APIC_ISR);
+ printk(KERN_DEBUG "... APIC TMR field:\n");
+ print_APIC_bitfield(APIC_TMR);
+ printk(KERN_DEBUG "... APIC IRR field:\n");
+ print_APIC_bitfield(APIC_IRR);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_ICR);
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+ v = apic_read(APIC_ICR2);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+ v = apic_read(APIC_LVTT);
+ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+ if (maxlvt > 3) { /* PC is LVT#4. */
+ v = apic_read(APIC_LVTPC);
+ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+ }
+ v = apic_read(APIC_LVT0);
+ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+ v = apic_read(APIC_LVT1);
+ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+ if (maxlvt > 2) { /* ERR is LVT#3. */
+ v = apic_read(APIC_LVTERR);
+ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_TMICT);
+ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+ v = apic_read(APIC_TMCCT);
+ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+ v = apic_read(APIC_TDCR);
+ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+ printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+ on_each_cpu(print_local_APIC, NULL, 1, 1);
+}
+
+void /*__init*/ print_PIC(void)
+{
+ unsigned int v;
+ unsigned long flags;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ v = inb(0xa1) << 8 | inb(0x21);
+ printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
+
+ v = inb(0xa0) << 8 | inb(0x20);
+ printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
+
+ outb(0x0b,0xa0);
+ outb(0x0b,0x20);
+ v = inb(0xa0) << 8 | inb(0x20);
+ outb(0x0a,0xa0);
+ outb(0x0a,0x20);
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
+
+ v = inb(0x4d1) << 8 | inb(0x4d0);
+ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+#endif /* 0 */
+
+#else
+void __init print_IO_APIC(void) { }
+#endif /* !CONFIG_XEN */
+
+static void __init enable_IO_APIC(void)
+{
+ union IO_APIC_reg_01 reg_01;
+ int i8259_apic, i8259_pin;
+ int i, apic;
+ unsigned long flags;
+
+ for (i = 0; i < PIN_MAP_SIZE; i++) {
+ irq_2_pin[i].pin = -1;
+ irq_2_pin[i].next = 0;
+ }
+ if (!pirqs_enabled)
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(apic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+ }
+ for(apic = 0; apic < nr_ioapics; apic++) {
+ int pin;
+ /* See if any of the pins is in ExtINT mode */
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ struct IO_APIC_route_entry entry;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 *
pin);
+ *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 *
pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+
+ /* If the interrupt line is enabled and in ExtInt mode
+ * I have found the pin where the i8259 is connected.
+ */
+ if ((entry.mask == 0) && (entry.delivery_mode ==
dest_ExtINT)) {
+ ioapic_i8259.apic = apic;
+ ioapic_i8259.pin = pin;
+ goto found_i8259;
+ }
+ }
+ }
+ found_i8259:
+ /* Look to see what if the MP table has reported the ExtINT */
+ /* If we could not find the appropriate pin by looking at the ioapic
+ * the i8259 probably is not connected the ioapic but give the
+ * mptable a chance anyway.
+ */
+ i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
+ i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+ /* Trust the MP table if nothing is setup in the hardware */
+ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+ printk(KERN_WARNING "ExtINT not setup in hardware but reported
by MP table\n");
+ ioapic_i8259.pin = i8259_pin;
+ ioapic_i8259.apic = i8259_apic;
+ }
+ /* Complain if the MP table and the hardware disagree */
+ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin !=
i8259_pin)) &&
+ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+ {
+ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+ }
+
+ /*
+ * Do not trust the IO-APIC being empty at bootup
+ */
+ clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+ /*
+ * Clear the IO-APIC before rebooting:
+ */
+ clear_IO_APIC();
+
+#ifndef CONFIG_XEN
+ /*
+ * If the i8259 is routed through an IOAPIC
+ * Put that IOAPIC in virtual wire mode
+ * so legacy interrupts can be delivered.
+ */
+ if (ioapic_i8259.pin != -1) {
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 0; /* Enabled */
+ entry.trigger = 0; /* Edge */
+ entry.irr = 0;
+ entry.polarity = 0; /* High */
+ entry.delivery_status = 0;
+ entry.dest_mode = 0; /* Physical */
+ entry.delivery_mode = dest_ExtINT; /* ExtInt */
+ entry.vector = 0;
+ entry.dest.physical.physical_dest =
+ GET_APIC_ID(apic_read(APIC_ID));
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+ *(((int *)&entry)+1));
+ io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+ *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+#endif
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@xxxxxxxx> Tue Dec 21 12:25:05 CST 1999
+ */
+
+#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+ union IO_APIC_reg_00 reg_00;
+ physid_mask_t phys_id_present_map;
+ int apic;
+ int i;
+ unsigned char old_id;
+ unsigned long flags;
+
+ /*
+ * Don't check I/O APIC IDs for xAPIC systems. They have
+ * no meaning without the serial APIC bus.
+ */
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return;
+ /*
+ * This is broken; anything with a real cpu count has to
+ * circumvent this idiocy regardless.
+ */
+ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+ /*
+ * Set the IOAPIC ID to the value stored in the MPC table.
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ /* Read the register 0 value */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ old_id = mp_ioapics[apic].mpc_apicid;
+
+ if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the
MPC table!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ printk(KERN_ERR "... fixing up to %d. (tell your hw
vendor)\n",
+ reg_00.bits.ID);
+ mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+ }
+
+ /*
+ * Sanity check, is the ID really free? Every APIC in a
+ * system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (check_apicid_used(phys_id_present_map,
+ mp_ioapics[apic].mpc_apicid)) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already
used!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ for (i = 0; i < get_physical_broadcast(); i++)
+ if (!physid_isset(i, phys_id_present_map))
+ break;
+ if (i >= get_physical_broadcast())
+ panic("Max APIC ID exceeded!\n");
+ printk(KERN_ERR "... fixing up to %d. (tell your hw
vendor)\n",
+ i);
+ physid_set(i, phys_id_present_map);
+ mp_ioapics[apic].mpc_apicid = i;
+ } else {
+ physid_mask_t tmp;
+ tmp =
apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+ apic_printk(APIC_VERBOSE, "Setting %d in the "
+ "phys_id_present_map\n",
+ mp_ioapics[apic].mpc_apicid);
+ physids_or(phys_id_present_map, phys_id_present_map,
tmp);
+ }
+
+
+ /*
+ * We need to adjust the IRQ routing table
+ * if the ID changed.
+ */
+ if (old_id != mp_ioapics[apic].mpc_apicid)
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_dstapic == old_id)
+ mp_irqs[i].mpc_dstapic
+ = mp_ioapics[apic].mpc_apicid;
+
+ /*
+ * Read the right value from the MPC table and
+ * write it into the ID register.
+ */
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "...changing IO-APIC physical APIC ID to %d ...",
+ mp_ioapics[apic].mpc_apicid);
+
+ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0, reg_00.raw);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /*
+ * Sanity check
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+ printk("could not set ID!\n");
+ else
+ apic_printk(APIC_VERBOSE, " ok.\n");
+ }
+}
+#else
+static void __init setup_ioapic_ids_from_mpc(void) { }
+#endif
+
+#ifndef CONFIG_XEN
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ * - timer IRQ defaults to IO-APIC IRQ
+ * - if this function detects that timer IRQs are defunct, then we fall
+ * back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+ unsigned long t1 = jiffies;
+
+ local_irq_enable();
+ /* Let ten ticks pass... */
+ mdelay((10 * 1000) / HZ);
+
+ /*
+ * Expect a few ticks at least, to be sure some possible
+ * glue logic does not lock up after one or two first
+ * ticks in a non-ExtINT mode. Also the local APIC
+ * might have cached one ExtINT interrupt. Finally, at
+ * least one tick may be lost due to delays.
+ */
+ if (jiffies - t1 > 4)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+ int was_pending = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ if (irq < 16) {
+ disable_8259A_irq(irq);
+ if (i8259A_irq_pending(irq))
+ was_pending = 1;
+ }
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return was_pending;
+}
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+ move_irq(irq);
+ if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+ == (IRQ_PENDING | IRQ_DISABLED))
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
+}
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+ unmask_IO_APIC_irq(irq);
+
+ return 0; /* don't check for pending */
+}
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+ unsigned long v;
+ int i;
+
+ move_irq(irq);
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets). Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source. The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually. We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt. We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ */
+ i = IO_APIC_VECTOR(irq);
+
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+ ack_APIC_irq();
+
+ if (!(v & (1 << (i & 0x1f)))) {
+ atomic_inc(&irq_mis_count);
+ spin_lock(&ioapic_lock);
+ __mask_and_edge_IO_APIC_irq(irq);
+ __unmask_and_level_IO_APIC_irq(irq);
+ spin_unlock(&ioapic_lock);
+ }
+}
+
+#ifdef CONFIG_PCI_MSI
+static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ return startup_edge_ioapic_irq(irq);
+}
+
+static void ack_edge_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ move_native_irq(vector);
+ ack_edge_ioapic_irq(irq);
+}
+
+static unsigned int startup_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ return startup_level_ioapic_irq (irq);
+}
+
+static void end_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ move_native_irq(vector);
+ end_level_ioapic_irq(irq);
+}
+
+static void mask_IO_APIC_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ mask_IO_APIC_irq(irq);
+}
+
+static void unmask_IO_APIC_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ unmask_IO_APIC_irq(irq);
+}
+
+#ifdef CONFIG_SMP
+static void set_ioapic_affinity_vector (unsigned int vector,
+ cpumask_t cpu_mask)
+{
+ int irq = vector_to_irq(vector);
+
+ set_native_irq_info(vector, cpu_mask);
+ set_ioapic_affinity_irq(irq, cpu_mask);
+}
+#endif
+#endif
+
+static int ioapic_retrigger(unsigned int irq)
+{
+ send_IPI_self(IO_APIC_VECTOR(irq));
+
+ return 1;
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
+ .typename = "IO-APIC-edge",
+ .startup = startup_edge_ioapic,
+ .shutdown = shutdown_edge_ioapic,
+ .enable = enable_edge_ioapic,
+ .disable = disable_edge_ioapic,
+ .ack = ack_edge_ioapic,
+ .end = end_edge_ioapic,
+#ifdef CONFIG_SMP
+ .set_affinity = set_ioapic_affinity,
+#endif
+ .retrigger = ioapic_retrigger,
+};
+
+static struct hw_interrupt_type ioapic_level_type __read_mostly = {
+ .typename = "IO-APIC-level",
+ .startup = startup_level_ioapic,
+ .shutdown = shutdown_level_ioapic,
+ .enable = enable_level_ioapic,
+ .disable = disable_level_ioapic,
+ .ack = mask_and_ack_level_ioapic,
+ .end = end_level_ioapic,
+#ifdef CONFIG_SMP
+ .set_affinity = set_ioapic_affinity,
+#endif
+ .retrigger = ioapic_retrigger,
+};
+#endif /* !CONFIG_XEN */
+
+static inline void init_IO_APIC_traps(void)
+{
+ int irq;
+
+ /*
+ * NOTE! The local APIC isn't very good at handling
+ * multiple interrupts at the same interrupt level.
+ * As the interrupt level is determined by taking the
+ * vector number and shifting that right by 4, we
+ * want to spread these out a bit so that they don't
+ * all fall in the same interrupt level.
+ *
+ * Also, we've got to be careful not to trash gate
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ */
+ for (irq = 0; irq < NR_IRQS ; irq++) {
+ int tmp = irq;
+ if (use_pci_vector()) {
+ if (!platform_legacy_irq(tmp))
+ if ((tmp = vector_to_irq(tmp)) == -1)
+ continue;
+ }
+ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
+ /*
+ * Hmm.. We don't have an entry for this,
+ * so default to an old-fashioned 8259
+ * interrupt if we can..
+ */
+ if (irq < 16)
+ make_8259A_irq(irq);
+#ifndef CONFIG_XEN
+ else
+ /* Strange. Oh, well.. */
+ irq_desc[irq].chip = &no_irq_type;
+#endif
+ }
+ }
+}
+
+#ifndef CONFIG_XEN
+static void enable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+ ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type __read_mostly = {
+ .typename = "local-APIC-edge",
+ .startup = NULL, /* startup_irq() not used for IRQ0 */
+ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
+ .enable = enable_lapic_irq,
+ .disable = disable_lapic_irq,
+ .ack = ack_lapic_irq,
+ .end = end_lapic_irq
+};
+
+static void setup_nmi (void)
+{
+ /*
+ * Dirty trick to enable the NMI watchdog ...
+ * We put the 8259A master into AEOI mode and
+ * unmask on all local APICs LVT0 as NMI.
+ *
+ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+ * is from Maciej W. Rozycki - so we do not have to EOI from
+ * the NMI handler or the timer interrupt.
+ */
+ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+
+ on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
+
+ apic_printk(APIC_VERBOSE, " done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic. ICR does
+ * not support the ExtINT mode, unfortunately. We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA. --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+ int apic, pin, i;
+ struct IO_APIC_route_entry entry0, entry1;
+ unsigned char save_control, save_freq_select;
+ unsigned long flags;
+
+ pin = find_isa_irq_pin(8, mp_INT);
+ apic = find_isa_irq_apic(8, mp_INT);
+ if (pin == -1)
+ return;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ clear_IO_APIC_pin(apic, pin);
+
+ memset(&entry1, 0, sizeof(entry1));
+
+ entry1.dest_mode = 0; /* physical delivery */
+ entry1.mask = 0; /* unmask IRQ now */
+ entry1.dest.physical.physical_dest = hard_smp_processor_id();
+ entry1.delivery_mode = dest_ExtINT;
+ entry1.polarity = entry0.polarity;
+ entry1.trigger = 0;
+ entry1.vector = 0;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+ RTC_FREQ_SELECT);
+ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+ i = 100;
+ while (i-- > 0) {
+ mdelay(10);
+ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+ i -= 10;
+ }
+
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ clear_IO_APIC_pin(apic, pin);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+int timer_uses_ioapic_pin_0;
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
+ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+ int apic1, pin1, apic2, pin2;
+ int vector;
+
+ /*
+ * get/set the timer IRQ vector:
+ */
+ disable_8259A_irq(0);
+ vector = assign_irq_vector(0);
+ set_intr_gate(vector, interrupt[0]);
+
+ /*
+ * Subtle, code in do_timer_interrupt() expects an AEOI
+ * mode for the 8259A whenever interrupts are routed
+ * through I/O APICs. Also IRQ0 has to be enabled in
+ * the 8259A which implies the virtual wire has to be
+ * disabled in the local APIC.
+ */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ init_8259A(1);
+ timer_ack = 1;
+ if (timer_over_8254 > 0)
+ enable_8259A_irq(0);
+
+ pin1 = find_isa_irq_pin(0, mp_INT);
+ apic1 = find_isa_irq_apic(0, mp_INT);
+ pin2 = ioapic_i8259.pin;
+ apic2 = ioapic_i8259.apic;
+
+ if (pin1 == 0)
+ timer_uses_ioapic_pin_0 = 1;
+
+ printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d
pin2=%d\n",
+ vector, apic1, pin1, apic2, pin2);
+
+ if (pin1 != -1) {
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ unmask_IO_APIC_irq(0);
+ if (timer_irq_works()) {
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ setup_nmi();
+ enable_8259A_irq(0);
+ }
+ if (disable_timer_pin_1 > 0)
+ clear_IO_APIC_pin(0, pin1);
+ return;
+ }
+ clear_IO_APIC_pin(apic1, pin1);
+ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
+ "IO-APIC\n");
+ }
+
+ printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A
... ");
+ if (pin2 != -1) {
+ printk("\n..... (found pin %d) ...", pin2);
+ /*
+ * legacy devices should be connected to IO APIC #0
+ */
+ setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
+ if (timer_irq_works()) {
+ printk("works.\n");
+ if (pin1 != -1)
+ replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+ else
+ add_pin_to_irq(0, apic2, pin2);
+ if (nmi_watchdog == NMI_IO_APIC) {
+ setup_nmi();
+ }
+ return;
+ }
+ /*
+ * Cleanup, just in case ...
+ */
+ clear_IO_APIC_pin(apic2, pin2);
+ }
+ printk(" failed.\n");
+
+ if (nmi_watchdog == NMI_IO_APIC) {
+ printk(KERN_WARNING "timer doesn't work through the IO-APIC -
disabling NMI Watchdog!\n");
+ nmi_watchdog = 0;
+ }
+
+ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+ disable_8259A_irq(0);
+ irq_desc[0].chip = &lapic_irq_type;
+ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ enable_8259A_irq(0);
+
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+ printk(" failed.\n");
+
+ printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+ timer_ack = 0;
+ init_8259A(0);
+ make_8259A_irq(0);
+ apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+ unlock_ExtINT_logic();
+
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ printk(" failed :(.\n");
+ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
+ "report. Then try booting with the 'noapic' option");
+}
+#else
+int timer_uses_ioapic_pin_0 = 0;
+#define check_timer() ((void)0)
+#endif
+
+/*
+ *
+ * IRQ's that are handled by the PIC in the MPS IOAPIC case.
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ * Linux doesn't really care, as it's not actually used
+ * for any interrupt handling anyway.
+ */
+#define PIC_IRQS (1 << PIC_CASCADE_IR)
+
+void __init setup_IO_APIC(void)
+{
+ enable_IO_APIC();
+
+ if (acpi_ioapic)
+ io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
+ else
+ io_apic_irqs = ~PIC_IRQS;
+
+ printk("ENABLING IO-APIC IRQs\n");
+
+ /*
+ * Set up IO-APIC IRQ routing.
+ */
+ if (!acpi_ioapic)
+ setup_ioapic_ids_from_mpc();
+#ifndef CONFIG_XEN
+ sync_Arb_IDs();
+#endif
+ setup_IO_APIC_irqs();
+ init_IO_APIC_traps();
+ check_timer();
+ if (!acpi_ioapic)
+ print_IO_APIC();
+}
+
+static int __init setup_disable_8254_timer(char *s)
+{
+ timer_over_8254 = -1;
+ return 1;
+}
+static int __init setup_enable_8254_timer(char *s)
+{
+ timer_over_8254 = 2;
+ return 1;
+}
+
+__setup("disable_8254_timer", setup_disable_8254_timer);
+__setup("enable_8254_timer", setup_enable_8254_timer);
+
+/*
+ * Called after all the initialization is done. If we didnt find any
+ * APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+ if(sis_apic_bug == -1)
+ sis_apic_bug = 0;
+ if (is_initial_xendomain()) {
+ struct xen_platform_op op = { .cmd = XENPF_platform_quirk };
+ op.u.platform_quirk.quirk_id = sis_apic_bug ?
+ QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL;
+ HYPERVISOR_platform_op(&op);
+ }
+ return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+struct sysfs_ioapic_data {
+ struct sys_device dev;
+ struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+{
+ struct IO_APIC_route_entry *entry;
+ struct sysfs_ioapic_data *data;
+ unsigned long flags;
+ int i;
+
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
+ entry = data->entry;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+ *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+ *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+ struct IO_APIC_route_entry *entry;
+ struct sysfs_ioapic_data *data;
+ unsigned long flags;
+ union IO_APIC_reg_00 reg_00;
+ int i;
+
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
+ entry = data->entry;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(dev->id, 0);
+ if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
+ reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+ io_apic_write(dev->id, 0, reg_00.raw);
+ }
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+ io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+ io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+ set_kset_name("ioapic"),
+ .suspend = ioapic_suspend,
+ .resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+ struct sys_device * dev;
+ int i, size, error = 0;
+
+ error = sysdev_class_register(&ioapic_sysdev_class);
+ if (error)
+ return error;
+
+ for (i = 0; i < nr_ioapics; i++ ) {
+ size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+ * sizeof(struct IO_APIC_route_entry);
+ mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+ if (!mp_ioapic_data[i]) {
+ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+ continue;
+ }
+ memset(mp_ioapic_data[i], 0, size);
+ dev = &mp_ioapic_data[i]->dev;
+ dev->id = i;
+ dev->cls = &ioapic_sysdev_class;
+ error = sysdev_register(dev);
+ if (error) {
+ kfree(mp_ioapic_data[i]);
+ mp_ioapic_data[i] = NULL;
+ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+ continue;
+ }
+ }
+
+ return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+/* --------------------------------------------------------------------------
+ ACPI-based IOAPIC Configuration
+ --------------------------------------------------------------------------
*/
+
+#ifdef CONFIG_ACPI
+
+int __init io_apic_get_unique_id (int ioapic, int apic_id)
+{
+#ifndef CONFIG_XEN
+ union IO_APIC_reg_00 reg_00;
+ static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+ physid_mask_t tmp;
+ unsigned long flags;
+ int i = 0;
+
+ /*
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
+ * supports up to 16 on one shared APIC bus.
+ *
+ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+ * advantage of new APIC bus architecture.
+ */
+
+ if (physids_empty(apic_id_map))
+ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (apic_id >= get_physical_broadcast()) {
+ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+ "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ apic_id = reg_00.bits.ID;
+ }
+
+ /*
+ * Every APIC in a system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (check_apicid_used(apic_id_map, apic_id)) {
+
+ for (i = 0; i < get_physical_broadcast(); i++) {
+ if (!check_apicid_used(apic_id_map, i))
+ break;
+ }
+
+ if (i == get_physical_broadcast())
+ panic("Max apic_id exceeded!\n");
+
+ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+ "trying %d\n", ioapic, apic_id, i);
+
+ apic_id = i;
+ }
+
+ tmp = apicid_to_cpu_present(apic_id);
+ physids_or(apic_id_map, apic_id_map, tmp);
+
+ if (reg_00.bits.ID != apic_id) {
+ reg_00.bits.ID = apic_id;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /* Sanity check */
+ if (reg_00.bits.ID != apic_id) {
+ printk("IOAPIC[%d]: Unable to change apic_id!\n",
ioapic);
+ return -1;
+ }
+ }
+
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+#endif /* !CONFIG_XEN */
+
+ return apic_id;
+}
+
+
+int __init io_apic_get_version (int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.bits.version;
+}
+
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.bits.entries;
+}
+
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int
active_high_low)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ if (!IO_APIC_IRQ(irq)) {
+ printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+ ioapic);
+ return -EINVAL;
+ }
+
+ /*
+ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+ * Note that we mask (disable) IRQs now -- these get enabled when the
+ * corresponding device driver registers for this IRQ.
+ */
+
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.trigger = edge_level;
+ entry.polarity = active_high_low;
+ entry.mask = 1;
+
+ /*
+ * IRQs < 16 are already in the irq_2_pin[] map
+ */
+ if (irq >= 16)
+ add_pin_to_irq(irq, ioapic, pin);
+
+ entry.vector = assign_irq_vector(irq);
+
+ apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
+ "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
+ mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+ edge_level, active_high_low);
+
+ ioapic_register_intr(irq, entry.vector, edge_level);
+
+ if (!ioapic && (irq < 16))
+ disable_8259A_irq(irq);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+ set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+#endif /* CONFIG_ACPI */
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/ioport-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/ioport-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,122 @@
+/*
+ * linux/arch/i386/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <xen/interface/physdev.h>
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int
extent, int new_value)
+{
+ unsigned long mask;
+ unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
+ unsigned int low_index = base & (BITS_PER_LONG-1);
+ int length = low_index + extent;
+
+ if (low_index != 0) {
+ mask = (~0UL << low_index);
+ if (length < BITS_PER_LONG)
+ mask &= ~(~0UL << length);
+ if (new_value)
+ *bitmap_base++ |= mask;
+ else
+ *bitmap_base++ &= ~mask;
+ length -= BITS_PER_LONG;
+ }
+
+ mask = (new_value ? ~0UL : 0UL);
+ while (length >= BITS_PER_LONG) {
+ *bitmap_base++ = mask;
+ length -= BITS_PER_LONG;
+ }
+
+ if (length > 0) {
+ mask = ~(~0UL << length);
+ if (new_value)
+ *bitmap_base++ |= mask;
+ else
+ *bitmap_base++ &= ~mask;
+ }
+}
+
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+ struct thread_struct * t = ¤t->thread;
+ unsigned long *bitmap;
+ struct physdev_set_iobitmap set_iobitmap;
+
+ if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
+ return -EINVAL;
+ if (turn_on && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ /*
+ * If it's the first ioperm() call in this thread's lifetime, set the
+ * IO bitmap up. ioperm() is much less timing critical than clone(),
+ * this is why we delay this operation until now:
+ */
+ if (!t->io_bitmap_ptr) {
+ bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+ if (!bitmap)
+ return -ENOMEM;
+
+ memset(bitmap, 0xff, IO_BITMAP_BYTES);
+ t->io_bitmap_ptr = bitmap;
+ set_thread_flag(TIF_IO_BITMAP);
+
+ set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
+ set_iobitmap.nr_ports = IO_BITMAP_BITS;
+ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &set_iobitmap);
+ }
+
+ set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+
+ return 0;
+}
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
+
+asmlinkage long sys_iopl(unsigned long unused)
+{
+ volatile struct pt_regs * regs = (struct pt_regs *) &unused;
+ unsigned int level = regs->ebx;
+ struct thread_struct *t = ¤t->thread;
+ unsigned int old = (t->iopl >> 12) & 3;
+
+ if (level > 3)
+ return -EINVAL;
+ /* Trying to gain more privileges? */
+ if (level > old) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ }
+ t->iopl = level << 12;
+ set_iopl_mask(t->iopl);
+ return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/irq-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/irq-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,324 @@
+/*
+ * linux/arch/i386/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+#ifndef CONFIG_X86_LOCAL_APIC
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+/*
+ * per-CPU IRQ handling contexts (thread information and stack)
+ */
+union irq_ctx {
+ struct thread_info tinfo;
+ u32 stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+#endif
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int do_IRQ(struct pt_regs *regs)
+{
+ /* high bit used in ret_from_ code */
+ int irq = ~regs->orig_eax;
+#ifdef CONFIG_4KSTACKS
+ union irq_ctx *curctx, *irqctx;
+ u32 *isp;
+#endif
+
+ if (unlikely((unsigned)irq >= NR_IRQS)) {
+ printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
+ __FUNCTION__, irq);
+ BUG();
+ }
+
+ irq_enter();
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+ {
+ long esp;
+
+ __asm__ __volatile__("andl %%esp,%0" :
+ "=r" (esp) : "0" (THREAD_SIZE - 1));
+ if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ esp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+#ifdef CONFIG_4KSTACKS
+
+ curctx = (union irq_ctx *) current_thread_info();
+ irqctx = hardirq_ctx[smp_processor_id()];
+
+ /*
+ * this is where we switch to the IRQ stack. However, if we are
+ * already using the IRQ stack (because we interrupted a hardirq
+ * handler) we can't do that and just have to keep using the
+ * current stack (which is the irq stack already after all)
+ */
+ if (curctx != irqctx) {
+ int arg1, arg2, ebx;
+
+ /* build the stack frame on the IRQ stack */
+ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+ irqctx->tinfo.task = curctx->tinfo.task;
+ irqctx->tinfo.previous_esp = current_stack_pointer;
+
+ /*
+ * Copy the softirq bits in preempt_count so that the
+ * softirq checks work in the hardirq context.
+ */
+ irqctx->tinfo.preempt_count =
+ (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
+ (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
+ asm volatile(
+ " xchgl %%ebx,%%esp \n"
+ " call __do_IRQ \n"
+ " movl %%ebx,%%esp \n"
+ : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+ : "0" (irq), "1" (regs), "2" (isp)
+ : "memory", "cc", "ecx"
+ );
+ } else
+#endif
+ __do_IRQ(irq, regs);
+
+ irq_exit();
+
+ return 1;
+}
+
+#ifdef CONFIG_4KSTACKS
+
+/*
+ * These should really be __section__(".bss.page_aligned") as well, but
+ * gcc's 3.0 and earlier don't handle that correctly.
+ */
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+ __attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+ __attribute__((__aligned__(THREAD_SIZE)));
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+ union irq_ctx *irqctx;
+
+ if (hardirq_ctx[cpu])
+ return;
+
+ irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+ irqctx->tinfo.task = NULL;
+ irqctx->tinfo.exec_domain = NULL;
+ irqctx->tinfo.cpu = cpu;
+ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
+
+ hardirq_ctx[cpu] = irqctx;
+
+ irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+ irqctx->tinfo.task = NULL;
+ irqctx->tinfo.exec_domain = NULL;
+ irqctx->tinfo.cpu = cpu;
+ irqctx->tinfo.preempt_count = 0;
+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
+
+ softirq_ctx[cpu] = irqctx;
+
+ printk("CPU %u irqstacks, hard=%p soft=%p\n",
+ cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+}
+
+void irq_ctx_exit(int cpu)
+{
+ hardirq_ctx[cpu] = NULL;
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+ unsigned long flags;
+ struct thread_info *curctx;
+ union irq_ctx *irqctx;
+ u32 *isp;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ curctx = current_thread_info();
+ irqctx = softirq_ctx[smp_processor_id()];
+ irqctx->tinfo.task = curctx->task;
+ irqctx->tinfo.previous_esp = current_stack_pointer;
+
+ /* build the stack frame on the softirq stack */
+ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+
+ asm volatile(
+ " xchgl %%ebx,%%esp \n"
+ " call __do_softirq \n"
+ " movl %%ebx,%%esp \n"
+ : "=b"(isp)
+ : "0"(isp)
+ : "memory", "cc", "edx", "ecx", "eax"
+ );
+ /*
+ * Shouldnt happen, we returned above if in_interrupt():
+ */
+ WARN_ON_ONCE(softirq_count());
+ }
+
+ local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *) v, j;
+ struct irqaction * action;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_printf(p, " ");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%-8d",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %14s", irq_desc[i].chip->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_printf(p, "NMI: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", nmi_count(j));
+ seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+ seq_printf(p, "LOC: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).apic_timer_irqs);
+ seq_putc(p, '\n');
+#endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+ }
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ cpumask_t mask;
+ if (irq == 2)
+ continue;
+
+ cpus_and(mask, irq_desc[irq].affinity, map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ /*printk("Breaking affinity for irq %i\n", irq);*/
+ mask = map;
+ }
+ if (irq_desc[irq].chip->set_affinity)
+ irq_desc[irq].chip->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+#if 0
+ barrier();
+ /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+ [note the nop - the interrupt-enable boundary on x86 is two
+ instructions from sti] - to flush out pending hardirqs and
+ IPIs. After this point nothing is supposed to reach this CPU." */
+ __asm__ __volatile__("sti; nop; cli");
+ barrier();
+#else
+ /* That doesn't seem sufficient. Give it 1ms. */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+#endif
+}
+#endif
+
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/ldt-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/ldt-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,270 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+static void flush_ldt(void *null)
+{
+ if (current->active_mm)
+ load_LDT(¤t->active_mm->context);
+}
+#endif
+
+static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+{
+ void *oldldt;
+ void *newldt;
+ int oldsize;
+
+ if (mincount <= pc->size)
+ return 0;
+ oldsize = pc->size;
+ mincount = (mincount+511)&(~511);
+ if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+ newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+ else
+ newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+ if (!newldt)
+ return -ENOMEM;
+
+ if (oldsize)
+ memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+ oldldt = pc->ldt;
+ memset(newldt+oldsize*LDT_ENTRY_SIZE, 0,
(mincount-oldsize)*LDT_ENTRY_SIZE);
+ pc->ldt = newldt;
+ wmb();
+ pc->size = mincount;
+ wmb();
+
+ if (reload) {
+#ifdef CONFIG_SMP
+ cpumask_t mask;
+ preempt_disable();
+#endif
+ make_pages_readonly(
+ pc->ldt,
+ (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+ XENFEAT_writable_descriptor_tables);
+ load_LDT(pc);
+#ifdef CONFIG_SMP
+ mask = cpumask_of_cpu(smp_processor_id());
+ if (!cpus_equal(current->mm->cpu_vm_mask, mask))
+ smp_call_function(flush_ldt, NULL, 1, 1);
+ preempt_enable();
+#endif
+ }
+ if (oldsize) {
+ make_pages_writable(
+ oldldt,
+ (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE,
+ XENFEAT_writable_descriptor_tables);
+ if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(oldldt);
+ else
+ kfree(oldldt);
+ }
+ return 0;
+}
+
+static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+{
+ int err = alloc_ldt(new, old->size, 0);
+ if (err < 0)
+ return err;
+ memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+ make_pages_readonly(
+ new->ldt,
+ (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+ XENFEAT_writable_descriptor_tables);
+ return 0;
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ struct mm_struct * old_mm;
+ int retval = 0;
+
+ init_MUTEX(&mm->context.sem);
+ mm->context.size = 0;
+ mm->context.has_foreign_mappings = 0;
+ old_mm = current->mm;
+ if (old_mm && old_mm->context.size > 0) {
+ down(&old_mm->context.sem);
+ retval = copy_ldt(&mm->context, &old_mm->context);
+ up(&old_mm->context.sem);
+ }
+ return retval;
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void destroy_context(struct mm_struct *mm)
+{
+ if (mm->context.size) {
+ if (mm == current->active_mm)
+ clear_LDT();
+ make_pages_writable(
+ mm->context.ldt,
+ (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+ XENFEAT_writable_descriptor_tables);
+ if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(mm->context.ldt);
+ else
+ kfree(mm->context.ldt);
+ mm->context.size = 0;
+ }
+}
+
+static int read_ldt(void __user * ptr, unsigned long bytecount)
+{
+ int err;
+ unsigned long size;
+ struct mm_struct * mm = current->mm;
+
+ if (!mm->context.size)
+ return 0;
+ if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+ bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+
+ down(&mm->context.sem);
+ size = mm->context.size*LDT_ENTRY_SIZE;
+ if (size > bytecount)
+ size = bytecount;
+
+ err = 0;
+ if (copy_to_user(ptr, mm->context.ldt, size))
+ err = -EFAULT;
+ up(&mm->context.sem);
+ if (err < 0)
+ goto error_return;
+ if (size != bytecount) {
+ /* zero-fill the rest */
+ if (clear_user(ptr+size, bytecount-size) != 0) {
+ err = -EFAULT;
+ goto error_return;
+ }
+ }
+ return bytecount;
+error_return:
+ return err;
+}
+
+static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+{
+ int err;
+ unsigned long size;
+ void *address;
+
+ err = 0;
+ address = &default_ldt[0];
+ size = 5*sizeof(struct desc_struct);
+ if (size > bytecount)
+ size = bytecount;
+
+ err = size;
+ if (copy_to_user(ptr, address, size))
+ err = -EFAULT;
+
+ return err;
+}
+
+static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+{
+ struct mm_struct * mm = current->mm;
+ __u32 entry_1, entry_2;
+ int error;
+ struct user_desc ldt_info;
+
+ error = -EINVAL;
+ if (bytecount != sizeof(ldt_info))
+ goto out;
+ error = -EFAULT;
+ if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+ goto out;
+
+ error = -EINVAL;
+ if (ldt_info.entry_number >= LDT_ENTRIES)
+ goto out;
+ if (ldt_info.contents == 3) {
+ if (oldmode)
+ goto out;
+ if (ldt_info.seg_not_present == 0)
+ goto out;
+ }
+
+ down(&mm->context.sem);
+ if (ldt_info.entry_number >= mm->context.size) {
+ error = alloc_ldt(¤t->mm->context,
ldt_info.entry_number+1, 1);
+ if (error < 0)
+ goto out_unlock;
+ }
+
+ /* Allow LDTs to be cleared by the user. */
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ if (oldmode || LDT_empty(&ldt_info)) {
+ entry_1 = 0;
+ entry_2 = 0;
+ goto install;
+ }
+ }
+
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
+ if (oldmode)
+ entry_2 &= ~(1 << 20);
+
+ /* Install the new entry ... */
+install:
+ error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number,
+ entry_1, entry_2);
+
+out_unlock:
+ up(&mm->context.sem);
+out:
+ return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long
bytecount)
+{
+ int ret = -ENOSYS;
+
+ switch (func) {
+ case 0:
+ ret = read_ldt(ptr, bytecount);
+ break;
+ case 1:
+ ret = write_ldt(ptr, bytecount, 1);
+ break;
+ case 2:
+ ret = read_default_ldt(ptr, bytecount);
+ break;
+ case 0x11:
+ ret = write_ldt(ptr, bytecount, 0);
+ break;
+ }
+ return ret;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/machine_kexec.c
--- a/arch/i386/kernel/machine_kexec.c Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/machine_kexec.c Mon Jun 04 10:05:28 2007 +0100
@@ -19,6 +19,10 @@
#include <asm/desc.h>
#include <asm/system.h>
+#ifdef CONFIG_XEN
+#include <xen/interface/kexec.h>
+#endif
+
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u32 kexec_pgd[1024] PAGE_ALIGNED;
#ifdef CONFIG_X86_PAE
@@ -27,6 +31,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
#endif
static u32 kexec_pte0[1024] PAGE_ALIGNED;
static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+#ifdef CONFIG_XEN
+
+#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
+
+#if PAGES_NR > KEXEC_XEN_NO_PAGES
+#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
+#endif
+
+#if PA_CONTROL_PAGE != 0
+#error PA_CONTROL_PAGE is non zero - Xen support will break
+#endif
+
+void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
+{
+ void *control_page;
+
+ memset(xki->page_list, 0, sizeof(xki->page_list));
+
+ control_page = page_address(image->control_code_page);
+ memcpy(control_page, relocate_kernel, PAGE_SIZE);
+
+ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
+ xki->page_list[PA_PGD] = __ma(kexec_pgd);
+#ifdef CONFIG_X86_PAE
+ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
+ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
+#endif
+ xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
+ xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
+
+}
+
+#endif /* CONFIG_XEN */
/*
* A architecture hook called to validate the
@@ -54,6 +92,7 @@ void machine_kexec_cleanup(struct kimage
{
}
+#ifndef CONFIG_XEN
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
@@ -87,3 +126,4 @@ NORET_TYPE void machine_kexec(struct kim
relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
image->start, cpu_has_pae);
}
+#endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/microcode-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/microcode-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,144 @@
+/*
+ * Intel CPU Microcode Update Driver for Linux
+ *
+ * Copyright (C) 2000-2004 Tigran Aivazian
+ *
+ * This driver allows to upgrade microcode on Intel processors
+ * belonging to IA-32 family - PentiumPro, Pentium II,
+ * Pentium III, Xeon, Pentium 4, etc.
+ *
+ * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual,
+ * Order Number 245472 or free download from:
+ *
+ * http://developer.intel.com/design/pentium4/manuals/245472.htm
+ *
+ * For more information, go to http://www.urbanmyth.org/microcode
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+//#define DEBUG /* pr_debug */
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/syscalls.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@xxxxxxxxxxx>");
+MODULE_LICENSE("GPL");
+
+static int verbose;
+module_param(verbose, int, 0644);
+
+#define MICROCODE_VERSION "1.14a-xen"
+
+#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
+#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /*
2048 bytes */
+
+/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+static DEFINE_MUTEX(microcode_mutex);
+
+static int microcode_open (struct inode *unused1, struct file *unused2)
+{
+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+
+static int do_microcode_update (const void __user *ubuf, size_t len)
+{
+ int err;
+ void *kbuf;
+
+ kbuf = vmalloc(len);
+ if (!kbuf)
+ return -ENOMEM;
+
+ if (copy_from_user(kbuf, ubuf, len) == 0) {
+ struct xen_platform_op op;
+
+ op.cmd = XENPF_microcode_update;
+ set_xen_guest_handle(op.u.microcode.data, kbuf);
+ op.u.microcode.length = len;
+ err = HYPERVISOR_platform_op(&op);
+ } else
+ err = -EFAULT;
+
+ vfree(kbuf);
+
+ return err;
+}
+
+static ssize_t microcode_write (struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
+{
+ ssize_t ret;
+
+ if (len < MC_HEADER_SIZE) {
+ printk(KERN_ERR "microcode: not enough data\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(µcode_mutex);
+
+ ret = do_microcode_update(buf, len);
+ if (!ret)
+ ret = (ssize_t)len;
+
+ mutex_unlock(µcode_mutex);
+
+ return ret;
+}
+
+static struct file_operations microcode_fops = {
+ .owner = THIS_MODULE,
+ .write = microcode_write,
+ .open = microcode_open,
+};
+
+static struct miscdevice microcode_dev = {
+ .minor = MICROCODE_MINOR,
+ .name = "microcode",
+ .fops = µcode_fops,
+};
+
+static int __init microcode_init (void)
+{
+ int error;
+
+ error = misc_register(µcode_dev);
+ if (error) {
+ printk(KERN_ERR
+ "microcode: can't misc_register on minor=%d\n",
+ MICROCODE_MINOR);
+ return error;
+ }
+
+ printk(KERN_INFO
+ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION "
<tigran@xxxxxxxxxxx>\n");
+ return 0;
+}
+
+static void __exit microcode_exit (void)
+{
+ misc_deregister(µcode_dev);
+}
+
+module_init(microcode_init)
+module_exit(microcode_exit)
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/mpparse-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/mpparse-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1185 @@
+/*
+ * Intel Multiprocessor Specification 1.1 and 1.4
+ * compliant MP-table parsing routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ * Fixes
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Alan Cox : Added EBDA scanning
+ * Ingo Molnar : various cleanups and rewrites
+ * Maciej W. Rozycki: Bits for default MP configurations
+ * Paul Diefenbaugh: Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bitops.h>
+
+#include <asm/smp.h>
+#include <asm/acpi.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#include <bios_ebda.h>
+
+/* Have we found an MP table */
+int smp_found_config;
+unsigned int __initdata maxcpus = NR_CPUS;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES];
+int mp_bus_id_to_node [MAX_MP_BUSSES];
+int mp_bus_id_to_local [MAX_MP_BUSSES];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+static int mp_current_pci_id;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+unsigned int def_to_bigsmp = 0;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+/* Internal processor count */
+static unsigned int __devinitdata num_processors;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+ int sum = 0;
+
+ while (len--)
+ sum += *mp++;
+
+ return sum & 0xFF;
+}
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record;
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
__initdata;
+
+#ifndef CONFIG_XEN
+static void __devinit MP_processor_info (struct mpc_config_processor *m)
+{
+ int ver, apicid;
+ physid_mask_t phys_cpu;
+
+ if (!(m->mpc_cpuflag & CPU_ENABLED))
+ return;
+
+ apicid = mpc_apic_id(m, translation_table[mpc_record]);
+
+ if (m->mpc_featureflag&(1<<0))
+ Dprintk(" Floating point unit present.\n");
+ if (m->mpc_featureflag&(1<<7))
+ Dprintk(" Machine Exception supported.\n");
+ if (m->mpc_featureflag&(1<<8))
+ Dprintk(" 64 bit compare & exchange supported.\n");
+ if (m->mpc_featureflag&(1<<9))
+ Dprintk(" Internal APIC present.\n");
+ if (m->mpc_featureflag&(1<<11))
+ Dprintk(" SEP present.\n");
+ if (m->mpc_featureflag&(1<<12))
+ Dprintk(" MTRR present.\n");
+ if (m->mpc_featureflag&(1<<13))
+ Dprintk(" PGE present.\n");
+ if (m->mpc_featureflag&(1<<14))
+ Dprintk(" MCA present.\n");
+ if (m->mpc_featureflag&(1<<15))
+ Dprintk(" CMOV present.\n");
+ if (m->mpc_featureflag&(1<<16))
+ Dprintk(" PAT present.\n");
+ if (m->mpc_featureflag&(1<<17))
+ Dprintk(" PSE present.\n");
+ if (m->mpc_featureflag&(1<<18))
+ Dprintk(" PSN present.\n");
+ if (m->mpc_featureflag&(1<<19))
+ Dprintk(" Cache Line Flush Instruction present.\n");
+ /* 20 Reserved */
+ if (m->mpc_featureflag&(1<<21))
+ Dprintk(" Debug Trace and EMON Store present.\n");
+ if (m->mpc_featureflag&(1<<22))
+ Dprintk(" ACPI Thermal Throttle Registers present.\n");
+ if (m->mpc_featureflag&(1<<23))
+ Dprintk(" MMX present.\n");
+ if (m->mpc_featureflag&(1<<24))
+ Dprintk(" FXSR present.\n");
+ if (m->mpc_featureflag&(1<<25))
+ Dprintk(" XMM present.\n");
+ if (m->mpc_featureflag&(1<<26))
+ Dprintk(" Willamette New Instructions present.\n");
+ if (m->mpc_featureflag&(1<<27))
+ Dprintk(" Self Snoop present.\n");
+ if (m->mpc_featureflag&(1<<28))
+ Dprintk(" HT present.\n");
+ if (m->mpc_featureflag&(1<<29))
+ Dprintk(" Thermal Monitor present.\n");
+ /* 30, 31 Reserved */
+
+
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ Dprintk(" Bootup CPU\n");
+ boot_cpu_physical_apicid = m->mpc_apicid;
+ }
+
+ ver = m->mpc_apicver;
+
+ /*
+ * Validate version
+ */
+ if (ver == 0x0) {
+ printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+ "fixing up to 0x10. (tell your hw vendor)\n",
+ m->mpc_apicid);
+ ver = 0x10;
+ }
+ apic_version[m->mpc_apicid] = ver;
+
+ phys_cpu = apicid_to_cpu_present(apicid);
+ physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+
+ if (num_processors >= NR_CPUS) {
+ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ " Processor ignored.\n", NR_CPUS);
+ return;
+ }
+
+ if (num_processors >= maxcpus) {
+ printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+ " Processor ignored.\n", maxcpus);
+ return;
+ }
+
+ cpu_set(num_processors, cpu_possible_map);
+ num_processors++;
+
+ /*
+ * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+ * but we need to work other dependencies like SMP_SUSPEND etc
+ * before this can be done without some confusion.
+ * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+ * - Ashok Raj <ashok.raj@xxxxxxxxx>
+ */
+ if (num_processors > 8) {
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ if (!APIC_XAPIC(ver)) {
+ def_to_bigsmp = 0;
+ break;
+ }
+ /* If P4 and above fall through */
+ case X86_VENDOR_AMD:
+ def_to_bigsmp = 1;
+ }
+ }
+ bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+}
+#else
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ num_processors++;
+}
+#endif /* CONFIG_XEN */
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+ char str[7];
+
+ memcpy(str, m->mpc_bustype, 6);
+ str[6] = 0;
+
+ mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+
+ if (m->mpc_busid >= MAX_MP_BUSSES) {
+ printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+ " is too large, max. supported is %d\n",
+ m->mpc_busid, str, MAX_MP_BUSSES - 1);
+ return;
+ }
+
+ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+ } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+ } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+ mpc_oem_pci_bus(m, translation_table[mpc_record]);
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+ mp_current_pci_id++;
+ } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+ } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
+ } else {
+ printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+ }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+ if (!(m->mpc_flags & MPC_APIC_USABLE))
+ return;
+
+ printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found
%d).\n",
+ MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+ }
+ if (!m->mpc_apicaddr) {
+ printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+ " found in MP table, skipping!\n");
+ return;
+ }
+ mp_ioapics[nr_ioapics] = *m;
+ nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+ mp_irqs [mp_irq_entries] = *m;
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+ Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+ /*
+ * Well it seems all SMP boards in existence
+ * use ExtINT/LVT1 == LINT0 and
+ * NMI/LVT2 == LINT1 - the following check
+ * will show us if this assumptions is false.
+ * Until then we do not have to add baggage.
+ */
+ if ((m->mpc_irqtype == mp_ExtINT) &&
+ (m->mpc_destapiclint != 0))
+ BUG();
+ if ((m->mpc_irqtype == mp_NMI) &&
+ (m->mpc_destapiclint != 1))
+ BUG();
+}
+
+#ifdef CONFIG_X86_NUMAQ
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+ printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d,
local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global,
m->trans_local);
+
+ if (mpc_record >= MAX_MPC_ENTRY)
+ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+ else
+ translation_table[mpc_record] = m; /* stash this for later */
+ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+ node_set_online(m->trans_quad);
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+ unsigned short oemsize)
+{
+ int count = sizeof (*oemtable); /* the header size */
+ unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+
+ mpc_record = 0;
+ printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
oemtable);
+ if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+ {
+ printk(KERN_WARNING "SMP mpc oemtable: bad signature
[%c%c%c%c]!\n",
+ oemtable->oem_signature[0],
+ oemtable->oem_signature[1],
+ oemtable->oem_signature[2],
+ oemtable->oem_signature[3]);
+ return;
+ }
+ if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+ {
+ printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+ return;
+ }
+ while (count < oemtable->oem_length) {
+ switch (*oemptr) {
+ case MP_TRANSLATION:
+ {
+ struct mpc_config_translation *m=
+ (struct mpc_config_translation *)oemptr;
+ MP_translation_info(m);
+ oemptr += sizeof(*m);
+ count += sizeof(*m);
+ ++mpc_record;
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "Unrecognised OEM table
entry type! - %d\n", (int) *oemptr);
+ return;
+ }
+ }
+ }
+}
+
+static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+ char *productid)
+{
+ if (strncmp(oem, "IBM NUMA", 8))
+ printk("Warning! May not be a NUMA-Q system!\n");
+ if (mpc->mpc_oemptr)
+ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+ mpc->mpc_oemsize);
+}
+#endif /* CONFIG_X86_NUMAQ */
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+ char str[16];
+ char oem[10];
+ int count=sizeof(*mpc);
+ unsigned char *mpt=((unsigned char *)mpc)+count;
+
+ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+ printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+ *(u32 *)mpc->mpc_signature);
+ return 0;
+ }
+ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+ printk(KERN_ERR "SMP mptable: checksum error!\n");
+ return 0;
+ }
+ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+ mpc->mpc_spec);
+ return 0;
+ }
+ if (!mpc->mpc_lapic) {
+ printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+ return 0;
+ }
+ memcpy(oem,mpc->mpc_oem,8);
+ oem[8]=0;
+ printk(KERN_INFO "OEM ID: %s ",oem);
+
+ memcpy(str,mpc->mpc_productid,12);
+ str[12]=0;
+ printk("Product ID: %s ",str);
+
+ mps_oem_check(mpc, oem, str);
+
+ printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+ /*
+ * Save the local APIC address (it might be non-default) -- but only
+ * if we're not using ACPI.
+ */
+ if (!acpi_lapic)
+ mp_lapic_addr = mpc->mpc_lapic;
+
+ /*
+ * Now process the configuration blocks.
+ */
+ mpc_record = 0;
+ while (count < mpc->mpc_length) {
+ switch(*mpt) {
+ case MP_PROCESSOR:
+ {
+ struct mpc_config_processor *m=
+ (struct mpc_config_processor *)mpt;
+ /* ACPI may have already provided this data */
+ if (!acpi_lapic)
+ MP_processor_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_BUS:
+ {
+ struct mpc_config_bus *m=
+ (struct mpc_config_bus *)mpt;
+ MP_bus_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_IOAPIC:
+ {
+ struct mpc_config_ioapic *m=
+ (struct mpc_config_ioapic *)mpt;
+ MP_ioapic_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_INTSRC:
+ {
+ struct mpc_config_intsrc *m=
+ (struct mpc_config_intsrc *)mpt;
+
+ MP_intsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_LINTSRC:
+ {
+ struct mpc_config_lintsrc *m=
+ (struct mpc_config_lintsrc *)mpt;
+ MP_lintsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ default:
+ {
+ count = mpc->mpc_length;
+ break;
+ }
+ }
+ ++mpc_record;
+ }
+ clustered_apic_check();
+ if (!num_processors)
+ printk(KERN_ERR "SMP mptable: no processors registered!\n");
+ return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+ unsigned int port;
+
+ port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+ struct mpc_config_intsrc intsrc;
+ int i;
+ int ELCR_fallback = 0;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* conforming */
+ intsrc.mpc_srcbus = 0;
+ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+ intsrc.mpc_irqtype = mp_INT;
+
+ /*
+ * If true, we have an ISA/PCI system with no IRQ entries
+ * in the MP table. To prevent the PCI interrupts from being set up
+ * incorrectly, we try to use the ELCR. The sanity check to see if
+ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+ * never be level sensitive, so we simply see if the ELCR agrees.
+ * If it does, we assume it's valid.
+ */
+ if (mpc_default_type == 5) {
+ printk(KERN_INFO "ISA/PCI bus type with no IRQ information...
falling back to ELCR\n");
+
+ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
ELCR_trigger(13))
+ printk(KERN_WARNING "ELCR contains invalid data... not
using ELCR\n");
+ else {
+ printk(KERN_INFO "Using ELCR to identify PCI
interrupts\n");
+ ELCR_fallback = 1;
+ }
+ }
+
+ for (i = 0; i < 16; i++) {
+ switch (mpc_default_type) {
+ case 2:
+ if (i == 0 || i == 13)
+ continue; /* IRQ0 & IRQ13 not connected */
+ /* fall through */
+ default:
+ if (i == 2)
+ continue; /* IRQ2 is never connected */
+ }
+
+ if (ELCR_fallback) {
+ /*
+ * If the ELCR indicates a level-sensitive interrupt,
we
+ * copy that information over to the MP table in the
+ * irqflag field (level sensitive, active high
polarity).
+ */
+ if (ELCR_trigger(i))
+ intsrc.mpc_irqflag = 13;
+ else
+ intsrc.mpc_irqflag = 0;
+ }
+
+ intsrc.mpc_srcbusirq = i;
+ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
+ MP_intsrc_info(&intsrc);
+ }
+
+ intsrc.mpc_irqtype = mp_ExtINT;
+ intsrc.mpc_srcbusirq = 0;
+ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
+ MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+ struct mpc_config_processor processor;
+ struct mpc_config_bus bus;
+ struct mpc_config_ioapic ioapic;
+ struct mpc_config_lintsrc lintsrc;
+ int linttypes[2] = { mp_ExtINT, mp_NMI };
+ int i;
+
+ /*
+ * local APIC has default address
+ */
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ /*
+ * 2 CPUs, numbered 0 & 1.
+ */
+ processor.mpc_type = MP_PROCESSOR;
+ /* Either an integrated APIC or a discrete 82489DX. */
+ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ processor.mpc_cpuflag = CPU_ENABLED;
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) |
+ boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+ for (i = 0; i < 2; i++) {
+ processor.mpc_apicid = i;
+ MP_processor_info(&processor);
+ }
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ switch (mpc_default_type) {
+ default:
+ printk("???\n");
+ printk(KERN_ERR "Unknown standard configuration %d\n",
+ mpc_default_type);
+ /* fall through */
+ case 1:
+ case 5:
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ break;
+ case 2:
+ case 6:
+ case 3:
+ memcpy(bus.mpc_bustype, "EISA ", 6);
+ break;
+ case 4:
+ case 7:
+ memcpy(bus.mpc_bustype, "MCA ", 6);
+ }
+ MP_bus_info(&bus);
+ if (mpc_default_type > 4) {
+ bus.mpc_busid = 1;
+ memcpy(bus.mpc_bustype, "PCI ", 6);
+ MP_bus_info(&bus);
+ }
+
+ ioapic.mpc_type = MP_IOAPIC;
+ ioapic.mpc_apicid = 2;
+ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ ioapic.mpc_flags = MPC_APIC_USABLE;
+ ioapic.mpc_apicaddr = 0xFEC00000;
+ MP_ioapic_info(&ioapic);
+
+ /*
+ * We set up most of the low 16 IO-APIC pins according to MPS rules.
+ */
+ construct_default_ioirq_mptable(mpc_default_type);
+
+ lintsrc.mpc_type = MP_LINTSRC;
+ lintsrc.mpc_irqflag = 0; /* conforming */
+ lintsrc.mpc_srcbusid = 0;
+ lintsrc.mpc_srcbusirq = 0;
+ lintsrc.mpc_destapic = MP_APIC_ALL;
+ for (i = 0; i < 2; i++) {
+ lintsrc.mpc_irqtype = linttypes[i];
+ lintsrc.mpc_destapiclint = i;
+ MP_lintsrc_info(&lintsrc);
+ }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+ struct intel_mp_floating *mpf = mpf_found;
+
+ /*
+ * ACPI supports both logical (e.g. Hyper-Threading) and physical
+ * processors, where MPS only supports physical.
+ */
+ if (acpi_lapic && acpi_ioapic) {
+ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration
information\n");
+ return;
+ }
+ else if (acpi_lapic)
+ printk(KERN_INFO "Using ACPI for processor (LAPIC)
configuration information\n");
+
+ printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->mpf_specification);
+ if (mpf->mpf_feature2 & (1<<7)) {
+ printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
+ pic_mode = 1;
+ } else {
+ printk(KERN_INFO " Virtual Wire compatibility mode.\n");
+ pic_mode = 0;
+ }
+
+ /*
+ * Now see if we need to read further.
+ */
+ if (mpf->mpf_feature1 != 0) {
+
+ printk(KERN_INFO "Default MP configuration #%d\n",
mpf->mpf_feature1);
+ construct_default_ISA_mptable(mpf->mpf_feature1);
+
+ } else if (mpf->mpf_physptr) {
+
+ /*
+ * Read the physical hardware table. Anything here will
+ * override the defaults.
+ */
+ if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+ smp_found_config = 0;
+ printk(KERN_ERR "BIOS bug, MP table errors
detected!...\n");
+ printk(KERN_ERR "... disabling SMP support. (tell your
hw vendor)\n");
+ return;
+ }
+ /*
+ * If there are no explicit MP IRQ entries, then we are
+ * broken. We set up most of the low 16 IO-APIC pins to
+ * ISA defaults and hope it will work.
+ */
+ if (!mp_irq_entries) {
+ struct mpc_config_bus bus;
+
+ printk(KERN_ERR "BIOS bug, no explicit IRQ entries,
using default mptable. (tell your hw vendor)\n");
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ MP_bus_info(&bus);
+
+ construct_default_ioirq_mptable(0);
+ }
+
+ } else
+ BUG();
+
+ printk(KERN_INFO "Processors: %d\n", num_processors);
+ /*
+ * Only use the first configuration found.
+ */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+ unsigned long *bp = isa_bus_to_virt(base);
+ struct intel_mp_floating *mpf;
+
+ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+ if (sizeof(*mpf) != 16)
+ printk("Error: MPF size\n");
+
+ while (length > 0) {
+ mpf = (struct intel_mp_floating *)bp;
+ if ((*bp == SMP_MAGIC_IDENT) &&
+ (mpf->mpf_length == 1) &&
+ !mpf_checksum((unsigned char *)bp, 16) &&
+ ((mpf->mpf_specification == 1)
+ || (mpf->mpf_specification == 4)) ) {
+
+ smp_found_config = 1;
+#ifndef CONFIG_XEN
+ printk(KERN_INFO "found SMP MP-table at %08lx\n",
+ virt_to_phys(mpf));
+ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+ if (mpf->mpf_physptr) {
+ /*
+ * We cannot access to MPC table to compute
+ * table size yet, as only few megabytes from
+ * the bottom is mapped now.
+ * PC-9800's MPC table places on the very last
+ * of physical memory; so that simply reserving
+ * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+ * in reserve_bootmem.
+ */
+ unsigned long size = PAGE_SIZE;
+ unsigned long end = max_low_pfn * PAGE_SIZE;
+ if (mpf->mpf_physptr + size > end)
+ size = end - mpf->mpf_physptr;
+ reserve_bootmem(mpf->mpf_physptr, size);
+ }
+#else
+ printk(KERN_INFO "found SMP MP-table at %08lx\n",
+ ((unsigned long)bp - (unsigned
long)isa_bus_to_virt(base)) + base);
+#endif
+
+ mpf_found = mpf;
+ return 1;
+ }
+ bp += 4;
+ length -= 16;
+ }
+ return 0;
+}
+
+void __init find_smp_config (void)
+{
+#ifndef CONFIG_XEN
+ unsigned int address;
+#endif
+
+ /*
+ * FIXME: Linux assumes you have 640K of base ram..
+ * this continues the error...
+ *
+ * 1) Scan the bottom 1K for a signature
+ * 2) Scan the top 1K of base RAM
+ * 3) Scan the 64K of bios
+ */
+ if (smp_scan_config(0x0,0x400) ||
+ smp_scan_config(639*0x400,0x400) ||
+ smp_scan_config(0xF0000,0x10000))
+ return;
+ /*
+ * If it is an SMP machine we should know now, unless the
+ * configuration is in an EISA/MCA bus machine with an
+ * extended bios data area.
+ *
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E, calculate and scan it here.
+ *
+ * NOTE! There are Linux loaders that will corrupt the EBDA
+ * area, and as such this kind of SMP config may be less
+ * trustworthy, simply because the SMP table may have been
+ * stomped on during early boot. These loaders are buggy and
+ * should be fixed.
+ *
+ * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
+ */
+
+#ifndef CONFIG_XEN
+ address = get_bios_ebda();
+ if (address)
+ smp_scan_config(address, 0x400);
+#endif
+}
+
+int es7000_plat;
+
+/* --------------------------------------------------------------------------
+ ACPI-based MP Configuration
+ --------------------------------------------------------------------------
*/
+
+#ifdef CONFIG_ACPI
+
+void __init mp_register_lapic_address (
+ u64 address)
+{
+#ifndef CONFIG_XEN
+ mp_lapic_addr = (unsigned long) address;
+
+ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+
+ if (boot_cpu_physical_apicid == -1U)
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+ Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+#endif
+}
+
+
+void __devinit mp_register_lapic (
+ u8 id,
+ u8 enabled)
+{
+ struct mpc_config_processor processor;
+ int boot_cpu = 0;
+
+ if (MAX_APICS - id <= 0) {
+ printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+ id, MAX_APICS);
+ return;
+ }
+
+ if (id == boot_cpu_physical_apicid)
+ boot_cpu = 1;
+
+#ifndef CONFIG_XEN
+ processor.mpc_type = MP_PROCESSOR;
+ processor.mpc_apicid = id;
+ processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
+ processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+ processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+#endif
+
+ MP_processor_info(&processor);
+}
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define MP_ISA_BUS 0
+#define MP_MAX_IOAPIC_PIN 127
+
+static struct mp_ioapic_routing {
+ int apic_id;
+ int gsi_base;
+ int gsi_end;
+ u32 pin_programmed[4];
+} mp_ioapic_routing[MAX_IO_APICS];
+
+
+static int mp_find_ioapic (
+ int gsi)
+{
+ int i = 0;
+
+ /* Find the IOAPIC that manages this GSI. */
+ for (i = 0; i < nr_ioapics; i++) {
+ if ((gsi >= mp_ioapic_routing[i].gsi_base)
+ && (gsi <= mp_ioapic_routing[i].gsi_end))
+ return i;
+ }
+
+ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+
+ return -1;
+}
+
+
+void __init mp_register_ioapic (
+ u8 id,
+ u32 address,
+ u32 gsi_base)
+{
+ int idx = 0;
+ int tmpid;
+
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+ "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+ }
+ if (!address) {
+ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+ " found in MADT table, skipping!\n");
+ return;
+ }
+
+ idx = nr_ioapics++;
+
+ mp_ioapics[idx].mpc_type = MP_IOAPIC;
+ mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+ mp_ioapics[idx].mpc_apicaddr = address;
+
+#ifndef CONFIG_XEN
+ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+#endif
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ tmpid = io_apic_get_unique_id(idx, id);
+ else
+ tmpid = id;
+ if (tmpid == -1) {
+ nr_ioapics--;
+ return;
+ }
+ mp_ioapics[idx].mpc_apicid = tmpid;
+ mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+
+ /*
+ * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+ * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+ */
+ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+ mp_ioapic_routing[idx].gsi_base = gsi_base;
+ mp_ioapic_routing[idx].gsi_end = gsi_base +
+ io_apic_get_redir_entries(idx);
+
+ printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+ mp_ioapic_routing[idx].gsi_base,
+ mp_ioapic_routing[idx].gsi_end);
+
+ return;
+}
+
+
+void __init mp_override_legacy_irq (
+ u8 bus_irq,
+ u8 polarity,
+ u8 trigger,
+ u32 gsi)
+{
+ struct mpc_config_intsrc intsrc;
+ int ioapic = -1;
+ int pin = -1;
+
+ /*
+ * Convert 'gsi' to 'ioapic.pin'.
+ */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return;
+ pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+ /*
+ * TBD: This check is for faulty timer entries, where the override
+ * erroneously sets the trigger to level, resulting in a HUGE
+ * increase of timer interrupts!
+ */
+ if ((bus_irq == 0) && (trigger == 3))
+ trigger = 1;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqtype = mp_INT;
+ intsrc.mpc_irqflag = (trigger << 2) | polarity;
+ intsrc.mpc_srcbus = MP_ISA_BUS;
+ intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
+ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
+ intsrc.mpc_dstirq = pin; /* INTIN# */
+
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+ intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+
+ return;
+}
+
+void __init mp_config_acpi_legacy_irqs (void)
+{
+ struct mpc_config_intsrc intsrc;
+ int i = 0;
+ int ioapic = -1;
+
+ /*
+ * Fabricate the legacy ISA bus (bus #31).
+ */
+ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+ Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+ /*
+ * Older generations of ES7000 have no legacy identity mappings
+ */
+ if (es7000_plat == 1)
+ return;
+
+ /*
+ * Locate the IOAPIC that manages the ISA IRQs (0-15).
+ */
+ ioapic = mp_find_ioapic(0);
+ if (ioapic < 0)
+ return;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* Conforming */
+ intsrc.mpc_srcbus = MP_ISA_BUS;
+ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+
+ /*
+ * Use the default configuration for the IRQs 0-15. Unless
+ * overriden by (MADT) interrupt source override entries.
+ */
+ for (i = 0; i < 16; i++) {
+ int idx;
+
+ for (idx = 0; idx < mp_irq_entries; idx++) {
+ struct mpc_config_intsrc *irq = mp_irqs + idx;
+
+ /* Do we already have a mapping for this ISA IRQ? */
+ if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq
== i)
+ break;
+
+ /* Do we already have a mapping for this IOAPIC pin */
+ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+ (irq->mpc_dstirq == i))
+ break;
+ }
+
+ if (idx != mp_irq_entries) {
+ printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+ continue; /* IRQ already used */
+ }
+
+ intsrc.mpc_irqtype = mp_INT;
+ intsrc.mpc_srcbusirq = i; /* Identity mapped */
+ intsrc.mpc_dstirq = i;
+
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+ "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
+ intsrc.mpc_dstirq);
+
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+ }
+}
+
+#define MAX_GSI_NUM 4096
+
+int mp_register_gsi (u32 gsi, int triggering, int polarity)
+{
+ int ioapic = -1;
+ int ioapic_pin = 0;
+ int idx, bit = 0;
+ static int pci_irq = 16;
+ /*
+ * Mapping between Global System Interrups, which
+ * represent all possible interrupts, and IRQs
+ * assigned to actual devices.
+ */
+ static int gsi_to_irq[MAX_GSI_NUM];
+
+ /* Don't set up the ACPI SCI because it's already set up */
+ if (acpi_fadt.sci_int == gsi)
+ return gsi;
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+ return gsi;
+ }
+
+ ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+ if (ioapic_renumber_irq)
+ gsi = ioapic_renumber_irq(ioapic, gsi);
+
+ /*
+ * Avoid pin reprogramming. PRTs typically include entries
+ * with redundant pin->gsi mappings (but unique PCI devices);
+ * we only program the IOAPIC on the first.
+ */
+ bit = ioapic_pin % 32;
+ idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+ if (idx > 3) {
+ printk(KERN_ERR "Invalid reference to IOAPIC pin "
+ "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+ ioapic_pin);
+ return gsi;
+ }
+ if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+ Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+ mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+ return gsi_to_irq[gsi];
+ }
+
+ mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+ if (triggering == ACPI_LEVEL_SENSITIVE) {
+ /*
+ * For PCI devices assign IRQs in order, avoiding gaps
+ * due to unused I/O APIC pins.
+ */
+ int irq = gsi;
+ if (gsi < MAX_GSI_NUM) {
+ /*
+ * Retain the VIA chipset work-around (gsi > 15), but
+ * avoid a problem where the 8254 timer (IRQ0) is setup
+ * via an override (so it's not on pin 0 of the ioapic),
+ * and at the same time, the pin 0 interrupt is a PCI
+ * type. The gsi > 15 test could cause these two pins
+ * to be shared as IRQ0, and they are not shareable.
+ * So test for this condition, and if necessary, avoid
+ * the pin collision.
+ */
+ if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
+ gsi = pci_irq++;
+ /*
+ * Don't assign IRQ used by ACPI SCI
+ */
+ if (gsi == acpi_fadt.sci_int)
+ gsi = pci_irq++;
+ gsi_to_irq[irq] = gsi;
+ } else {
+ printk(KERN_ERR "GSI %u is too high\n", gsi);
+ return gsi;
+ }
+ }
+
+ io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+ triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+ polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+ return gsi;
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+#endif /* CONFIG_ACPI */
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/pci-dma-xen.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/pci-dma-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,378 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * On i386 there is no hardware dynamic DMA address translation,
+ * so consistent alloc/free are merely page allocation/freeing.
+ * The rest of the dynamic DMA mapping interface is implemented
+ * in asm/pci.h.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <asm/io.h>
+#include <xen/balloon.h>
+#include <xen/gnttab.h>
+#include <asm/swiotlb.h>
+#include <asm/tlbflush.h>
+#include <asm-i386/mach-xen/asm/swiotlb.h>
+#include <asm/bug.h>
+
+#ifdef __x86_64__
+#include <asm/proto.h>
+
+int iommu_merge __read_mostly = 0;
+EXPORT_SYMBOL(iommu_merge);
+
+dma_addr_t bad_dma_address __read_mostly;
+EXPORT_SYMBOL(bad_dma_address);
+
+/* This tells the BIO block layer to assume merging. Default to off
+ because we cannot guarantee merging later. */
+int iommu_bio_merge __read_mostly = 0;
+EXPORT_SYMBOL(iommu_bio_merge);
+
+int force_iommu __read_mostly= 0;
+
+__init int iommu_setup(char *p)
+{
+ return 1;
+}
+
+void __init pci_iommu_alloc(void)
+{
+#ifdef CONFIG_SWIOTLB
+ pci_swiotlb_init();
+#endif
+}
+
+static int __init pci_iommu_init(void)
+{
+ no_iommu_init();
+ return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+#endif
+
+struct dma_coherent_mem {
+ void *virt_base;
+ u32 device_base;
+ int size;
+ int flags;
+ unsigned long *bitmap;
+};
+
+#define IOMMU_BUG_ON(test) \
+do { \
+ if (unlikely(test)) { \
+ printk(KERN_ALERT "Fatal DMA error! " \
+ "Please use 'swiotlb=force'\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+int
+dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ int i, rc;
+
+ if (direction == DMA_NONE)
+ BUG();
+ WARN_ON(nents == 0 || sg[0].length == 0);
+
+ if (swiotlb) {
+ rc = swiotlb_map_sg(hwdev, sg, nents, direction);
+ } else {
+ for (i = 0; i < nents; i++ ) {
+ sg[i].dma_address =
+ gnttab_dma_map_page(sg[i].page) + sg[i].offset;
+ sg[i].dma_length = sg[i].length;
+ BUG_ON(!sg[i].page);
+ IOMMU_BUG_ON(address_needs_mapping(
+ hwdev, sg[i].dma_address));
+ }
+ rc = nents;
+ }
+
+ flush_write_buffers();
+ return rc;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void
+dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ int i;
+
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_sg(hwdev, sg, nents, direction);
+ else {
+ for (i = 0; i < nents; i++ )
+ gnttab_dma_unmap_page(sg[i].dma_address);
+ }
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+#ifdef CONFIG_HIGHMEM
+dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_addr_t dma_addr;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (swiotlb) {
+ dma_addr = swiotlb_map_page(
+ dev, page, offset, size, direction);
+ } else {
+ dma_addr = gnttab_dma_map_page(page) + offset;
+ IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
+ }
+
+ return dma_addr;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_page(dev, dma_address, size, direction);
+ else
+ gnttab_dma_unmap_page(dma_address);
+}
+EXPORT_SYMBOL(dma_unmap_page);
+#endif /* CONFIG_HIGHMEM */
+
+int
+dma_mapping_error(dma_addr_t dma_addr)
+{
+ if (swiotlb)
+ return swiotlb_dma_mapping_error(dma_addr);
+ return 0;
+}
+EXPORT_SYMBOL(dma_mapping_error);
+
+int
+dma_supported(struct device *dev, u64 mask)
+{
+ if (swiotlb)
+ return swiotlb_dma_supported(dev, mask);
+ /*
+ * By default we'll BUG when an infeasible DMA is requested, and
+ * request swiotlb=force (see IOMMU_BUG_ON).
+ */
+ return 1;
+}
+EXPORT_SYMBOL(dma_supported);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ void *ret;
+ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+ unsigned int order = get_order(size);
+ unsigned long vstart;
+ u64 mask;
+
+ /* ignore region specifiers */
+ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+ if (mem) {
+ int page = bitmap_find_free_region(mem->bitmap, mem->size,
+ order);
+ if (page >= 0) {
+ *dma_handle = mem->device_base + (page << PAGE_SHIFT);
+ ret = mem->virt_base + (page << PAGE_SHIFT);
+ memset(ret, 0, size);
+ return ret;
+ }
+ if (mem->flags & DMA_MEMORY_EXCLUSIVE)
+ return NULL;
+ }
+
+ if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+ gfp |= GFP_DMA;
+
+ vstart = __get_free_pages(gfp, order);
+ ret = (void *)vstart;
+
+ if (dev != NULL && dev->coherent_dma_mask)
+ mask = dev->coherent_dma_mask;
+ else
+ mask = 0xffffffff;
+
+ if (ret != NULL) {
+ if (xen_create_contiguous_region(vstart, order,
+ fls64(mask)) != 0) {
+ free_pages(vstart, order);
+ return NULL;
+ }
+ memset(ret, 0, size);
+ *dma_handle = virt_to_bus(ret);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|