# HG changeset patch
# User Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
# Date 1278413680 -3600
# Node ID db6234d3eafbd2d7b1469d8b98a13d6ab0b89973
# Parent ce278fdaced3ff898651657fda848c2b4daee648
# Parent 9d965ac1b0dbcb2f1fd4845e30753251d68d064f
Merge
---
xen/arch/x86/cpu/amd.h | 103 -----------
xen/arch/x86/hvm/vmx/vpmu.c | 119 -------------
tools/blktap2/drivers/tapdisk-vbd.c | 2
xen/Rules.mk | 2
xen/arch/x86/Makefile | 2
xen/arch/x86/apic.c | 198 +++++++++++++++++++---
xen/arch/x86/cpu/amd.c | 50 +++++
xen/arch/x86/domctl.c | 9 -
xen/arch/x86/genapic/x2apic.c | 19 ++
xen/arch/x86/hvm/mtrr.c | 2
xen/arch/x86/hvm/svm/asid.c | 4
xen/arch/x86/hvm/svm/svm.c | 7
xen/arch/x86/hvm/vmx/vmcs.c | 4
xen/arch/x86/hvm/vmx/vmx.c | 5
xen/arch/x86/i8259.c | 20 ++
xen/arch/x86/io_apic.c | 120 +++++++++++++
xen/arch/x86/mm/hap/p2m-ept.c | 297 ++++++++++++++++++---------------
xen/arch/x86/setup.c | 9 -
xen/common/memory.c | 2
xen/common/page_alloc.c | 8
xen/common/trace.c | 216 +++++++++++++++---------
xen/drivers/passthrough/vtd/dmar.c | 20 +-
xen/drivers/passthrough/vtd/dmar.h | 1
xen/drivers/passthrough/vtd/extern.h | 3
xen/drivers/passthrough/vtd/intremap.c | 125 ++++++++++++-
xen/drivers/passthrough/vtd/iommu.c | 54 ++----
xen/drivers/passthrough/vtd/qinval.c | 19 +-
xen/drivers/passthrough/vtd/vtd.h | 3
xen/include/asm-x86/amd.h | 138 +++++++++++++++
xen/include/asm-x86/apic.h | 2
xen/include/asm-x86/debugger.h | 2
xen/include/asm-x86/domain.h | 2
xen/include/asm-x86/genapic.h | 1
xen/include/asm-x86/io_apic.h | 6
xen/include/asm-x86/irq.h | 2
xen/include/asm-x86/msr-index.h | 4
xen/include/asm-x86/mtrr.h | 2
xen/include/public/io/ring.h | 15 +
xen/include/public/memory.h | 3
xen/include/xen/iommu.h | 2
xen/include/xen/mm.h | 2
xen/include/xen/trace.h | 14 -
42 files changed, 1073 insertions(+), 545 deletions(-)
diff -r ce278fdaced3 -r db6234d3eafb tools/blktap2/drivers/tapdisk-vbd.c
--- a/tools/blktap2/drivers/tapdisk-vbd.c Fri Jul 02 18:04:54 2010 +0100
+++ b/tools/blktap2/drivers/tapdisk-vbd.c Tue Jul 06 11:54:40 2010 +0100
@@ -1684,7 +1684,7 @@ tapdisk_vbd_check_ring_message(td_vbd_t
if (!vbd->ring.sring)
return -EINVAL;
- switch (vbd->ring.sring->pad[0]) {
+ switch (vbd->ring.sring->private.tapif_user.msg) {
case 0:
return 0;
diff -r ce278fdaced3 -r db6234d3eafb xen/Rules.mk
--- a/xen/Rules.mk Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/Rules.mk Tue Jul 06 11:54:40 2010 +0100
@@ -8,7 +8,6 @@ perfc_arrays ?= n
perfc_arrays ?= n
lock_profile ?= n
crash_debug ?= n
-gdbsx ?= n
frame_pointer ?= n
XEN_ROOT=$(BASEDIR)/..
@@ -53,7 +52,6 @@ CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS
CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS
CFLAGS-$(lock_profile) += -DLOCK_PROFILE
CFLAGS-$(frame_pointer) += -fno-omit-frame-pointer -DCONFIG_FRAME_POINTER
-CFLAGS-$(gdbsx) += -DXEN_GDBSX_CONFIG
ifneq ($(max_phys_cpus),)
CFLAGS-y += -DMAX_PHYS_CPUS=$(max_phys_cpus)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/Makefile Tue Jul 06 11:54:40 2010 +0100
@@ -13,6 +13,7 @@ obj-y += clear_page.o
obj-y += clear_page.o
obj-y += copy_page.o
obj-y += compat.o
+obj-y += debug.o
obj-y += delay.o
obj-y += dmi_scan.o
obj-y += domctl.o
@@ -57,7 +58,6 @@ obj-y += bzimage.o
obj-y += bzimage.o
obj-$(crash_debug) += gdbstub.o
-obj-$(gdbsx) += debug.o
x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/apic.c Tue Jul 06 11:54:40 2010 +0100
@@ -70,6 +70,9 @@ int x2apic_enabled __read_mostly = 0;
int x2apic_enabled __read_mostly = 0;
int directed_eoi_enabled __read_mostly = 0;
+/* x2APIC is enabled in BIOS */
+static int x2apic_preenabled;
+
/*
* The following vectors are part of the Linux architecture, there
* is no hardware IRQ pin equivalent for them, they are triggered
@@ -487,6 +490,47 @@ static void apic_pm_activate(void)
static void apic_pm_activate(void)
{
apic_pm_state.active = 1;
+}
+
+static void resume_x2apic(void)
+{
+ uint64_t msr_content;
+ struct IO_APIC_route_entry **ioapic_entries = NULL;
+
+ ASSERT(x2apic_enabled);
+
+ ioapic_entries = alloc_ioapic_entries();
+ if ( !ioapic_entries )
+ {
+ printk("Allocate ioapic_entries failed\n");
+ goto out;
+ }
+
+ if ( save_IO_APIC_setup(ioapic_entries) )
+ {
+ printk("Saving IO-APIC state failed\n");
+ goto out;
+ }
+
+ mask_8259A();
+ mask_IO_APIC_setup(ioapic_entries);
+
+ iommu_enable_IR();
+
+ rdmsrl(MSR_IA32_APICBASE, msr_content);
+ if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
+ {
+ msr_content |= MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD;
+ msr_content = (uint32_t)msr_content;
+ wrmsrl(MSR_IA32_APICBASE, msr_content);
+ }
+
+ restore_IO_APIC_setup(ioapic_entries);
+ unmask_8259A();
+
+out:
+ if ( ioapic_entries )
+ free_ioapic_entries(ioapic_entries);
}
void __devinit setup_local_APIC(void)
@@ -727,7 +771,7 @@ int lapic_resume(void)
msr_content | MSR_IA32_APICBASE_ENABLE | mp_lapic_addr);
}
else
- enable_x2apic();
+ resume_x2apic();
apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
apic_write(APIC_ID, apic_pm_state.apic_id);
@@ -894,35 +938,138 @@ no_apic:
return -1;
}
-void enable_x2apic(void)
+void check_x2apic_preenabled(void)
{
uint64_t msr_content;
- if ( smp_processor_id() == 0 )
+ if ( !x2apic_is_available() )
+ return;
+
+ rdmsrl(MSR_IA32_APICBASE, msr_content);
+ if ( msr_content & MSR_IA32_APICBASE_EXTD )
{
- if ( !iommu_supports_eim() )
+ printk("x2APIC mode is already enabled by BIOS.\n");
+ x2apic_preenabled = 1;
+ x2apic_enabled = 1;
+ }
+}
+
+static void enable_bsp_x2apic(void)
+{
+ struct IO_APIC_route_entry **ioapic_entries = NULL;
+ const struct genapic *x2apic_genapic = NULL;
+
+ ASSERT(smp_processor_id() == 0);
+
+ if ( x2apic_preenabled )
+ {
+ /*
+ * Interrupt remapping should be also enabled by BIOS when
+ * x2APIC is already enabled by BIOS, otherwise it's a BIOS
+ * bug
+ */
+ if ( !intremap_enabled() )
+ panic("Interrupt remapping is not enabled by BIOS while "
+ "x2APIC is already enabled by BIOS!\n");
+ }
+
+ x2apic_genapic = apic_x2apic_probe();
+ if ( x2apic_genapic )
+ genapic = x2apic_genapic;
+ else
+ {
+ if ( x2apic_cmdline_disable() )
{
- printk("x2APIC would not be enabled without EIM.\n");
- return;
+ if ( x2apic_preenabled )
+ {
+ /* Ignore x2apic=0, and set default x2apic mode */
+ genapic = &apic_x2apic_cluster;
+ printk("x2APIC: already enabled by BIOS, ignore x2apic=0.\n");
+ }
+ else
+ {
+ printk("Not enable x2APIC due to x2apic=0 is set.\n");
+ return;
+ }
}
-
- if ( apic_x2apic_phys.probe() )
- genapic = &apic_x2apic_phys;
- else if ( apic_x2apic_cluster.probe() )
- genapic = &apic_x2apic_cluster;
else
{
- printk("x2APIC would not be enabled due to x2apic=off.\n");
- return;
+ if ( !iommu_enabled || !iommu_intremap || !iommu_qinval )
+ panic("Cannot enable x2APIC due to iommu or interrupt "
+ "remapping or queued invalidation is disabled "
+ "by command line!\n");
+ else
+ {
+ if ( x2apic_preenabled )
+ panic("x2APIC: already enabled by BIOS, but "
+ "iommu_supports_eim fails\n");
+ else
+ {
+ printk("Not enable x2APIC due to "
+ "iommu_supports_eim fails!\n");
+ return;
+ }
+ }
}
-
- x2apic_enabled = 1;
- printk("Switched to APIC driver %s.\n", genapic->name);
- }
- else
+ }
+
+ ioapic_entries = alloc_ioapic_entries();
+ if ( !ioapic_entries )
{
- BUG_ON(!x2apic_enabled); /* APs only enable x2apic when BSP did so. */
- }
+ printk("Allocate ioapic_entries failed\n");
+ goto out;
+ }
+
+ if ( save_IO_APIC_setup(ioapic_entries) )
+ {
+ printk("Saving IO-APIC state failed\n");
+ goto out;
+ }
+
+ mask_8259A();
+ mask_IO_APIC_setup(ioapic_entries);
+
+ if ( iommu_enable_IR() )
+ {
+ printk("Would not enable x2APIC due to interrupt remapping "
+ "cannot be enabled.\n");
+ goto restore_out;
+ }
+
+ x2apic_enabled = 1;
+ printk("Switched to APIC driver %s.\n", genapic->name);
+
+ if ( !x2apic_preenabled )
+ {
+ uint64_t msr_content;
+ rdmsrl(MSR_IA32_APICBASE, msr_content);
+ if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
+ {
+ msr_content |= MSR_IA32_APICBASE_ENABLE |
+ MSR_IA32_APICBASE_EXTD;
+ msr_content = (uint32_t)msr_content;
+ wrmsrl(MSR_IA32_APICBASE, msr_content);
+ printk("x2APIC mode enabled.\n");
+ }
+ }
+
+restore_out:
+ restore_IO_APIC_setup(ioapic_entries);
+ unmask_8259A();
+
+out:
+ if ( ioapic_entries )
+ free_ioapic_entries(ioapic_entries);
+}
+
+static void enable_ap_x2apic(void)
+{
+ uint64_t msr_content;
+
+ ASSERT(smp_processor_id() != 0);
+
+ /* APs only enable x2apic when BSP did so. */
+ BUG_ON(!x2apic_enabled);
rdmsrl(MSR_IA32_APICBASE, msr_content);
if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
@@ -930,10 +1077,15 @@ void enable_x2apic(void)
msr_content |= MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD;
msr_content = (uint32_t)msr_content;
wrmsrl(MSR_IA32_APICBASE, msr_content);
- printk("x2APIC mode enabled.\n");
- }
+ }
+}
+
+void enable_x2apic(void)
+{
+ if ( smp_processor_id() == 0 )
+ enable_bsp_x2apic();
else
- printk("x2APIC mode enabled by BIOS.\n");
+ enable_ap_x2apic();
}
void __init init_apic_mappings(void)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/cpu/amd.c Tue Jul 06 11:54:40 2010 +0100
@@ -7,12 +7,12 @@
#include <asm/io.h>
#include <asm/msr.h>
#include <asm/processor.h>
+#include <asm/amd.h>
#include <asm/hvm/support.h>
#include <asm/setup.h> /* amd_init_cpu */
#include <asm/acpi.h>
#include "cpu.h"
-#include "amd.h"
/*
* Pre-canned values for overriding the CPUID features
@@ -148,6 +148,54 @@ static void __devinit set_cpuidmask(cons
}
/*
+ * Check for the presence of an AMD erratum. Arguments are defined in amd.h
+ * for each known erratum. Return 1 if erratum is found.
+ */
+int cpu_has_amd_erratum(const struct cpuinfo_x86 *cpu, int osvw, ...)
+{
+ va_list ap;
+ u32 range;
+ u32 ms;
+
+ if (cpu->x86_vendor != X86_VENDOR_AMD)
+ return 0;
+
+ va_start(ap, osvw);
+
+ if (osvw) {
+ u16 osvw_id = va_arg(ap, int);
+
+ if (cpu_has(cpu, X86_FEATURE_OSVW)) {
+ u64 osvw_len;
+ rdmsrl(MSR_AMD_OSVW_ID_LENGTH, osvw_len);
+
+ if (osvw_id < osvw_len) {
+ u64 osvw_bits;
+ rdmsrl(MSR_AMD_OSVW_STATUS + (osvw_id >> 6),
+ osvw_bits);
+
+ va_end(ap);
+ return (osvw_bits >> (osvw_id & 0x3f)) & 0x01;
+ }
+ }
+ }
+
+ /* OSVW unavailable or ID unknown, match family-model-stepping range */
+ ms = (cpu->x86_model << 8) | cpu->x86_mask;
+ while ((range = va_arg(ap, int))) {
+ if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+ (ms >= AMD_MODEL_RANGE_START(range)) &&
+ (ms <= AMD_MODEL_RANGE_END(range))) {
+ va_end(ap);
+ return 1;
+ }
+ }
+
+ va_end(ap);
+ return 0;
+}
+
+/*
* amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush
* filter on AMD 64-bit processors.
*/
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/cpu/amd.h
--- a/xen/arch/x86/cpu/amd.h Fri Jul 02 18:04:54 2010 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-/*
- * amd.h - AMD processor specific definitions
- */
-
-#ifndef __AMD_H__
-#define __AMD_H__
-
-#include <asm/cpufeature.h>
-
-/* CPUID masked for use by AMD-V Extended Migration */
-
-#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32)
-#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_))
-
-/* Family 0Fh, Revision C */
-#define AMD_FEATURES_K8_REV_C_ECX 0
-#define AMD_FEATURES_K8_REV_C_EDX ( \
- __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \
- __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \
- __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \
- __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \
- __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \
- __bit(X86_FEATURE_SEP) | __bit(X86_FEATURE_MTRR) | \
- __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \
- __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \
- __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_CLFLSH)| \
- __bit(X86_FEATURE_MMX) | __bit(X86_FEATURE_FXSR) | \
- __bit(X86_FEATURE_XMM) | __bit(X86_FEATURE_XMM2))
-#define AMD_EXTFEATURES_K8_REV_C_ECX 0
-#define AMD_EXTFEATURES_K8_REV_C_EDX (
\
- __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \
- __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \
- __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \
- __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \
- __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \
- __bit(X86_FEATURE_SYSCALL) | __bit(X86_FEATURE_MTRR) | \
- __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \
- __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \
- __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_NX) | \
- __bit(X86_FEATURE_MMXEXT) | __bit(X86_FEATURE_MMX) | \
- __bit(X86_FEATURE_FXSR) | __bit(X86_FEATURE_LM) | \
- __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW))
-
-/* Family 0Fh, Revision D */
-#define AMD_FEATURES_K8_REV_D_ECX AMD_FEATURES_K8_REV_C_ECX
-#define AMD_FEATURES_K8_REV_D_EDX AMD_FEATURES_K8_REV_C_EDX
-#define AMD_EXTFEATURES_K8_REV_D_ECX (AMD_EXTFEATURES_K8_REV_C_ECX |\
- __bit(X86_FEATURE_LAHF_LM))
-#define AMD_EXTFEATURES_K8_REV_D_EDX (AMD_EXTFEATURES_K8_REV_C_EDX |\
- __bit(X86_FEATURE_FFXSR))
-
-/* Family 0Fh, Revision E */
-#define AMD_FEATURES_K8_REV_E_ECX (AMD_FEATURES_K8_REV_D_ECX | \
- __bit(X86_FEATURE_XMM3))
-#define AMD_FEATURES_K8_REV_E_EDX (AMD_FEATURES_K8_REV_D_EDX | \
- __bit(X86_FEATURE_HT))
-#define AMD_EXTFEATURES_K8_REV_E_ECX (AMD_EXTFEATURES_K8_REV_D_ECX |\
- __bit(X86_FEATURE_CMP_LEGACY))
-#define AMD_EXTFEATURES_K8_REV_E_EDX AMD_EXTFEATURES_K8_REV_D_EDX
-
-/* Family 0Fh, Revision F */
-#define AMD_FEATURES_K8_REV_F_ECX (AMD_FEATURES_K8_REV_E_ECX | \
- __bit(X86_FEATURE_CX16))
-#define AMD_FEATURES_K8_REV_F_EDX AMD_FEATURES_K8_REV_E_EDX
-#define AMD_EXTFEATURES_K8_REV_F_ECX (AMD_EXTFEATURES_K8_REV_E_ECX |\
- __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) | \
- __bit(X86_FEATURE_ALTMOVCR))
-#define AMD_EXTFEATURES_K8_REV_F_EDX (AMD_EXTFEATURES_K8_REV_E_EDX |\
- __bit(X86_FEATURE_RDTSCP))
-
-/* Family 0Fh, Revision G */
-#define AMD_FEATURES_K8_REV_G_ECX AMD_FEATURES_K8_REV_F_ECX
-#define AMD_FEATURES_K8_REV_G_EDX AMD_FEATURES_K8_REV_F_EDX
-#define AMD_EXTFEATURES_K8_REV_G_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\
- __bit(X86_FEATURE_3DNOWPF))
-#define AMD_EXTFEATURES_K8_REV_G_EDX AMD_EXTFEATURES_K8_REV_F_EDX
-
-/* Family 10h, Revision B */
-#define AMD_FEATURES_FAM10h_REV_B_ECX (AMD_FEATURES_K8_REV_F_ECX | \
- __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT))
-#define AMD_FEATURES_FAM10h_REV_B_EDX AMD_FEATURES_K8_REV_F_EDX
-#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\
- __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) | \
- __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) | \
- __bit(X86_FEATURE_IBS))
-#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\
- __bit(X86_FEATURE_PAGE1GB))
-
-/* Family 10h, Revision C */
-#define AMD_FEATURES_FAM10h_REV_C_ECX AMD_FEATURES_FAM10h_REV_B_ECX
-#define AMD_FEATURES_FAM10h_REV_C_EDX AMD_FEATURES_FAM10h_REV_B_EDX
-#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\
- __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT))
-#define AMD_EXTFEATURES_FAM10h_REV_C_EDX AMD_EXTFEATURES_FAM10h_REV_B_EDX
-
-/* Family 11h, Revision B */
-#define AMD_FEATURES_FAM11h_REV_B_ECX AMD_FEATURES_K8_REV_G_ECX
-#define AMD_FEATURES_FAM11h_REV_B_EDX AMD_FEATURES_K8_REV_G_EDX
-#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\
- __bit(X86_FEATURE_SKINIT))
-#define AMD_EXTFEATURES_FAM11h_REV_B_EDX AMD_EXTFEATURES_K8_REV_G_EDX
-
-#endif /* __AMD_H__ */
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/domctl.c Tue Jul 06 11:54:40 2010 +0100
@@ -34,7 +34,6 @@
#include <public/mem_event.h>
#include <asm/mem_sharing.h>
-#ifdef XEN_GDBSX_CONFIG
#ifdef XEN_KDB_CONFIG
#include "../kdb/include/kdbdefs.h"
#include "../kdb/include/kdbproto.h"
@@ -43,8 +42,9 @@ typedef unsigned char kdbbyt_t;
typedef unsigned char kdbbyt_t;
extern int dbg_rw_mem(kdbva_t, kdbbyt_t *, int, domid_t, int, uint64_t);
#endif
-static int
-gdbsx_guest_mem_io(domid_t domid, struct xen_domctl_gdbsx_memio *iop)
+
+static int gdbsx_guest_mem_io(
+ domid_t domid, struct xen_domctl_gdbsx_memio *iop)
{
ulong l_uva = (ulong)iop->uva;
iop->remain = dbg_rw_mem(
@@ -52,7 +52,6 @@ gdbsx_guest_mem_io(domid_t domid, struct
iop->gwr, iop->pgd3val);
return (iop->remain ? -EFAULT : 0);
}
-#endif /* XEN_GDBSX_CONFIG */
long arch_do_domctl(
struct xen_domctl *domctl,
@@ -1309,7 +1308,6 @@ long arch_do_domctl(
}
break;
-#ifdef XEN_GDBSX_CONFIG
case XEN_DOMCTL_gdbsx_guestmemio:
{
struct domain *d;
@@ -1418,7 +1416,6 @@ long arch_do_domctl(
rcu_unlock_domain(d);
}
break;
-#endif /* XEN_GDBSX_CONFIG */
#ifdef __x86_64__
case XEN_DOMCTL_mem_event_op:
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/genapic/x2apic.c
--- a/xen/arch/x86/genapic/x2apic.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/genapic/x2apic.c Tue Jul 06 11:54:40 2010 +0100
@@ -33,6 +33,11 @@ static int x2apic_phys; /* By default w
static int x2apic_phys; /* By default we use logical cluster mode. */
boolean_param("x2apic_phys", x2apic_phys);
+int x2apic_cmdline_disable(void)
+{
+ return (x2apic == 0);
+}
+
static int probe_x2apic_phys(void)
{
return x2apic && x2apic_phys && x2apic_is_available() &&
@@ -54,6 +59,20 @@ const struct genapic apic_x2apic_cluster
APIC_INIT("x2apic_cluster", probe_x2apic_cluster),
GENAPIC_X2APIC_CLUSTER
};
+
+const struct genapic *apic_x2apic_probe(void)
+{
+ if ( !x2apic || !x2apic_is_available() )
+ return NULL;
+
+ if ( !iommu_supports_eim() )
+ return NULL;
+
+ if ( x2apic_phys )
+ return &apic_x2apic_phys;
+ else
+ return &apic_x2apic_cluster;
+}
void init_apic_ldr_x2apic_phys(void)
{
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/mtrr.c Tue Jul 06 11:54:40 2010 +0100
@@ -707,7 +707,7 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save
1, HVMSR_PER_VCPU);
uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
- uint8_t *ipat, int direct_mmio)
+ uint8_t *ipat, bool_t direct_mmio)
{
uint8_t gmtrr_mtype, hmtrr_mtype;
uint32_t type;
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/svm/asid.c
--- a/xen/arch/x86/hvm/svm/asid.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/svm/asid.c Tue Jul 06 11:54:40 2010 +0100
@@ -21,14 +21,14 @@
#include <xen/lib.h>
#include <xen/perfc.h>
#include <asm/hvm/svm/asid.h>
+#include <asm/amd.h>
void svm_asid_init(struct cpuinfo_x86 *c)
{
int nasids = 0;
/* Check for erratum #170, and leave ASIDs disabled if it's present. */
- if ( (c->x86 == 0x10) ||
- ((c->x86 == 0xf) && (c->x86_model >= 0x68) && (c->x86_mask >= 1)) )
+ if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_170) )
nasids = cpuid_ebx(0x8000000A);
hvm_asid_init(nasids);
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c Tue Jul 06 11:54:40 2010 +0100
@@ -34,6 +34,7 @@
#include <asm/regs.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
+#include <asm/amd.h>
#include <asm/types.h>
#include <asm/debugreg.h>
#include <asm/msr.h>
@@ -846,8 +847,8 @@ static void svm_init_erratum_383(struct
{
uint64_t msr_content;
- /* only family 10h is affected */
- if ( c->x86 != 0x10 )
+ /* check whether CPU is affected */
+ if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_383) )
return;
/* use safe methods to be compatible with nested virtualization */
@@ -1492,9 +1493,7 @@ asmlinkage void svm_vmexit_handler(struc
if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 )
break;
__update_guest_eip(regs, inst_len);
-#ifdef XEN_GDBSX_CONFIG
current->arch.gdbsx_vcpu_event = TRAP_int3;
-#endif
domain_pause_for_debugger();
break;
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Jul 06 11:54:40 2010 +0100
@@ -1064,8 +1064,10 @@ void vmx_do_resume(struct vcpu *v)
* 1: flushing cache (wbinvd) when the guest is scheduled out if
* there is no wbinvd exit, or
* 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
+ * If VT-d engine can force snooping, we don't need to do these.
*/
- if ( has_arch_pdevs(v->domain) && !cpu_has_wbinvd_exiting )
+ if ( has_arch_pdevs(v->domain) && !iommu_snoop
+ && !cpu_has_wbinvd_exiting )
{
int cpu = v->arch.hvm_vmx.active_cpu;
if ( cpu != -1 )
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Jul 06 11:54:40 2010 +0100
@@ -2089,6 +2089,9 @@ static void vmx_wbinvd_intercept(void)
if ( !has_arch_mmios(current->domain) )
return;
+ if ( iommu_snoop )
+ return;
+
if ( cpu_has_wbinvd_exiting )
on_each_cpu(wbinvd_ipi, NULL, 1);
else
@@ -2406,9 +2409,7 @@ asmlinkage void vmx_vmexit_handler(struc
goto exit_and_crash;
inst_len = __get_instruction_length(); /* Safe: INT3 */
__update_guest_eip(inst_len);
-#ifdef XEN_GDBSX_CONFIG
current->arch.gdbsx_vcpu_event = TRAP_int3;
-#endif
domain_pause_for_debugger();
break;
case TRAP_no_device:
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vpmu.c
--- a/xen/arch/x86/hvm/vmx/vpmu.c Fri Jul 02 18:04:54 2010 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-/*
- * vpmu.c: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@xxxxxxxxx>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/regs.h>
-#include <asm/types.h>
-#include <asm/msr.h>
-#include <asm/hvm/support.h>
-#include <asm/hvm/vmx/vmx.h>
-#include <asm/hvm/vmx/vmcs.h>
-#include <public/sched.h>
-#include <public/hvm/save.h>
-#include <asm/hvm/vmx/vpmu.h>
-
-static int __read_mostly opt_vpmu_enabled;
-boolean_param("vpmu", opt_vpmu_enabled);
-
-int vpmu_do_wrmsr(struct cpu_user_regs *regs)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
- if ( vpmu->arch_vpmu_ops )
- return vpmu->arch_vpmu_ops->do_wrmsr(regs);
- return 0;
-}
-
-int vpmu_do_rdmsr(struct cpu_user_regs *regs)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
- if ( vpmu->arch_vpmu_ops )
- return vpmu->arch_vpmu_ops->do_rdmsr(regs);
- return 0;
-}
-
-int vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
- if ( vpmu->arch_vpmu_ops )
- return vpmu->arch_vpmu_ops->do_interrupt(regs);
- return 0;
-}
-
-void vpmu_save(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( vpmu->arch_vpmu_ops )
- vpmu->arch_vpmu_ops->arch_vpmu_save(v);
-}
-
-void vpmu_load(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( vpmu->arch_vpmu_ops )
- vpmu->arch_vpmu_ops->arch_vpmu_load(v);
-}
-
-extern struct arch_vpmu_ops core2_vpmu_ops;
-void vpmu_initialise(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( !opt_vpmu_enabled )
- return;
-
- if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED )
- vpmu_destroy(v);
-
- if ( current_cpu_data.x86 == 6 )
- {
- switch ( current_cpu_data.x86_model )
- {
- case 15:
- case 23:
- case 26:
- case 29:
- vpmu->arch_vpmu_ops = &core2_vpmu_ops;
- break;
- }
- }
-
- if ( vpmu->arch_vpmu_ops != NULL )
- {
- vpmu->flags = 0;
- vpmu->context = NULL;
- vpmu->arch_vpmu_ops->arch_vpmu_initialise(v);
- }
-}
-
-void vpmu_destroy(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( vpmu->arch_vpmu_ops )
- vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
-}
-
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/i8259.c Tue Jul 06 11:54:40 2010 +0100
@@ -173,6 +173,26 @@ int i8259A_irq_pending(unsigned int irq)
spin_unlock_irqrestore(&i8259A_lock, flags);
return ret;
+}
+
+void mask_8259A(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ outb(0xff, 0xA1);
+ outb(0xff, 0x21);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void unmask_8259A(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ outb(cached_A1, 0xA1);
+ outb(cached_21, 0x21);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
}
/*
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/io_apic.c Tue Jul 06 11:54:40 2010 +0100
@@ -134,6 +134,126 @@ static void __init replace_pin_at_irq(un
break;
entry = irq_2_pin + entry->next;
}
+}
+
+struct IO_APIC_route_entry **alloc_ioapic_entries(void)
+{
+ int apic;
+ struct IO_APIC_route_entry **ioapic_entries;
+
+ ioapic_entries = xmalloc_array(struct IO_APIC_route_entry *, nr_ioapics);
+ if (!ioapic_entries)
+ return 0;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ ioapic_entries[apic] =
+ xmalloc_array(struct IO_APIC_route_entry,
+ nr_ioapic_registers[apic]);
+ if (!ioapic_entries[apic])
+ goto nomem;
+ }
+
+ return ioapic_entries;
+
+nomem:
+ while (--apic >= 0)
+ xfree(ioapic_entries[apic]);
+ xfree(ioapic_entries);
+
+ return 0;
+}
+
+/*
+ * Saves all the IO-APIC RTE's
+ */
+int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+ int apic, pin;
+
+ if (!ioapic_entries)
+ return -ENOMEM;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ if (!ioapic_entries[apic])
+ return -ENOMEM;
+
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ *(((int *)&ioapic_entries[apic][pin])+0) =
+ __io_apic_read(apic, 0x10+pin*2);
+ *(((int *)&ioapic_entries[apic][pin])+1) =
+ __io_apic_read(apic, 0x11+pin*2);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Mask all IO APIC entries.
+ */
+void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+ int apic, pin;
+
+ if (!ioapic_entries)
+ return;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ if (!ioapic_entries[apic])
+ break;
+
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ entry = ioapic_entries[apic][pin];
+ if (!entry.mask) {
+ entry.mask = 1;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ __io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ }
+ }
+}
+
+/*
+ * Restore IO APIC entries which was saved in ioapic_entries.
+ */
+int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+ int apic, pin;
+ unsigned long flags;
+ struct IO_APIC_route_entry entry;
+
+ if (!ioapic_entries)
+ return -ENOMEM;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ if (!ioapic_entries[apic])
+ return -ENOMEM;
+
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ entry = ioapic_entries[apic][pin];
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ __io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+
+ return 0;
+}
+
+void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
+{
+ int apic;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ xfree(ioapic_entries[apic]);
+
+ xfree(ioapic_entries);
}
static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable,
unsigned long disable)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Tue Jul 06 11:54:40 2010 +0100
@@ -118,6 +118,74 @@ static int ept_set_middle_entry(struct d
return 1;
}
+/* free ept sub tree behind an entry */
+void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level)
+{
+ /* End if the entry is a leaf entry. */
+ if ( level == 0 || !is_epte_present(ept_entry) ||
+ is_epte_superpage(ept_entry) )
+ return;
+
+ if ( level > 1 )
+ {
+ ept_entry_t *epte = map_domain_page(ept_entry->mfn);
+ for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+ ept_free_entry(d, epte + i, level - 1);
+ unmap_domain_page(epte);
+ }
+
+ d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn));
+}
+
+static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry,
+ int level, int target)
+{
+ ept_entry_t new_ept, *table;
+ uint64_t trunk;
+ int rv = 1;
+
+ /* End if the entry is a leaf entry or reaches the target level. */
+ if ( level == 0 || level == target )
+ return rv;
+
+ ASSERT(is_epte_superpage(ept_entry));
+
+ if ( !ept_set_middle_entry(d, &new_ept) )
+ return 0;
+
+ table = map_domain_page(new_ept.mfn);
+ trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
+
+ for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+ {
+ ept_entry_t *epte = table + i;
+
+ epte->emt = ept_entry->emt;
+ epte->ipat = ept_entry->ipat;
+ epte->sp = (level > 1) ? 1 : 0;
+ epte->avail1 = ept_entry->avail1;
+ epte->avail2 = 0;
+ epte->mfn = ept_entry->mfn + i * trunk;
+
+ ept_p2m_type_to_flags(epte, epte->avail1);
+
+ if ( (level - 1) == target )
+ continue;
+
+ ASSERT(is_epte_superpage(epte));
+
+ if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) )
+ break;
+ }
+
+ unmap_domain_page(table);
+
+ /* Even failed we should install the newly allocated ept page. */
+ *ept_entry = new_ept;
+
+ return rv;
+}
+
/* Take the currently mapped table, find the corresponding gfn entry,
* and map the next table, if available. If the entry is empty
* and read_only is set,
@@ -134,13 +202,18 @@ static int ept_set_middle_entry(struct d
*/
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
- u32 shift)
-{
+ int next_level)
+{
+ unsigned long mfn;
ept_entry_t *ept_entry;
- ept_entry_t *next;
- u32 index;
+ u32 shift, index;
+
+ shift = next_level * EPT_TABLE_ORDER;
index = *gfn_remainder >> shift;
+
+ /* index must be falling into the page */
+ ASSERT(index < EPT_PAGETABLE_ENTRIES);
ept_entry = (*table) + index;
@@ -159,69 +232,12 @@ static int ept_next_level(struct domain
/* The only time sp would be set here is if we had hit a superpage */
if ( is_epte_superpage(ept_entry) )
return GUEST_TABLE_SUPER_PAGE;
- else
- {
- *gfn_remainder &= (1UL << shift) - 1;
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
- return GUEST_TABLE_NORMAL_PAGE;
- }
-}
-
-/* It's super page before and we should break down it now. */
-static int ept_split_large_page(struct domain *d,
- ept_entry_t **table, u32 *index,
- unsigned long gfn, int level)
-{
- ept_entry_t *prev_table = *table;
- ept_entry_t *split_table = NULL;
- ept_entry_t *split_entry = NULL;
- ept_entry_t *ept_entry = (*table) + (*index);
- ept_entry_t temp_ept_entry;
- unsigned long s_gfn, s_mfn;
- unsigned long offset, trunk;
- int i;
-
- /* alloc new page for new ept middle level entry which is
- * before a leaf super entry
- */
-
- if ( !ept_set_middle_entry(d, &temp_ept_entry) )
- return 0;
-
- /* split the super page to small next level pages */
- split_table = map_domain_page(temp_ept_entry.mfn);
- offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1);
- trunk = (1UL << ((level-1) * EPT_TABLE_ORDER));
-
- for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ )
- {
- s_gfn = gfn - offset + i * trunk;
- s_mfn = ept_entry->mfn + i * trunk;
-
- split_entry = split_table + i;
- split_entry->emt = ept_entry->emt;
- split_entry->ipat = ept_entry->ipat;
-
- split_entry->sp = (level > 1) ? 1 : 0;
-
- split_entry->mfn = s_mfn;
-
- split_entry->avail1 = ept_entry->avail1;
- split_entry->avail2 = 0;
- /* last step */
- split_entry->r = split_entry->w = split_entry->x = 1;
- ept_p2m_type_to_flags(split_entry, ept_entry->avail1);
- }
-
- *ept_entry = temp_ept_entry;
-
- *index = offset / trunk;
- *table = split_table;
- unmap_domain_page(prev_table);
-
- return 1;
+
+ mfn = ept_entry->mfn;
+ unmap_domain_page(*table);
+ *table = map_domain_page(mfn);
+ *gfn_remainder &= (1UL << shift) - 1;
+ return GUEST_TABLE_NORMAL_PAGE;
}
/*
@@ -229,56 +245,64 @@ static int ept_split_large_page(struct d
* by observing whether any gfn->mfn translations are modified.
*/
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
unsigned int order, p2m_type_t p2mt)
{
- ept_entry_t *table = NULL;
+ ept_entry_t *table, *ept_entry;
unsigned long gfn_remainder = gfn;
unsigned long offset = 0;
- ept_entry_t *ept_entry = NULL;
u32 index;
- int i;
+ int i, target = order / EPT_TABLE_ORDER;
int rv = 0;
int ret = 0;
- int split_level = 0;
- int walk_level = order / EPT_TABLE_ORDER;
- int direct_mmio = (p2mt == p2m_mmio_direct);
+ bool_t direct_mmio = (p2mt == p2m_mmio_direct);
uint8_t ipat = 0;
int need_modify_vtd_table = 1;
int needs_sync = 1;
- if ( order != 0 )
- if ( (gfn & ((1UL << order) - 1)) )
- return 1;
+ /*
+ * the caller must make sure:
+ * 1. passing valid gfn and mfn at order boundary.
+ * 2. gfn not exceeding guest physical address width.
+ * 3. passing a valid order.
+ */
+ if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) ||
+ (gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) ||
+ (order % EPT_TABLE_ORDER) )
+ return 0;
+
+ ASSERT((target == 2 && hvm_hap_has_1gb(d)) ||
+ (target == 1 && hvm_hap_has_2mb(d)) ||
+ (target == 0));
table = map_domain_page(ept_get_asr(d));
ASSERT(table != NULL);
- for ( i = ept_get_wl(d); i > walk_level; i-- )
- {
- ret = ept_next_level(d, 0, &table, &gfn_remainder, i *
EPT_TABLE_ORDER);
+ for ( i = ept_get_wl(d); i > target; i-- )
+ {
+ ret = ept_next_level(d, 0, &table, &gfn_remainder, i);
if ( !ret )
goto out;
else if ( ret != GUEST_TABLE_NORMAL_PAGE )
break;
}
- /* If order == 0, we should only get POD if we have a POD superpage.
- * If i > walk_level, we need to split the page; otherwise,
- * just behave as normal. */
- ASSERT(ret != GUEST_TABLE_POD_PAGE || i != walk_level);
-
- index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
- offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
-
- split_level = i;
+ ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
+
+ index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+ offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
ept_entry = table + index;
- if ( i == walk_level )
- {
- /* We reached the level we're looking for */
+ /*
+ * When we are here, we must be on a leaf ept entry
+ * with i == target or i > target.
+ */
+
+ if ( i == target )
+ {
+ /* We reached the target level. */
/* No need to flush if the old entry wasn't valid */
if ( !is_epte_present(ept_entry) )
@@ -291,15 +315,14 @@ ept_set_entry(struct domain *d, unsigned
direct_mmio);
ept_entry->ipat = ipat;
ept_entry->sp = order ? 1 : 0;
+ ept_entry->avail1 = p2mt;
+ ept_entry->avail2 = 0;
if ( ept_entry->mfn == mfn_x(mfn) )
need_modify_vtd_table = 0;
else
ept_entry->mfn = mfn_x(mfn);
- ept_entry->avail1 = p2mt;
- ept_entry->avail2 = 0;
-
ept_p2m_type_to_flags(ept_entry, p2mt);
}
else
@@ -307,32 +330,51 @@ ept_set_entry(struct domain *d, unsigned
}
else
{
- int level;
- ept_entry_t *split_ept_entry;
-
- for ( level = split_level; level > walk_level ; level-- )
- {
- rv = ept_split_large_page(d, &table, &index, gfn, level);
- if ( !rv )
- goto out;
- }
-
- split_ept_entry = table + index;
- split_ept_entry->avail1 = p2mt;
- ept_p2m_type_to_flags(split_ept_entry, p2mt);
- split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
- direct_mmio);
- split_ept_entry->ipat = ipat;
-
- if ( split_ept_entry->mfn == mfn_x(mfn) )
- need_modify_vtd_table = 0;
- else
- split_ept_entry->mfn = mfn_x(mfn);
+ /* We need to split the original page. */
+ ept_entry_t split_ept_entry;
+
+ ASSERT(is_epte_superpage(ept_entry));
+
+ split_ept_entry = *ept_entry;
+
+ if ( !ept_split_super_page(d, &split_ept_entry, i, target) )
+ {
+ ept_free_entry(d, &split_ept_entry, i);
+ goto out;
+ }
+
+ /* now install the newly split ept sub-tree */
+ /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
+ *ept_entry = split_ept_entry;
+
+ /* then move to the level we want to make real changes */
+ for ( ; i > target; i-- )
+ ept_next_level(d, 0, &table, &gfn_remainder, i);
+
+ ASSERT(i == target);
+
+ index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+ offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
+
+ ept_entry = table + index;
+
+ ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
+ ept_entry->ipat = ipat;
+ ept_entry->sp = i ? 1 : 0;
+ ept_entry->avail1 = p2mt;
+ ept_entry->avail2 = 0;
+
+ if ( ept_entry->mfn == mfn_x(mfn) )
+ need_modify_vtd_table = 0;
+ else /* the caller should take care of the previous page */
+ ept_entry->mfn = mfn_x(mfn);
+
+ ept_p2m_type_to_flags(ept_entry, p2mt);
}
/* Track the highest gfn for which we have ever had a valid mapping */
- if ( mfn_valid(mfn_x(mfn))
- && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
+ if ( mfn_valid(mfn_x(mfn)) &&
+ (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
/* Success */
@@ -354,11 +396,11 @@ out:
for ( i = 0; i < (1 << order); i++ )
iommu_map_page(
d, gfn - offset + i, mfn_x(mfn) - offset + i,
- IOMMUF_readable|IOMMUF_writable);
+ IOMMUF_readable | IOMMUF_writable);
}
else if ( !order )
iommu_map_page(
- d, gfn, mfn_x(mfn), IOMMUF_readable|IOMMUF_writable);
+ d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable);
}
else
{
@@ -398,8 +440,7 @@ static mfn_t ept_get_entry(struct domain
for ( i = ept_get_wl(d); i > 0; i-- )
{
retry:
- ret = ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER);
+ ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
if ( !ret )
goto out;
else if ( ret == GUEST_TABLE_POD_PAGE )
@@ -486,8 +527,7 @@ static ept_entry_t ept_get_entry_content
for ( i = ept_get_wl(d); i > 0; i-- )
{
- ret = ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER);
+ ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
if ( !ret || ret == GUEST_TABLE_POD_PAGE )
goto out;
else if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -559,7 +599,7 @@ static mfn_t ept_get_entry_current(unsig
return ept_get_entry(current->domain, gfn, t, q);
}
-/*
+/*
* To test if the new emt type is the same with old,
* return 1 to not to reset ept entry.
*/
@@ -569,14 +609,14 @@ static int need_modify_ept_entry(struct
{
uint8_t ipat;
uint8_t emt;
- int direct_mmio = (p2mt == p2m_mmio_direct);
+ bool_t direct_mmio = (p2mt == p2m_mmio_direct);
emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
if ( (emt == o_emt) && (ipat == o_ipat) )
return 0;
- return 1;
+ return 1;
}
void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
@@ -710,8 +750,7 @@ static void ept_dump_p2m_table(unsigned
for ( i = ept_get_wl(d); i > 0; i-- )
{
- ret = ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER);
+ ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
if ( ret != GUEST_TABLE_NORMAL_PAGE )
break;
}
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/setup.c Tue Jul 06 11:54:40 2010 +0100
@@ -909,6 +909,9 @@ void __init __start_xen(unsigned long mb
tboot_probe();
+ /* Check if x2APIC is already enabled in BIOS */
+ check_x2apic_preenabled();
+
/* Unmap the first page of CPU0's stack. */
memguard_guard_stack(cpu0_stack);
@@ -926,9 +929,6 @@ void __init __start_xen(unsigned long mb
generic_apic_probe();
acpi_boot_init();
-
- if ( x2apic_is_available() )
- enable_x2apic();
init_cpu_to_node();
@@ -941,6 +941,9 @@ void __init __start_xen(unsigned long mb
#endif
init_apic_mappings();
+
+ if ( x2apic_is_available() )
+ enable_x2apic();
init_IRQ();
diff -r ce278fdaced3 -r db6234d3eafb xen/common/memory.c
--- a/xen/common/memory.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/memory.c Tue Jul 06 11:54:40 2010 +0100
@@ -545,6 +545,8 @@ long do_memory_op(unsigned long cmd, XEN
}
args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
+ if (reservation.mem_flags & XENMEMF_exact_node_request)
+ args.memflags |= MEMF_exact_node;
if ( op == XENMEM_populate_physmap
&& (reservation.mem_flags & XENMEMF_populate_on_demand) )
diff -r ce278fdaced3 -r db6234d3eafb xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/page_alloc.c Tue Jul 06 11:54:40 2010 +0100
@@ -300,11 +300,15 @@ static struct page_info *alloc_heap_page
unsigned int i, j, zone = 0;
unsigned int num_nodes = num_online_nodes();
unsigned long request = 1UL << order;
+ bool_t exact_node_request = !!(memflags & MEMF_exact_node);
cpumask_t extra_cpus_mask, mask;
struct page_info *pg;
if ( node == NUMA_NO_NODE )
+ {
node = cpu_to_node(smp_processor_id());
+ exact_node_request = 0;
+ }
ASSERT(node >= 0);
ASSERT(zone_lo <= zone_hi);
@@ -345,6 +349,9 @@ static struct page_info *alloc_heap_page
goto found;
} while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
+ if ( exact_node_request )
+ goto not_found;
+
/* Pick next node, wrapping around if needed. */
node = next_node(node, node_online_map);
if (node == MAX_NUMNODES)
@@ -360,6 +367,7 @@ static struct page_info *alloc_heap_page
return pg;
}
+ not_found:
/* No suitable memory blocks. Fail the request. */
spin_unlock(&heap_lock);
return NULL;
diff -r ce278fdaced3 -r db6234d3eafb xen/common/trace.c
--- a/xen/common/trace.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/trace.c Tue Jul 06 11:54:40 2010 +0100
@@ -50,16 +50,15 @@ static struct t_info *t_info;
static struct t_info *t_info;
#define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
-/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
-#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1)
/ sizeof(uint32_t))
static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
+static u32 t_info_first_offset __read_mostly;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
/* Number of records lost due to per-CPU trace buffer being full. */
static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -75,13 +74,37 @@ static cpumask_t tb_cpu_mask = CPU_MASK_
/* which tracing events are enabled */
static u32 tb_event_mask = TRC_ALL;
+/* Return the number of elements _type necessary to store at least _x bytes of
data
+ * i.e., sizeof(_type) * ans >= _x. */
+#define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type))
+
+static void calc_tinfo_first_offset(void)
+{
+ int offset_in_bytes;
+
+ offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
+
+ t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes);
+
+ gdprintk(XENLOG_INFO, "%s: NR_CPUs %d, offset_in_bytes %d,
t_info_first_offset %u\n",
+ __func__, NR_CPUS, offset_in_bytes, (unsigned)t_info_first_offset);
+}
+
/**
* check_tbuf_size - check to make sure that the proposed size will fit
- * in the currently sized struct t_info.
- */
-static inline int check_tbuf_size(int size)
-{
- return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE /
sizeof(uint32_t));
+ * in the currently sized struct t_info and allows prod and cons to
+ * reach double the value without overflow.
+ */
+static int check_tbuf_size(u32 pages)
+{
+ struct t_buf dummy;
+ typeof(dummy.prod) size;
+
+ size = ((typeof(dummy.prod))pages) * PAGE_SIZE;
+
+ return (size / PAGE_SIZE != pages)
+ || (size + size < size)
+ || (num_online_cpus() * pages + t_info_first_offset > T_INFO_SIZE /
sizeof(uint32_t));
}
/**
@@ -100,7 +123,7 @@ static int alloc_trace_bufs(void)
unsigned long nr_pages;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list = (uint32_t *)t_info;
- int offset = T_INFO_FIRST_OFFSET;
+ int offset = t_info_first_offset;
BUG_ON(check_tbuf_size(opt_tbuf_size));
@@ -115,7 +138,7 @@ static int alloc_trace_bufs(void)
}
t_info->tbuf_size = opt_tbuf_size;
- printk("tbuf_size %d\n", t_info->tbuf_size);
+ printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
nr_pages = opt_tbuf_size;
order = get_order_from_pages(nr_pages);
@@ -140,7 +163,7 @@ static int alloc_trace_bufs(void)
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
- buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+ per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
buf->cons = buf->prod = 0;
per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
@@ -172,7 +195,7 @@ static int alloc_trace_bufs(void)
/* Write list first, then write per-cpu offset. */
wmb();
t_info->mfn_offset[cpu]=offset;
- printk("p%d mfn %"PRIx32" offset %d\n",
+ printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
cpu, mfn, offset);
offset+=i;
}
@@ -191,6 +214,7 @@ out_dealloc:
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
{
+ per_cpu(t_bufs, cpu) = NULL;
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
free_xenheap_pages(rawbuf, order);
}
@@ -293,6 +317,10 @@ void __init init_trace_bufs(void)
void __init init_trace_bufs(void)
{
int i;
+
+ /* Calculate offset in u32 of first mfn */
+ calc_tinfo_first_offset();
+
/* t_info size fixed at 2 pages for now. That should be big enough /
small enough
* until it's worth making it dynamic. */
t_info = alloc_xenheap_pages(1, 0);
@@ -405,19 +433,39 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
return rc;
}
-static inline int calc_rec_size(int cycles, int extra)
-{
- int rec_size;
- rec_size = 4;
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
+{
+ unsigned int rec_size = 4;
+
if ( cycles )
rec_size += 8;
rec_size += extra;
return rec_size;
}
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
-{
- int x = buf->prod - buf->cons;
+static inline bool_t bogus(u32 prod, u32 cons)
+{
+ if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+ unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+ {
+ tb_init_done = 0;
+ printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
+ smp_processor_id(), prod, cons);
+ return 1;
+ }
+ return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const struct t_buf *buf)
+{
+ u32 prod = buf->prod, cons = buf->cons;
+ s32 x;
+
+ barrier(); /* must read buf->prod and buf->cons only once */
+ if ( bogus(prod, cons) )
+ return data_size;
+
+ x = prod - cons;
if ( x < 0 )
x += 2*data_size;
@@ -427,9 +475,16 @@ static inline int calc_unconsumed_bytes(
return x;
}
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
-{
- int x = data_size - buf->prod;
+static inline u32 calc_bytes_to_wrap(const struct t_buf *buf)
+{
+ u32 prod = buf->prod, cons = buf->cons;
+ s32 x;
+
+ barrier(); /* must read buf->prod and buf->cons only once */
+ if ( bogus(prod, cons) )
+ return 0;
+
+ x = data_size - prod;
if ( x <= 0 )
x += data_size;
@@ -439,54 +494,60 @@ static inline int calc_bytes_to_wrap(str
return x;
}
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const struct t_buf *buf)
{
return data_size - calc_unconsumed_bytes(buf);
}
-static inline struct t_rec *
-next_record(struct t_buf *buf)
-{
- int x = buf->prod;
+static inline struct t_rec *next_record(const struct t_buf *buf,
+ uint32_t *next)
+{
+ u32 x = buf->prod, cons = buf->cons;
+
+ barrier(); /* must read buf->prod and buf->cons only once */
+ *next = x;
+ if ( !tb_init_done || bogus(x, cons) )
+ return NULL;
+
if ( x >= data_size )
x -= data_size;
- ASSERT(x >= 0);
ASSERT(x < data_size);
return (struct t_rec *)&this_cpu(t_data)[x];
}
-static inline int __insert_record(struct t_buf *buf,
- unsigned long event,
- int extra,
- int cycles,
- int rec_size,
- unsigned char *extra_data)
+static inline void __insert_record(struct t_buf *buf,
+ unsigned long event,
+ unsigned int extra,
+ bool_t cycles,
+ unsigned int rec_size,
+ const void *extra_data)
{
struct t_rec *rec;
unsigned char *dst;
- unsigned long extra_word = extra/sizeof(u32);
- int local_rec_size = calc_rec_size(cycles, extra);
+ unsigned int extra_word = extra / sizeof(u32);
+ unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
BUG_ON(local_rec_size != rec_size);
BUG_ON(extra & 3);
+ rec = next_record(buf, &next);
+ if ( !rec )
+ return;
/* Double-check once more that we have enough space.
* Don't bugcheck here, in case the userland tool is doing
* something stupid. */
- if ( calc_bytes_avail(buf) < rec_size )
- {
- printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
- __func__,
- calc_bytes_avail(buf),
- data_size, buf->prod, buf->cons, data_size, rec_size);
- return 0;
- }
- rmb();
-
- rec = next_record(buf);
+ if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
+ {
+ if ( printk_ratelimit() )
+ printk(XENLOG_WARNING
+ "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
+ __func__, data_size, next, buf->cons, rec_size);
+ return;
+ }
+
rec->event = event;
rec->extra_u32 = extra_word;
dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -503,21 +564,19 @@ static inline int __insert_record(struct
wmb();
- next = buf->prod + rec_size;
+ next += rec_size;
if ( next >= 2*data_size )
next -= 2*data_size;
- ASSERT(next >= 0);
ASSERT(next < 2*data_size);
buf->prod = next;
-
- return rec_size;
-}
-
-static inline int insert_wrap_record(struct t_buf *buf, int size)
-{
- int space_left = calc_bytes_to_wrap(buf);
- unsigned long extra_space = space_left - sizeof(u32);
- int cycles = 0;
+}
+
+static inline void insert_wrap_record(struct t_buf *buf,
+ unsigned int size)
+{
+ u32 space_left = calc_bytes_to_wrap(buf);
+ unsigned int extra_space = space_left - sizeof(u32);
+ bool_t cycles = 0;
BUG_ON(space_left > size);
@@ -529,17 +588,13 @@ static inline int insert_wrap_record(str
ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
}
- return __insert_record(buf,
- TRC_TRACE_WRAP_BUFFER,
- extra_space,
- cycles,
- space_left,
- NULL);
+ __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+ space_left, NULL);
}
#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(struct t_buf *buf)
{
struct {
u32 lost_records;
@@ -554,12 +609,8 @@ static inline int insert_lost_records(st
this_cpu(lost_records) = 0;
- return __insert_record(buf,
- TRC_LOST_RECORDS,
- sizeof(ed),
- 1 /* cycles */,
- LOST_REC_SIZE,
- (unsigned char *)&ed);
+ __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+ LOST_REC_SIZE, &ed);
}
/*
@@ -581,13 +632,15 @@ static DECLARE_TASKLET(trace_notify_dom0
* failure, otherwise 0. Failure occurs only if the trace buffers are not yet
* initialised.
*/
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+ const void *extra_data)
{
struct t_buf *buf;
- unsigned long flags, bytes_to_tail, bytes_to_wrap;
- int rec_size, total_size;
- int extra_word;
- int started_below_highwater = 0;
+ unsigned long flags;
+ u32 bytes_to_tail, bytes_to_wrap;
+ unsigned int rec_size, total_size;
+ unsigned int extra_word;
+ bool_t started_below_highwater;
if( !tb_init_done )
return;
@@ -626,7 +679,11 @@ void __trace_var(u32 event, int cycles,
buf = this_cpu(t_bufs);
if ( unlikely(!buf) )
+ {
+ /* Make gcc happy */
+ started_below_highwater = 0;
goto unlock;
+ }
started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
@@ -707,8 +764,9 @@ unlock:
spin_unlock_irqrestore(&this_cpu(t_lock), flags);
/* Notify trace buffer consumer that we've crossed the high water mark. */
- if ( started_below_highwater &&
- (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
+ if ( likely(buf!=NULL)
+ && started_below_highwater
+ && (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
tasklet_schedule(&trace_notify_dom0_tasklet);
}
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/dmar.c Tue Jul 06 11:54:40 2010 +0100
@@ -32,6 +32,7 @@
#include "dmar.h"
#include "iommu.h"
#include "extern.h"
+#include "vtd.h"
#undef PREFIX
#define PREFIX VTDPREFIX "ACPI DMAR:"
@@ -378,7 +379,6 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header;
void *dev_scope_start, *dev_scope_end;
struct acpi_drhd_unit *dmaru;
- void *addr;
int ret;
static int include_all = 0;
@@ -397,8 +397,9 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
dprintk(VTDPREFIX, " dmaru->address = %"PRIx64"\n",
dmaru->address);
- addr = map_to_nocache_virt(0, drhd->address);
- dmaru->ecap = dmar_readq(addr, DMAR_ECAP_REG);
+ ret = iommu_alloc(dmaru);
+ if ( ret )
+ goto out;
dev_scope_start = (void *)(drhd + 1);
dev_scope_end = ((void *)drhd) + header->length;
@@ -420,7 +421,7 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
}
if ( ret )
- xfree(dmaru);
+ goto out;
else if ( force_iommu || dmaru->include_all )
acpi_register_drhd_unit(dmaru);
else
@@ -451,14 +452,15 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
if ( invalid_cnt )
{
- xfree(dmaru);
-
if ( iommu_workaround_bios_bug &&
invalid_cnt == dmaru->scope.devices_cnt )
{
dprintk(XENLOG_WARNING VTDPREFIX,
" Workaround BIOS bug: ignore the DRHD due to all "
"devices under its scope are not PCI discoverable!\n");
+
+ iommu_free(dmaru);
+ xfree(dmaru);
}
else
{
@@ -474,6 +476,12 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
acpi_register_drhd_unit(dmaru);
}
+out:
+ if ( ret )
+ {
+ iommu_free(dmaru);
+ xfree(dmaru);
+ }
return ret;
}
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/dmar.h
--- a/xen/drivers/passthrough/vtd/dmar.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/dmar.h Tue Jul 06 11:54:40 2010 +0100
@@ -50,7 +50,6 @@ struct acpi_drhd_unit {
struct dmar_scope scope; /* must be first member of struct */
struct list_head list;
u64 address; /* register base address of the unit */
- u64 ecap;
u8 include_all:1;
struct iommu *iommu;
struct list_head ioapic_list;
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/extern.h
--- a/xen/drivers/passthrough/vtd/extern.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/extern.h Tue Jul 06 11:54:40 2010 +0100
@@ -33,7 +33,7 @@ extern struct keyhandler dump_iommu_info
int enable_qinval(struct iommu *iommu);
void disable_qinval(struct iommu *iommu);
-int enable_intremap(struct iommu *iommu);
+int enable_intremap(struct iommu *iommu, int eim);
void disable_intremap(struct iommu *iommu);
int queue_invalidate_context(struct iommu *iommu,
u16 did, u16 source_id, u8 function_mask, u8 granu);
@@ -44,6 +44,7 @@ int invalidate_sync(struct iommu *iommu)
int invalidate_sync(struct iommu *iommu);
int iommu_flush_iec_global(struct iommu *iommu);
int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
+void clear_fault_bits(struct iommu *iommu);
struct iommu * ioapic_to_iommu(unsigned int apic_id);
struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id);
struct acpi_drhd_unit * iommu_to_drhd(struct iommu *iommu);
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/intremap.c Tue Jul 06 11:54:40 2010 +0100
@@ -134,18 +134,26 @@ int iommu_supports_eim(void)
if ( !iommu_enabled || !iommu_qinval || !iommu_intremap )
return 0;
+ if ( list_empty(&acpi_drhd_units) )
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX, "VT-d is not supported\n");
+ return 0;
+ }
+
/* We MUST have a DRHD unit for each IOAPIC. */
for ( apic = 0; apic < nr_ioapics; apic++ )
if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "There is not a DRHD for IOAPIC 0x%x (id: 0x%x)!\n",
+ apic, IO_APIC_ID(apic));
return 0;
-
- if ( list_empty(&acpi_drhd_units) )
- return 0;
+ }
for_each_drhd_unit ( drhd )
- if ( !ecap_queued_inval(drhd->ecap) ||
- !ecap_intr_remap(drhd->ecap) ||
- !ecap_eim(drhd->ecap) )
+ if ( !ecap_queued_inval(drhd->iommu->ecap) ||
+ !ecap_intr_remap(drhd->iommu->ecap) ||
+ !ecap_eim(drhd->iommu->ecap) )
return 0;
return 1;
@@ -706,7 +714,7 @@ void msi_msg_write_remap_rte(
}
#endif
-int enable_intremap(struct iommu *iommu)
+int enable_intremap(struct iommu *iommu, int eim)
{
struct acpi_drhd_unit *drhd;
struct ir_ctrl *ir_ctrl;
@@ -716,10 +724,25 @@ int enable_intremap(struct iommu *iommu)
ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
ir_ctrl = iommu_ir_ctrl(iommu);
+ sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+
+ /* Return if already enabled by Xen */
+ if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr )
+ return 0;
+
+ sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+ if ( !(sts & DMA_GSTS_QIES) )
+ {
+ dprintk(XENLOG_ERR VTDPREFIX,
+ "Queued invalidation is not enabled, should not enable "
+ "interrupt remapping\n");
+ return -EINVAL;
+ }
+
if ( ir_ctrl->iremap_maddr == 0 )
{
drhd = iommu_to_drhd(iommu);
- ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR
);
+ ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR);
if ( ir_ctrl->iremap_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
@@ -732,7 +755,7 @@ int enable_intremap(struct iommu *iommu)
#ifdef CONFIG_X86
/* set extended interrupt mode bit */
ir_ctrl->iremap_maddr |=
- x2apic_enabled ? (1 << IRTA_REG_EIME_SHIFT) : 0;
+ eim ? (1 << IRTA_REG_EIME_SHIFT) : 0;
#endif
spin_lock_irqsave(&iommu->register_lock, flags);
@@ -769,13 +792,95 @@ void disable_intremap(struct iommu *iomm
u32 sts;
unsigned long flags;
- ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
+ if ( !ecap_intr_remap(iommu->ecap) )
+ return;
spin_lock_irqsave(&iommu->register_lock, flags);
sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+ if ( !(sts & DMA_GSTS_IRES) )
+ goto out;
+
dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_IRE));
IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
!(sts & DMA_GSTS_IRES), sts);
+out:
spin_unlock_irqrestore(&iommu->register_lock, flags);
}
+
+/*
+ * This function is used to enable Interrutp remapping when
+ * enable x2apic
+ */
+int iommu_enable_IR(void)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+
+ if ( !iommu_supports_eim() )
+ return -1;
+
+ for_each_drhd_unit ( drhd )
+ {
+ struct qi_ctrl *qi_ctrl = NULL;
+
+ iommu = drhd->iommu;
+ qi_ctrl = iommu_qi_ctrl(iommu);
+
+ /* Clear previous faults */
+ clear_fault_bits(iommu);
+
+ /*
+ * Disable interrupt remapping and queued invalidation if
+ * already enabled by BIOS
+ */
+ disable_intremap(iommu);
+ disable_qinval(iommu);
+ }
+
+ /* Enable queue invalidation */
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( enable_qinval(iommu) != 0 )
+ {
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "Failed to enable Queued Invalidation!\n");
+ return -1;
+ }
+ }
+
+ /* Enable interrupt remapping */
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( enable_intremap(iommu, 1) )
+ {
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "Failed to enable Interrupt Remapping!\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Check if interrupt remapping is enabled or not
+ * return 1: enabled
+ * return 0: not enabled
+ */
+int intremap_enabled(void)
+{
+ struct acpi_drhd_unit *drhd;
+ u32 sts;
+
+ for_each_drhd_unit ( drhd )
+ {
+ sts = dmar_readl(drhd->iommu->reg, DMAR_GSTS_REG);
+ if ( !(sts & DMA_GSTS_IRES) )
+ return 0;
+ }
+
+ return 1;
+}
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c Tue Jul 06 11:54:40 2010 +0100
@@ -144,14 +144,17 @@ struct iommu_flush *iommu_get_flush(stru
return iommu ? &iommu->intel->flush : NULL;
}
-static unsigned int clflush_size;
static int iommus_incoherent;
static void __iommu_flush_cache(void *addr, unsigned int size)
{
int i;
+ static unsigned int clflush_size = 0;
if ( !iommus_incoherent )
return;
+
+ if ( clflush_size == 0 )
+ clflush_size = get_cache_line_size();
for ( i = 0; i < size; i += clflush_size )
cacheline_flush((char *)addr + i);
@@ -1037,7 +1040,7 @@ static int iommu_set_interrupt(struct io
return irq;
}
-static int __init iommu_alloc(struct acpi_drhd_unit *drhd)
+int __init iommu_alloc(struct acpi_drhd_unit *drhd)
{
struct iommu *iommu;
unsigned long sagaw, nr_dom;
@@ -1131,7 +1134,7 @@ static int __init iommu_alloc(struct acp
return 0;
}
-static void __init iommu_free(struct acpi_drhd_unit *drhd)
+void __init iommu_free(struct acpi_drhd_unit *drhd)
{
struct iommu *iommu = drhd->iommu;
@@ -1787,7 +1790,7 @@ static void setup_dom0_devices(struct do
spin_unlock(&pcidevs_lock);
}
-static void clear_fault_bits(struct iommu *iommu)
+void clear_fault_bits(struct iommu *iommu)
{
u64 val;
unsigned long flags;
@@ -1831,24 +1834,20 @@ static int init_vtd_hw(void)
spin_lock_irqsave(&iommu->register_lock, flags);
dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
spin_unlock_irqrestore(&iommu->register_lock, flags);
-
- /* initialize flush functions */
- flush = iommu_get_flush(iommu);
- flush->context = flush_context_reg;
- flush->iotlb = flush_iotlb_reg;
- }
-
- if ( iommu_qinval )
- {
- for_each_drhd_unit ( drhd )
- {
- iommu = drhd->iommu;
- if ( enable_qinval(iommu) != 0 )
- {
- dprintk(XENLOG_INFO VTDPREFIX,
- "Failed to enable Queued Invalidation!\n");
- break;
- }
+ }
+
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ /*
+ * If queued invalidation not enabled, use regiser based
+ * invalidation
+ */
+ if ( enable_qinval(iommu) != 0 )
+ {
+ flush = iommu_get_flush(iommu);
+ flush->context = flush_context_reg;
+ flush->iotlb = flush_iotlb_reg;
}
}
@@ -1874,9 +1873,9 @@ static int init_vtd_hw(void)
for_each_drhd_unit ( drhd )
{
iommu = drhd->iommu;
- if ( enable_intremap(iommu) != 0 )
+ if ( enable_intremap(iommu, 0) != 0 )
{
- dprintk(XENLOG_INFO VTDPREFIX,
+ dprintk(XENLOG_WARNING VTDPREFIX,
"Failed to enable Interrupt Remapping!\n");
break;
}
@@ -1943,8 +1942,6 @@ int __init intel_vtd_setup(void)
platform_quirks();
- clflush_size = get_cache_line_size();
-
irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs);
BUG_ON(!irq_to_iommu);
memset(irq_to_iommu, 0, nr_irqs * sizeof(struct iommu*));
@@ -1958,9 +1955,6 @@ int __init intel_vtd_setup(void)
*/
for_each_drhd_unit ( drhd )
{
- if ( iommu_alloc(drhd) != 0 )
- goto error;
-
iommu = drhd->iommu;
if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
@@ -2000,8 +1994,6 @@ int __init intel_vtd_setup(void)
return 0;
error:
- for_each_drhd_unit ( drhd )
- iommu_free(drhd);
iommu_enabled = 0;
iommu_snoop = 0;
iommu_passthrough = 0;
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/qinval.c Tue Jul 06 11:54:40 2010 +0100
@@ -437,10 +437,16 @@ int enable_qinval(struct iommu *iommu)
u32 sts;
unsigned long flags;
+ if ( !ecap_queued_inval(iommu->ecap) || !iommu_qinval )
+ return -ENOENT;
+
qi_ctrl = iommu_qi_ctrl(iommu);
flush = iommu_get_flush(iommu);
- ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+ /* Return if already enabled by Xen */
+ sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+ if ( (sts & DMA_GSTS_QIES) && qi_ctrl->qinval_maddr )
+ return 0;
if ( qi_ctrl->qinval_maddr == 0 )
{
@@ -488,14 +494,19 @@ void disable_qinval(struct iommu *iommu)
u32 sts;
unsigned long flags;
- ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+ if ( !ecap_queued_inval(iommu->ecap) )
+ return;
spin_lock_irqsave(&iommu->register_lock, flags);
sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+ if ( !(sts & DMA_GSTS_QIES) )
+ goto out;
+
dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_QIE));
/* Make sure hardware complete it */
IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
!(sts & DMA_GSTS_QIES), sts);
- spin_unlock_irqrestore(&iommu->register_lock, flags);
-}
+out:
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/vtd.h Tue Jul 06 11:54:40 2010 +0100
@@ -108,4 +108,7 @@ void iommu_flush_cache_entry(void *addr,
void iommu_flush_cache_entry(void *addr, unsigned int size);
void iommu_flush_cache_page(void *addr, unsigned long npages);
+int iommu_alloc(struct acpi_drhd_unit *drhd);
+void iommu_free(struct acpi_drhd_unit *drhd);
+
#endif // _VTD_H_
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/amd.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/amd.h Tue Jul 06 11:54:40 2010 +0100
@@ -0,0 +1,138 @@
+/*
+ * amd.h - AMD processor specific definitions
+ */
+
+#ifndef __AMD_H__
+#define __AMD_H__
+
+#include <asm/cpufeature.h>
+
+/* CPUID masked for use by AMD-V Extended Migration */
+
+#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32)
+#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_))
+
+/* Family 0Fh, Revision C */
+#define AMD_FEATURES_K8_REV_C_ECX 0
+#define AMD_FEATURES_K8_REV_C_EDX ( \
+ __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \
+ __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \
+ __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \
+ __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \
+ __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \
+ __bit(X86_FEATURE_SEP) | __bit(X86_FEATURE_MTRR) | \
+ __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \
+ __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \
+ __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_CLFLSH)| \
+ __bit(X86_FEATURE_MMX) | __bit(X86_FEATURE_FXSR) | \
+ __bit(X86_FEATURE_XMM) | __bit(X86_FEATURE_XMM2))
+#define AMD_EXTFEATURES_K8_REV_C_ECX 0
+#define AMD_EXTFEATURES_K8_REV_C_EDX (
\
+ __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \
+ __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \
+ __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \
+ __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \
+ __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \
+ __bit(X86_FEATURE_SYSCALL) | __bit(X86_FEATURE_MTRR) | \
+ __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \
+ __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \
+ __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_NX) | \
+ __bit(X86_FEATURE_MMXEXT) | __bit(X86_FEATURE_MMX) | \
+ __bit(X86_FEATURE_FXSR) | __bit(X86_FEATURE_LM) | \
+ __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW))
+
+/* Family 0Fh, Revision D */
+#define AMD_FEATURES_K8_REV_D_ECX AMD_FEATURES_K8_REV_C_ECX
+#define AMD_FEATURES_K8_REV_D_EDX AMD_FEATURES_K8_REV_C_EDX
+#define AMD_EXTFEATURES_K8_REV_D_ECX (AMD_EXTFEATURES_K8_REV_C_ECX |\
+ __bit(X86_FEATURE_LAHF_LM))
+#define AMD_EXTFEATURES_K8_REV_D_EDX (AMD_EXTFEATURES_K8_REV_C_EDX |\
+ __bit(X86_FEATURE_FFXSR))
+
+/* Family 0Fh, Revision E */
+#define AMD_FEATURES_K8_REV_E_ECX (AMD_FEATURES_K8_REV_D_ECX | \
+ __bit(X86_FEATURE_XMM3))
+#define AMD_FEATURES_K8_REV_E_EDX (AMD_FEATURES_K8_REV_D_EDX | \
+ __bit(X86_FEATURE_HT))
+#define AMD_EXTFEATURES_K8_REV_E_ECX (AMD_EXTFEATURES_K8_REV_D_ECX |\
+ __bit(X86_FEATURE_CMP_LEGACY))
+#define AMD_EXTFEATURES_K8_REV_E_EDX AMD_EXTFEATURES_K8_REV_D_EDX
+
+/* Family 0Fh, Revision F */
+#define AMD_FEATURES_K8_REV_F_ECX (AMD_FEATURES_K8_REV_E_ECX | \
+ __bit(X86_FEATURE_CX16))
+#define AMD_FEATURES_K8_REV_F_EDX AMD_FEATURES_K8_REV_E_EDX
+#define AMD_EXTFEATURES_K8_REV_F_ECX (AMD_EXTFEATURES_K8_REV_E_ECX |\
+ __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) | \
+ __bit(X86_FEATURE_ALTMOVCR))
+#define AMD_EXTFEATURES_K8_REV_F_EDX (AMD_EXTFEATURES_K8_REV_E_EDX |\
+ __bit(X86_FEATURE_RDTSCP))
+
+/* Family 0Fh, Revision G */
+#define AMD_FEATURES_K8_REV_G_ECX AMD_FEATURES_K8_REV_F_ECX
+#define AMD_FEATURES_K8_REV_G_EDX AMD_FEATURES_K8_REV_F_EDX
+#define AMD_EXTFEATURES_K8_REV_G_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\
+ __bit(X86_FEATURE_3DNOWPF))
+#define AMD_EXTFEATURES_K8_REV_G_EDX AMD_EXTFEATURES_K8_REV_F_EDX
+
+/* Family 10h, Revision B */
+#define AMD_FEATURES_FAM10h_REV_B_ECX (AMD_FEATURES_K8_REV_F_ECX | \
+ __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT))
+#define AMD_FEATURES_FAM10h_REV_B_EDX AMD_FEATURES_K8_REV_F_EDX
+#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\
+ __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) | \
+ __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) | \
+ __bit(X86_FEATURE_IBS))
+#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\
+ __bit(X86_FEATURE_PAGE1GB))
+
+/* Family 10h, Revision C */
+#define AMD_FEATURES_FAM10h_REV_C_ECX AMD_FEATURES_FAM10h_REV_B_ECX
+#define AMD_FEATURES_FAM10h_REV_C_EDX AMD_FEATURES_FAM10h_REV_B_EDX
+#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\
+ __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT))
+#define AMD_EXTFEATURES_FAM10h_REV_C_EDX AMD_EXTFEATURES_FAM10h_REV_B_EDX
+
+/* Family 11h, Revision B */
+#define AMD_FEATURES_FAM11h_REV_B_ECX AMD_FEATURES_K8_REV_G_ECX
+#define AMD_FEATURES_FAM11h_REV_B_EDX AMD_FEATURES_K8_REV_G_EDX
+#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\
+ __bit(X86_FEATURE_SKINIT))
+#define AMD_EXTFEATURES_FAM11h_REV_B_EDX AMD_EXTFEATURES_K8_REV_G_EDX
+
+/* AMD errata checking
+ *
+ * Errata are defined using the AMD_LEGACY_ERRATUM() or AMD_OSVW_ERRATUM()
+ * macros. The latter is intended for newer errata that have an OSVW id
+ * assigned, which it takes as first argument. Both take a variable number
+ * of family-specific model-stepping ranges created by AMD_MODEL_RANGE().
+ *
+ * Example 1:
+ * #define AMD_ERRATUM_319 \
+ * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), \
+ * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), \
+ * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0))
+ * Example 2:
+ * #define AMD_ERRATUM_400 \
+ * AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), \
+ * AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf))
+ *
+ */
+
+#define AMD_LEGACY_ERRATUM(...) 0 /* legacy */, __VA_ARGS__, 0
+#define AMD_OSVW_ERRATUM(osvw_id, ...) 1 /* osvw */, osvw_id, __VA_ARGS__, 0
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+
+#define AMD_ERRATUM_170 \
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x67, 0xf))
+
+#define AMD_ERRATUM_383 \
+ AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0x1, 0x0))
+
+int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
+#endif /* __AMD_H__ */
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/apic.h
--- a/xen/include/asm-x86/apic.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/apic.h Tue Jul 06 11:54:40 2010 +0100
@@ -25,6 +25,8 @@ extern int x2apic_enabled;
extern int x2apic_enabled;
extern int directed_eoi_enabled;
+extern void check_x2apic_preenabled(void);
+extern int x2apic_cmdline_disable(void);
extern void enable_x2apic(void);
static __inline int x2apic_is_available(void)
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/debugger.h
--- a/xen/include/asm-x86/debugger.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/debugger.h Tue Jul 06 11:54:40 2010 +0100
@@ -68,10 +68,8 @@ static inline int debugger_trap_entry(
if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached &&
((vector == TRAP_int3) || (vector == TRAP_debug)) )
{
-#ifdef XEN_GDBSX_CONFIG
if ( vector != TRAP_debug ) /* domain pause is good enough */
current->arch.gdbsx_vcpu_event = vector;
-#endif
domain_pause_for_debugger();
return 1;
}
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/domain.h Tue Jul 06 11:54:40 2010 +0100
@@ -415,9 +415,7 @@ struct arch_vcpu
struct mapcache_vcpu mapcache;
#endif
-#if XEN_GDBSX_CONFIG
uint32_t gdbsx_vcpu_event;
-#endif
/* A secondary copy of the vcpu time info. */
XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/genapic.h
--- a/xen/include/asm-x86/genapic.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/genapic.h Tue Jul 06 11:54:40 2010 +0100
@@ -70,6 +70,7 @@ cpumask_t vector_allocation_domain_flat(
.send_IPI_mask = send_IPI_mask_flat, \
.send_IPI_self = send_IPI_self_flat
+const struct genapic *apic_x2apic_probe(void);
void init_apic_ldr_x2apic_phys(void);
void init_apic_ldr_x2apic_cluster(void);
void clustered_apic_check_x2apic(void);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/io_apic.h
--- a/xen/include/asm-x86/io_apic.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/io_apic.h Tue Jul 06 11:54:40 2010 +0100
@@ -199,6 +199,12 @@ extern void ioapic_suspend(void);
extern void ioapic_suspend(void);
extern void ioapic_resume(void);
+extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
+extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
+extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+
#else /* !CONFIG_X86_IO_APIC */
static inline void init_ioapic_mappings(void) {}
static inline void ioapic_suspend(void) {}
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/irq.h Tue Jul 06 11:54:40 2010 +0100
@@ -91,6 +91,8 @@ void disable_8259A_irq(unsigned int irq)
void disable_8259A_irq(unsigned int irq);
void enable_8259A_irq(unsigned int irq);
int i8259A_irq_pending(unsigned int irq);
+void mask_8259A(void);
+void unmask_8259A(void);
void init_8259A(int aeoi);
int i8259A_suspend(void);
int i8259A_resume(void);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/msr-index.h Tue Jul 06 11:54:40 2010 +0100
@@ -251,6 +251,10 @@
/* AMD Microcode MSRs */
#define MSR_AMD_PATCHLEVEL 0x0000008b
#define MSR_AMD_PATCHLOADER 0xc0010020
+
+/* AMD OS Visible Workaround MSRs */
+#define MSR_AMD_OSVW_ID_LENGTH 0xc0010140
+#define MSR_AMD_OSVW_STATUS 0xc0010141
/* K6 MSRs */
#define MSR_K6_EFER 0xc0000080
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/mtrr.h
--- a/xen/include/asm-x86/mtrr.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/mtrr.h Tue Jul 06 11:54:40 2010 +0100
@@ -65,7 +65,7 @@ extern u32 get_pat_flags(struct vcpu *v,
extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr,
paddr_t spaddr, uint8_t gmtrr_mtype);
extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn,
- mfn_t mfn, uint8_t *ipat, int direct_mmio);
+ mfn_t mfn, uint8_t *ipat, bool_t
direct_mmio);
extern void ept_change_entry_emt_with_range(
struct domain *d, unsigned long start_gfn, unsigned long end_gfn);
extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/public/io/ring.h
--- a/xen/include/public/io/ring.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/public/io/ring.h Tue Jul 06 11:54:40 2010 +0100
@@ -103,8 +103,16 @@ struct __name##_sring {
struct __name##_sring { \
RING_IDX req_prod, req_event; \
RING_IDX rsp_prod, rsp_event; \
- uint8_t netfront_smartpoll_active; \
- uint8_t pad[47]; \
+ union { \
+ struct { \
+ uint8_t smartpoll_active; \
+ } netif; \
+ struct { \
+ uint8_t msg; \
+ } tapif_user; \
+ uint8_t pvt_pad[4]; \
+ } private; \
+ uint8_t __pad[44]; \
union __name##_sring_entry ring[1]; /* variable-length */ \
}; \
\
@@ -148,7 +156,8 @@ typedef struct __name##_back_ring __name
#define SHARED_RING_INIT(_s) do { \
(_s)->req_prod = (_s)->rsp_prod = 0; \
(_s)->req_event = (_s)->rsp_event = 1; \
- (void)memset((_s)->pad, 0, sizeof((_s)->pad)); \
+ (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \
+ (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \
} while(0)
#define FRONT_RING_INIT(_r, _s, __size) do { \
diff -r ce278fdaced3 -r db6234d3eafb xen/include/public/memory.h
--- a/xen/include/public/memory.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/public/memory.h Tue Jul 06 11:54:40 2010 +0100
@@ -52,6 +52,9 @@
#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
/* Flag to populate physmap with populate-on-demand entries */
#define XENMEMF_populate_on_demand (1<<16)
+/* Flag to request allocation only from the node specified */
+#define XENMEMF_exact_node_request (1<<17)
+#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
#endif
struct xen_memory_reservation {
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/iommu.h Tue Jul 06 11:54:40 2010 +0100
@@ -58,6 +58,8 @@ struct iommu {
int iommu_setup(void);
int iommu_supports_eim(void);
+int iommu_enable_IR(void);
+int intremap_enabled(void);
int iommu_add_device(struct pci_dev *pdev);
int iommu_remove_device(struct pci_dev *pdev);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/mm.h
--- a/xen/include/xen/mm.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/mm.h Tue Jul 06 11:54:40 2010 +0100
@@ -82,6 +82,8 @@ int assign_pages(
#define MEMF_tmem (1U<<_MEMF_tmem)
#define _MEMF_no_dma 3
#define MEMF_no_dma (1U<<_MEMF_no_dma)
+#define _MEMF_exact_node 4
+#define MEMF_exact_node (1U<<_MEMF_exact_node)
#define _MEMF_node 8
#define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node)
#define _MEMF_bits 24
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/trace.h
--- a/xen/include/xen/trace.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/trace.h Tue Jul 06 11:54:40 2010 +0100
@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
int trace_will_trace_event(u32 event);
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
static inline void trace_var(u32 event, int cycles, int extra,
unsigned char *extra_data)
@@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
{ \
u32 _d[1]; \
_d[0] = d1; \
- __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
u32 _d[2]; \
_d[0] = d1; \
_d[1] = d2; \
- __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
_d[0] = d1; \
_d[1] = d2; \
_d[2] = d3; \
- __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
_d[1] = d2; \
_d[2] = d3; \
_d[3] = d4; \
- __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
_d[2] = d3; \
_d[3] = d4; \
_d[4] = d5; \
- __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
_d[3] = d4; \
_d[4] = d5; \
_d[5] = d6; \
- __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|