Intel new CPU supports SMEP (Supervisor Mode Execution Protection). SMEP
prevents software operating with CPL < 3 (supervisor mode) from fetching
instructions from any linear address with a valid translation for which the U/S
flag (bit 2) is 1 in every paging-structure entry controlling the translation
for the linear address.
This patch enables SMEP in Xen to protect Xen hypervisor from executing pv guest
instructions, whose translation paging-structure entries' U/S flags are all set.
It crashes a pv guest triggering SMEP fault.
Signed-off-by: Yang Wei <wei.y.yang@xxxxxxxxx>
Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
Signed-off-by: Li Xin <xin.li@xxxxxxxxx>
diff -r 0791661a32d8 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/arch/x86/cpu/common.c Fri Jun 03 19:59:43 2011 +0800
@@ -222,6 +222,22 @@ static void __init early_cpu_detect(void
c->x86_capability[4] = cap4;
}
+/* nosmep: if true, Intel SMEP is disabled. */
+static bool_t disable_smep __cpuinitdata;
+boolean_param("nosmep", disable_smep);
+
+static void __cpuinit setup_smep(struct cpuinfo_x86 *c)
+{
+ if ( cpu_has(c, X86_FEATURE_SMEP) ) {
+ if ( unlikely(disable_smep) )
+ setup_clear_cpu_cap(X86_FEATURE_SMEP);
+ else {
+ set_in_cr4(X86_CR4_SMEP);
+ mmu_cr4_features |= X86_CR4_SMEP;
+ }
+ }
+}
+
void __cpuinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl, capability, excap, ebx;
@@ -268,6 +284,8 @@ void __cpuinit generic_identify(struct c
c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx;
}
+ setup_smep(c);
+
early_intel_workaround(c);
#ifdef CONFIG_X86_HT
diff -r 0791661a32d8 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/arch/x86/traps.c Fri Jun 03 19:59:43 2011 +0800
@@ -1139,7 +1139,13 @@ static int handle_gdt_ldt_mapping_fault(
(((va) >= HYPERVISOR_VIRT_START))
#endif
-static int __spurious_page_fault(
+enum pf_type {
+ real_page_fault,
+ smep_fault,
+ spurious_fault
+};
+
+static enum pf_type __page_fault_type(
unsigned long addr, unsigned int error_code)
{
unsigned long mfn, cr3 = read_cr3();
@@ -1151,7 +1157,7 @@ static int __spurious_page_fault(
#endif
l2_pgentry_t l2e, *l2t;
l1_pgentry_t l1e, *l1t;
- unsigned int required_flags, disallowed_flags;
+ unsigned int required_flags, disallowed_flags, u_flag;
/*
* We do not take spurious page faults in IRQ handlers as we do not
@@ -1159,11 +1165,11 @@ static int __spurious_page_fault(
* map_domain_page() is not IRQ-safe.
*/
if ( in_irq() )
- return 0;
+ return real_page_fault;
/* Reserved bit violations are never spurious faults. */
if ( error_code & PFEC_reserved_bit )
- return 0;
+ return real_page_fault;
required_flags = _PAGE_PRESENT;
if ( error_code & PFEC_write_access )
@@ -1175,6 +1181,8 @@ static int __spurious_page_fault(
if ( error_code & PFEC_insn_fetch )
disallowed_flags |= _PAGE_NX_BIT;
+ u_flag = _PAGE_USER;
+
mfn = cr3 >> PAGE_SHIFT;
#if CONFIG_PAGING_LEVELS >= 4
@@ -1184,7 +1192,9 @@ static int __spurious_page_fault(
unmap_domain_page(l4t);
if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
(l4e_get_flags(l4e) & disallowed_flags) )
- return 0;
+ return real_page_fault;
+
+ u_flag &= l4e_get_flags(l4e);
#endif
#if CONFIG_PAGING_LEVELS >= 3
@@ -1197,13 +1207,23 @@ static int __spurious_page_fault(
unmap_domain_page(l3t);
#if CONFIG_PAGING_LEVELS == 3
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
- return 0;
+ return real_page_fault;
#else
if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) ||
(l3e_get_flags(l3e) & disallowed_flags) )
- return 0;
- if ( l3e_get_flags(l3e) & _PAGE_PSE )
- return 1;
+ return real_page_fault;
+
+ u_flag &= l3e_get_flags(l3e);
+
+ if ( l3e_get_flags(l3e) & _PAGE_PSE ) {
+ /* SMEP fault error code 10001b */
+ if ( (error_code & PFEC_insn_fetch) &&
+ !(error_code & PFEC_user_mode) &&
+ cpu_has_smep && u_flag )
+ return smep_fault;
+
+ return spurious_fault;
+ }
#endif
#endif
@@ -1213,9 +1233,19 @@ static int __spurious_page_fault(
unmap_domain_page(l2t);
if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
(l2e_get_flags(l2e) & disallowed_flags) )
- return 0;
- if ( l2e_get_flags(l2e) & _PAGE_PSE )
- return 1;
+ return real_page_fault;
+
+ u_flag &= l2e_get_flags(l2e);
+
+ if ( l2e_get_flags(l2e) & _PAGE_PSE ) {
+ /* SMEP fault error code 10001b */
+ if ( (error_code & PFEC_insn_fetch) &&
+ !(error_code & PFEC_user_mode) &&
+ cpu_has_smep && u_flag )
+ return smep_fault;
+
+ return spurious_fault;
+ }
l1t = map_domain_page(mfn);
l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
@@ -1223,26 +1253,37 @@ static int __spurious_page_fault(
unmap_domain_page(l1t);
if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
(l1e_get_flags(l1e) & disallowed_flags) )
- return 0;
-
- return 1;
+ return real_page_fault;
+
+ u_flag &= l1e_get_flags(l1e);
+
+ /* SMEP fault error code 10001b */
+ if ( (error_code & PFEC_insn_fetch) &&
+ !(error_code & PFEC_user_mode) &&
+ cpu_has_smep && u_flag )
+ return smep_fault;
+
+ return spurious_fault;
}
static int spurious_page_fault(
unsigned long addr, unsigned int error_code)
{
unsigned long flags;
- int is_spurious;
+ enum pf_type pf_type;
/*
* Disabling interrupts prevents TLB flushing, and hence prevents
* page tables from becoming invalid under our feet during the walk.
*/
local_irq_save(flags);
- is_spurious = __spurious_page_fault(addr, error_code);
+ pf_type = __page_fault_type(addr, error_code);
local_irq_restore(flags);
- return is_spurious;
+ if ( pf_type == smep_fault )
+ domain_crash_synchronous();
+
+ return (pf_type == spurious_fault);
}
static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
diff -r 0791661a32d8 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/include/asm-x86/cpufeature.h Fri Jun 03 19:59:43 2011 +0800
@@ -141,8 +141,9 @@
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
#define X86_FEATURE_FSGSBASE (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution
Protection */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
@@ -202,6 +203,8 @@
#define cpu_has_fsgsbase boot_cpu_has(X86_FEATURE_FSGSBASE)
#endif
+#define cpu_has_smep boot_cpu_has(X86_FEATURE_SMEP)
+
#define cpu_has_ffxsr ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) \
&& boot_cpu_has(X86_FEATURE_FFXSR))
diff -r 0791661a32d8 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/include/asm-x86/domain.h Fri Jun 03 19:59:43 2011 +0800
@@ -527,12 +527,14 @@ unsigned long pv_guest_cr4_fixup(const s
/* Convert between guest-visible and real CR4 values. */
#define pv_guest_cr4_to_real_cr4(v) \
(((v)->arch.pv_vcpu.ctrlreg[4] \
- | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE)) \
- | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \
- | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0)) \
- & ~X86_CR4_DE)
-#define real_cr4_to_pv_guest_cr4(c) \
- ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | X86_CR4_OSXSAVE))
+ | (mmu_cr4_features \
+ & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP)) \
+ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \
+ | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0)) \
+ & ~X86_CR4_DE)
+#define real_cr4_to_pv_guest_cr4(c) \
+ ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \
+ | X86_CR4_OSXSAVE | X86_CR4_SMEP))
void domain_cpuid(struct domain *d,
unsigned int input,
diff -r 0791661a32d8 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/include/asm-x86/processor.h Fri Jun 03 19:59:43 2011 +0800
@@ -85,6 +85,7 @@
#define X86_CR4_SMXE 0x4000 /* enable SMX */
#define X86_CR4_FSGSBASE 0x10000 /* enable {rd,wr}{fs,gs}base */
#define X86_CR4_OSXSAVE 0x40000 /* enable XSAVE/XRSTOR */
+#define X86_CR4_SMEP 0x100000/* enable SMEP */
/*
* Trap/fault mnemonics.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|