WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH v2] Enable SMEP CPU feature support for XEN hyper

To: <keir.xen@xxxxxxxxx>,<xin.li@xxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH v2] Enable SMEP CPU feature support for XEN hypervisor
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Fri, 03 Jun 2011 15:36:18 +0100
Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Fri, 03 Jun 2011 07:36:58 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
>>> "Li, Xin" <xin.li@xxxxxxxxx> 06/03/11 3:59 PM >>>
>--- a/xen/arch/x86/cpu/common.c    Thu Jun 02 18:46:35 2011 +0100
>+++ b/xen/arch/x86/cpu/common.c    Fri Jun 03 19:59:43 2011 +0800
>@@ -222,6 +222,22 @@ static void __init early_cpu_detect(void
>    c->x86_capability[4] = cap4;
>}
>
>+/* nosmep: if true, Intel SMEP is disabled. */
>+static bool_t disable_smep __cpuinitdata;

__initdata

>+boolean_param("nosmep", disable_smep);
>+
>+static void __cpuinit setup_smep(struct cpuinfo_x86 *c)

__init

>+{
>+    if ( cpu_has(c, X86_FEATURE_SMEP) ) {
>+        if ( unlikely(disable_smep) )
>+            setup_clear_cpu_cap(X86_FEATURE_SMEP);
>+        else {
>+            set_in_cr4(X86_CR4_SMEP);
>+            mmu_cr4_features |= X86_CR4_SMEP;

Why? We old you on the previous patch version that
set_in_cr4() already does this.

>+        }
>+    }
>+}
>+
>void __cpuinit generic_identify(struct cpuinfo_x86 * c)
>{
>    u32 tfms, xlvl, capability, excap, ebx;
>@@ -268,6 +284,8 @@ void __cpuinit generic_identify(struct c
>        c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx;
>    }
>
>+    setup_smep(c);

Still misplaced.

>+
>    early_intel_workaround(c);
>
>#ifdef CONFIG_X86_HT
>diff -r 0791661a32d8 xen/arch/x86/traps.c
>--- a/xen/arch/x86/traps.c    Thu Jun 02 18:46:35 2011 +0100
>+++ b/xen/arch/x86/traps.c    Fri Jun 03 19:59:43 2011 +0800
>@@ -1139,7 +1139,13 @@ static int handle_gdt_ldt_mapping_fault(
>(((va) >= HYPERVISOR_VIRT_START))
>#endif
>
>-static int __spurious_page_fault(
>+enum pf_type {
>+ real_page_fault,
>+ smep_fault,
>+ spurious_fault
>+};
>+
>+static enum pf_type __page_fault_type(
>unsigned long addr, unsigned int error_code)
>{
>unsigned long mfn, cr3 = read_cr3();
>@@ -1151,7 +1157,7 @@ static int __spurious_page_fault(
>#endif
>l2_pgentry_t l2e, *l2t;
>l1_pgentry_t l1e, *l1t;
>- unsigned int required_flags, disallowed_flags;
>+ unsigned int required_flags, disallowed_flags, u_flag;
>
>/*
>* We do not take spurious page faults in IRQ handlers as we do not
>@@ -1159,11 +1165,11 @@ static int __spurious_page_fault(
>* map_domain_page() is not IRQ-safe.
>*/
>if ( in_irq() )
>- return 0;
>+ return real_page_fault;
>
>/* Reserved bit violations are never spurious faults. */
>if ( error_code & PFEC_reserved_bit )
>- return 0;
>+ return real_page_fault;
>
>required_flags = _PAGE_PRESENT;
>if ( error_code & PFEC_write_access )
>@@ -1175,6 +1181,8 @@ static int __spurious_page_fault(
>if ( error_code & PFEC_insn_fetch )
>disallowed_flags |= _PAGE_NX_BIT;
>
>+ u_flag = _PAGE_USER;

How about

u_flag = cpu_has_smep ? _PAGE_USER : 0;

avoiding the repeated cpu_has_smep checks below.

>+
>mfn = cr3 >> PAGE_SHIFT;
>
>#if CONFIG_PAGING_LEVELS >= 4
>@@ -1184,7 +1192,9 @@ static int __spurious_page_fault(
>unmap_domain_page(l4t);
>if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
>(l4e_get_flags(l4e) & disallowed_flags) )
>- return 0;
>+ return real_page_fault;
>+
>+ u_flag &= l4e_get_flags(l4e);
>#endif
>
>#if CONFIG_PAGING_LEVELS >= 3
>@@ -1197,13 +1207,23 @@ static int __spurious_page_fault(
>unmap_domain_page(l3t);
>#if CONFIG_PAGING_LEVELS == 3
>if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
>- return 0;
>+ return real_page_fault;
>#else
>if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) ||
>(l3e_get_flags(l3e) & disallowed_flags) )
>- return 0;
>- if ( l3e_get_flags(l3e) & _PAGE_PSE )
>- return 1;
>+ return real_page_fault;
>+
>+ u_flag &= l3e_get_flags(l3e);
>+
>+ if ( l3e_get_flags(l3e) & _PAGE_PSE ) {
>+ /* SMEP fault error code 10001b */
>+ if ( (error_code & PFEC_insn_fetch) &&
>+ !(error_code & PFEC_user_mode) &&
>+ cpu_has_smep && u_flag )
>+ return smep_fault;
>+
>+ return spurious_fault;
>+ }
>#endif
>#endif
>
>@@ -1213,9 +1233,19 @@ static int __spurious_page_fault(
>unmap_domain_page(l2t);
>if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
>(l2e_get_flags(l2e) & disallowed_flags) )
>- return 0;
>- if ( l2e_get_flags(l2e) & _PAGE_PSE )
>- return 1;
>+ return real_page_fault;
>+
>+ u_flag &= l2e_get_flags(l2e);
>+
>+ if ( l2e_get_flags(l2e) & _PAGE_PSE ) {
>+ /* SMEP fault error code 10001b */
>+ if ( (error_code & PFEC_insn_fetch) &&
>+ !(error_code & PFEC_user_mode) &&
>+ cpu_has_smep && u_flag )
>+ return smep_fault;
>+
>+ return spurious_fault;
>+ }
>
>l1t = map_domain_page(mfn);
>l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
>@@ -1223,26 +1253,37 @@ static int __spurious_page_fault(
>unmap_domain_page(l1t);
>if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
>(l1e_get_flags(l1e) & disallowed_flags) )
>- return 0;
>-
>- return 1;
>+ return real_page_fault;
>+
>+ u_flag &= l1e_get_flags(l1e);
>+
>+ /* SMEP fault error code 10001b */
>+ if ( (error_code & PFEC_insn_fetch) &&
>+ !(error_code & PFEC_user_mode) &&
>+ cpu_has_smep && u_flag )
>+ return smep_fault;
>+
>+ return spurious_fault;
>}
>
>static int spurious_page_fault(
>unsigned long addr, unsigned int error_code)
>{
>unsigned long flags;
>- int is_spurious;
>+ enum pf_type pf_type;
>
>/*
>* Disabling interrupts prevents TLB flushing, and hence prevents
>* page tables from becoming invalid under our feet during the walk.
>*/
>local_irq_save(flags);
>- is_spurious = __spurious_page_fault(addr, error_code);
>+ pf_type = __page_fault_type(addr, error_code);
>local_irq_restore(flags);
>
>- return is_spurious;
>+ if ( pf_type == smep_fault )
>+ domain_crash_synchronous();

I don't think you can domain_crash_synchronous() here, domain_crash()
is perhaps the only possibility (and even if it was possible, I think there
was agreement to not add new calls to the former function and always
use the latter instead).

That said I'm not certain it's a good choice at all, as Keir had already
pointed out on the first patch version.

Finally, spurious_page_fault() has two call sites, only one of which is
for faults that occurred in hypervisor context (which you seem to
assume always being the case). For the one where it's getting called
for a fault in guest context using domain_crash() is probably correct,
but an (possibly guest chosen) alternative would be to forward the
(non-spurious) fault to the guest. Again I thin Keir hinted in that
direction already in response to the first patch version.

>+
>+ return (pf_type == spurious_fault);
>}
>
>static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>