[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 17/17] x86: hide most hypervisor mappings in XPTI shadow page tables



Hide all but the absolute necessary hypervisor mappings in the XPTI
shadow page tables. The following mappings are needed:

- guest accessible areas, e.g. the RO M2P table
- IDT, TSS, GDT
- interrupt entry stacks
- interrupt handling code

For some of those mappings we need to setup lower level page tables
with just some entries populated.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
 xen/arch/x86/pv/xpti.c             | 229 ++++++++++++++++++++++++++++++++++++-
 xen/arch/x86/traps.c               |   2 +-
 xen/arch/x86/x86_64/compat/entry.S |   4 +
 xen/arch/x86/x86_64/entry.S        |   4 +
 xen/include/asm-x86/pv/mm.h        |   5 +
 5 files changed, 241 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/pv/xpti.c b/xen/arch/x86/pv/xpti.c
index e08aa782bf..dea34322d7 100644
--- a/xen/arch/x86/pv/xpti.c
+++ b/xen/arch/x86/pv/xpti.c
@@ -19,13 +19,16 @@
  * along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <xen/cpu.h>
 #include <xen/domain_page.h>
 #include <xen/errno.h>
 #include <xen/init.h>
 #include <xen/keyhandler.h>
 #include <xen/lib.h>
+#include <xen/notifier.h>
 #include <xen/sched.h>
 #include <asm/bitops.h>
+#include <asm/pv/mm.h>
 
 /*
  * For each L4 page table of the guest we need a shadow for the hypervisor.
@@ -118,6 +121,7 @@ struct xpti_domain {
     unsigned int unused_first; /* List of unused slots */
     spinlock_t lock;           /* Protects all shadow lists */
     struct domain *domain;
+    struct page_info *l3_shadow;
     struct tasklet tasklet;
     l1_pgentry_t **perdom_l1tab;
 #ifdef XPTI_DEBUG
@@ -140,6 +144,9 @@ static __read_mostly enum {
     XPTI_NODOM0
 } opt_xpti = XPTI_DEFAULT;
 
+static bool xpti_l3_shadow = false;
+static l3_pgentry_t *xpti_l3_shadows[11];
+
 static int parse_xpti(const char *s)
 {
     int rc = 0;
@@ -357,6 +364,34 @@ static unsigned int xpti_shadow_getforce(struct 
xpti_domain *xd)
     return idx;
 }
 
+static void xpti_update_l4_entry(struct xpti_domain *xd, l4_pgentry_t *dest,
+                                 l4_pgentry_t entry, unsigned int slot)
+{
+    l3_pgentry_t *l3pg;
+
+    switch ( slot )
+    {
+    case 257: /* ioremap area. */
+    case 258: /* linear page table (guest table). */
+    case 259: /* linear page table (shadow table). */
+        dest[slot] = l4e_empty();
+        break;
+    case 260: /* per-domain mappings. */
+        dest[slot] = l4e_from_page(xd->l3_shadow, __PAGE_HYPERVISOR);
+        break;
+    case 261 ... 271: /* hypervisor text and data, direct phys mapping. */
+        l3pg = xpti_l3_shadows[slot - 261];
+        dest[slot] = l3pg
+                     ? l4e_from_mfn(_mfn(virt_to_mfn(l3pg)), __PAGE_HYPERVISOR)
+                     : l4e_empty();
+        break;
+    case 256: /* read-only guest accessible m2p table. */
+    default:
+        dest[slot] = entry;
+        break;
+    }
+}
+
 static void xpti_init_xen_l4(struct xpti_domain *xd, struct xpti_l4pg *l4pg)
 {
     unsigned int i;
@@ -365,7 +400,7 @@ static void xpti_init_xen_l4(struct xpti_domain *xd, struct 
xpti_l4pg *l4pg)
     src = map_domain_page(_mfn(l4pg->guest_mfn));
     dest = mfn_to_virt(l4pg->xen_mfn);
     for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
-        dest[i] = src[i];
+        xpti_update_l4_entry(xd, dest, src[i], i);
     unmap_domain_page(src);
 }
 
@@ -432,7 +467,7 @@ void xpti_update_l4(const struct domain *d, unsigned long 
mfn,
     if ( idx != L4_INVALID )
     {
         l4 = mfn_to_virt(xd->l4pg[idx].xen_mfn);
-        l4[slot] = e;
+        xpti_update_l4_entry(xd, l4, e, slot);
     }
 
     spin_unlock_irqrestore(&xd->lock, flags);
@@ -550,6 +585,8 @@ void xpti_domain_destroy(struct domain *d)
         free_xenheap_page(xpti_shadow_free(xd, idx));
     }
 
+    if ( xd->l3_shadow )
+        free_domheap_page(xd->l3_shadow);
     xfree(xd->perdom_l1tab);
     xfree(xd->l4pg);
     xfree(xd->l4ref);
@@ -642,6 +679,125 @@ static int xpti_vcpu_init(struct vcpu *v)
     return rc;
 }
 
+static int xpti_add_mapping(unsigned long addr)
+{
+    unsigned int slot, flags, mapflags;
+    unsigned long mfn;
+    l3_pgentry_t *pl3e;
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+
+    slot = l4_table_offset(addr);
+    pl3e = l4e_to_l3e(idle_pg_table[slot]);
+
+    slot = l3_table_offset(addr);
+    mapflags = l3e_get_flags(pl3e[slot]);
+    ASSERT(mapflags & _PAGE_PRESENT);
+    if ( mapflags & _PAGE_PSE )
+    {
+        mapflags &= ~_PAGE_PSE;
+        mfn = l3e_get_pfn(pl3e[slot]) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1);
+        mfn |= PFN_DOWN(addr) & ((1UL << (2 * PAGETABLE_ORDER)) - 1);
+    }
+    else
+    {
+        pl2e = l3e_to_l2e(pl3e[slot]);
+        slot = l2_table_offset(addr);
+        mapflags = l2e_get_flags(pl2e[slot]);
+        ASSERT(mapflags & _PAGE_PRESENT);
+        if ( mapflags & _PAGE_PSE )
+        {
+            mapflags &= ~_PAGE_PSE;
+            mfn = l2e_get_pfn(pl2e[slot]) & ~((1UL << PAGETABLE_ORDER) - 1);
+            mfn |= PFN_DOWN(addr) & ((1UL << PAGETABLE_ORDER) - 1);
+        }
+        else
+        {
+            pl1e = l2e_to_l1e(pl2e[slot]);
+            slot = l1_table_offset(addr);
+            mapflags = l1e_get_flags(pl1e[slot]);
+            ASSERT(mapflags & _PAGE_PRESENT);
+            mfn = l1e_get_pfn(pl1e[slot]);
+        }
+    }
+
+    slot = l4_table_offset(addr);
+    ASSERT(slot >= 261 && slot <= 271);
+    pl3e = xpti_l3_shadows[slot - 261];
+    if ( !pl3e )
+    {
+        pl3e = alloc_xen_pagetable();
+        if ( !pl3e )
+            return -ENOMEM;
+        clear_page(pl3e);
+        xpti_l3_shadows[slot - 261] = pl3e;
+    }
+
+    slot = l3_table_offset(addr);
+    flags = l3e_get_flags(pl3e[slot]);
+    if ( !(flags & _PAGE_PRESENT) )
+    {
+        pl2e = alloc_xen_pagetable();
+        if ( !pl2e )
+            return -ENOMEM;
+        clear_page(pl2e);
+        pl3e[slot] = l3e_from_mfn(_mfn(virt_to_mfn(pl2e)), __PAGE_HYPERVISOR);
+    }
+    else
+    {
+        pl2e = l3e_to_l2e(pl3e[slot]);
+    }
+
+    slot = l2_table_offset(addr);
+    flags = l2e_get_flags(pl2e[slot]);
+    if ( !(flags & _PAGE_PRESENT) )
+    {
+        pl1e = alloc_xen_pagetable();
+        if ( !pl1e )
+            return -ENOMEM;
+        clear_page(pl1e);
+        pl2e[slot] = l2e_from_mfn(_mfn(virt_to_mfn(pl1e)), __PAGE_HYPERVISOR);
+    }
+    else
+    {
+        pl1e = l2e_to_l1e(pl2e[slot]);
+    }
+
+    slot = l1_table_offset(addr);
+    pl1e[slot] = l1e_from_mfn(_mfn(mfn), mapflags);
+
+    return 0;
+}
+
+static void xpti_rm_mapping(unsigned long addr)
+{
+    unsigned int slot, flags;
+    l3_pgentry_t *pl3e;
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+
+    slot = l4_table_offset(addr);
+    ASSERT(slot >= 261 && slot <= 271);
+    pl3e = xpti_l3_shadows[slot - 261];
+    if ( !pl3e )
+        return;
+
+    slot = l3_table_offset(addr);
+    flags = l3e_get_flags(pl3e[slot]);
+    if ( !(flags & _PAGE_PRESENT) )
+        return;
+
+    pl2e = l3e_to_l2e(pl3e[slot]);
+    slot = l2_table_offset(addr);
+    flags = l2e_get_flags(pl2e[slot]);
+    if ( !(flags & _PAGE_PRESENT) )
+        return;
+
+    pl1e = l2e_to_l1e(pl2e[slot]);
+    slot = l1_table_offset(addr);
+    pl1e[slot] = l1e_empty();
+}
+
 int xpti_domain_init(struct domain *d)
 {
     bool xpti = false;
@@ -649,7 +805,9 @@ int xpti_domain_init(struct domain *d)
     struct vcpu *v;
     struct xpti_domain *xd;
     void *virt;
+    unsigned long addr;
     unsigned int i, new;
+    l3_pgentry_t *l3tab, *l3shadow;
 
     if ( !is_pv_domain(d) || is_pv_32bit_domain(d) )
         return 0;
@@ -683,6 +841,27 @@ int xpti_domain_init(struct domain *d)
     xd->lru_last = L4_INVALID;
     xd->free_first = L4_INVALID;
 
+    if ( !xpti_l3_shadow )
+    {
+        xpti_l3_shadow = true;
+
+        for_each_online_cpu ( i )
+            if ( xpti_add_mapping((unsigned long)idt_tables[i]) )
+                goto done;
+
+        for ( addr = round_pgdown((unsigned long)&xpti_map_start);
+              addr <= round_pgdown((unsigned long)&xpti_map_end - 1);
+              addr += PAGE_SIZE )
+            if ( xpti_add_mapping(addr) )
+                goto done;
+
+        for ( addr = round_pgdown((unsigned long)&xpti_map_start_compat);
+              addr <= round_pgdown((unsigned long)&xpti_map_end_compat - 1);
+              addr += PAGE_SIZE )
+            if ( xpti_add_mapping(addr) )
+                goto done;
+    }
+
     spin_lock_init(&xd->lock);
     tasklet_init(&xd->tasklet, xpti_tasklet, (unsigned long)xd);
 
@@ -725,6 +904,16 @@ int xpti_domain_init(struct domain *d)
             goto done;
     }
 
+    xd->l3_shadow = alloc_domheap_page(d, MEMF_no_owner);
+    if ( !xd->l3_shadow )
+        goto done;
+    l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+    l3shadow = __map_domain_page(xd->l3_shadow);
+    clear_page(l3shadow);
+    l3shadow[0] = l3tab[0];          /* GDT/LDT shadow mapping. */
+    l3shadow[3] = l3tab[3];          /* XPTI mappings. */
+    unmap_domain_page(l3shadow);
+    unmap_domain_page(l3tab);
     ret = 0;
 
     printk("Enabling Xen Pagetable protection (XPTI) for Domain %d\n",
@@ -801,3 +990,39 @@ static int __init xpti_key_init(void)
     return 0;
 }
 __initcall(xpti_key_init);
+
+static int xpti_cpu_callback(struct notifier_block *nfb, unsigned long action,
+                             void *hcpu)
+{
+    unsigned int cpu = (unsigned long)hcpu;
+    int rc = 0;
+
+    if ( !xpti_l3_shadow )
+        return NOTIFY_DONE;
+
+    switch ( action )
+    {
+    case CPU_DOWN_FAILED:
+    case CPU_ONLINE:
+        rc = xpti_add_mapping((unsigned long)idt_tables[cpu]);
+        break;
+    case CPU_DOWN_PREPARE:
+        xpti_rm_mapping((unsigned long)idt_tables[cpu]);
+        break;
+    default:
+        break;
+    }
+
+    return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
+}
+
+static struct notifier_block xpti_cpu_nfb = {
+    .notifier_call = xpti_cpu_callback
+};
+
+static int __init xpti_presmp_init(void)
+{
+    register_cpu_notifier(&xpti_cpu_nfb);
+    return 0;
+}
+presmp_initcall(xpti_presmp_init);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 93b228dced..00cc7cd9d7 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -102,7 +102,7 @@ DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table);
 DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table);
 
 /* Master table, used by CPU0. */
-idt_entry_t idt_table[IDT_ENTRIES];
+idt_entry_t idt_table[IDT_ENTRIES] __aligned(PAGE_SIZE);
 
 /* Pointer to the IDT of every CPU. */
 idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
diff --git a/xen/arch/x86/x86_64/compat/entry.S 
b/xen/arch/x86/x86_64/compat/entry.S
index 206bc9a05a..575a3e5d8e 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -13,6 +13,8 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
+ENTRY(xpti_map_start_compat)
+
 ENTRY(entry_int82)
         ASM_CLAC
         pushq $0
@@ -367,3 +369,5 @@ compat_crash_page_fault:
         jmp   .Lft14
 .previous
         _ASM_EXTABLE(.Lft14, .Lfx14)
+
+ENTRY(xpti_map_end_compat)
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 909f6eea66..d1cb355044 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -14,6 +14,8 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
+ENTRY(xpti_map_start)
+
 /* %rbx: struct vcpu, %r12: user_regs */
 ENTRY(switch_to_kernel)
         leaq  VCPU_trap_bounce(%rbx),%rdx
@@ -735,6 +737,8 @@ ENTRY(enable_nmis)
 GLOBAL(trap_nop)
         iretq
 
+ENTRY(xpti_map_end)
+
 /* Table of automatically generated entry points.  One per vector. */
         .section .init.rodata, "a", @progbits
 GLOBAL(autogen_entrypoints)
diff --git a/xen/include/asm-x86/pv/mm.h b/xen/include/asm-x86/pv/mm.h
index 8a90af1084..36e1856b8d 100644
--- a/xen/include/asm-x86/pv/mm.h
+++ b/xen/include/asm-x86/pv/mm.h
@@ -23,6 +23,11 @@
 
 #ifdef CONFIG_PV
 
+extern void *xpti_map_start;
+extern void *xpti_map_end;
+extern void *xpti_map_start_compat;
+extern void *xpti_map_end_compat;
+
 int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs);
 
 long pv_set_gdt(struct vcpu *v, unsigned long *frames, unsigned int entries);
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.