[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 04/10] EPT: Make ept data structure or operations neutral



From: Zhang Xiantao <xiantao.zhang@xxxxxxxxx>

Share the current EPT logic with nested EPT case, so
make the related data structure or operations netural
to comment EPT and nested EPT.

Signed-off-by: Zhang Xiantao <xiantao.zhang@xxxxxxxxx>
---
 xen/arch/x86/hvm/vmx/vmcs.c        |    9 +++-
 xen/arch/x86/hvm/vmx/vmx.c         |   53 ++-----------------
 xen/arch/x86/mm/p2m-ept.c          |  104 ++++++++++++++++++++++++++++--------
 xen/arch/x86/mm/p2m.c              |   23 ++++++---
 xen/include/asm-x86/hvm/vmx/vmcs.h |   23 ++++----
 xen/include/asm-x86/hvm/vmx/vmx.h  |   10 +++-
 xen/include/asm-x86/p2m.h          |    4 ++
 7 files changed, 133 insertions(+), 93 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 9adc7a4..379b75c 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -941,8 +941,13 @@ static int construct_vmcs(struct vcpu *v)
         __vmwrite(TPR_THRESHOLD, 0);
     }
 
-    if ( paging_mode_hap(d) )
-        __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp);
+    if ( paging_mode_hap(d) ) {
+        struct p2m_domain *p2m = p2m_get_hostp2m(d);
+        struct ept_data *ept = &p2m->ept;
+
+        ept->asr  = pagetable_get_pfn(p2m_get_pagetable(p2m));
+        __vmwrite(EPT_POINTER, ept_get_eptp(ept));
+    }
 
     if ( cpu_has_vmx_pat && paging_mode_hap(d) )
     {
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 4abfa90..d74aae0 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -74,38 +74,19 @@ static void vmx_fpu_dirty_intercept(void);
 static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content);
 static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
 static void vmx_invlpg_intercept(unsigned long vaddr);
-static void __ept_sync_domain(void *info);
 
 static int vmx_domain_initialise(struct domain *d)
 {
     int rc;
 
-    /* Set the memory type used when accessing EPT paging structures. */
-    d->arch.hvm_domain.vmx.ept_control.ept_mt = EPT_DEFAULT_MT;
-
-    /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */
-    d->arch.hvm_domain.vmx.ept_control.ept_wl = 3;
-
-    d->arch.hvm_domain.vmx.ept_control.asr  =
-        pagetable_get_pfn(p2m_get_pagetable(p2m_get_hostp2m(d)));
-
-    if ( !zalloc_cpumask_var(&d->arch.hvm_domain.vmx.ept_synced) )
-        return -ENOMEM;
-
     if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
-    {
-        free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced);
         return rc;
-    }
 
     return 0;
 }
 
 static void vmx_domain_destroy(struct domain *d)
 {
-    if ( paging_mode_hap(d) )
-        on_each_cpu(__ept_sync_domain, d, 1);
-    free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced);
     vmx_free_vlapic_mapping(d);
 }
 
@@ -641,6 +622,7 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
 {
     struct domain *d = v->domain;
     unsigned long old_cr4 = read_cr4(), new_cr4 = mmu_cr4_features;
+    struct ept_data *ept_data = &p2m_get_hostp2m(d)->ept;
 
     /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */
     if ( old_cr4 != new_cr4 )
@@ -650,10 +632,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
     {
         unsigned int cpu = smp_processor_id();
         /* Test-and-test-and-set this CPU in the EPT-is-synced mask. */
-        if ( !cpumask_test_cpu(cpu, d->arch.hvm_domain.vmx.ept_synced) &&
+        if ( !cpumask_test_cpu(cpu, ept_get_synced_mask(ept_data)) &&
              !cpumask_test_and_set_cpu(cpu,
-                                       d->arch.hvm_domain.vmx.ept_synced) )
-            __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(d), 0);
+                                       ept_get_synced_mask(ept_data)) )
+            __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
     }
 
     vmx_restore_guest_msrs(v);
@@ -1216,33 +1198,6 @@ static void vmx_update_guest_efer(struct vcpu *v)
                    (v->arch.hvm_vcpu.guest_efer & EFER_SCE));
 }
 
-static void __ept_sync_domain(void *info)
-{
-    struct domain *d = info;
-    __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(d), 0);
-}
-
-void ept_sync_domain(struct domain *d)
-{
-    /* Only if using EPT and this domain has some VCPUs to dirty. */
-    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
-        return;
-
-    ASSERT(local_irq_is_enabled());
-
-    /*
-     * Flush active cpus synchronously. Flush others the next time this domain
-     * is scheduled onto them. We accept the race of other CPUs adding to
-     * the ept_synced mask before on_selected_cpus() reads it, resulting in
-     * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack.
-     */
-    cpumask_and(d->arch.hvm_domain.vmx.ept_synced,
-                d->domain_dirty_cpumask, &cpu_online_map);
-
-    on_selected_cpus(d->arch.hvm_domain.vmx.ept_synced,
-                     __ept_sync_domain, d, 1);
-}
-
 void nvmx_enqueue_n2_exceptions(struct vcpu *v, 
             unsigned long intr_fields, int error_code)
 {
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index c964f54..e33f415 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -291,9 +291,11 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, 
mfn_t mfn,
     int need_modify_vtd_table = 1;
     int vtd_pte_present = 0;
     int needs_sync = 1;
-    struct domain *d = p2m->domain;
     ept_entry_t old_entry = { .epte = 0 };
+    struct ept_data *ept = &p2m->ept;
+    struct domain *d = p2m->domain;
 
+    ASSERT(ept);
     /*
      * the caller must make sure:
      * 1. passing valid gfn and mfn at order boundary.
@@ -301,17 +303,17 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, 
mfn_t mfn,
      * 3. passing a valid order.
      */
     if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) ||
-         ((u64)gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) ||
+         ((u64)gfn >> ((ept_get_wl(ept) + 1) * EPT_TABLE_ORDER)) ||
          (order % EPT_TABLE_ORDER) )
         return 0;
 
-    ASSERT((target == 2 && hvm_hap_has_1gb(d)) ||
-           (target == 1 && hvm_hap_has_2mb(d)) ||
+    ASSERT((target == 2 && hvm_hap_has_1gb()) ||
+           (target == 1 && hvm_hap_has_2mb()) ||
            (target == 0));
 
-    table = map_domain_page(ept_get_asr(d));
+    table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
 
-    for ( i = ept_get_wl(d); i > target; i-- )
+    for ( i = ept_get_wl(ept); i > target; i-- )
     {
         ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i);
         if ( !ret )
@@ -439,9 +441,11 @@ out:
     unmap_domain_page(table);
 
     if ( needs_sync )
-        ept_sync_domain(p2m->domain);
+        ept_sync_domain(p2m);
 
-    if ( rv && iommu_enabled && need_iommu(p2m->domain) && 
need_modify_vtd_table )
+    /* For non-nested p2m, may need to change VT-d page table.*/
+    if ( rv && !p2m_is_nestedp2m(p2m) && iommu_enabled && 
need_iommu(p2m->domain) &&
+                need_modify_vtd_table )
     {
         if ( iommu_hap_pt_share )
             iommu_pte_flush(d, gfn, (u64*)ept_entry, order, vtd_pte_present);
@@ -488,14 +492,14 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
                            unsigned long gfn, p2m_type_t *t, p2m_access_t* a,
                            p2m_query_t q, unsigned int *page_order)
 {
-    struct domain *d = p2m->domain;
-    ept_entry_t *table = map_domain_page(ept_get_asr(d));
+    ept_entry_t *table = 
map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
     unsigned long gfn_remainder = gfn;
     ept_entry_t *ept_entry;
     u32 index;
     int i;
     int ret = 0;
     mfn_t mfn = _mfn(INVALID_MFN);
+    struct ept_data *ept = &p2m->ept;
 
     *t = p2m_mmio_dm;
     *a = p2m_access_n;
@@ -506,7 +510,7 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
 
     /* Should check if gfn obeys GAW here. */
 
-    for ( i = ept_get_wl(d); i > 0; i-- )
+    for ( i = ept_get_wl(ept); i > 0; i-- )
     {
     retry:
         ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
@@ -588,19 +592,20 @@ out:
 static ept_entry_t ept_get_entry_content(struct p2m_domain *p2m,
     unsigned long gfn, int *level)
 {
-    ept_entry_t *table = map_domain_page(ept_get_asr(p2m->domain));
+    ept_entry_t *table =  
map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
     unsigned long gfn_remainder = gfn;
     ept_entry_t *ept_entry;
     ept_entry_t content = { .epte = 0 };
     u32 index;
     int i;
     int ret=0;
+    struct ept_data *ept = &p2m->ept;
 
     /* This pfn is higher than the highest the p2m map currently holds */
     if ( gfn > p2m->max_mapped_pfn )
         goto out;
 
-    for ( i = ept_get_wl(p2m->domain); i > 0; i-- )
+    for ( i = ept_get_wl(ept); i > 0; i-- )
     {
         ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
         if ( !ret || ret == GUEST_TABLE_POD_PAGE )
@@ -622,7 +627,8 @@ static ept_entry_t ept_get_entry_content(struct p2m_domain 
*p2m,
 void ept_walk_table(struct domain *d, unsigned long gfn)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-    ept_entry_t *table = map_domain_page(ept_get_asr(d));
+    struct ept_data *ept = &p2m->ept;
+    ept_entry_t *table =  
map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
     unsigned long gfn_remainder = gfn;
 
     int i;
@@ -638,7 +644,7 @@ void ept_walk_table(struct domain *d, unsigned long gfn)
         goto out;
     }
 
-    for ( i = ept_get_wl(d); i >= 0; i-- )
+    for ( i = ept_get_wl(ept); i >= 0; i-- )
     {
         ept_entry_t *ept_entry, *next;
         u32 index;
@@ -778,24 +784,76 @@ static void ept_change_entry_type_page(mfn_t 
ept_page_mfn, int ept_page_level,
 static void ept_change_entry_type_global(struct p2m_domain *p2m,
                                          p2m_type_t ot, p2m_type_t nt)
 {
-    struct domain *d = p2m->domain;
-    if ( ept_get_asr(d) == 0 )
+    struct ept_data *ept = &p2m->ept;
+    if ( ept_get_asr(ept) == 0 )
         return;
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
     BUG_ON(ot != nt && (ot == p2m_mmio_direct || nt == p2m_mmio_direct));
 
-    ept_change_entry_type_page(_mfn(ept_get_asr(d)), ept_get_wl(d), ot, nt);
+    ept_change_entry_type_page(_mfn(ept_get_asr(ept)),
+            ept_get_wl(ept), ot, nt);
+
+    ept_sync_domain(p2m);
+}
+
+static void __ept_sync_domain(void *info)
+{
+    struct ept_data *ept = &((struct p2m_domain *)info)->ept;
 
-    ept_sync_domain(d);
+    __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0);
 }
 
-void ept_p2m_init(struct p2m_domain *p2m)
+void ept_sync_domain(struct p2m_domain *p2m)
 {
+    struct domain *d = p2m->domain;
+    struct ept_data *ept = &p2m->ept;
+    /* Only if using EPT and this domain has some VCPUs to dirty. */
+    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
+        return;
+
+    ASSERT(local_irq_is_enabled());
+
+    /*
+     * Flush active cpus synchronously. Flush others the next time this domain
+     * is scheduled onto them. We accept the race of other CPUs adding to
+     * the ept_synced mask before on_selected_cpus() reads it, resulting in
+     * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack.
+     */
+    cpumask_and(ept_get_synced_mask(ept),
+                d->domain_dirty_cpumask, &cpu_online_map);
+
+    on_selected_cpus(ept_get_synced_mask(ept),
+                     __ept_sync_domain, p2m, 1);
+}
+
+int ept_p2m_init(struct p2m_domain *p2m)
+{
+    struct ept_data *ept = &p2m->ept;
+
     p2m->set_entry = ept_set_entry;
     p2m->get_entry = ept_get_entry;
     p2m->change_entry_type_global = ept_change_entry_type_global;
     p2m->audit_p2m = NULL;
+
+    /* Set the memory type used when accessing EPT paging structures. */
+    ept->ept_mt = EPT_DEFAULT_MT;
+
+    /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */
+    ept->ept_wl = 3;
+
+    if ( !zalloc_cpumask_var(&ept->synced_mask) )
+        return -ENOMEM;
+
+    on_each_cpu(__ept_sync_domain, p2m, 1);
+
+    return 0;
+}
+
+void ept_p2m_uninit(struct p2m_domain *p2m)
+{
+    struct ept_data *ept = &p2m->ept;
+    free_cpumask_var(ept->synced_mask);
 }
 
 static void ept_dump_p2m_table(unsigned char key)
@@ -811,6 +869,7 @@ static void ept_dump_p2m_table(unsigned char key)
     unsigned long gfn, gfn_remainder;
     unsigned long record_counter = 0;
     struct p2m_domain *p2m;
+    struct ept_data *ept;
 
     for_each_domain(d)
     {
@@ -818,15 +877,16 @@ static void ept_dump_p2m_table(unsigned char key)
             continue;
 
         p2m = p2m_get_hostp2m(d);
+        ept = &p2m->ept;
         printk("\ndomain%d EPT p2m table: \n", d->domain_id);
 
         for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += (1 << order) )
         {
             gfn_remainder = gfn;
             mfn = _mfn(INVALID_MFN);
-            table = map_domain_page(ept_get_asr(d));
+            table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
 
-            for ( i = ept_get_wl(d); i > 0; i-- )
+            for ( i = ept_get_wl(ept); i > 0; i-- )
             {
                 ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 6a4bdd9..1f59410 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -57,8 +57,10 @@ boolean_param("hap_2mb", opt_hap_2mb);
 
 
 /* Init the datastructures for later use by the p2m code */
-static void p2m_initialise(struct domain *d, struct p2m_domain *p2m)
+static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
 {
+    int ret = 0;
+
     mm_rwlock_init(&p2m->lock);
     mm_lock_init(&p2m->pod.lock);
     INIT_LIST_HEAD(&p2m->np2m_list);
@@ -72,11 +74,11 @@ static void p2m_initialise(struct domain *d, struct 
p2m_domain *p2m)
     p2m->np2m_base = P2M_BASE_EADDR;
 
     if ( hap_enabled(d) && cpu_has_vmx )
-        ept_p2m_init(p2m);
+        ret = ept_p2m_init(p2m);
     else
         p2m_pt_init(p2m);
 
-    return;
+    return ret;
 }
 
 static int
@@ -119,7 +121,7 @@ int p2m_init(struct domain *d)
      * since nestedhvm_enabled(d) returns false here.
      * (p2m_init runs too early for HVM_PARAM_* options) */
     rc = p2m_init_nestedp2m(d);
-    if ( rc ) 
+    if ( rc )
         p2m_final_teardown(d);
     return rc;
 }
@@ -424,12 +426,16 @@ void p2m_teardown(struct p2m_domain *p2m)
 static void p2m_teardown_nestedp2m(struct domain *d)
 {
     uint8_t i;
+    struct p2m_domain *p2m;
 
     for (i = 0; i < MAX_NESTEDP2M; i++) {
         if ( !d->arch.nested_p2m[i] )
             continue;
-        free_cpumask_var(d->arch.nested_p2m[i]->dirty_cpumask);
-        xfree(d->arch.nested_p2m[i]);
+        p2m = d->arch.nested_p2m[i];
+        free_cpumask_var(p2m->dirty_cpumask);
+        if ( hap_enabled(d) && cpu_has_vmx )
+            ept_p2m_uninit(p2m);
+        xfree(p2m);
         d->arch.nested_p2m[i] = NULL;
     }
 }
@@ -437,9 +443,12 @@ static void p2m_teardown_nestedp2m(struct domain *d)
 void p2m_final_teardown(struct domain *d)
 {
     /* Iterate over all p2m tables per domain */
-    if ( d->arch.p2m )
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    if ( p2m )
     {
         free_cpumask_var(d->arch.p2m->dirty_cpumask);
+        if ( hap_enabled(d) && cpu_has_vmx )
+            ept_p2m_uninit(p2m);
         xfree(d->arch.p2m);
         d->arch.p2m = NULL;
     }
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 9a728b6..2d38b43 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -56,26 +56,27 @@ struct vmx_msr_state {
 
 #define EPT_DEFAULT_MT      MTRR_TYPE_WRBACK
 
-struct vmx_domain {
-    unsigned long apic_access_mfn;
+struct ept_data{
     union {
-        struct {
+    struct {
             u64 ept_mt :3,
                 ept_wl :3,
                 rsvd   :6,
                 asr    :52;
         };
         u64 eptp;
-    } ept_control;
-    cpumask_var_t ept_synced;
+    };
+    cpumask_var_t synced_mask;
+};
+
+struct vmx_domain {
+    unsigned long apic_access_mfn;
 };
 
-#define ept_get_wl(d)   \
-    ((d)->arch.hvm_domain.vmx.ept_control.ept_wl)
-#define ept_get_asr(d)  \
-    ((d)->arch.hvm_domain.vmx.ept_control.asr)
-#define ept_get_eptp(d) \
-    ((d)->arch.hvm_domain.vmx.ept_control.eptp)
+#define ept_get_wl(ept)   ((ept)->ept_wl)
+#define ept_get_asr(ept)  ((ept)->asr)
+#define ept_get_eptp(ept) ((ept)->eptp)
+#define ept_get_synced_mask(ept) ((ept)->synced_mask)
 
 struct arch_vmx_struct {
     /* Virtual address of VMCS. */
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index feaaa80..2600694 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -360,7 +360,7 @@ static inline void ept_sync_all(void)
     __invept(INVEPT_ALL_CONTEXT, 0, 0);
 }
 
-void ept_sync_domain(struct domain *d);
+void ept_sync_domain(struct p2m_domain *p2m);
 
 static inline void vpid_sync_vcpu_gva(struct vcpu *v, unsigned long gva)
 {
@@ -422,12 +422,18 @@ void vmx_get_segment_register(struct vcpu *, enum 
x86_segment,
 void vmx_inject_extint(int trap);
 void vmx_inject_nmi(void);
 
-void ept_p2m_init(struct p2m_domain *p2m);
+int ept_p2m_init(struct p2m_domain *p2m);
+void ept_p2m_uninit(struct p2m_domain *p2m);
+
 void ept_walk_table(struct domain *d, unsigned long gfn);
 void setup_ept_dump(void);
 
 void update_guest_eip(void);
 
+int alloc_p2m_hap_data(struct p2m_domain *p2m);
+void free_p2m_hap_data(struct p2m_domain *p2m);
+void p2m_init_hap_data(struct p2m_domain *p2m);
+
 /* EPT violation qualifications definitions */
 #define _EPT_READ_VIOLATION         0
 #define EPT_READ_VIOLATION          (1UL<<_EPT_READ_VIOLATION)
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index ce26594..b6a84b6 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -277,6 +277,10 @@ struct p2m_domain {
         mm_lock_t        lock;         /* Locking of private pod structs,   *
                                         * not relying on the p2m lock.      */
     } pod;
+    union {
+        struct ept_data ept;
+        /* NPT-equivalent structure could be added here. */
+    };
 };
 
 /* get host p2m table */
-- 
1.7.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.