ept: Remove lock in ept_get_entry, replace with access-once semantics. This mirrors the RVI/shadow situation, where p2m read access is lockless because it's done in the hardware (linear map of the p2m table). This fixes the original bug (call it bug A) without introducing bug B (a deadlock). Bug A was caused by a race when updating p2m entries: between testing if it's valid, and testing if it's populate-on-demand, it may have been changed from populate-on-demand to valid. My original patch simply introduced a lock into ept_get_entry, but that caused bug B, caused by circular locking order: p2m_change_type [grabs p2m lock] -> set_p2m_entry -> ept_set_entry -> ept_set_middle_level -> p2m_alloc [grabs hap lock] write cr4 -> hap_update_paging_modes [grabes hap lock] -> hap_update_cr3 -> gfn_to_mfn -> ept_get_entry -> [grabs p2m lock] Signed-off-by: George Dunlap diff -r 49d2aa5cee4e xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Thu Dec 09 16:17:33 2010 +0000 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Tue Dec 14 10:36:43 2010 +0000 @@ -211,7 +211,7 @@ int next_level) { unsigned long mfn; - ept_entry_t *ept_entry; + ept_entry_t *ept_entry, e; u32 shift, index; shift = next_level * EPT_TABLE_ORDER; @@ -223,9 +223,14 @@ ept_entry = (*table) + index; - if ( !is_epte_present(ept_entry) ) + /* ept_next_level() is called (sometimes) without a lock. Read + * the entry once, and act on the "cached" entry after that to + * avoid races. */ + e=*ept_entry; + + if ( !is_epte_present(&e) ) { - if ( ept_entry->avail1 == p2m_populate_on_demand ) + if ( e.avail1 == p2m_populate_on_demand ) return GUEST_TABLE_POD_PAGE; if ( read_only ) @@ -233,13 +238,15 @@ if ( !ept_set_middle_entry(p2m, ept_entry) ) return GUEST_TABLE_MAP_FAILED; + else + e=*ept_entry; /* Refresh */ } /* The only time sp would be set here is if we had hit a superpage */ - if ( is_epte_superpage(ept_entry) ) + if ( is_epte_superpage(&e) ) return GUEST_TABLE_SUPER_PAGE; - mfn = ept_entry->mfn; + mfn = e.mfn; unmap_domain_page(*table); *table = map_domain_page(mfn); *gfn_remainder &= (1UL << shift) - 1; @@ -318,19 +325,24 @@ if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_in_start) ) { - ept_entry->emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat, + ept_entry_t new_entry; + + /* Construct the new entry, and then write it once */ + new_entry.emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat, direct_mmio); - ept_entry->ipat = ipat; - ept_entry->sp = order ? 1 : 0; - ept_entry->avail1 = p2mt; - ept_entry->avail2 = 0; + new_entry.ipat = ipat; + new_entry.sp = order ? 1 : 0; + new_entry.avail1 = p2mt; + new_entry.avail2 = 0; - if ( ept_entry->mfn == mfn_x(mfn) ) + if ( new_entry.mfn == mfn_x(mfn) ) need_modify_vtd_table = 0; else - ept_entry->mfn = mfn_x(mfn); + new_entry.mfn = mfn_x(mfn); - ept_p2m_type_to_flags(ept_entry, p2mt); + ept_p2m_type_to_flags(&new_entry, p2mt); + + ept_entry->epte = new_entry.epte; } else ept_entry->epte = 0; @@ -339,6 +351,7 @@ { /* We need to split the original page. */ ept_entry_t split_ept_entry; + ept_entry_t new_entry; ASSERT(is_epte_superpage(ept_entry)); @@ -365,18 +378,20 @@ ept_entry = table + index; - ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio); - ept_entry->ipat = ipat; - ept_entry->sp = i ? 1 : 0; - ept_entry->avail1 = p2mt; - ept_entry->avail2 = 0; + new_entry.emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio); + new_entry.ipat = ipat; + new_entry.sp = i ? 1 : 0; + new_entry.avail1 = p2mt; + new_entry.avail2 = 0; - if ( ept_entry->mfn == mfn_x(mfn) ) + if ( new_entry.mfn == mfn_x(mfn) ) need_modify_vtd_table = 0; else /* the caller should take care of the previous page */ - ept_entry->mfn = mfn_x(mfn); + new_entry.mfn = mfn_x(mfn); - ept_p2m_type_to_flags(ept_entry, p2mt); + ept_p2m_type_to_flags(&new_entry, p2mt); + + ept_entry->epte = new_entry.epte; } /* Track the highest gfn for which we have ever had a valid mapping */ @@ -437,10 +452,6 @@ int i; int ret = 0; mfn_t mfn = _mfn(INVALID_MFN); - int do_locking = !p2m_locked_by_me(p2m); - - if ( do_locking ) - p2m_lock(p2m); *t = p2m_mmio_dm; @@ -517,8 +528,6 @@ } out: - if ( do_locking ) - p2m_unlock(p2m); unmap_domain_page(table); return mfn; }