# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1155121285 -32400 # Node ID 86d4ea768120579994cf79da9e5ece9c688f52f3 # Parent 44e6aea4077ead8633df36a7fa1b17f029307ad6 add tlb insert tracking to do vTLB flush finer grained virtual address range when a page is unmapped from a domain. This is functionality is enabled with a compile time option, xen_ia64_tlb_track=y. This patch forcuses on grant table mapping. When page is unmapped, full vTLB flush is necessary. By tracking tlb insert on grant mapped page, full vTLB flush can be avoided. Especially vbd does only DMA, so dom0 doesn't insert tlb entry on the grant mapped page. In such case any vTLB flush isn't needed. PATCHNAME: tlb_track Signed-off-by: Isaku Yamahata diff -r 44e6aea4077e -r 86d4ea768120 xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/arch/ia64/Rules.mk Wed Aug 09 20:01:25 2006 +0900 @@ -39,6 +39,9 @@ ifeq ($(xen_ia64_pervcpu_vhpt),y) ifeq ($(xen_ia64_pervcpu_vhpt),y) CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT endif +ifeq ($(xen_ia64_tlb_track),y) +CFLAGS += -DCONFIG_XEN_IA64_TLB_TRACK +endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif diff -r 44e6aea4077e -r 86d4ea768120 xen/arch/ia64/xen/Makefile --- a/xen/arch/ia64/xen/Makefile Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/arch/ia64/xen/Makefile Wed Aug 09 20:01:25 2006 +0900 @@ -27,3 +27,4 @@ obj-y += privop_stat.o obj-y += privop_stat.o obj-$(crash_debug) += gdbstub.o +obj-$(xen_ia64_tlb_track) += tlb_track.o diff -r 44e6aea4077e -r 86d4ea768120 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/arch/ia64/xen/domain.c Wed Aug 09 20:01:25 2006 +0900 @@ -48,6 +48,9 @@ #include #include #include +#ifdef CONFIG_XEN_IA64_TLB_TRACK +#include +#endif unsigned long dom0_size = 512*1024*1024; unsigned long dom0_align = 64*1024*1024; @@ -372,6 +375,10 @@ int arch_domain_create(struct domain *d) DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n", __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt); #endif +#ifdef CONFIG_XEN_IA64_TLB_TRACK + if (tlb_track_create(d) < 0) + goto fail_nomem1; +#endif d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT)); if (d->shared_info == NULL) goto fail_nomem; @@ -400,6 +407,8 @@ int arch_domain_create(struct domain *d) return 0; fail_nomem: + tlb_track_destroy(d); +fail_nomem1: if (d->arch.mm.pgd != NULL) pgd_free(d->arch.mm.pgd); if (d->shared_info != NULL) @@ -414,6 +423,10 @@ void arch_domain_destroy(struct domain * free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT)); if (d->arch.shadow_bitmap != NULL) xfree(d->arch.shadow_bitmap); + +#ifdef CONFIG_XEN_IA64_TLB_TRACK + tlb_track_destroy(d); +#endif /* Clear vTLB for the next domain. */ domain_flush_tlb_vhpt(d); diff -r 44e6aea4077e -r 86d4ea768120 xen/arch/ia64/xen/faults.c --- a/xen/arch/ia64/xen/faults.c Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/arch/ia64/xen/faults.c Wed Aug 09 20:01:25 2006 +0900 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -200,8 +201,15 @@ void ia64_do_page_fault (unsigned long a fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha); if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) { struct p2m_entry entry; - pteval = translate_domain_pte(pteval, address, itir, &logps, &entry); - vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps); + unsigned long m_pteval; + m_pteval = translate_domain_pte(pteval, address, itir, &logps, &entry); +#ifndef CONFIG_XEN_IA64_TLB_TRACK + vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4, + address, m_pteval, pteval, logps); +#else + vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4, + address, m_pteval, pteval, logps, &entry); +#endif if ((fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) || p2m_entry_retry(&entry)) { /* dtlb has been purged in-between. This dtlb was diff -r 44e6aea4077e -r 86d4ea768120 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/arch/ia64/xen/mm.c Wed Aug 09 20:01:25 2006 +0900 @@ -171,10 +171,15 @@ #include #include #include +#include #include +#ifdef CONFIG_XEN_IA64_TLB_TRACK +#include +#endif static void domain_page_flush(struct domain* d, unsigned long mpaddr, - unsigned long old_mfn, unsigned long new_mfn); + volatile pte_t* ptep, pte_t old_pte); + extern unsigned long ia64_iobase; @@ -772,12 +777,15 @@ flags_to_prot (unsigned long flags) res |= flags & ASSIGN_readonly ? _PAGE_AR_R: _PAGE_AR_RWX; res |= flags & ASSIGN_nocache ? _PAGE_MA_UC: _PAGE_MA_WB; +#ifdef CONFIG_XEN_IA64_TLB_TRACK + res |= flags & ASSIGN_tlb_track ? _PAGE_TLB_TRACKING: 0; +#endif return res; } /* map a physical address to the specified metaphysical addr */ -// flags: currently only ASSIGN_readonly, ASSIGN_nocache +// flags: currently only ASSIGN_readonly, ASSIGN_nocache, ASSIGN_tlb_tack // This is called by assign_domain_mmio_page(). // So accessing to pte is racy. void @@ -991,7 +999,7 @@ assign_domain_mach_page(struct domain *d // caller must call set_gpfn_from_mfn() before call if necessary. // because set_gpfn_from_mfn() result must be visible before pte xchg // caller must use memory barrier. NOTE: xchg has acquire semantics. -// flags: currently only ASSIGN_readonly +// flags: ASSIGN_xxx static void assign_domain_page_replace(struct domain *d, unsigned long mpaddr, unsigned long mfn, unsigned long flags) @@ -1025,7 +1033,7 @@ assign_domain_page_replace(struct domain set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); } - domain_page_flush(d, mpaddr, old_mfn, mfn); + domain_page_flush(d, mpaddr, pte, old_pte); try_to_clear_PGC_allocate(d, old_page); put_page(old_page); @@ -1045,7 +1053,7 @@ assign_domain_page_cmpxchg_rel(struct do struct mm_struct *mm = &d->arch.mm; volatile pte_t* pte; unsigned long old_mfn; - unsigned long old_arflags; + unsigned long old_prot; pte_t old_pte; unsigned long new_mfn; unsigned long new_prot; @@ -1055,12 +1063,12 @@ assign_domain_page_cmpxchg_rel(struct do pte = lookup_alloc_domain_pte(d, mpaddr); again: - old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK; + old_prot = pte_val(*pte) & ~_PAGE_PPN_MASK; old_mfn = page_to_mfn(old_page); - old_pte = pfn_pte(old_mfn, __pgprot(old_arflags)); + old_pte = pfn_pte(old_mfn, __pgprot(old_prot)); if (!pte_present(old_pte)) { - DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx\n", - __func__, pte_val(old_pte), old_arflags, old_mfn); + DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx\n", + __func__, pte_val(old_pte), old_prot, old_mfn); return -EINVAL; } @@ -1075,10 +1083,10 @@ assign_domain_page_cmpxchg_rel(struct do goto again; } - DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx " + DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx " "ret_pte 0x%lx ret_mfn 0x%lx\n", __func__, - pte_val(old_pte), old_arflags, old_mfn, + pte_val(old_pte), old_prot, old_mfn, pte_val(ret_pte), pte_pfn(ret_pte)); return -EINVAL; } @@ -1090,7 +1098,7 @@ assign_domain_page_cmpxchg_rel(struct do set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); - domain_page_flush(d, mpaddr, old_mfn, new_mfn); + domain_page_flush(d, mpaddr, pte, old_pte); put_page(old_page); perfc_incrc(assign_domain_pge_cmpxchg_rel); return 0; @@ -1159,7 +1167,7 @@ zap_domain_page_one(struct domain *d, un set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } - domain_page_flush(d, mpaddr, mfn, INVALID_MFN); + domain_page_flush(d, mpaddr, pte, old_pte); if (page_get_owner(page) != NULL) { try_to_clear_PGC_allocate(d, page); @@ -1254,8 +1262,12 @@ create_grant_host_mapping(unsigned long BUG_ON(ret == 0); BUG_ON(page_get_owner(mfn_to_page(mfn)) == d && get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY); - assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)? - ASSIGN_readonly: ASSIGN_writable); + assign_domain_page_replace(d, gpaddr, mfn, +#ifdef CONFIG_XEN_IA64_TLB_TRACK + ASSIGN_tlb_track | +#endif + ((flags & GNTMAP_readonly) ? + ASSIGN_readonly: ASSIGN_writable)); perfc_incrc(create_grant_host_mapping); return GNTST_okay; } @@ -1310,7 +1322,7 @@ destroy_grant_host_mapping(unsigned long } BUG_ON(pte_pfn(old_pte) != mfn); - domain_page_flush(d, gpaddr, mfn, INVALID_MFN); + domain_page_flush(d, gpaddr, pte, old_pte); page = mfn_to_page(mfn); BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed. @@ -1482,12 +1494,43 @@ guest_physmap_remove_page(struct domain // flush finer range. static void domain_page_flush(struct domain* d, unsigned long mpaddr, - unsigned long old_mfn, unsigned long new_mfn) -{ + volatile pte_t* ptep, pte_t old_pte) +{ +#ifdef CONFIG_XEN_IA64_TLB_TRACK + struct tlb_track_entry* entry; +#endif + if (shadow_mode_enabled(d)) shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT); +#ifndef CONFIG_XEN_IA64_TLB_TRACK domain_flush_vtlb_all(); +#else + switch (tlb_track_search_and_remove(d->arch.tlb_track, + ptep, old_pte, &entry)) { + case TLB_TRACK_NOT_TRACKED: + //DPRINTK("%s TLB_TRACK_NOT_TRACKED\n", __func__); + domain_flush_vtlb_all(); + break; + case TLB_TRACK_NOT_FOUND: + // do nothing + //DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__); + break; + case TLB_TRACK_FOUND: + //DPRINTK("%s TLB_TRACK_FOUND\n", __func__); + domain_flush_vtlb_track_entry(d, entry); + tlb_track_free_entry(d->arch.tlb_track, entry); + break; + case TLB_TRACK_MANY: + DPRINTK("%s TLB_TRACK_MANY\n", __func__); + domain_flush_vtlb_all(); + break; + case TLB_TRACK_AGAIN: + DPRINTK("%s TLB_TRACK_AGAIN\n", __func__); + BUG(); + break; + } +#endif perfc_incrc(domain_page_flush); } diff -r 44e6aea4077e -r 86d4ea768120 xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/arch/ia64/xen/vcpu.c Wed Aug 09 20:01:25 2006 +0900 @@ -22,6 +22,10 @@ #include #include #include +#include +#ifdef CONFIG_XEN_IA64_TLB_TRACK +#include +#endif /* FIXME: where these declarations should be there ? */ extern void getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs); @@ -1996,7 +2000,11 @@ IA64FAULT vcpu_set_dtr(VCPU *vcpu, u64 s VCPU translation cache access routines **************************************************************************/ +#ifndef CONFIG_XEN_IA64_TLB_TRACK void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps) +#else +void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps, struct p2m_entry* entry) +#endif { unsigned long psr; unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT; @@ -2009,6 +2017,9 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 "smaller page size!\n"); BUG_ON(logps > PAGE_SHIFT); +#ifdef CONFIG_XEN_IA64_TLB_TRACK + vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry); +#endif psr = ia64_clear_ic(); ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings ia64_set_psr(psr); @@ -2026,7 +2037,7 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 // PAGE_SIZE mapping in the vhpt for now, else purging is complicated else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2); #endif - if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB + if (IorD & 0x4) // don't place in 1-entry TLB return; if (IorD & 0x1) { vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr); @@ -2051,7 +2062,11 @@ again: pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry); if (!pteval) return IA64_ILLOP_FAULT; if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0])); +#ifndef CONFIG_XEN_IA64_TLB_TRACK vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps); +#else + vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps,&entry); +#endif if (swap_rr0) set_metaphysical_rr0(); if (p2m_entry_retry(&entry)) { vcpu_flush_tlb_vhpt_range(ifa, logps); @@ -2074,7 +2089,11 @@ again: pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry); if (!pteval) return IA64_ILLOP_FAULT; if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0])); +#ifndef CONFIG_XEN_IA64_TLB_TRACK vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps); +#else + vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps,&entry); +#endif if (swap_rr0) set_metaphysical_rr0(); if (p2m_entry_retry(&entry)) { vcpu_flush_tlb_vhpt_range(ifa, logps); diff -r 44e6aea4077e -r 86d4ea768120 xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/arch/ia64/xen/vhpt.c Wed Aug 09 20:01:25 2006 +0900 @@ -39,11 +39,13 @@ void local_vhpt_flush(void) void local_vhpt_flush(void) { __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr)); + perfc_incrc(local_vhpt_flush); } static void vcpu_vhpt_flush(struct vcpu* v) { __vhpt_flush(vcpu_vhpt_maddr(v)); + perfc_incrc(vcpu_vhpt_flush); } static void vhpt_erase(unsigned long vhpt_maddr) @@ -312,6 +314,82 @@ void domain_flush_vtlb_range (struct dom perfc_incrc(domain_flush_vtlb_range); } +#ifdef CONFIG_XEN_IA64_TLB_TRACK +#include +#include +void +domain_flush_vtlb_track_entry(struct domain* d, + const struct tlb_track_entry* entry) +{ + unsigned long rr7_rid; + int swap_rr0 = 0; + unsigned long old_rid; + unsigned long vaddr = entry->vaddr; + struct vcpu* v; + int cpu; + int vcpu; + + BUG_ON((vaddr >> VRN_SHIFT) != VRN7); + // heuristic: + // dom0linux accesses grant mapped pages via the kernel + // straight mapped area and it doesn't change rr7 rid. + // So it is likey that rr7 == entry->rid so that + // we can avoid rid change. + // When blktap is supported, this heuristic should be revised. + vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid); + if (likely(rr7_rid == entry->rid)) { + perfc_incrc(tlb_track_use_rr7); + } else { + swap_rr0 = 1; + vaddr = (vaddr << 3) >> 3;// force vrn0 + perfc_incrc(tlb_track_swap_rr0); + } + + //tlb_track_entry_printf(entry); + if (swap_rr0) { + vcpu_get_rr(current, 0, &old_rid); + vcpu_set_rr(current, 0, entry->rid); + } + + for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) { + v = d->vcpu[vcpu]; + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) + continue; + + /* Purge TC entries. + FIXME: clear only if match. */ + vcpu_purge_tr_entry(&PSCBX(v, dtlb)); + vcpu_purge_tr_entry(&PSCBX(v, itlb)); + } + smp_mb(); + + if (HAS_PERVCPU_VHPT(d)) { + for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) { + v = d->vcpu[vcpu]; + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) + continue; + + /* Invalidate VHPT entries. */ + vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE); + } + } else { + for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) { + /* Invalidate VHPT entries. */ + cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE); + } + } + // ptc.ga has release semantics. + + /* ptc.ga */ + ia64_global_tlb_purge(vaddr, vaddr + PAGE_SIZE, PAGE_SHIFT); + + if (swap_rr0) { + vcpu_set_rr(current, 0, old_rid); + } + perfc_incrc(domain_flush_vtlb_track_entry); +} +#endif + static void flush_tlb_vhpt_all (struct domain *d) { /* First VHPT. */ diff -r 44e6aea4077e -r 86d4ea768120 xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/include/asm-ia64/domain.h Wed Aug 09 20:01:25 2006 +0900 @@ -13,28 +13,10 @@ #include #include -struct p2m_entry { - volatile pte_t* pte; - pte_t used; -}; - -static inline void -p2m_entry_set(struct p2m_entry* entry, volatile pte_t* pte, pte_t used) -{ - entry->pte = pte; - entry->used = used; -} - -static inline int -p2m_entry_retry(struct p2m_entry* entry) -{ - //XXX see lookup_domain_pte(). - // NULL is set for invalid gpaddr for the time being. - if (entry->pte == NULL) - return 0; - - return (pte_val(*entry->pte) != pte_val(entry->used)); -} +struct p2m_entry; +#ifdef CONFIG_XEN_IA64_TLB_TRACK +struct tlb_track; +#endif extern void domain_relinquish_resources(struct domain *); struct vcpu; @@ -140,6 +122,10 @@ struct arch_domain { atomic64_t shadow_fault_count; struct last_vcpu last_vcpu[NR_CPUS]; + +#ifdef CONFIG_XEN_IA64_TLB_TRACK + struct tlb_track* tlb_track; +#endif }; #define INT_ENABLE_OFFSET(v) \ (sizeof(vcpu_info_t) * (v)->vcpu_id + \ diff -r 44e6aea4077e -r 86d4ea768120 xen/include/asm-ia64/linux-xen/asm/pgtable.h --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Wed Aug 09 20:01:25 2006 +0900 @@ -68,6 +68,25 @@ #ifdef XEN #define _PAGE_VIRT_D (__IA64_UL(1) << 53) /* Virtual dirty bit */ #define _PAGE_PROTNONE 0 + +#ifdef CONFIG_XEN_IA64_TLB_TRACK +#define _PAGE_TLB_TRACKING_BIT 54 +#define _PAGE_TLB_INSERTED_BIT 55 +#define _PAGE_TLB_INSERTED_MANY_BIT 56 + +#define _PAGE_TLB_TRACKING (1UL << _PAGE_TLB_TRACKING_BIT) +#define _PAGE_TLB_INSERTED (1UL << _PAGE_TLB_INSERTED_BIT) +#define _PAGE_TLB_INSERTED_MANY (1UL << _PAGE_TLB_INSERTED_MANY_BIT) +#define _PAGE_TLB_TRACK_MASK (_PAGE_TLB_TRACKING | _PAGE_TLB_INSERTED | _PAGE_TLB_INSERTED_MANY) + +#define pte_tlb_tracking(pte) \ + ((pte_val(pte) & _PAGE_TLB_TRACKING) != 0) +#define pte_tlb_inserted(pte) \ + ((pte_val(pte) & _PAGE_TLB_INSERTED) != 0) +#define pte_tlb_inserted_many(pte) \ + ((pte_val(pte) & _PAGE_TLB_INSERTED_MANY) != 0) +#endif // CONFIG_XEN_IA64_TLB_TRACK + #else #define _PAGE_PROTNONE (__IA64_UL(1) << 63) #endif diff -r 44e6aea4077e -r 86d4ea768120 xen/include/asm-ia64/perfc_defn.h --- a/xen/include/asm-ia64/perfc_defn.h Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/include/asm-ia64/perfc_defn.h Wed Aug 09 20:01:25 2006 +0900 @@ -42,9 +42,12 @@ PERFCOUNTER_ARRAY(fast_reflect, "f PERFCOUNTER_ARRAY(fast_reflect, "fast reflection", 0x80) // vhpt.c +PERFCOUNTER_CPU(local_vhpt_flush, "local_vhpt_flush") +PERFCOUNTER_CPU(vcpu_vhpt_flush, "vcpu_vhpt_flush") PERFCOUNTER_CPU(vcpu_flush_vtlb_all, "vcpu_flush_vtlb_all") PERFCOUNTER_CPU(domain_flush_vtlb_all, "domain_flush_vtlb_all") PERFCOUNTER_CPU(vcpu_flush_tlb_vhpt_range, "vcpu_flush_tlb_vhpt_range") +PERFCOUNTER_CPU(domain_flush_vtlb_track_entry, "domain_flush_vtlb_track_entry") PERFCOUNTER_CPU(domain_flush_vtlb_range, "domain_flush_vtlb_range") // domain.c @@ -67,3 +70,30 @@ PERFCOUNTER_CPU(domain_page_flush, "dom // dom0vp PERFCOUNTER_CPU(dom0vp_phystomach, "dom0vp_phystomach") PERFCOUNTER_CPU(dom0vp_machtophys, "dom0vp_machtophys") + +#ifdef CONFIG_XEN_IA64_TLB_TRACK +// insert or dirty +PERFCOUNTER_CPU(tlb_track_iod, "tlb_track_iod") +PERFCOUNTER_CPU(tlb_track_iod_again, "tlb_track_iod_again") +PERFCOUNTER_CPU(tlb_track_iod_not_tracked, "tlb_track_iod_not_tracked") +PERFCOUNTER_CPU(tlb_track_iod_force_many, "tlb_track_iod_force_many") +PERFCOUNTER_CPU(tlb_track_iod_tracked_many, "tlb_track_iod_tracked_many") +PERFCOUNTER_CPU(tlb_track_iod_tracked_many_del, "tlb_track_iod_tracked_many_del") +PERFCOUNTER_CPU(tlb_track_iod_found, "tlb_track_iod_found") +PERFCOUNTER_CPU(tlb_track_iod_new_entry, "tlb_track_iod_new_entry") +PERFCOUNTER_CPU(tlb_track_iod_new_failed, "tlb_track_iod_new_failed") +PERFCOUNTER_CPU(tlb_track_iod_new_many, "tlb_track_iod_new_many") +PERFCOUNTER_CPU(tlb_track_iod_insert, "tlb_track_iod_insert") +PERFCOUNTER_CPU(tlb_track_iod_dirtied, "tlb_track_iod_dirtied") + +// search and remove +PERFCOUNTER_CPU(tlb_track_sar, "tlb_track_sar") +PERFCOUNTER_CPU(tlb_track_sar_not_tracked, "tlb_track_sar_not_tracked") +PERFCOUNTER_CPU(tlb_track_sar_not_found, "tlb_track_sar_not_found") +PERFCOUNTER_CPU(tlb_track_sar_found, "tlb_track_sar_found") +PERFCOUNTER_CPU(tlb_track_sar_many, "tlb_track_sar_many") + +// flush +PERFCOUNTER_CPU(tlb_track_use_rr7, "tlb_track_use_rr7") +PERFCOUNTER_CPU(tlb_track_swap_rr0, "tlb_track_swap_rr0") +#endif diff -r 44e6aea4077e -r 86d4ea768120 xen/include/asm-ia64/tlbflush.h --- a/xen/include/asm-ia64/tlbflush.h Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/include/asm-ia64/tlbflush.h Wed Aug 09 20:01:25 2006 +0900 @@ -22,6 +22,13 @@ void domain_flush_vtlb_all (void); /* Global range-flush of vTLB. */ void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range); +#ifdef CONFIG_XEN_IA64_TLB_TRACK +struct tlb_track_entry; +/* Global entry-flush of vTLB */ +void domain_flush_vtlb_track_entry(struct domain* d, + const struct tlb_track_entry* entry); +#endif + /* Flush vhpt and mTLB on every dirty cpus. */ void domain_flush_tlb_vhpt(struct domain *d); diff -r 44e6aea4077e -r 86d4ea768120 xen/include/asm-ia64/vcpu.h --- a/xen/include/asm-ia64/vcpu.h Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/include/asm-ia64/vcpu.h Wed Aug 09 20:01:25 2006 +0900 @@ -158,7 +158,12 @@ extern void vcpu_set_next_timer(VCPU *vc extern void vcpu_set_next_timer(VCPU *vcpu); extern BOOLEAN vcpu_timer_expired(VCPU *vcpu); extern UINT64 vcpu_deliverable_interrupts(VCPU *vcpu); +#ifndef CONFIG_XEN_IA64_TLB_TRACK extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64); +#else +struct p2m_entry; +extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64, struct p2m_entry*); +#endif extern UINT64 vcpu_get_tmp(VCPU *, UINT64); extern void vcpu_set_tmp(VCPU *, UINT64, UINT64); diff -r 44e6aea4077e -r 86d4ea768120 xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Tue Aug 08 15:58:20 2006 +0900 +++ b/xen/include/public/arch-ia64.h Wed Aug 09 20:01:25 2006 +0900 @@ -362,6 +362,11 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte /* Internal only: memory attribute must be WC/UC/UCE. */ #define _ASSIGN_nocache 1 #define ASSIGN_nocache (1UL << _ASSIGN_nocache) +// tlb tracking +#ifdef CONFIG_XEN_IA64_TLB_TRACK +# define _ASSIGN_tlb_track 2 +# define ASSIGN_tlb_track (1UL << _ASSIGN_tlb_track) +#endif /* This structure has the same layout of struct ia64_boot_param, defined in . It is redefined here to ease use. */ diff -r 44e6aea4077e -r 86d4ea768120 xen/arch/ia64/xen/tlb_track.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/xen/tlb_track.c Wed Aug 09 20:01:25 2006 +0900 @@ -0,0 +1,506 @@ +/****************************************************************************** + * tlb_track.c + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include // for IA64_RR_SHIFT +#include // for VRN7 +#include // for PSCB() + +#define CONFIG_TLB_TRACK_DEBUG +#ifdef CONFIG_TLB_TRACK_DEBUG +# define tlb_track_printd(fmt, ...) \ + printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__) +#else +# define tlb_track_printd(fmt, ...) do { } while (0) +#endif + +static int +tlb_track_allocate_entries(struct tlb_track* tlb_track) +{ + struct page_info* entry_page; + struct tlb_track_entry* track_entries; + unsigned int allocated; + unsigned long i; + + BUG_ON(tlb_track->num_free > 0); + if (tlb_track->num_entries >= tlb_track->limit) { + DPRINTK("%s: num_entries %d limit %d\n", + __func__, tlb_track->num_entries, tlb_track->limit); + return -ENOMEM; + } + entry_page = alloc_domheap_page(NULL); + if (entry_page == NULL) { + DPRINTK("%s: domheap page failed. num_entries %d limit %d\n", + __func__, tlb_track->num_entries, tlb_track->limit); + return -ENOMEM; + } + + list_add(&entry_page->list, &tlb_track->page_list); + track_entries = (struct tlb_track_entry*)page_to_virt(entry_page); + allocated = PAGE_SIZE / sizeof(track_entries[0]); + tlb_track->num_entries += allocated; + tlb_track->num_free += allocated; + for (i = 0; i < allocated; i++) { + list_add(&track_entries[i].list, &tlb_track->free_list); + //tlb_track_printd("track_entries[%ld] 0x%p\n", i, &track_entries[i]); + } + tlb_track_printd("allocated %d num_entries %d num_free %d\n", + allocated, tlb_track->num_entries, tlb_track->num_free); + return 0; +} + + +int +tlb_track_create(struct domain* d) +{ + struct tlb_track* tlb_track = NULL; + struct page_info* hash_page = NULL; + unsigned int hash_size; + unsigned int hash_shift; + unsigned int i; + + tlb_track = xmalloc(struct tlb_track); + if (tlb_track == NULL) { + goto out; + } + hash_page = alloc_domheap_page(NULL); + if (hash_page == NULL) { + goto out; + } + + spin_lock_init(&tlb_track->free_list_lock); + INIT_LIST_HEAD(&tlb_track->free_list); + tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES; + tlb_track->num_entries = 0; + tlb_track->num_free = 0; + INIT_LIST_HEAD(&tlb_track->page_list); + if (tlb_track_allocate_entries(tlb_track) < 0) { + goto out; + } + + spin_lock_init(&tlb_track->hash_lock); + //XXX hash size optimization + hash_size = PAGE_SIZE / sizeof(tlb_track->hash[0]); + for (hash_shift = 0; (1 << (hash_shift + 1)) < hash_size; hash_shift++) + /* nothing */; + tlb_track->hash_size = (1 << hash_shift); + tlb_track->hash_shift = hash_shift; + tlb_track->hash_mask = (1 << hash_shift) - 1; + tlb_track->hash = page_to_virt(hash_page); + for (i = 0; i < tlb_track->hash_size; i++) { + INIT_LIST_HEAD(&tlb_track->hash[i]); + } + + smp_mb(); // make initialization visible before use. + d->arch.tlb_track = tlb_track; + printk("%s:%d hash 0x%p hash_size %d \n", + __func__, __LINE__, tlb_track->hash, tlb_track->hash_size); + + return 0; + +out: + if (hash_page != NULL) { + free_domheap_page(hash_page); + } + if (tlb_track != NULL) { + xfree(tlb_track); + } + return -ENOMEM; +} + +void +tlb_track_destroy(struct domain* d) +{ + struct tlb_track* tlb_track = d->arch.tlb_track; + struct page_info* page; + struct page_info* next; + + spin_lock(&tlb_track->free_list_lock); + BUG_ON(tlb_track->num_free != tlb_track->num_entries); + + list_for_each_entry_safe(page, next, &tlb_track->page_list, list) { + list_del(&page->list); + free_domheap_page(page); + } + + free_domheap_page(virt_to_page(tlb_track->hash)); + xfree(tlb_track); + //d->tlb_track = NULL; +} + +static struct tlb_track_entry* +tlb_track_get_entry(struct tlb_track* tlb_track) +{ + struct tlb_track_entry* entry = NULL; + spin_lock(&tlb_track->free_list_lock); + if (tlb_track->num_free == 0) { + (void)tlb_track_allocate_entries(tlb_track); + } + if (tlb_track->num_free > 0) { + BUG_ON(list_empty(&tlb_track->free_list)); + entry = list_entry(tlb_track->free_list.next, + struct tlb_track_entry, list); + tlb_track->num_free--; + list_del(&entry->list); + } + spin_unlock(&tlb_track->free_list_lock); + return entry; +} + +void +tlb_track_free_entry(struct tlb_track* tlb_track, + struct tlb_track_entry* entry) +{ + spin_lock(&tlb_track->free_list_lock); + list_add(&entry->list, &tlb_track->free_list); + tlb_track->num_free++; + spin_unlock(&tlb_track->free_list_lock); +} + + +#include +// XXX hash function. +static struct list_head* +tlb_track_hash_head(struct tlb_track* tlb_track, volatile pte_t* ptep) +{ + unsigned long hash = hash_long((unsigned long)ptep, tlb_track->hash_shift); + BUG_ON(hash >= tlb_track->hash_size); + BUG_ON((hash & tlb_track->hash_mask) != hash); + return &tlb_track->hash[hash]; +} + +static int +tlb_track_pte_zapped(pte_t old_pte, pte_t ret_pte) +{ + if (pte_pfn(old_pte) != pte_pfn(ret_pte) || + (pte_val(old_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK)) != + (pte_val(ret_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK))) { + // Other thread zapped the p2m entry. + return 1; + } + return 0; +} + +static TLB_TRACK_RET_T +tlb_track_insert_or_dirty(struct tlb_track* tlb_track, struct mm_struct* mm, + volatile pte_t* ptep, pte_t old_pte, + unsigned long vaddr, unsigned long rid) +{ + unsigned long mfn = pte_pfn(old_pte); + struct list_head* head = tlb_track_hash_head(tlb_track, ptep); + struct tlb_track_entry* entry; + struct tlb_track_entry* new_entry = NULL; + unsigned long bit_to_be_set = _PAGE_TLB_INSERTED; + pte_t new_pte; + pte_t ret_pte; + + struct vcpu* v = current; + TLB_TRACK_RET_T ret = TLB_TRACK_NOT_FOUND; + +#if 0 // this is done at vcpu_tlb_track_insert_or_dirty() + perfc_incrc(tlb_track_iod); + if (!pte_tlb_tracking(old_pte)) { + perfc_incrc(tlb_track_iod_not_tracked); + return TLB_TRACK_NOT_TRACKED; + } +#endif + if (pte_tlb_inserted_many(old_pte)) { + perfc_incrc(tlb_track_iod_tracked_many); + return TLB_TRACK_MANY; + } + + // vaddr must be normalized so that it is in vrn7 and page aligned. + BUG_ON((vaddr >> IA64_RR_SHIFT) != VRN7); + BUG_ON((vaddr & ~PAGE_MASK) != 0); +#if 0 + tlb_track_printd("\n" + "\tmfn 0x%016lx\n" + "\told_pte 0x%016lx ptep 0x%p\n" + "\tptep_val 0x%016lx vaddr 0x%016lx rid %ld\n" + "\ttlb_track 0x%p head 0x%p\n", + mfn, + pte_val(old_pte), ptep, pte_val(*ptep), + vaddr, rid, + tlb_track, head); +#endif + + again: + // zapping side may zap the p2m entry and then remove tlb track entry + // non-atomically. We may see the stale tlb track entry here. + // p2m_entry_retry() handles such a case. + // Or other thread may zap the p2m entry and remove tlb track entry + // and inserted new tlb track entry. + spin_lock(&tlb_track->hash_lock); + list_for_each_entry(entry, head, list) { + if (entry->ptep != ptep) { + continue; + } + + if (pte_pfn(entry->pte_val) == mfn) { + //tlb_track_entry_printf(entry); + if (entry->vaddr == vaddr && entry->rid == rid) { + //tlb_track_printd("TLB_TRACK_FOUND\n"); + ret = TLB_TRACK_FOUND; + perfc_incrc(tlb_track_iod_found); +#ifdef CONFIG_TLB_TRACK_CNT + entry->cnt++; + if (entry->cnt > TLB_TRACK_CNT_FORCE_MANY) { + // heuristics: + // If a page is used to transfer data by dev channel, + // it would be unmapped with small amount access + // (once or twice tlb insert) after real device + // I/O completion. It would be short period. + // However this page seems to be accessed many times. + // We guess that this page is used I/O ring + // so that tracking this entry might be useless. + //tlb_track_entry_printf(entry); + //tlb_track_printd("cnt = %ld\n", entry->cnt); + perfc_incrc(tlb_track_iod_force_many); + goto force_many; + } +#endif + goto found; + } else { +#ifdef CONFIG_TLB_TRACK_CNT + force_many: +#endif + if (!pte_tlb_inserted(old_pte)) { + printk("%s:%d racy update\n", __func__, __LINE__); + old_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED); + } + new_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED_MANY); + ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte); + if (pte_val(ret_pte) != pte_val(old_pte)) { + //tlb_track_printd("TLB_TRACK_AGAIN\n"); + ret = TLB_TRACK_AGAIN; + perfc_incrc(tlb_track_iod_again); + } else { + //tlb_track_printd("TLB_TRACK_MANY del entry 0x%p\n", entry); + ret = TLB_TRACK_MANY; + list_del(&entry->list); + //tlb_track_entry_printf(entry); + perfc_incrc(tlb_track_iod_tracked_many_del); + } + goto out; + } + } + + // Other thread changed the p2m entry and removed and inserted new + // tlb tracn entry after we get old_pte, but before we get + // spinlock. + //tlb_track_printd("TLB_TRACK_AGAIN\n"); + ret = TLB_TRACK_AGAIN; + perfc_incrc(tlb_track_iod_again); + goto out; + } + + entry = NULL; // prevent freeing entry. + if (pte_tlb_inserted(old_pte)) { + // Other thread else removed the tlb_track_entry after we got old_pte + // before we got spin lock. + ret = TLB_TRACK_AGAIN; + perfc_incrc(tlb_track_iod_again); + goto out; + } + if (new_entry == NULL && bit_to_be_set == _PAGE_TLB_INSERTED) { + spin_unlock(&tlb_track->hash_lock); + new_entry = tlb_track_get_entry(tlb_track); + if (new_entry == NULL) { + tlb_track_printd("get_entry failed\n"); + // entry can't be allocated. + // fall down into full flush mode. + bit_to_be_set |= _PAGE_TLB_INSERTED_MANY; + perfc_incrc(tlb_track_iod_new_failed); + } + //tlb_track_printd("new_entry 0x%p\n", new_entry); + perfc_incrc(tlb_track_iod_new_entry); + goto again; + } + + BUG_ON(pte_tlb_inserted_many(old_pte)); + new_pte = __pte(pte_val(old_pte) | bit_to_be_set); + ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte); + if (pte_val(old_pte) != pte_val(ret_pte)) { + if (tlb_track_pte_zapped(old_pte, ret_pte)) { + //tlb_track_printd("zapped TLB_TRACK_AGAIN\n"); + ret = TLB_TRACK_AGAIN; + perfc_incrc(tlb_track_iod_again); + goto out; + } + + // Other thread set _PAGE_TLB_INSERTED and/or _PAGE_TLB_INSERTED_MANY + if (pte_tlb_inserted_many(ret_pte)) { + // Other thread already set _PAGE_TLB_INSERTED_MANY and + // removed the entry. + //tlb_track_printd("iserted TLB_TRACK_MANY\n"); + BUG_ON(!pte_tlb_inserted(ret_pte)); + ret = TLB_TRACK_MANY; + perfc_incrc(tlb_track_iod_new_many); + goto out; + } + BUG_ON(pte_tlb_inserted(ret_pte)); + BUG(); + } + if (new_entry) { + //tlb_track_printd("iserting new_entry 0x%p\n", new_entry); + entry = new_entry; + new_entry = NULL; + + entry->ptep = ptep; + entry->pte_val = old_pte; + entry->vaddr = vaddr; + entry->rid = rid; + cpus_clear(entry->pcpu_dirty_mask); + vcpus_clear(entry->vcpu_dirty_mask); + list_add(&entry->list, head); + +#ifdef CONFIG_TLB_TRACK_CNT + entry->cnt = 0; +#endif + perfc_incrc(tlb_track_iod_insert); + //tlb_track_entry_printf(entry); + } else { + goto out; + } + + found: + BUG_ON(v->processor >= NR_CPUS); + cpu_set(v->processor, entry->pcpu_dirty_mask); + BUG_ON(v->vcpu_id >= NR_CPUS); + vcpu_set(v->vcpu_id, entry->vcpu_dirty_mask); + perfc_incrc(tlb_track_iod_dirtied); + + out: + spin_unlock(&tlb_track->hash_lock); + if (ret == TLB_TRACK_MANY && entry != NULL) { + tlb_track_free_entry(tlb_track, entry); + } + if (new_entry != NULL) { + tlb_track_free_entry(tlb_track, new_entry); + } + return ret; +} + +void +__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr, + struct p2m_entry* entry) +{ + unsigned long vrn = vaddr >> IA64_RR_SHIFT; + unsigned long rid = PSCB(vcpu, rrs[vrn]); + TLB_TRACK_RET_T ret; + + // normalize vrn7 + // When linux dom0 case, vrn7 is the most common case. + vaddr |= VRN7 << VRN_SHIFT; + vaddr &= PAGE_MASK; + ret = tlb_track_insert_or_dirty(vcpu->domain->arch.tlb_track, + &vcpu->domain->arch.mm, + entry->ptep, entry->used, + vaddr, rid); + if (ret == TLB_TRACK_AGAIN) { + p2m_entry_set_retry(entry); + } +} + +TLB_TRACK_RET_T +tlb_track_search_and_remove(struct tlb_track* tlb_track, + volatile pte_t* ptep, pte_t old_pte, + struct tlb_track_entry** entryp) +{ + unsigned long mfn = pte_pfn(old_pte); + struct list_head* head = tlb_track_hash_head(tlb_track, ptep); + struct tlb_track_entry* entry; + + perfc_incrc(tlb_track_sar); + if (!pte_tlb_tracking(old_pte)) { + perfc_incrc(tlb_track_sar_not_tracked); + return TLB_TRACK_NOT_TRACKED; + } + if (!pte_tlb_inserted(old_pte)) { + BUG_ON(pte_tlb_inserted_many(old_pte)); + perfc_incrc(tlb_track_sar_not_found); + return TLB_TRACK_NOT_FOUND; + } + if (pte_tlb_inserted_many(old_pte)) { + BUG_ON(!pte_tlb_inserted(old_pte)); + perfc_incrc(tlb_track_sar_many); + return TLB_TRACK_MANY; + } + + spin_lock(&tlb_track->hash_lock); + list_for_each_entry(entry, head, list) { + if (entry->ptep != ptep) { + continue; + } + if (pte_pfn(entry->pte_val) == mfn) { + list_del(&entry->list); + spin_unlock(&tlb_track->hash_lock); + *entryp = entry; + perfc_incrc(tlb_track_sar_found); + //tlb_track_entry_printf(entry); +#ifdef CONFIG_TLB_TRACK_CNT + //tlb_track_printd("cnt = %ld\n", entry->cnt); +#endif + return TLB_TRACK_FOUND; + } + BUG(); + } + BUG(); + spin_unlock(&tlb_track->hash_lock); + return TLB_TRACK_NOT_TRACKED; +} + +// for debug +void +__tlb_track_entry_printf(const char* func, int line, + const struct tlb_track_entry* entry) +{ + char pcpumask_buf[NR_CPUS + 1]; + char vcpumask_buf[MAX_VIRT_CPUS + 1]; + cpumask_scnprintf(pcpumask_buf, sizeof(pcpumask_buf), + entry->pcpu_dirty_mask); + vcpumask_scnprintf(vcpumask_buf, sizeof(vcpumask_buf), + entry->vcpu_dirty_mask); + printk("%s:%d\n" + "\tmfn 0x%016lx\n" + "\told_pte 0x%016lx ptep 0x%p\n" + "\tpte_val 0x%016lx vaddr 0x%016lx rid %ld\n" + "\tpcpu_dirty_mask %s vcpu_dirty_mask %s\n" + "\tentry 0x%p\n", + func, line, + pte_pfn(entry->pte_val), + pte_val(entry->pte_val), entry->ptep, pte_val(*entry->ptep), + entry->vaddr, entry->rid, + pcpumask_buf, vcpumask_buf, + entry); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 44e6aea4077e -r 86d4ea768120 xen/include/asm-ia64/p2m_entry.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/p2m_entry.h Wed Aug 09 20:01:25 2006 +0900 @@ -0,0 +1,76 @@ +/****************************************************************************** + * p2m_entry.h + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __ASM_P2M_ENTRY_H__ +#define __ASM_P2M_ENTRY_H__ + +#include + +struct p2m_entry { +#define P2M_PTE_ALWAYS_RETRY ((volatile pte_t*) -1) + volatile pte_t* ptep; + pte_t used; +}; + +static inline void +p2m_entry_set(struct p2m_entry* entry, volatile pte_t* ptep, pte_t used) +{ + entry->ptep = ptep; + entry->used = used; +} + +static inline void +p2m_entry_set_retry(struct p2m_entry* entry) +{ + entry->ptep = P2M_PTE_ALWAYS_RETRY; +} + +static inline int +p2m_entry_retry(struct p2m_entry* entry) +{ + //XXX see lookup_domain_pte(). + // NULL is set for invalid gpaddr for the time being. + if (entry->ptep == NULL) + return 0; + + if (entry->ptep == P2M_PTE_ALWAYS_RETRY) + return 1; + +#ifdef CONFIG_XEN_IA64_TLB_TRACK + return ((pte_val(*entry->ptep) & ~_PAGE_TLB_TRACK_MASK) != + (pte_val(entry->used) & ~_PAGE_TLB_TRACK_MASK)); +#else + return (pte_val(*entry->ptep) != pte_val(entry->used)); +#endif +} + +#endif // __ASM_P2M_ENTRY_H__ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 44e6aea4077e -r 86d4ea768120 xen/include/asm-ia64/tlb_track.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/tlb_track.h Wed Aug 09 20:01:25 2006 +0900 @@ -0,0 +1,196 @@ +/****************************************************************************** + * tlb_track.h + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __TLB_TRACK_H__ +#define __TLB_TRACK_H__ + +#ifdef CONFIG_XEN_IA64_TLB_TRACK + +#include +#include +#include +#include +#include + +// vcpu mask +// stolen from cpumask.h +typedef struct { DECLARE_BITMAP(bits, MAX_VIRT_CPUS); } vcpumask_t; + +#define vcpu_set(vcpu, dst) __vcpu_set((vcpu), &(dst)) +static inline void __vcpu_set(int vcpu, volatile vcpumask_t *dstp) +{ + set_bit(vcpu, dstp->bits); +} +#define vcpus_clear(dst) __vcpus_clear(&(dst), MAX_VIRT_CPUS) +static inline void __vcpus_clear(vcpumask_t *dstp, int nbits) +{ + bitmap_zero(dstp->bits, nbits); +} +/* No static inline type checking - see Subtlety (1) above. */ +#define vcpu_isset(vcpu, vcpumask) test_bit((vcpu), (vcpumask).bits) + +#define first_vcpu(src) __first_vcpu(&(src), MAX_VIRT_CPUS) +static inline int __first_vcpu(const vcpumask_t *srcp, int nbits) +{ + return min_t(int, nbits, find_first_bit(srcp->bits, nbits)); +} + +#define next_vcpu(n, src) __next_vcpu((n), &(src), MAX_VIRT_CPUS) +static inline int __next_vcpu(int n, const vcpumask_t *srcp, int nbits) +{ + return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1)); +} + +#if MAX_VIRT_CPUS > 1 +#define for_each_vcpu_mask(vcpu, mask) \ + for ((vcpu) = first_vcpu(mask); \ + (vcpu) < MAX_VIRT_CPUS; \ + (vcpu) = next_vcpu((vcpu), (mask))) +#else /* NR_CPUS == 1 */ +#define for_each_vcpu_mask(vcpu, mask) for ((vcpu) = 0; (vcpu) < 1; (vcpu)++) +#endif /* NR_CPUS */ + +#define vcpumask_scnprintf(buf, len, src) \ + __vcpumask_scnprintf((buf), (len), &(src), MAX_VIRT_CPUS) +static inline int __vcpumask_scnprintf(char *buf, int len, + const vcpumask_t *srcp, int nbits) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nbits); +} + + +// TODO: compact this structure. +struct tlb_track_entry { + struct list_head list; + + + volatile pte_t* ptep; // corresponding p2m entry + + //XXX should we use TR_ENTRY? + pte_t pte_val; // mfn and other flags + // pte_val.p = 1: + // tlb entry is inserted. + // pte_val.p = 0: + // once tlb entry is inserted, so + // this entry is created. But tlb + // purge is isseued, so this + // virtual address need not to be + // purged. + unsigned long vaddr; // virtual address + unsigned long rid; // rid + + cpumask_t pcpu_dirty_mask; + vcpumask_t vcpu_dirty_mask; + // tlbflush_timestamp; + +#define CONFIG_TLB_TRACK_CNT +#ifdef CONFIG_TLB_TRACK_CNT +#define TLB_TRACK_CNT_FORCE_MANY 256 //XXX how many? + unsigned long cnt; +#endif +}; + +struct tlb_track { + +// see __gnttab_map_grant_ref() +// A domain can map granted-page up to MAPTRACK_MAX_ENTRIES pages. +#define TLB_TRACK_LIMIT_ENTRIES \ + (MAPTRACK_MAX_ENTRIES * (PAGE_SIZE / sizeof(struct tlb_track))) + + spinlock_t free_list_lock; + struct list_head free_list; + unsigned int limit; + unsigned int num_entries; + unsigned int num_free; + struct list_head page_list; + + // XXX hash table size + spinlock_t hash_lock; + unsigned int hash_size; + unsigned int hash_shift; + unsigned int hash_mask; + struct list_head* hash; +}; + +int tlb_track_create(struct domain* d); +void tlb_track_destroy(struct domain* d); + +void tlb_track_free_entry(struct tlb_track* tlb_track, + struct tlb_track_entry* entry); + +void +__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr, + struct p2m_entry* entry); +static inline void +vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr, + struct p2m_entry* entry) +{ + // optimization. + // non-tracking pte is most common. + perfc_incrc(tlb_track_iod); + if (!pte_tlb_tracking(entry->used)) { + perfc_incrc(tlb_track_iod_not_tracked); + return; + } + + __vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry); +} + + +// return value +// NULL if this entry is used +// entry if this entry isn't used +enum TLB_TRACK_RET { + TLB_TRACK_NOT_TRACKED, + TLB_TRACK_NOT_FOUND, + TLB_TRACK_FOUND, + TLB_TRACK_MANY, + TLB_TRACK_AGAIN, +}; +typedef enum TLB_TRACK_RET TLB_TRACK_RET_T; + +TLB_TRACK_RET_T +tlb_track_search_and_remove(struct tlb_track* tlb_track, + volatile pte_t* ptep, pte_t old_pte, + struct tlb_track_entry** entryp); + +void +__tlb_track_entry_printf(const char* func, int line, + const struct tlb_track_entry* entry); +#define tlb_track_entry_printf(entry) \ + __tlb_track_entry_printf(__func__, __LINE__, (entry)) +#else +//define nop + +#endif // CONFIG_XEN_IA64_TLB_TRACK + +#endif // __TLB_TRACK_H__ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */