[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 2/7] mm: introduce local state for lazy_mmu sections
On Fri, Sep 12, 2025 at 10:55:50AM +0200, David Hildenbrand wrote: Hi David, Kevin, > Great, looking forward to seeing this all getting cleaned up and done > properly for good. I am currently working on lazy mmu for s390 and this nesting initiative kind of interferres. Well, in fact it looks like it does not, but I am bit lost in last couple of iterations ;) The prerequisite for s390 would be something like the change below. With that change I can store the context in a per-cpu structure and use it later in arch-specific ptep_* primitives. Moreover, with a further (experimental) rework we could use a custom kasan sanitizer to spot false directly compiled PTE accesses, as opposed to set_pte()/ptep_get() accessors. I am not quite sure see whether this could be derailed by the new lazy mmu API. At least I do not immediately see any obvious problem. But may be you do? [PATCH] mm: Make lazy MMU mode context-aware The lazy MMU mode is assumed to be context-independent in a sense the MMU does not need any additional data in lazy mode. Yet, s390 architecture may benefit strongly if it knows the exact page table entries being changed while in lazy mode. Introduce arch_enter_lazy_mmu_mode_pte() that is provided with the process memory space and the page table being operated on as the prerequisite for s390 optimization. It is expected to be called only against PTE page tables and never cross the page table boundary. There is no change for architectures that do not need any context. Signed-off-by: Alexander Gordeev <agordeev@xxxxxxxxxxxxx> --- fs/proc/task_mmu.c | 2 +- include/linux/pgtable.h | 8 ++++++++ mm/madvise.c | 8 ++++---- mm/memory.c | 8 ++++---- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- mm/vmalloc.c | 6 +++--- 7 files changed, 22 insertions(+), 14 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 751479eb128f..02fcd2771b2a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2493,7 +2493,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, return 0; } - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(vma->vm_mm, start, end, start_pte); if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 0b6e1f781d86..16235c198bcb 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -235,6 +235,14 @@ static inline int pmd_dirty(pmd_t pmd) #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) #define arch_flush_lazy_mmu_mode() do {} while (0) + +static inline void arch_enter_lazy_mmu_mode_pte(struct mm_struct *mm, + unsigned long addr, + unsigned long end, + pte_t *ptep) +{ + arch_enter_lazy_mmu_mode(); +} #endif #ifndef pte_batch_hint diff --git a/mm/madvise.c b/mm/madvise.c index 1d44a35ae85c..d36d4dc42378 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -448,7 +448,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (!start_pte) return 0; flush_tlb_batched_pending(mm); - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(mm, addr, end, start_pte); for (; addr < end; pte += nr, addr += nr * PAGE_SIZE) { nr = 1; ptent = ptep_get(pte); @@ -509,7 +509,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (!start_pte) break; flush_tlb_batched_pending(mm); - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(mm, addr, end, start_pte); if (!err) nr = 0; continue; @@ -678,7 +678,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (!start_pte) return 0; flush_tlb_batched_pending(mm); - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(mm, addr, end, start_pte); for (; addr != end; pte += nr, addr += PAGE_SIZE * nr) { nr = 1; ptent = ptep_get(pte); @@ -743,7 +743,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (!start_pte) break; flush_tlb_batched_pending(mm); - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(mm, addr, end, pte); if (!err) nr = 0; continue; diff --git a/mm/memory.c b/mm/memory.c index b0cda5aab398..93c0b8457eb0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1131,7 +1131,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); orig_src_pte = src_pte; orig_dst_pte = dst_pte; - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(src_mm, addr, end, src_pte); do { nr = 1; @@ -1723,7 +1723,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, return addr; flush_tlb_batched_pending(mm); - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(mm, addr, end, start_pte); do { bool any_skipped = false; @@ -2707,7 +2707,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(mm, addr, end, mapped_pte); do { BUG_ON(!pte_none(ptep_get(pte))); if (!pfn_modify_allowed(pfn, prot)) { @@ -3024,7 +3024,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, return -EINVAL; } - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(mm, addr, end, mapped_pte); if (fn) { do { diff --git a/mm/mprotect.c b/mm/mprotect.c index 88608d0dc2c2..919c1dedff87 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -106,7 +106,7 @@ static long change_pte_range(struct mmu_gather *tlb, target_node = numa_node_id(); flush_tlb_batched_pending(vma->vm_mm); - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(vma->vm_mm, addr, end, pte); do { oldpte = ptep_get(pte); if (pte_present(oldpte)) { diff --git a/mm/mremap.c b/mm/mremap.c index 60f6b8d0d5f0..08b9cb3bb9ef 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -233,7 +233,7 @@ static int move_ptes(struct pagetable_move_control *pmc, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); flush_tlb_batched_pending(vma->vm_mm); - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(mm, old_addr, old_end, old_pte); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6dbcdceecae1..29cfc64970a5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -105,7 +105,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (!pte) return -ENOMEM; - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(&init_mm, addr, end, pte); do { if (unlikely(!pte_none(ptep_get(pte)))) { @@ -359,7 +359,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long size = PAGE_SIZE; pte = pte_offset_kernel(pmd, addr); - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(&init_mm, addr, end, pte); do { #ifdef CONFIG_HUGETLB_PAGE @@ -526,7 +526,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, if (!pte) return -ENOMEM; - arch_enter_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode_pte(&init_mm, addr, end, pte); do { struct page *page = pages[*nr]; > David / dhildenb Thanks!
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |