Loading Documentation/sysctl/vm.txt +22 −0 Original line number Diff line number Diff line Loading @@ -67,6 +67,7 @@ Currently, these files are in /proc/sys/vm: - vfs_cache_pressure - watermark_scale_factor - zone_reclaim_mode - want_old_faultaround_pte ============================================================== Loading Loading @@ -945,4 +946,25 @@ Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations. ============================================================= want_old_faultaround_pte: By default faultaround code produces young pte. When want_old_faultaround_pte is set to 1, faultaround produces old ptes. During sparse file access faultaround gets more pages mapped and when all of them are young (default), under memory pressure, this makes vmscan swap out anon pages instead, or to drop other page cache pages which otherwise stay resident. Setting want_old_faultaround_pte to 1 avoids this. Making the faultaround ptes old can result in performance regression on some architectures. This is due to cycles spent in micro-faults which would take page walk to set young bit in the pte. One such known test that shows a regression on x86 is unixbench shell8. Set want_old_faultaround_pte to 1 on architectures which does not show this regression or if the workload shows overall performance benefit with old faultaround ptes. The default value is 0. ============ End of Document ================================= include/linux/mm.h +3 −0 Original line number Diff line number Diff line Loading @@ -318,6 +318,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ #define FAULT_FLAG_PREFAULT_OLD 0x400 /* Make faultaround ptes old */ #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ Loading Loading @@ -2796,5 +2797,7 @@ void __init setup_nr_node_ids(void); static inline void setup_nr_node_ids(void) {} #endif extern int want_old_faultaround_pte; #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ kernel/sysctl.c +9 −0 Original line number Diff line number Diff line Loading @@ -1550,6 +1550,15 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, { .procname = "want_old_faultaround_pte", .data = &want_old_faultaround_pte, .maxlen = sizeof(want_old_faultaround_pte), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", Loading mm/filemap.c +10 −0 Original line number Diff line number Diff line Loading @@ -48,6 +48,8 @@ #include <asm/mman.h> int want_old_faultaround_pte = 1; /* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. Loading Loading @@ -2676,6 +2678,14 @@ void filemap_map_pages(struct vm_fault *vmf, if (vmf->pte) vmf->pte += iter.index - last_pgoff; last_pgoff = iter.index; if (want_old_faultaround_pte) { if (iter.index == vmf->pgoff) vmf->flags &= ~FAULT_FLAG_PREFAULT_OLD; else vmf->flags |= FAULT_FLAG_PREFAULT_OLD; } if (alloc_set_pte(vmf, NULL, page)) goto unlock; unlock_page(page); Loading mm/memory.c +4 −0 Original line number Diff line number Diff line Loading @@ -3455,6 +3455,10 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (vmf->flags & FAULT_FLAG_PREFAULT_OLD) entry = pte_mkold(entry); /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); Loading Loading
Documentation/sysctl/vm.txt +22 −0 Original line number Diff line number Diff line Loading @@ -67,6 +67,7 @@ Currently, these files are in /proc/sys/vm: - vfs_cache_pressure - watermark_scale_factor - zone_reclaim_mode - want_old_faultaround_pte ============================================================== Loading Loading @@ -945,4 +946,25 @@ Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations. ============================================================= want_old_faultaround_pte: By default faultaround code produces young pte. When want_old_faultaround_pte is set to 1, faultaround produces old ptes. During sparse file access faultaround gets more pages mapped and when all of them are young (default), under memory pressure, this makes vmscan swap out anon pages instead, or to drop other page cache pages which otherwise stay resident. Setting want_old_faultaround_pte to 1 avoids this. Making the faultaround ptes old can result in performance regression on some architectures. This is due to cycles spent in micro-faults which would take page walk to set young bit in the pte. One such known test that shows a regression on x86 is unixbench shell8. Set want_old_faultaround_pte to 1 on architectures which does not show this regression or if the workload shows overall performance benefit with old faultaround ptes. The default value is 0. ============ End of Document =================================
include/linux/mm.h +3 −0 Original line number Diff line number Diff line Loading @@ -318,6 +318,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ #define FAULT_FLAG_PREFAULT_OLD 0x400 /* Make faultaround ptes old */ #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ Loading Loading @@ -2796,5 +2797,7 @@ void __init setup_nr_node_ids(void); static inline void setup_nr_node_ids(void) {} #endif extern int want_old_faultaround_pte; #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */
kernel/sysctl.c +9 −0 Original line number Diff line number Diff line Loading @@ -1550,6 +1550,15 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, { .procname = "want_old_faultaround_pte", .data = &want_old_faultaround_pte, .maxlen = sizeof(want_old_faultaround_pte), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", Loading
mm/filemap.c +10 −0 Original line number Diff line number Diff line Loading @@ -48,6 +48,8 @@ #include <asm/mman.h> int want_old_faultaround_pte = 1; /* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. Loading Loading @@ -2676,6 +2678,14 @@ void filemap_map_pages(struct vm_fault *vmf, if (vmf->pte) vmf->pte += iter.index - last_pgoff; last_pgoff = iter.index; if (want_old_faultaround_pte) { if (iter.index == vmf->pgoff) vmf->flags &= ~FAULT_FLAG_PREFAULT_OLD; else vmf->flags |= FAULT_FLAG_PREFAULT_OLD; } if (alloc_set_pte(vmf, NULL, page)) goto unlock; unlock_page(page); Loading
mm/memory.c +4 −0 Original line number Diff line number Diff line Loading @@ -3455,6 +3455,10 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (vmf->flags & FAULT_FLAG_PREFAULT_OLD) entry = pte_mkold(entry); /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); Loading