Loading Documentation/sysctl/vm.txt +22 −0 Original line number Diff line number Diff line Loading @@ -64,6 +64,7 @@ Currently, these files are in /proc/sys/vm: - vfs_cache_pressure - watermark_scale_factor - zone_reclaim_mode - want_old_faultaround_pte ============================================================== Loading Loading @@ -891,4 +892,25 @@ Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations. ============================================================= want_old_faultaround_pte: By default faultaround code produces young pte. When want_old_faultaround_pte is set to 1, faultaround produces old ptes. During sparse file access faultaround gets more pages mapped and when all of them are young (default), under memory pressure, this makes vmscan swap out anon pages instead, or to drop other page cache pages which otherwise stay resident. Setting want_old_faultaround_pte to 1 avoids this. Making the faultaround ptes old can result in performance regression on some architectures. This is due to cycles spent in micro-faults which would take page walk to set young bit in the pte. One such known test that shows a regression on x86 is unixbench shell8. Set want_old_faultaround_pte to 1 on architectures which does not show this regression or if the workload shows overall performance benefit with old faultaround ptes. The default value is 0. ============ End of Document ================================= include/linux/mm.h +4 −0 Original line number Diff line number Diff line Loading @@ -286,6 +286,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ /* Speculative fault, not holding mmap_sem */ #define FAULT_FLAG_SPECULATIVE 0x200 #define FAULT_FLAG_PREFAULT_OLD 0x400 /* Make faultaround ptes old */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's Loading Loading @@ -323,6 +324,7 @@ struct vm_fault { struct fault_env { struct vm_area_struct *vma; /* Target VMA */ unsigned long address; /* Faulting virtual address */ unsigned long fault_address; /* Saved faulting virtual address */ unsigned int flags; /* FAULT_FLAG_xxx flags */ pmd_t *pmd; /* Pointer to pmd entry matching * the 'address' Loading Loading @@ -2584,6 +2586,8 @@ void __init setup_nr_node_ids(void); static inline void setup_nr_node_ids(void) {} #endif extern int want_old_faultaround_pte; #ifdef CONFIG_PROCESS_RECLAIM struct reclaim_param { struct vm_area_struct *vma; Loading kernel/sysctl.c +9 −0 Original line number Diff line number Diff line Loading @@ -1486,6 +1486,15 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, { .procname = "want_old_faultaround_pte", .data = &want_old_faultaround_pte, .maxlen = sizeof(want_old_faultaround_pte), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", Loading mm/filemap.c +10 −0 Original line number Diff line number Diff line Loading @@ -47,6 +47,8 @@ #include <asm/mman.h> int want_old_faultaround_pte = 1; /* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. Loading Loading @@ -2287,6 +2289,14 @@ void filemap_map_pages(struct fault_env *fe, if (fe->pte) fe->pte += iter.index - last_pgoff; last_pgoff = iter.index; if (want_old_faultaround_pte) { if (fe->address == fe->fault_address) fe->flags &= ~FAULT_FLAG_PREFAULT_OLD; else fe->flags |= FAULT_FLAG_PREFAULT_OLD; } if (alloc_set_pte(fe, NULL, page)) goto unlock; unlock_page(page); Loading mm/memory.c +13 −0 Original line number Diff line number Diff line Loading @@ -3212,6 +3212,10 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, entry = mk_pte(page, fe->vma_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), fe->vma_flags); if (fe->flags & FAULT_FLAG_PREFAULT_OLD) entry = pte_mkold(entry); /* copy-on-write page */ if (write && !(fe->vma_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); Loading @@ -3230,8 +3234,16 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, return 0; } /* * If architecture emulates "accessed" or "young" bit without HW support, * there is no much gain with fault_around. */ static unsigned long fault_around_bytes __read_mostly = #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS PAGE_SIZE; #else rounddown_pow_of_two(65536); #endif #ifdef CONFIG_DEBUG_FS static int fault_around_bytes_get(void *data, u64 *val) Loading Loading @@ -3300,6 +3312,7 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) pgoff_t end_pgoff; int off, ret = 0; fe->fault_address = address; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; Loading Loading
Documentation/sysctl/vm.txt +22 −0 Original line number Diff line number Diff line Loading @@ -64,6 +64,7 @@ Currently, these files are in /proc/sys/vm: - vfs_cache_pressure - watermark_scale_factor - zone_reclaim_mode - want_old_faultaround_pte ============================================================== Loading Loading @@ -891,4 +892,25 @@ Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations. ============================================================= want_old_faultaround_pte: By default faultaround code produces young pte. When want_old_faultaround_pte is set to 1, faultaround produces old ptes. During sparse file access faultaround gets more pages mapped and when all of them are young (default), under memory pressure, this makes vmscan swap out anon pages instead, or to drop other page cache pages which otherwise stay resident. Setting want_old_faultaround_pte to 1 avoids this. Making the faultaround ptes old can result in performance regression on some architectures. This is due to cycles spent in micro-faults which would take page walk to set young bit in the pte. One such known test that shows a regression on x86 is unixbench shell8. Set want_old_faultaround_pte to 1 on architectures which does not show this regression or if the workload shows overall performance benefit with old faultaround ptes. The default value is 0. ============ End of Document =================================
include/linux/mm.h +4 −0 Original line number Diff line number Diff line Loading @@ -286,6 +286,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ /* Speculative fault, not holding mmap_sem */ #define FAULT_FLAG_SPECULATIVE 0x200 #define FAULT_FLAG_PREFAULT_OLD 0x400 /* Make faultaround ptes old */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's Loading Loading @@ -323,6 +324,7 @@ struct vm_fault { struct fault_env { struct vm_area_struct *vma; /* Target VMA */ unsigned long address; /* Faulting virtual address */ unsigned long fault_address; /* Saved faulting virtual address */ unsigned int flags; /* FAULT_FLAG_xxx flags */ pmd_t *pmd; /* Pointer to pmd entry matching * the 'address' Loading Loading @@ -2584,6 +2586,8 @@ void __init setup_nr_node_ids(void); static inline void setup_nr_node_ids(void) {} #endif extern int want_old_faultaround_pte; #ifdef CONFIG_PROCESS_RECLAIM struct reclaim_param { struct vm_area_struct *vma; Loading
kernel/sysctl.c +9 −0 Original line number Diff line number Diff line Loading @@ -1486,6 +1486,15 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, { .procname = "want_old_faultaround_pte", .data = &want_old_faultaround_pte, .maxlen = sizeof(want_old_faultaround_pte), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", Loading
mm/filemap.c +10 −0 Original line number Diff line number Diff line Loading @@ -47,6 +47,8 @@ #include <asm/mman.h> int want_old_faultaround_pte = 1; /* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. Loading Loading @@ -2287,6 +2289,14 @@ void filemap_map_pages(struct fault_env *fe, if (fe->pte) fe->pte += iter.index - last_pgoff; last_pgoff = iter.index; if (want_old_faultaround_pte) { if (fe->address == fe->fault_address) fe->flags &= ~FAULT_FLAG_PREFAULT_OLD; else fe->flags |= FAULT_FLAG_PREFAULT_OLD; } if (alloc_set_pte(fe, NULL, page)) goto unlock; unlock_page(page); Loading
mm/memory.c +13 −0 Original line number Diff line number Diff line Loading @@ -3212,6 +3212,10 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, entry = mk_pte(page, fe->vma_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), fe->vma_flags); if (fe->flags & FAULT_FLAG_PREFAULT_OLD) entry = pte_mkold(entry); /* copy-on-write page */ if (write && !(fe->vma_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); Loading @@ -3230,8 +3234,16 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, return 0; } /* * If architecture emulates "accessed" or "young" bit without HW support, * there is no much gain with fault_around. */ static unsigned long fault_around_bytes __read_mostly = #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS PAGE_SIZE; #else rounddown_pow_of_two(65536); #endif #ifdef CONFIG_DEBUG_FS static int fault_around_bytes_get(void *data, u64 *val) Loading Loading @@ -3300,6 +3312,7 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) pgoff_t end_pgoff; int off, ret = 0; fe->fault_address = address; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; Loading