Merge "filemap: drop the mmap_sem for all blocking operations" (f3b8bbba) · Commits · e / devices / android_kernel_xiaomi_sm6125

include/linux/pagemap.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -256,6 +256,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
		#define FGP_WRITE 0x00000008
		#define FGP_NOFS 0x00000010
		#define FGP_NOWAIT 0x00000020
		#define FGP_FOR_MMAP 0x00000040

		struct page pagecache_get_page(struct address_space mapping, pgoff_t offset,
		int fgp_flags, gfp_t cache_gfp_mask);

mm/filemap.c

+112 −77

Original line number	Diff line number	Diff line
		@@ -1431,6 +1431,9 @@ EXPORT_SYMBOL(find_lock_entry);
		* @gfp_mask and added to the page cache and the VM's LRU
		* list. The page is returned locked and with an increased
		* refcount. Otherwise, NULL is returned.
		* - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do
		* its own locking dance if the page is already in cache, or unlock the page
		* before returning if we had to add the page to pagecache.
		*
		* If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
		* if the GFP flags specified for FGP_CREAT are atomic.
		@@ -1483,7 +1486,7 @@ struct page pagecache_get_page(struct address_space mapping, pgoff_t offset,
		if (!page)
		return NULL;

		if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
		if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK \| FGP_FOR_MMAP))))
		fgp_flags \|= FGP_LOCK;

		/* Init accessed so avoid atomic mark_page_accessed later */
		@@ -1497,6 +1500,13 @@ struct page pagecache_get_page(struct address_space mapping, pgoff_t offset,
		if (err == -EEXIST)
		goto repeat;
		}

		/*
		* add_to_page_cache_lru lock's the page, and for mmap we expect
		* a unlocked page.
		*/
		if (fgp_flags & FGP_FOR_MMAP)
		unlock_page(page);
		}

		return page;
		@@ -2258,62 +2268,77 @@ generic_file_read_iter(struct kiocb iocb, struct iov_iter iter)
		EXPORT_SYMBOL(generic_file_read_iter);

		#ifdef CONFIG_MMU
		/**
		* page_cache_read - adds requested page to the page cache if not already there
		* @file: file to read
		* @offset: page index
		* @gfp_mask: memory allocation flags
		*
		* This adds the requested page to the page cache if it isn't already there,
		* and schedules an I/O to read in its contents from disk.
		*/
		static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
		#define MMAP_LOTSAMISS (100)
		static struct file maybe_unlock_mmap_for_io(struct vm_fault vmf,
		struct file *fpin)
		{
		struct address_space *mapping = file->f_mapping;
		struct page *page;
		int ret;

		do {
		page = __page_cache_alloc(gfp_mask\|__GFP_COLD);
		if (!page)
		return -ENOMEM;

		ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
		if (ret == 0)
		ret = mapping->a_ops->readpage(file, page);
		else if (ret == -EEXIST)
		ret = 0; /* losing race to add is OK */

		put_page(page);
		int flags = vmf->flags;
		if (fpin)
		return fpin;
		if ((flags & (FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_RETRY_NOWAIT)) ==
		FAULT_FLAG_ALLOW_RETRY) {
		fpin = get_file(vmf->vma->vm_file);
		up_read(&vmf->vma->vm_mm->mmap_sem);
		}
		return fpin;
		}

		} while (ret == AOP_TRUNCATED_PAGE);
		/*
		* Works similar to lock_page_or_retry, except it will pin the file and drop the
		* mmap_sem if necessary and then lock the page, and return 1 in this case.
		* This means the caller needs to deal with the fpin appropriately. 0 return is
		* the same as in lock_page_or_retry.
		*/
		static int lock_page_maybe_drop_mmap(struct vm_fault vmf, struct page page,
		struct file **fpin)
		{
		if (trylock_page(page))
		return 1;

		return ret;
		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
		return 0;
		if (vmf->flags & FAULT_FLAG_KILLABLE) {
		if (__lock_page_killable(page)) {
		/*
		* We didn't have the right flags to drop the mmap_sem,
		* but all fault_handlers only check for fatal signals
		* if we return VM_FAULT_RETRY, so we need to drop the
		* mmap_sem here and return 0 if we don't have a fpin.
		*/
		if (*fpin == NULL)
		up_read(&vmf->vma->vm_mm->mmap_sem);
		return 0;
		}
		} else
		__lock_page(page);
		return 1;
		}

		#define MMAP_LOTSAMISS (100)

		/*
		* Synchronous readahead happens when we don't even find
		* a page in the page cache at all.
		*/
		static void do_sync_mmap_readahead(struct vm_area_struct *vma,
		struct file_ra_state *ra,
		struct file *file,
		pgoff_t offset)
		static struct file do_sync_mmap_readahead(struct vm_fault vmf)
		{
		struct file *file = vmf->vma->vm_file;
		struct file_ra_state *ra = &file->f_ra;
		struct address_space *mapping = file->f_mapping;
		struct file *fpin = NULL;
		pgoff_t offset = vmf->pgoff;

		/* If we don't want any read-ahead, don't bother */
		if (vma->vm_flags & VM_RAND_READ)
		return;
		if (vmf->vma->vm_flags & VM_RAND_READ)
		return fpin;
		if (!ra->ra_pages)
		return;
		return fpin;

		if (vma->vm_flags & VM_SEQ_READ) {
		if (vmf->vma->vm_flags & VM_SEQ_READ) {
		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
		page_cache_sync_readahead(mapping, ra, file, offset,
		ra->ra_pages);
		return;
		return fpin;
		}

		/* Avoid banging the cache line if not needed */
		@@ -2325,38 +2350,44 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
		* stop bothering with read-ahead. It will only hurt.
		*/
		if (ra->mmap_miss > MMAP_LOTSAMISS)
		return;
		return fpin;

		/*
		* mmap read-around
		*/
		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
		ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
		ra->size = ra->ra_pages;
		ra->async_size = ra->ra_pages / 4;
		ra_submit(ra, mapping, file);
		return fpin;
		}

		/*
		* Asynchronous readahead happens when we find the page and PG_readahead,
		* so we want to possibly extend the readahead further..
		*/
		static void do_async_mmap_readahead(struct vm_area_struct *vma,
		struct file_ra_state *ra,
		struct file *file,
		struct page *page,
		pgoff_t offset)
		static struct file do_async_mmap_readahead(struct vm_fault vmf,
		struct page *page)
		{
		struct file *file = vmf->vma->vm_file;
		struct file_ra_state *ra = &file->f_ra;
		struct address_space *mapping = file->f_mapping;
		struct file *fpin = NULL;
		pgoff_t offset = vmf->pgoff;

		/* If we don't want any read-ahead, don't bother */
		if (vma->vm_flags & VM_RAND_READ)
		return;
		if (vmf->vma->vm_flags & VM_RAND_READ)
		return fpin;
		if (ra->mmap_miss > 0)
		ra->mmap_miss--;
		if (PageReadahead(page))
		if (PageReadahead(page)) {
		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
		page_cache_async_readahead(mapping, ra, file,
		page, offset, ra->ra_pages);
		}
		return fpin;
		}

		/**
		* filemap_fault - read in file data for page fault handling
		@@ -2385,6 +2416,7 @@ int filemap_fault(struct vm_fault *vmf)
		{
		int error;
		struct file *file = vmf->vma->vm_file;
		struct file *fpin = NULL;
		struct address_space *mapping = file->f_mapping;
		struct file_ra_state *ra = &file->f_ra;
		struct inode *inode = mapping->host;
		@@ -2406,20 +2438,22 @@ int filemap_fault(struct vm_fault *vmf)
		* We found the page, so try async readahead before
		* waiting for the lock.
		*/
		do_async_mmap_readahead(vmf->vma, ra, file, page, offset);
		fpin = do_async_mmap_readahead(vmf, page);
		} else if (!page) {
		/* No page in the page cache at all */
		do_sync_mmap_readahead(vmf->vma, ra, file, offset);
		fpin = do_sync_mmap_readahead(vmf);
		count_vm_event(PGMAJFAULT);
		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
		ret = VM_FAULT_MAJOR;
		retry_find:
		page = find_get_page(mapping, offset);
		page = pagecache_get_page(mapping, offset,
		FGP_CREAT\|FGP_FOR_MMAP,
		vmf->gfp_mask);
		if (!page)
		goto no_cached_page;
		return VM_FAULT_OOM;
		}

		if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
		if (!lock_page_maybe_drop_mmap(vmf, page, &fpin)) {
		put_page(page);
		return ret \| VM_FAULT_RETRY;
		}
		@@ -2439,6 +2473,16 @@ int filemap_fault(struct vm_fault *vmf)
		if (unlikely(!PageUptodate(page)))
		goto page_not_uptodate;

		/*
		* We've made it this far and we had to drop our mmap_sem, now is the
		* time to return to the upper layer and have it re-find the vma and
		* redo the fault.
		*/
		if (fpin) {
		unlock_page(page);
		goto out_retry;
		}

		/*
		* Found the page and have a reference on it.
		* We must recheck i_size under page lock.
		@@ -2453,30 +2497,6 @@ int filemap_fault(struct vm_fault *vmf)
		vmf->page = page;
		return ret \| VM_FAULT_LOCKED;

		no_cached_page:
		/*
		* We're only likely to ever get here if MADV_RANDOM is in
		* effect.
		*/
		error = page_cache_read(file, offset, vmf->gfp_mask);

		/*
		* The page we want has now been added to the page cache.
		* In the unlikely event that someone removed it in the
		* meantime, we'll just come back here and read it again.
		*/
		if (error >= 0)
		goto retry_find;

		/*
		* An error return from page_cache_read can result if the
		* system is low on memory, or a problem occurs while trying
		* to schedule I/O.
		*/
		if (error == -ENOMEM)
		return VM_FAULT_OOM;
		return VM_FAULT_SIGBUS;

		page_not_uptodate:
		/*
		* Umm, take care of errors if the page isn't up-to-date.
		@@ -2485,12 +2505,15 @@ int filemap_fault(struct vm_fault *vmf)
		* and we need to check for errors.
		*/
		ClearPageError(page);
		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
		error = mapping->a_ops->readpage(file, page);
		if (!error) {
		wait_on_page_locked(page);
		if (!PageUptodate(page))
		error = -EIO;
		}
		if (fpin)
		goto out_retry;
		put_page(page);

		if (!error \|\| error == AOP_TRUNCATED_PAGE)
		@@ -2499,6 +2522,18 @@ int filemap_fault(struct vm_fault *vmf)
		/* Things didn't work out. Return zero to tell the mm layer so. */
		shrink_readahead_size_eio(file, ra);
		return VM_FAULT_SIGBUS;

		out_retry:
		/*
		* We dropped the mmap_sem, we need to return to the fault handler to
		* re-find the vma and come back and find our hopefully still populated
		* page.
		*/
		if (page)
		put_page(page);
		if (fpin)
		fput(fpin);
		return ret \| VM_FAULT_RETRY;
		}
		EXPORT_SYMBOL(filemap_fault);