Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 478a1469 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull DAX locking updates from Ross Zwisler:
 "Filesystem DAX locking for 4.7

   - We use a bit in an exceptional radix tree entry as a lock bit and
     use it similarly to how page lock is used for normal faults.  This
     fixes races between hole instantiation and read faults of the same
     index.

   - Filesystem DAX PMD faults are disabled, and will be re-enabled when
     PMD locking is implemented"

* tag 'dax-locking-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  dax: Remove i_mmap_lock protection
  dax: Use radix tree entry lock to protect cow faults
  dax: New fault locking
  dax: Allow DAX code to replace exceptional entries
  dax: Define DAX lock bit for radix tree exceptional entry
  dax: Make huge page handling depend of CONFIG_BROKEN
  dax: Fix condition for filling of PMD holes
parents 315227f6 4d9a2c87
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@ config FS_DAX_PMD
	depends on FS_DAX
	depends on ZONE_DEVICE
	depends on TRANSPARENT_HUGEPAGE
	depends on BROKEN

endif # BLOCK

+421 −171

File changed.

Preview size limit exceeded, changes collapsed.

+15 −1
Original line number Diff line number Diff line
@@ -3,17 +3,25 @@

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/radix-tree.h>
#include <asm/pgtable.h>

/* We use lowest available exceptional entry bit for locking */
#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)

ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
		  get_block_t, dio_iodone_t, int flags);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
void dax_wake_mapping_entry_waiter(struct address_space *mapping,
				   pgoff_t index, bool wake_all);

#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index);
int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
		unsigned int offset, unsigned int length);
#else
@@ -22,6 +30,12 @@ static inline struct page *read_dax_sector(struct block_device *bdev,
{
	return ERR_PTR(-ENXIO);
}
/* Shouldn't ever be called when dax is disabled. */
static inline void dax_unlock_mapping_entry(struct address_space *mapping,
					    pgoff_t index)
{
	BUG();
}
static inline int __dax_zero_page_range(struct block_device *bdev,
		sector_t sector, unsigned int offset, unsigned int length)
{
@@ -29,7 +43,7 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
				unsigned int flags, get_block_t);
int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
+7 −0
Original line number Diff line number Diff line
@@ -303,6 +303,12 @@ struct vm_fault {
					 * is set (which is also implied by
					 * VM_FAULT_ERROR).
					 */
	void *entry;			/* ->fault handler can alternatively
					 * return locked DAX entry. In that
					 * case handler should return
					 * VM_FAULT_DAX_LOCKED and fill in
					 * entry here.
					 */
	/* for ->map_pages() only */
	pgoff_t max_pgoff;		/* map pages for offset from pgoff till
					 * max_pgoff inclusive */
@@ -1076,6 +1082,7 @@ static inline void clear_page_pfmemalloc(struct page *page)
#define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
#define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
#define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
#define VM_FAULT_DAX_LOCKED 0x1000	/* ->fault has locked DAX entry */

#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */

+21 −9
Original line number Diff line number Diff line
@@ -143,13 +143,15 @@ static void page_cache_tree_delete(struct address_space *mapping,
			return;

	/*
	 * Track node that only contains shadow entries.
	 * Track node that only contains shadow entries. DAX mappings contain
	 * no shadow entries and may contain other exceptional entries so skip
	 * those.
	 *
	 * Avoid acquiring the list_lru lock if already tracked.  The
	 * list_empty() test is safe as node->private_list is
	 * protected by mapping->tree_lock.
	 */
	if (!workingset_node_pages(node) &&
	if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
	    list_empty(&node->private_list)) {
		node->private_data = mapping;
		list_lru_add(&workingset_shadow_nodes, &node->private_list);
@@ -580,14 +582,24 @@ static int page_cache_tree_insert(struct address_space *mapping,
		if (!radix_tree_exceptional_entry(p))
			return -EEXIST;

		if (WARN_ON(dax_mapping(mapping)))
			return -EINVAL;

		mapping->nrexceptional--;
		if (!dax_mapping(mapping)) {
			if (shadowp)
				*shadowp = p;
		mapping->nrexceptional--;
			if (node)
				workingset_node_shadows_dec(node);
		} else {
			/* DAX can replace empty locked entry with a hole */
			WARN_ON_ONCE(p !=
				(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
					 RADIX_DAX_ENTRY_LOCK));
			/* DAX accounts exceptional entries as normal pages */
			if (node)
				workingset_node_pages_dec(node);
			/* Wakeup waiters for exceptional entry lock */
			dax_wake_mapping_entry_waiter(mapping, page->index,
						      false);
		}
	}
	radix_tree_replace_slot(slot, page);
	mapping->nrpages++;
Loading