Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b89f4321 authored by Arnd Bergmann's avatar Arnd Bergmann
Browse files

fs/locks.c: prepare for BKL removal



This prepares the removal of the big kernel lock from the
file locking code. We still use the BKL as long as fs/lockd
uses it and ceph might sleep, but we can flip the definition
to a private spinlock as soon as that's done.
All users outside of fs/lockd get converted to use
lock_flocks() instead of lock_kernel() where appropriate.

Based on an earlier patch to use a spinlock from Matthew
Wilcox, who has attempted this a few times before, the
earliest patch from over 10 years ago turned it into
a semaphore, which ended up being slower than the BKL
and was subsequently reverted.

Someone should do some serious performance testing when
this becomes a spinlock, since this has caused problems
before. Using a spinlock should be at least as good
as the BKL in theory, but who knows...

Signed-off-by: default avatarArnd Bergmann <arnd@arndb.de>
Acked-by: default avatarMatthew Wilcox <willy@linux.intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Sage Weil <sage@newdream.net>
Cc: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
parent 2e54eb96
Loading
Loading
Loading
Loading
+2 −3
Original line number Original line Diff line number Diff line
@@ -9,7 +9,6 @@
 * 2 of the License, or (at your option) any later version.
 * 2 of the License, or (at your option) any later version.
 */
 */


#include <linux/smp_lock.h>
#include "internal.h"
#include "internal.h"


#define AFS_LOCK_GRANTED	0
#define AFS_LOCK_GRANTED	0
@@ -274,7 +273,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)


	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;


	lock_kernel();
	lock_flocks();


	/* make sure we've got a callback on this file and that our view of the
	/* make sure we've got a callback on this file and that our view of the
	 * data version is up to date */
	 * data version is up to date */
@@ -421,7 +420,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
	afs_vnode_fetch_status(vnode, NULL, key);
	afs_vnode_fetch_status(vnode, NULL, key);


error:
error:
	unlock_kernel();
	unlock_flocks();
	_leave(" = %d", ret);
	_leave(" = %d", ret);
	return ret;
	return ret;


+2 −2
Original line number Original line Diff line number Diff line
@@ -562,8 +562,8 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)


static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
{
{
	/* note that this is called by vfs setlease with the BKL held
	/* note that this is called by vfs setlease with lock_flocks held
	   although I doubt that BKL is needed here in cifs */
	   to protect *lease from going away */
	struct inode *inode = file->f_path.dentry->d_inode;
	struct inode *inode = file->f_path.dentry->d_inode;


	if (!(S_ISREG(inode->i_mode)))
	if (!(S_ISREG(inode->i_mode)))
+2 −0
Original line number Original line Diff line number Diff line
@@ -620,6 +620,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 * cluster; until we do, disable leases (by just returning -EINVAL),
 * cluster; until we do, disable leases (by just returning -EINVAL),
 * unless the administrator has requested purely local locking.
 * unless the administrator has requested purely local locking.
 *
 *
 * Locking: called under lock_flocks
 *
 * Returns: errno
 * Returns: errno
 */
 */


+68 −44
Original line number Original line Diff line number Diff line
@@ -143,6 +143,22 @@ int lease_break_time = 45;
static LIST_HEAD(file_lock_list);
static LIST_HEAD(file_lock_list);
static LIST_HEAD(blocked_list);
static LIST_HEAD(blocked_list);


/*
 * Protects the two list heads above, plus the inode->i_flock list
 * FIXME: should use a spinlock, once lockd and ceph are ready.
 */
void lock_flocks(void)
{
	lock_kernel();
}
EXPORT_SYMBOL_GPL(lock_flocks);

void unlock_flocks(void)
{
	unlock_kernel();
}
EXPORT_SYMBOL_GPL(unlock_flocks);

static struct kmem_cache *filelock_cache __read_mostly;
static struct kmem_cache *filelock_cache __read_mostly;


/* Allocate an empty lock structure. */
/* Allocate an empty lock structure. */
@@ -511,9 +527,9 @@ static void __locks_delete_block(struct file_lock *waiter)
 */
 */
static void locks_delete_block(struct file_lock *waiter)
static void locks_delete_block(struct file_lock *waiter)
{
{
	lock_kernel();
	lock_flocks();
	__locks_delete_block(waiter);
	__locks_delete_block(waiter);
	unlock_kernel();
	unlock_flocks();
}
}


/* Insert waiter into blocker's block list.
/* Insert waiter into blocker's block list.
@@ -644,7 +660,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
{
{
	struct file_lock *cfl;
	struct file_lock *cfl;


	lock_kernel();
	lock_flocks();
	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
		if (!IS_POSIX(cfl))
		if (!IS_POSIX(cfl))
			continue;
			continue;
@@ -657,7 +673,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
			fl->fl_pid = pid_vnr(cfl->fl_nspid);
			fl->fl_pid = pid_vnr(cfl->fl_nspid);
	} else
	} else
		fl->fl_type = F_UNLCK;
		fl->fl_type = F_UNLCK;
	unlock_kernel();
	unlock_flocks();
	return;
	return;
}
}
EXPORT_SYMBOL(posix_test_lock);
EXPORT_SYMBOL(posix_test_lock);
@@ -730,18 +746,16 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
	int error = 0;
	int error = 0;
	int found = 0;
	int found = 0;


	lock_kernel();
	if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
	if (request->fl_flags & FL_ACCESS)
		goto find_conflict;

	if (request->fl_type != F_UNLCK) {
		error = -ENOMEM;
		new_fl = locks_alloc_lock();
		new_fl = locks_alloc_lock();
		if (new_fl == NULL)
		if (!new_fl)
			goto out;
			return -ENOMEM;
		error = 0;
	}
	}


	lock_flocks();
	if (request->fl_flags & FL_ACCESS)
		goto find_conflict;

	for_each_lock(inode, before) {
	for_each_lock(inode, before) {
		struct file_lock *fl = *before;
		struct file_lock *fl = *before;
		if (IS_POSIX(fl))
		if (IS_POSIX(fl))
@@ -767,8 +781,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
	 * If a higher-priority process was blocked on the old file lock,
	 * If a higher-priority process was blocked on the old file lock,
	 * give it the opportunity to lock the file.
	 * give it the opportunity to lock the file.
	 */
	 */
	if (found)
	if (found) {
		unlock_flocks();
		cond_resched();
		cond_resched();
		lock_flocks();
	}


find_conflict:
find_conflict:
	for_each_lock(inode, before) {
	for_each_lock(inode, before) {
@@ -794,7 +811,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
	error = 0;
	error = 0;


out:
out:
	unlock_kernel();
	unlock_flocks();
	if (new_fl)
	if (new_fl)
		locks_free_lock(new_fl);
		locks_free_lock(new_fl);
	return error;
	return error;
@@ -823,7 +840,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
		new_fl2 = locks_alloc_lock();
		new_fl2 = locks_alloc_lock();
	}
	}


	lock_kernel();
	lock_flocks();
	if (request->fl_type != F_UNLCK) {
	if (request->fl_type != F_UNLCK) {
		for_each_lock(inode, before) {
		for_each_lock(inode, before) {
			fl = *before;
			fl = *before;
@@ -991,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
		locks_wake_up_blocks(left);
		locks_wake_up_blocks(left);
	}
	}
 out:
 out:
	unlock_kernel();
	unlock_flocks();
	/*
	/*
	 * Free any unused locks.
	 * Free any unused locks.
	 */
	 */
@@ -1066,14 +1083,14 @@ int locks_mandatory_locked(struct inode *inode)
	/*
	/*
	 * Search the lock list for this inode for any POSIX locks.
	 * Search the lock list for this inode for any POSIX locks.
	 */
	 */
	lock_kernel();
	lock_flocks();
	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
		if (!IS_POSIX(fl))
		if (!IS_POSIX(fl))
			continue;
			continue;
		if (fl->fl_owner != owner)
		if (fl->fl_owner != owner)
			break;
			break;
	}
	}
	unlock_kernel();
	unlock_flocks();
	return fl ? -EAGAIN : 0;
	return fl ? -EAGAIN : 0;
}
}


@@ -1186,7 +1203,7 @@ int __break_lease(struct inode *inode, unsigned int mode)


	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);


	lock_kernel();
	lock_flocks();


	time_out_leases(inode);
	time_out_leases(inode);


@@ -1247,8 +1264,10 @@ int __break_lease(struct inode *inode, unsigned int mode)
			break_time++;
			break_time++;
	}
	}
	locks_insert_block(flock, new_fl);
	locks_insert_block(flock, new_fl);
	unlock_flocks();
	error = wait_event_interruptible_timeout(new_fl->fl_wait,
	error = wait_event_interruptible_timeout(new_fl->fl_wait,
						!new_fl->fl_next, break_time);
						!new_fl->fl_next, break_time);
	lock_flocks();
	__locks_delete_block(new_fl);
	__locks_delete_block(new_fl);
	if (error >= 0) {
	if (error >= 0) {
		if (error == 0)
		if (error == 0)
@@ -1263,7 +1282,7 @@ int __break_lease(struct inode *inode, unsigned int mode)
	}
	}


out:
out:
	unlock_kernel();
	unlock_flocks();
	if (!IS_ERR(new_fl))
	if (!IS_ERR(new_fl))
		locks_free_lock(new_fl);
		locks_free_lock(new_fl);
	return error;
	return error;
@@ -1319,7 +1338,7 @@ int fcntl_getlease(struct file *filp)
	struct file_lock *fl;
	struct file_lock *fl;
	int type = F_UNLCK;
	int type = F_UNLCK;


	lock_kernel();
	lock_flocks();
	time_out_leases(filp->f_path.dentry->d_inode);
	time_out_leases(filp->f_path.dentry->d_inode);
	for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
	for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
			fl = fl->fl_next) {
			fl = fl->fl_next) {
@@ -1328,7 +1347,7 @@ int fcntl_getlease(struct file *filp)
			break;
			break;
		}
		}
	}
	}
	unlock_kernel();
	unlock_flocks();
	return type;
	return type;
}
}


@@ -1341,7 +1360,7 @@ int fcntl_getlease(struct file *filp)
 *	The (input) flp->fl_lmops->fl_break function is required
 *	The (input) flp->fl_lmops->fl_break function is required
 *	by break_lease().
 *	by break_lease().
 *
 *
 *	Called with kernel lock held.
 *	Called with file_lock_lock held.
 */
 */
int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
{
{
@@ -1436,6 +1455,14 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
}
}
EXPORT_SYMBOL(generic_setlease);
EXPORT_SYMBOL(generic_setlease);


static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
{
	if (filp->f_op && filp->f_op->setlease)
		return filp->f_op->setlease(filp, arg, lease);
	else
		return generic_setlease(filp, arg, lease);
}

/**
/**
 *	vfs_setlease        -       sets a lease on an open file
 *	vfs_setlease        -       sets a lease on an open file
 *	@filp: file pointer
 *	@filp: file pointer
@@ -1467,12 +1494,9 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
{
{
	int error;
	int error;


	lock_kernel();
	lock_flocks();
	if (filp->f_op && filp->f_op->setlease)
	error = __vfs_setlease(filp, arg, lease);
		error = filp->f_op->setlease(filp, arg, lease);
	unlock_flocks();
	else
		error = generic_setlease(filp, arg, lease);
	unlock_kernel();


	return error;
	return error;
}
}
@@ -1499,9 +1523,9 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
	if (error)
	if (error)
		return error;
		return error;


	lock_kernel();
	lock_flocks();


	error = vfs_setlease(filp, arg, &flp);
	error = __vfs_setlease(filp, arg, &flp);
	if (error || arg == F_UNLCK)
	if (error || arg == F_UNLCK)
		goto out_unlock;
		goto out_unlock;


@@ -1516,7 +1540,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)


	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
out_unlock:
out_unlock:
	unlock_kernel();
	unlock_flocks();
	return error;
	return error;
}
}


@@ -2020,7 +2044,7 @@ void locks_remove_flock(struct file *filp)
			fl.fl_ops->fl_release_private(&fl);
			fl.fl_ops->fl_release_private(&fl);
	}
	}


	lock_kernel();
	lock_flocks();
	before = &inode->i_flock;
	before = &inode->i_flock;


	while ((fl = *before) != NULL) {
	while ((fl = *before) != NULL) {
@@ -2038,7 +2062,7 @@ void locks_remove_flock(struct file *filp)
 		}
 		}
		before = &fl->fl_next;
		before = &fl->fl_next;
	}
	}
	unlock_kernel();
	unlock_flocks();
}
}


/**
/**
@@ -2053,12 +2077,12 @@ posix_unblock_lock(struct file *filp, struct file_lock *waiter)
{
{
	int status = 0;
	int status = 0;


	lock_kernel();
	lock_flocks();
	if (waiter->fl_next)
	if (waiter->fl_next)
		__locks_delete_block(waiter);
		__locks_delete_block(waiter);
	else
	else
		status = -ENOENT;
		status = -ENOENT;
	unlock_kernel();
	unlock_flocks();
	return status;
	return status;
}
}


@@ -2172,7 +2196,7 @@ static int locks_show(struct seq_file *f, void *v)


static void *locks_start(struct seq_file *f, loff_t *pos)
static void *locks_start(struct seq_file *f, loff_t *pos)
{
{
	lock_kernel();
	lock_flocks();
	f->private = (void *)1;
	f->private = (void *)1;
	return seq_list_start(&file_lock_list, *pos);
	return seq_list_start(&file_lock_list, *pos);
}
}
@@ -2184,7 +2208,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)


static void locks_stop(struct seq_file *f, void *v)
static void locks_stop(struct seq_file *f, void *v)
{
{
	unlock_kernel();
	unlock_flocks();
}
}


static const struct seq_operations locks_seq_operations = {
static const struct seq_operations locks_seq_operations = {
@@ -2231,7 +2255,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
{
{
	struct file_lock *fl;
	struct file_lock *fl;
	int result = 1;
	int result = 1;
	lock_kernel();
	lock_flocks();
	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
		if (IS_POSIX(fl)) {
		if (IS_POSIX(fl)) {
			if (fl->fl_type == F_RDLCK)
			if (fl->fl_type == F_RDLCK)
@@ -2248,7 +2272,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
		result = 0;
		result = 0;
		break;
		break;
	}
	}
	unlock_kernel();
	unlock_flocks();
	return result;
	return result;
}
}


@@ -2271,7 +2295,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
{
{
	struct file_lock *fl;
	struct file_lock *fl;
	int result = 1;
	int result = 1;
	lock_kernel();
	lock_flocks();
	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
		if (IS_POSIX(fl)) {
		if (IS_POSIX(fl)) {
			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
@@ -2286,7 +2310,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
		result = 0;
		result = 0;
		break;
		break;
	}
	}
	unlock_kernel();
	unlock_flocks();
	return result;
	return result;
}
}


+5 −5
Original line number Original line Diff line number Diff line
@@ -71,20 +71,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
	if (inode->i_flock == NULL)
	if (inode->i_flock == NULL)
		goto out;
		goto out;


	/* Protect inode->i_flock using the BKL */
	/* Protect inode->i_flock using the file locks lock */
	lock_kernel();
	lock_flocks();
	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
			continue;
			continue;
		if (nfs_file_open_context(fl->fl_file) != ctx)
		if (nfs_file_open_context(fl->fl_file) != ctx)
			continue;
			continue;
		unlock_kernel();
		unlock_flocks();
		status = nfs4_lock_delegation_recall(state, fl);
		status = nfs4_lock_delegation_recall(state, fl);
		if (status < 0)
		if (status < 0)
			goto out;
			goto out;
		lock_kernel();
		lock_flocks();
	}
	}
	unlock_kernel();
	unlock_flocks();
out:
out:
	return status;
	return status;
}
}
Loading