Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 475f230c authored by David Teigland's avatar David Teigland
Browse files

dlm: fix unlock balance warnings



The in_recovery rw_semaphore has always been acquired and
released by different threads by design.  To work around
the "BUG: bad unlock balance detected!" messages, adjust
things so the dlm_recoverd thread always does both down_write
and up_write.

Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
parent 6ad22916
Loading
Loading
Loading
Loading
+36 −10
Original line number Diff line number Diff line
@@ -604,6 +604,7 @@ struct dlm_ls {
	struct idr		ls_recover_idr;
	spinlock_t		ls_recover_idr_lock;
	wait_queue_head_t	ls_wait_general;
	wait_queue_head_t	ls_recover_lock_wait;
	struct mutex		ls_clear_proc_locks;

	struct list_head	ls_root_list;	/* root resources */
@@ -616,15 +617,40 @@ struct dlm_ls {
	char			ls_name[1];
};

#define LSFL_WORK		0
#define LSFL_RUNNING		1
#define LSFL_RECOVERY_STOP	2
#define LSFL_RCOM_READY		3
#define LSFL_RCOM_WAIT		4
#define LSFL_UEVENT_WAIT	5
#define LSFL_TIMEWARN		6
#define LSFL_CB_DELAY		7
#define LSFL_NODIR		8
/*
 * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines
 * that they should abort what they're doing so new recovery can be started.
 *
 * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it
 * should do down_write() on the in_recovery rw_semaphore. (doing down_write
 * within dlm_ls_stop causes complaints about the lock acquired/released
 * in different contexts.)
 *
 * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore.
 * It sets this after it is done with down_write() on the in_recovery
 * rw_semaphore and clears it after it has released the rw_semaphore.
 *
 * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it
 * should begin recovery of the lockspace.
 *
 * LSFL_RUNNING - set when normal locking activity is enabled.
 * dlm_ls_stop() clears this to tell dlm locking routines that they should
 * quit what they are doing so recovery can run.  dlm_recoverd sets
 * this after recovery is finished.
 */

#define LSFL_RECOVER_STOP	0
#define LSFL_RECOVER_DOWN	1
#define LSFL_RECOVER_LOCK	2
#define LSFL_RECOVER_WORK	3
#define LSFL_RUNNING		4

#define LSFL_RCOM_READY		5
#define LSFL_RCOM_WAIT		6
#define LSFL_UEVENT_WAIT	7
#define LSFL_TIMEWARN		8
#define LSFL_CB_DELAY		9
#define LSFL_NODIR		10

/* much of this is just saving user space pointers associated with the
   lock that we pass back to the user lib with an ast */
@@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls)

static inline int dlm_recovery_stopped(struct dlm_ls *ls)
{
	return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
	return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
}

static inline int dlm_no_directory(struct dlm_ls *ls)
+12 −3
Original line number Diff line number Diff line
@@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster,
	INIT_LIST_HEAD(&ls->ls_root_list);
	init_rwsem(&ls->ls_root_sem);

	down_write(&ls->ls_in_recovery);

	spin_lock(&lslist_lock);
	ls->ls_create_count = 1;
	list_add(&ls->ls_list, &lslist);
@@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster,
		}
	}

	/* needs to find ls in lslist */
	init_waitqueue_head(&ls->ls_recover_lock_wait);

	/*
	 * Once started, dlm_recoverd first looks for ls in lslist, then
	 * initializes ls_in_recovery as locked in "down" mode.  We need
	 * to wait for the wakeup from dlm_recoverd because in_recovery
	 * has to start out in down mode.
	 */

	error = dlm_recoverd_start(ls);
	if (error) {
		log_error(ls, "can't start dlm_recoverd %d", error);
		goto out_callback;
	}

	wait_event(ls->ls_recover_lock_wait,
		   test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));

	ls->ls_kobj.kset = dlm_kset;
	error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
				     "%s", ls->ls_name);
+11 −6
Original line number Diff line number Diff line
@@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls)
	down_write(&ls->ls_recv_active);

	/*
	 * Abort any recovery that's in progress (see RECOVERY_STOP,
	 * Abort any recovery that's in progress (see RECOVER_STOP,
	 * dlm_recovery_stopped()) and tell any other threads running in the
	 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
	 */

	spin_lock(&ls->ls_recover_lock);
	set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
	set_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
	new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
	ls->ls_recover_seq++;
	spin_unlock(&ls->ls_recover_lock);
@@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
	 *    when recovery is complete.
	 */

	if (new)
		down_write(&ls->ls_in_recovery);
	if (new) {
		set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags);
		wake_up_process(ls->ls_recoverd_task);
		wait_event(ls->ls_recover_lock_wait,
			   test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
	}

	/*
	 * The recoverd suspend/resume makes sure that dlm_recoverd (if
	 * running) has noticed RECOVERY_STOP above and quit processing the
	 * running) has noticed RECOVER_STOP above and quit processing the
	 * previous recovery.
	 */

@@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls)
		kfree(rv_old);
	}

	dlm_recoverd_kick(ls);
	set_bit(LSFL_RECOVER_WORK, &ls->ls_flags);
	wake_up_process(ls->ls_recoverd_task);
	return 0;

 fail:
+1 −1
Original line number Diff line number Diff line
@@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)

	spin_lock(&ls->ls_recover_lock);
	status = ls->ls_recover_status;
	stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
	stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
	seq = ls->ls_recover_seq;
	spin_unlock(&ls->ls_recover_lock);

+18 −9
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq)
		set_bit(LSFL_RUNNING, &ls->ls_flags);
		/* unblocks processes waiting to enter the dlm */
		up_write(&ls->ls_in_recovery);
		clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
		error = 0;
	}
	spin_unlock(&ls->ls_recover_lock);
@@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
	rv = ls->ls_recover_args;
	ls->ls_recover_args = NULL;
	if (rv && ls->ls_recover_seq == rv->seq)
		clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
		clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
	spin_unlock(&ls->ls_recover_lock);

	if (rv) {
@@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg)
		return -1;
	}

	down_write(&ls->ls_in_recovery);
	set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
	wake_up(&ls->ls_recover_lock_wait);

	while (!kthread_should_stop()) {
		set_current_state(TASK_INTERRUPTIBLE);
		if (!test_bit(LSFL_WORK, &ls->ls_flags))
		if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
		    !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
			schedule();
		set_current_state(TASK_RUNNING);

		if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags))
		if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
			down_write(&ls->ls_in_recovery);
			set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
			wake_up(&ls->ls_recover_lock_wait);
		}

		if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
			do_ls_recovery(ls);
	}

	if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
		up_write(&ls->ls_in_recovery);

	dlm_put_lockspace(ls);
	return 0;
}

void dlm_recoverd_kick(struct dlm_ls *ls)
{
	set_bit(LSFL_WORK, &ls->ls_flags);
	wake_up_process(ls->ls_recoverd_task);
}

int dlm_recoverd_start(struct dlm_ls *ls)
{
	struct task_struct *p;
Loading