[DLM] block dlm_recv in recovery transition (c36258b5) · Commits · e / devices / android_kernel_fairphone_FP5

fs/dlm/dlm_internal.h

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -491,6 +491,7 @@ struct dlm_ls {
	uint64_t ls_recover_seq;		uint64_t ls_recover_seq;
	struct dlm_recover *ls_recover_args;		struct dlm_recover *ls_recover_args;
	struct rw_semaphore ls_in_recovery; /* block local requests */		struct rw_semaphore ls_in_recovery; /* block local requests */
			struct rw_semaphore ls_recv_active; /* block dlm_recv */
	struct list_head ls_requestqueue;/* queue remote requests */		struct list_head ls_requestqueue;/* queue remote requests */
	struct mutex ls_requestqueue_mutex;		struct mutex ls_requestqueue_mutex;
	char *ls_recover_buf;		char *ls_recover_buf;

fs/dlm/lock.c

+81 −55

Original line number	Original line	Diff line number	Diff line
	@@ -3638,55 +3638,8 @@ static void receive_lookup_reply(struct dlm_ls ls, struct dlm_message ms)
	dlm_put_lkb(lkb);		dlm_put_lkb(lkb);
	}		}

	int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)		static void _receive_message(struct dlm_ls ls, struct dlm_message ms)
	{		{
	struct dlm_message ms = (struct dlm_message ) hd;
	struct dlm_ls *ls;
	int error = 0;

	if (!recovery)
	dlm_message_in(ms);

	ls = dlm_find_lockspace_global(hd->h_lockspace);
	if (!ls) {
	log_print("drop message %d from %d for unknown lockspace %d",
	ms->m_type, nodeid, hd->h_lockspace);
	return -EINVAL;
	}

	/* recovery may have just ended leaving a bunch of backed-up requests
	in the requestqueue; wait while dlm_recoverd clears them */

	if (!recovery)
	dlm_wait_requestqueue(ls);

	/* recovery may have just started while there were a bunch of
	in-flight requests -- save them in requestqueue to be processed
	after recovery. we can't let dlm_recvd block on the recovery
	lock. if dlm_recoverd is calling this function to clear the
	requestqueue, it needs to be interrupted (-EINTR) if another
	recovery operation is starting. */

	while (1) {
	if (dlm_locking_stopped(ls)) {
	if (recovery) {
	error = -EINTR;
	goto out;
	}
	error = dlm_add_requestqueue(ls, nodeid, hd);
	if (error == -EAGAIN)
	continue;
	else {
	error = -EINTR;
	goto out;
	}
	}

	if (dlm_lock_recovery_try(ls))
	break;
	schedule();
	}

	switch (ms->m_type) {		switch (ms->m_type) {

	/* messages sent to a master node */		/* messages sent to a master node */
	@@ -3761,17 +3714,90 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
	log_error(ls, "unknown message type %d", ms->m_type);		log_error(ls, "unknown message type %d", ms->m_type);
	}		}

	dlm_unlock_recovery(ls);
	out:
	dlm_put_lockspace(ls);
	dlm_astd_wake();		dlm_astd_wake();
	return error;
	}		}

			/* If the lockspace is in recovery mode (locking stopped), then normal
			messages are saved on the requestqueue for processing after recovery is
			done. When not in recovery mode, we wait for dlm_recoverd to drain saved
			messages off the requestqueue before we process new ones. This occurs right
			after recovery completes when we transition from saving all messages on
			requestqueue, to processing all the saved messages, to processing new
			messages as they arrive. */

	/*		static void dlm_receive_message(struct dlm_ls ls, struct dlm_message ms,
	* Recovery related		int nodeid)
	*/		{
			if (dlm_locking_stopped(ls)) {
			dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms);
			} else {
			dlm_wait_requestqueue(ls);
			_receive_message(ls, ms);
			}
			}

			/* This is called by dlm_recoverd to process messages that were saved on
			the requestqueue. */

			void dlm_receive_message_saved(struct dlm_ls ls, struct dlm_message ms)
			{
			_receive_message(ls, ms);
			}

			/* This is called by the midcomms layer when something is received for
			the lockspace. It could be either a MSG (normal message sent as part of
			standard locking activity) or an RCOM (recovery message sent as part of
			lockspace recovery). */

			void dlm_receive_buffer(struct dlm_header *hd, int nodeid)
			{
			struct dlm_message ms = (struct dlm_message ) hd;
			struct dlm_rcom rc = (struct dlm_rcom ) hd;
			struct dlm_ls *ls;
			int type = 0;

			switch (hd->h_cmd) {
			case DLM_MSG:
			dlm_message_in(ms);
			type = ms->m_type;
			break;
			case DLM_RCOM:
			dlm_rcom_in(rc);
			type = rc->rc_type;
			break;
			default:
			log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
			return;
			}

			if (hd->h_nodeid != nodeid) {
			log_print("invalid h_nodeid %d from %d lockspace %x",
			hd->h_nodeid, nodeid, hd->h_lockspace);
			return;
			}

			ls = dlm_find_lockspace_global(hd->h_lockspace);
			if (!ls) {
			log_print("invalid h_lockspace %x from %d cmd %d type %d",
			hd->h_lockspace, nodeid, hd->h_cmd, type);

			if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
			dlm_send_ls_not_ready(nodeid, rc);
			return;
			}

			/* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
			be inactive (in this ls) before transitioning to recovery mode */

			down_read(&ls->ls_recv_active);
			if (hd->h_cmd == DLM_MSG)
			dlm_receive_message(ls, ms, nodeid);
			else
			dlm_receive_rcom(ls, rc, nodeid);
			up_read(&ls->ls_recv_active);

			dlm_put_lockspace(ls);
			}

	static void recover_convert_waiter(struct dlm_ls ls, struct dlm_lkb lkb)		static void recover_convert_waiter(struct dlm_ls ls, struct dlm_lkb lkb)
	{		{

fs/dlm/lock.h

+2 −1

Original line number	Original line	Diff line number	Diff line
	@@ -16,7 +16,8 @@
	void dlm_print_rsb(struct dlm_rsb *r);		void dlm_print_rsb(struct dlm_rsb *r);
	void dlm_dump_rsb(struct dlm_rsb *r);		void dlm_dump_rsb(struct dlm_rsb *r);
	void dlm_print_lkb(struct dlm_lkb *lkb);		void dlm_print_lkb(struct dlm_lkb *lkb);
	int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery);		void dlm_receive_message_saved(struct dlm_ls ls, struct dlm_message ms);
			void dlm_receive_buffer(struct dlm_header *hd, int nodeid);
	int dlm_modes_compat(int mode1, int mode2);		int dlm_modes_compat(int mode1, int mode2);
	int dlm_find_rsb(struct dlm_ls ls, char name, int namelen,		int dlm_find_rsb(struct dlm_ls ls, char name, int namelen,
	unsigned int flags, struct dlm_rsb **r_ret);		unsigned int flags, struct dlm_rsb **r_ret);

fs/dlm/lockspace.c

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -519,6 +519,7 @@ static int new_lockspace(char name, int namelen, void *lockspace,
	ls->ls_recover_seq = 0;		ls->ls_recover_seq = 0;
	ls->ls_recover_args = NULL;		ls->ls_recover_args = NULL;
	init_rwsem(&ls->ls_in_recovery);		init_rwsem(&ls->ls_in_recovery);
			init_rwsem(&ls->ls_recv_active);
	INIT_LIST_HEAD(&ls->ls_requestqueue);		INIT_LIST_HEAD(&ls->ls_requestqueue);
	mutex_init(&ls->ls_requestqueue_mutex);		mutex_init(&ls->ls_requestqueue_mutex);
	mutex_init(&ls->ls_clear_proc_locks);		mutex_init(&ls->ls_clear_proc_locks);

fs/dlm/member.c

+27 −14

Original line number	Original line	Diff line number	Diff line
	@@ -18,10 +18,6 @@
	#include "rcom.h"		#include "rcom.h"
	#include "config.h"		#include "config.h"

	/*
	* Following called by dlm_recoverd thread
	*/

	static void add_ordered_member(struct dlm_ls ls, struct dlm_member new)		static void add_ordered_member(struct dlm_ls ls, struct dlm_member new)
	{		{
	struct dlm_member *memb = NULL;		struct dlm_member *memb = NULL;
	@@ -250,18 +246,30 @@ int dlm_recover_members(struct dlm_ls ls, struct dlm_recover rv, int *neg_out)
	return error;		return error;
	}		}

	/*		/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
	* Following called from lockspace.c		dlm_ls_start() is called on any of them to start the new recovery. */
	*/

	int dlm_ls_stop(struct dlm_ls *ls)		int dlm_ls_stop(struct dlm_ls *ls)
	{		{
	int new;		int new;

	/*		/*
	* A stop cancels any recovery that's in progress (see RECOVERY_STOP,		* Prevent dlm_recv from being in the middle of something when we do
	* dlm_recovery_stopped()) and prevents any new locks from being		* the stop. This includes ensuring dlm_recv isn't processing a
	* processed (see RUNNING, dlm_locking_stopped()).		* recovery message (rcom), while dlm_recoverd is aborting and
			* resetting things from an in-progress recovery. i.e. we want
			* dlm_recoverd to abort its recovery without worrying about dlm_recv
			* processing an rcom at the same time. Stopping dlm_recv also makes
			* it easy for dlm_receive_message() to check locking stopped and add a
			* message to the requestqueue without races.
			*/

			down_write(&ls->ls_recv_active);

			/*
			* Abort any recovery that's in progress (see RECOVERY_STOP,
			* dlm_recovery_stopped()) and tell any other threads running in the
			* dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
	*/		*/

	spin_lock(&ls->ls_recover_lock);		spin_lock(&ls->ls_recover_lock);
	@@ -270,9 +278,15 @@ int dlm_ls_stop(struct dlm_ls *ls)
	ls->ls_recover_seq++;		ls->ls_recover_seq++;
	spin_unlock(&ls->ls_recover_lock);		spin_unlock(&ls->ls_recover_lock);

			/*
			* Let dlm_recv run again, now any normal messages will be saved on the
			* requestqueue for later.
			*/

			up_write(&ls->ls_recv_active);

	/*		/*
	* This in_recovery lock does two things:		* This in_recovery lock does two things:
	*
	* 1) Keeps this function from returning until all threads are out		* 1) Keeps this function from returning until all threads are out
	* of locking routines and locking is truely stopped.		* of locking routines and locking is truely stopped.
	* 2) Keeps any new requests from being processed until it's unlocked		* 2) Keeps any new requests from being processed until it's unlocked
	@@ -284,9 +298,8 @@ int dlm_ls_stop(struct dlm_ls *ls)

	/*		/*
	* The recoverd suspend/resume makes sure that dlm_recoverd (if		* The recoverd suspend/resume makes sure that dlm_recoverd (if
	* running) has noticed the clearing of RUNNING above and quit		* running) has noticed RECOVERY_STOP above and quit processing the
	* processing the previous recovery. This will be true for all nodes		* previous recovery.
	* before any nodes start the new recovery.
	*/		*/

	dlm_recoverd_suspend(ls);		dlm_recoverd_suspend(ls);