Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3ae1acf9 authored by David Teigland's avatar David Teigland Committed by Steven Whitehouse
Browse files

[DLM] add lock timeouts and warnings [2/6]



New features: lock timeouts and time warnings.  If the DLM_LKF_TIMEOUT
flag is set, then the request/conversion will be canceled after waiting
the specified number of centiseconds (specified per lock).  This feature
is only available for locks requested through libdlm (can be enabled for
kernel dlm users if there's a use for it.)

If the new DLM_LSFL_TIMEWARN flag is set when creating the lockspace, then
a warning message will be sent to userspace (using genetlink) after a
request/conversion has been waiting for a given number of centiseconds
(configurable per node).  The time warnings will be used in the future
to do deadlock detection in userspace.

Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
Signed-off-by: default avatarSteven Whitehouse <swhiteho@redhat.com>
parent 85e86edf
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -8,6 +8,7 @@ dlm-y := ast.o \
				member.o \
				member.o \
				memory.o \
				memory.o \
				midcomms.o \
				midcomms.o \
				netlink.o \
				lowcomms.o \
				lowcomms.o \
				rcom.o \
				rcom.o \
				recover.o \
				recover.o \
+7 −1
Original line number Original line Diff line number Diff line
@@ -90,6 +90,7 @@ struct cluster {
	unsigned int cl_scan_secs;
	unsigned int cl_scan_secs;
	unsigned int cl_log_debug;
	unsigned int cl_log_debug;
	unsigned int cl_protocol;
	unsigned int cl_protocol;
	unsigned int cl_timewarn_cs;
};
};


enum {
enum {
@@ -103,6 +104,7 @@ enum {
	CLUSTER_ATTR_SCAN_SECS,
	CLUSTER_ATTR_SCAN_SECS,
	CLUSTER_ATTR_LOG_DEBUG,
	CLUSTER_ATTR_LOG_DEBUG,
	CLUSTER_ATTR_PROTOCOL,
	CLUSTER_ATTR_PROTOCOL,
	CLUSTER_ATTR_TIMEWARN_CS,
};
};


struct cluster_attribute {
struct cluster_attribute {
@@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
CLUSTER_ATTR(scan_secs, 1);
CLUSTER_ATTR(scan_secs, 1);
CLUSTER_ATTR(log_debug, 0);
CLUSTER_ATTR(log_debug, 0);
CLUSTER_ATTR(protocol, 0);
CLUSTER_ATTR(protocol, 0);
CLUSTER_ATTR(timewarn_cs, 1);


static struct configfs_attribute *cluster_attrs[] = {
static struct configfs_attribute *cluster_attrs[] = {
	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
	[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
	[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
	[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
	[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
	[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
	[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
	NULL,
	NULL,
};
};


@@ -916,6 +920,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_SCAN_SECS          5
#define DEFAULT_SCAN_SECS          5
#define DEFAULT_LOG_DEBUG          0
#define DEFAULT_LOG_DEBUG          0
#define DEFAULT_PROTOCOL           0
#define DEFAULT_PROTOCOL           0
#define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */


struct dlm_config_info dlm_config = {
struct dlm_config_info dlm_config = {
	.ci_tcp_port = DEFAULT_TCP_PORT,
	.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -927,6 +932,7 @@ struct dlm_config_info dlm_config = {
	.ci_toss_secs = DEFAULT_TOSS_SECS,
	.ci_toss_secs = DEFAULT_TOSS_SECS,
	.ci_scan_secs = DEFAULT_SCAN_SECS,
	.ci_scan_secs = DEFAULT_SCAN_SECS,
	.ci_log_debug = DEFAULT_LOG_DEBUG,
	.ci_log_debug = DEFAULT_LOG_DEBUG,
	.ci_protocol = DEFAULT_PROTOCOL
	.ci_protocol = DEFAULT_PROTOCOL,
	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS
};
};
+1 −0
Original line number Original line Diff line number Diff line
@@ -27,6 +27,7 @@ struct dlm_config_info {
	int ci_scan_secs;
	int ci_scan_secs;
	int ci_log_debug;
	int ci_log_debug;
	int ci_protocol;
	int ci_protocol;
	int ci_timewarn_cs;
};
};


extern struct dlm_config_info dlm_config;
extern struct dlm_config_info dlm_config;
+10 −0
Original line number Original line Diff line number Diff line
@@ -213,8 +213,10 @@ struct dlm_args {
#define DLM_IFL_OVERLAP_UNLOCK  0x00080000
#define DLM_IFL_OVERLAP_UNLOCK  0x00080000
#define DLM_IFL_OVERLAP_CANCEL  0x00100000
#define DLM_IFL_OVERLAP_CANCEL  0x00100000
#define DLM_IFL_ENDOFLIFE	0x00200000
#define DLM_IFL_ENDOFLIFE	0x00200000
#define DLM_IFL_WATCH_TIMEWARN	0x00400000
#define DLM_IFL_USER		0x00000001
#define DLM_IFL_USER		0x00000001
#define DLM_IFL_ORPHAN		0x00000002
#define DLM_IFL_ORPHAN		0x00000002
#define DLM_IFL_TIMEOUT_CANCEL	0x00000004


struct dlm_lkb {
struct dlm_lkb {
	struct dlm_rsb		*lkb_resource;	/* the rsb */
	struct dlm_rsb		*lkb_resource;	/* the rsb */
@@ -243,6 +245,9 @@ struct dlm_lkb {
	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
	struct list_head	lkb_astqueue;	/* need ast to be sent */
	struct list_head	lkb_astqueue;	/* need ast to be sent */
	struct list_head	lkb_ownqueue;	/* list of locks for a process */
	struct list_head	lkb_ownqueue;	/* list of locks for a process */
	struct list_head	lkb_time_list;
	unsigned long		lkb_timestamp;
	unsigned long		lkb_timeout_cs;


	char			*lkb_lvbptr;
	char			*lkb_lvbptr;
	struct dlm_lksb		*lkb_lksb;      /* caller's status block */
	struct dlm_lksb		*lkb_lksb;      /* caller's status block */
@@ -447,6 +452,9 @@ struct dlm_ls {
	struct mutex		ls_orphans_mutex;
	struct mutex		ls_orphans_mutex;
	struct list_head	ls_orphans;
	struct list_head	ls_orphans;


	struct mutex		ls_timeout_mutex;
	struct list_head	ls_timeout;

	struct list_head	ls_nodes;	/* current nodes in ls */
	struct list_head	ls_nodes;	/* current nodes in ls */
	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
	int			ls_num_nodes;	/* number of nodes in ls */
	int			ls_num_nodes;	/* number of nodes in ls */
@@ -472,6 +480,7 @@ struct dlm_ls {
	struct task_struct	*ls_recoverd_task;
	struct task_struct	*ls_recoverd_task;
	struct mutex		ls_recoverd_active;
	struct mutex		ls_recoverd_active;
	spinlock_t		ls_recover_lock;
	spinlock_t		ls_recover_lock;
	unsigned long		ls_recover_begin; /* jiffies timestamp */
	uint32_t		ls_recover_status; /* DLM_RS_ */
	uint32_t		ls_recover_status; /* DLM_RS_ */
	uint64_t		ls_recover_seq;
	uint64_t		ls_recover_seq;
	struct dlm_recover	*ls_recover_args;
	struct dlm_recover	*ls_recover_args;
@@ -501,6 +510,7 @@ struct dlm_ls {
#define LSFL_RCOM_READY		3
#define LSFL_RCOM_READY		3
#define LSFL_RCOM_WAIT		4
#define LSFL_RCOM_WAIT		4
#define LSFL_UEVENT_WAIT	5
#define LSFL_UEVENT_WAIT	5
#define LSFL_TIMEWARN		6


/* much of this is just saving user space pointers associated with the
/* much of this is just saving user space pointers associated with the
   lock that we pass back to the user lib with an ast */
   lock that we pass back to the user lib with an ast */
+144 −2
Original line number Original line Diff line number Diff line
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
static int send_remove(struct dlm_rsb *r);
static int send_remove(struct dlm_rsb *r);
static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
				    struct dlm_message *ms);
				    struct dlm_message *ms);
static int receive_extralen(struct dlm_message *ms);
static int receive_extralen(struct dlm_message *ms);
static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
static void del_timeout(struct dlm_lkb *lkb);
void dlm_timeout_warn(struct dlm_lkb *lkb);


/*
/*
 * Lock compatibilty matrix - thanks Steve
 * Lock compatibilty matrix - thanks Steve
@@ -286,8 +289,17 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
	if (is_master_copy(lkb))
	if (is_master_copy(lkb))
		return;
		return;


	del_timeout(lkb);

	DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
	DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););


	/* if the operation was a cancel, then return -DLM_ECANCEL, if a
	   timeout caused the cancel then return -ETIMEDOUT */
	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
		lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
		rv = -ETIMEDOUT;
	}

	lkb->lkb_lksb->sb_status = rv;
	lkb->lkb_lksb->sb_status = rv;
	lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
	lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;


@@ -581,6 +593,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
	kref_init(&lkb->lkb_ref);
	kref_init(&lkb->lkb_ref);
	INIT_LIST_HEAD(&lkb->lkb_ownqueue);
	INIT_LIST_HEAD(&lkb->lkb_ownqueue);
	INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
	INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
	INIT_LIST_HEAD(&lkb->lkb_time_list);


	get_random_bytes(&bucket, sizeof(bucket));
	get_random_bytes(&bucket, sizeof(bucket));
	bucket &= (ls->ls_lkbtbl_size - 1);
	bucket &= (ls->ls_lkbtbl_size - 1);
@@ -993,6 +1006,125 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
	}
	}
}
}


static void add_timeout(struct dlm_lkb *lkb)
{
	struct dlm_ls *ls = lkb->lkb_resource->res_ls;

	if (is_master_copy(lkb))
		return;

	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
		goto add_it;

	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
		lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
		goto add_it;
	}
	return;

 add_it:
	DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
	mutex_lock(&ls->ls_timeout_mutex);
	hold_lkb(lkb);
	lkb->lkb_timestamp = jiffies;
	list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
	mutex_unlock(&ls->ls_timeout_mutex);
}

static void del_timeout(struct dlm_lkb *lkb)
{
	struct dlm_ls *ls = lkb->lkb_resource->res_ls;

	mutex_lock(&ls->ls_timeout_mutex);
	if (!list_empty(&lkb->lkb_time_list)) {
		list_del_init(&lkb->lkb_time_list);
		unhold_lkb(lkb);
	}
	mutex_unlock(&ls->ls_timeout_mutex);
}

/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
   lkb_lksb_timeout without lock_rsb?  Note: we can't lock timeout_mutex
   and then lock rsb because of lock ordering in add_timeout.  We may need
   to specify some special timeout-related bits in the lkb that are just to
   be accessed under the timeout_mutex. */

void dlm_scan_timeout(struct dlm_ls *ls)
{
	struct dlm_rsb *r;
	struct dlm_lkb *lkb;
	int do_cancel, do_warn;

	for (;;) {
		if (dlm_locking_stopped(ls))
			break;

		do_cancel = 0;
		do_warn = 0;
		mutex_lock(&ls->ls_timeout_mutex);
		list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {

			if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
			    time_after_eq(jiffies, lkb->lkb_timestamp +
					  lkb->lkb_timeout_cs * HZ/100))
				do_cancel = 1;

			if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
			    time_after_eq(jiffies, lkb->lkb_timestamp +
				   	   dlm_config.ci_timewarn_cs * HZ/100))
				do_warn = 1;

			if (!do_cancel && !do_warn)
				continue;
			hold_lkb(lkb);
			break;
		}
		mutex_unlock(&ls->ls_timeout_mutex);

		if (!do_cancel && !do_warn)
			break;

		r = lkb->lkb_resource;
		hold_rsb(r);
		lock_rsb(r);

		if (do_warn) {
			/* clear flag so we only warn once */
			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
			if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
				del_timeout(lkb);
			dlm_timeout_warn(lkb);
		}

		if (do_cancel) {
			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
			lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
			del_timeout(lkb);
			_cancel_lock(r, lkb);
		}

		unlock_rsb(r);
		unhold_rsb(r);
		dlm_put_lkb(lkb);
	}
}

/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
   dlm_recoverd before checking/setting ls_recover_begin. */

void dlm_adjust_timeouts(struct dlm_ls *ls)
{
	struct dlm_lkb *lkb;
	long adj = jiffies - ls->ls_recover_begin;

	ls->ls_recover_begin = 0;
	mutex_lock(&ls->ls_timeout_mutex);
	list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
		lkb->lkb_timestamp += adj;
	mutex_unlock(&ls->ls_timeout_mutex);
}

/* lkb is master or local copy */
/* lkb is master or local copy */


static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1902,6 +2034,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
		if (is_overlap(lkb))
		if (is_overlap(lkb))
			goto out;
			goto out;


		/* don't let scand try to do a cancel */
		del_timeout(lkb);

		if (lkb->lkb_flags & DLM_IFL_RESEND) {
		if (lkb->lkb_flags & DLM_IFL_RESEND) {
			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
			rv = -EBUSY;
			rv = -EBUSY;
@@ -1933,6 +2068,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
		if (is_overlap_unlock(lkb))
		if (is_overlap_unlock(lkb))
			goto out;
			goto out;


		/* don't let scand try to do a cancel */
		del_timeout(lkb);

		if (lkb->lkb_flags & DLM_IFL_RESEND) {
		if (lkb->lkb_flags & DLM_IFL_RESEND) {
			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
			rv = -EBUSY;
			rv = -EBUSY;
@@ -1993,6 +2131,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
		error = -EINPROGRESS;
		error = -EINPROGRESS;
		add_lkb(r, lkb, DLM_LKSTS_WAITING);
		add_lkb(r, lkb, DLM_LKSTS_WAITING);
		send_blocking_asts(r, lkb);
		send_blocking_asts(r, lkb);
		add_timeout(lkb);
		goto out;
		goto out;
	}
	}


@@ -2040,6 +2179,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
		del_lkb(r, lkb);
		del_lkb(r, lkb);
		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
		send_blocking_asts(r, lkb);
		send_blocking_asts(r, lkb);
		add_timeout(lkb);
		goto out;
		goto out;
	}
	}


@@ -3110,9 +3250,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
		lkb->lkb_remid = ms->m_lkid;
		lkb->lkb_remid = ms->m_lkid;
		if (is_altmode(lkb))
		if (is_altmode(lkb))
			munge_altmode(lkb, ms);
			munge_altmode(lkb, ms);
		if (result)
		if (result) {
			add_lkb(r, lkb, DLM_LKSTS_WAITING);
			add_lkb(r, lkb, DLM_LKSTS_WAITING);
		else {
			add_timeout(lkb);
		} else {
			grant_lock_pc(r, lkb, ms);
			grant_lock_pc(r, lkb, ms);
			queue_cast(r, lkb, 0);
			queue_cast(r, lkb, 0);
		}
		}
@@ -3178,6 +3319,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
			munge_demoted(lkb, ms);
			munge_demoted(lkb, ms);
		del_lkb(r, lkb);
		del_lkb(r, lkb);
		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
		add_timeout(lkb);
		break;
		break;


	case 0:
	case 0:
Loading