Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f6656d26 authored by Sunil Mushran's avatar Sunil Mushran Committed by Joel Becker
Browse files

ocfs2/cluster: Make fence method configurable - v2



By default, o2cb fences the box by calling emergency_restart(). While this
scheme works well in production, it comes in the way during testing as it
does not let the tester take stack/core dumps for analysis.

This patch allows user to dynamically change the fence method to panic() by:
# echo "panic" > /sys/kernel/config/cluster/<clustername>/fence_method

Signed-off-by: default avatarSunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: default avatarJoel Becker <joel.becker@oracle.com>
parent 57b09bb5
Loading
Loading
Loading
Loading
+51 −0
Original line number Diff line number Diff line
@@ -35,6 +35,10 @@
 * cluster references throughout where nodes are looked up */
struct o2nm_cluster *o2nm_single_cluster = NULL;

char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
		"reset",	/* O2NM_FENCE_RESET */
		"panic",	/* O2NM_FENCE_PANIC */
};

struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
{
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
	return o2nm_cluster_attr_write(page, count,
	                               &cluster->cl_reconnect_delay_ms);
}

static ssize_t o2nm_cluster_attr_fence_method_read(
	struct o2nm_cluster *cluster, char *page)
{
	ssize_t ret = 0;

	if (cluster)
		ret = sprintf(page, "%s\n",
			      o2nm_fence_method_desc[cluster->cl_fence_method]);
	return ret;
}

static ssize_t o2nm_cluster_attr_fence_method_write(
	struct o2nm_cluster *cluster, const char *page, size_t count)
{
	unsigned int i;

	if (page[count - 1] != '\n')
		goto bail;

	for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
		if (count != strlen(o2nm_fence_method_desc[i]) + 1)
			continue;
		if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
			continue;
		if (cluster->cl_fence_method != i) {
			printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
			       o2nm_fence_method_desc[i]);
			cluster->cl_fence_method = i;
		}
		return count;
	}

bail:
	return -EINVAL;
}

static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
	.attr	= { .ca_owner = THIS_MODULE,
		    .ca_name = "idle_timeout_ms",
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
	.store	= o2nm_cluster_attr_reconnect_delay_ms_write,
};

static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
	.attr	= { .ca_owner = THIS_MODULE,
		    .ca_name = "fence_method",
		    .ca_mode = S_IRUGO | S_IWUSR },
	.show	= o2nm_cluster_attr_fence_method_read,
	.store	= o2nm_cluster_attr_fence_method_write,
};

static struct configfs_attribute *o2nm_cluster_attrs[] = {
	&o2nm_cluster_attr_idle_timeout_ms.attr,
	&o2nm_cluster_attr_keepalive_delay_ms.attr,
	&o2nm_cluster_attr_reconnect_delay_ms.attr,
	&o2nm_cluster_attr_fence_method.attr,
	NULL,
};
static ssize_t o2nm_cluster_show(struct config_item *item,
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
	cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
	cluster->cl_idle_timeout_ms    = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
	cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
	cluster->cl_fence_method       = O2NM_FENCE_RESET;

	ret = &cluster->cl_group;
	o2nm_single_cluster = cluster;
+7 −0
Original line number Diff line number Diff line
@@ -33,6 +33,12 @@
#include <linux/configfs.h>
#include <linux/rbtree.h>

enum o2nm_fence_method {
	O2NM_FENCE_RESET	= 0,
	O2NM_FENCE_PANIC,
	O2NM_FENCE_METHODS,	/* Number of fence methods */
};

struct o2nm_node {
	spinlock_t		nd_lock;
	struct config_item	nd_item;
@@ -58,6 +64,7 @@ struct o2nm_cluster {
	unsigned int		cl_idle_timeout_ms;
	unsigned int		cl_keepalive_delay_ms;
	unsigned int		cl_reconnect_delay_ms;
	enum o2nm_fence_method	cl_fence_method;

	/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
	unsigned long	cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
+14 −2
Original line number Diff line number Diff line
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
	 * threads can still schedule, etc, etc */
	o2hb_stop_all_regions();

	printk("ocfs2 is very sorry to be fencing this system by restarting\n");
	switch (o2nm_single_cluster->cl_fence_method) {
	case O2NM_FENCE_PANIC:
		panic("*** ocfs2 is very sorry to be fencing this system by "
		      "panicing ***\n");
		break;
	default:
		WARN_ON(o2nm_single_cluster->cl_fence_method >=
			O2NM_FENCE_METHODS);
	case O2NM_FENCE_RESET:
		printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
		       "system by restarting ***\n");
		emergency_restart();
		break;
	};
}

/* Indicate that a timeout occured on a hearbeat region write. The