Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f5b2b966 authored by Jay Vosburgh's avatar Jay Vosburgh Committed by Jeff Garzik
Browse files

[PATCH] bonding: Validate probe replies in ARP monitor



	Add logic to check ARP request / reply packets used for ARP
monitor link integrity checking.

	The current method simply examines the slave device to see if it
has sent and received traffic; this can be fooled by extraneous traffic.
For example, if multiple hosts running bonding are behind a common
switch, the probe traffic from the multiple instances of bonding will
update the tx/rx times on each other's slave devices.

Signed-off-by: default avatarJay Vosburgh <fubar@us.ibm.com>
Signed-off-by: default avatarJeff Garzik <jeff@garzik.org>
parent 70298705
Loading
Loading
Loading
Loading
+59 −0
Original line number Diff line number Diff line
@@ -192,6 +192,17 @@ or, for backwards compatibility, the option value. E.g.,
arp_interval

	Specifies the ARP link monitoring frequency in milliseconds.

	The ARP monitor works by periodically checking the slave
	devices to determine whether they have sent or received
	traffic recently (the precise criteria depends upon the
	bonding mode, and the state of the slave).  Regular traffic is
	generated via ARP probes issued for the addresses specified by
	the arp_ip_target option.

	This behavior can be modified by the arp_validate option,
	below.

	If ARP monitoring is used in an etherchannel compatible mode
	(modes 0 and 2), the switch should be configured in a mode
	that evenly distributes packets across all links. If the
@@ -213,6 +224,54 @@ arp_ip_target
	maximum number of targets that can be specified is 16.  The
	default value is no IP addresses.

arp_validate

	Specifies whether or not ARP probes and replies should be
	validated in the active-backup mode.  This causes the ARP
	monitor to examine the incoming ARP requests and replies, and
	only consider a slave to be up if it is receiving the
	appropriate ARP traffic.

	Possible values are:

	none or 0

		No validation is performed.  This is the default.

	active or 1

		Validation is performed only for the active slave.

	backup or 2

		Validation is performed only for backup slaves.

	all or 3

		Validation is performed for all slaves.

	For the active slave, the validation checks ARP replies to
	confirm that they were generated by an arp_ip_target.  Since
	backup slaves do not typically receive these replies, the
	validation performed for backup slaves is on the ARP request
	sent out via the active slave.  It is possible that some
	switch or network configurations may result in situations
	wherein the backup slaves do not receive the ARP requests; in
	such a situation, validation of backup slaves must be
	disabled.

	This option is useful in network configurations in which
	multiple bonding hosts are concurrently issuing ARPs to one or
	more targets beyond a common switch.  Should the link between
	the switch and target fail (but not the switch itself), the
	probe traffic generated by the multiple bonding instances will
	fool the standard ARP monitor into considering the links as
	still up.  Use of the arp_validate option can resolve this, as
	the ARP monitor will only consider ARP requests and replies
	associated with its own instance of bonding.

	This option was added in bonding version 3.1.0.

downdelay

	Specifies the time, in milliseconds, to wait before disabling
+176 −6
Original line number Diff line number Diff line
@@ -96,6 +96,7 @@ static char *lacp_rate = NULL;
static char *xmit_hash_policy = NULL;
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
static char *arp_validate = NULL;
struct bond_params bonding_defaults;

module_param(max_bonds, int, 0);
@@ -127,6 +128,8 @@ module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0);
MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
module_param(arp_validate, charp, 0);
MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");

/*----------------------------- Global variables ----------------------------*/

@@ -170,6 +173,14 @@ struct bond_parm_tbl xmit_hashtype_tbl[] = {
{	NULL,			-1},
};

struct bond_parm_tbl arp_validate_tbl[] = {
{	"none",			BOND_ARP_VALIDATE_NONE},
{	"active",		BOND_ARP_VALIDATE_ACTIVE},
{	"backup",		BOND_ARP_VALIDATE_BACKUP},
{	"all",			BOND_ARP_VALIDATE_ALL},
{	NULL,			-1},
};

/*-------------------------- Forward declarations ---------------------------*/

static void bond_send_gratuitous_arp(struct bonding *bond);
@@ -1424,6 +1435,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)

	bond_compute_features(bond);

	new_slave->last_arp_rx = jiffies;

	if (bond->params.miimon && !bond->params.use_carrier) {
		link_reporting = bond_check_dev_link(bond, slave_dev, 1);

@@ -1785,7 +1798,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
	dev_set_mac_address(slave_dev, &addr);

	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
				   IFF_SLAVE_INACTIVE | IFF_BONDING);
				   IFF_SLAVE_INACTIVE | IFF_BONDING |
				   IFF_SLAVE_NEEDARP);

	kfree(slave);

@@ -2298,6 +2312,25 @@ static int bond_has_ip(struct bonding *bond)
	return 0;
}

static int bond_has_this_ip(struct bonding *bond, u32 ip)
{
	struct vlan_entry *vlan, *vlan_next;

	if (ip == bond->master_ip)
		return 1;

	if (list_empty(&bond->vlan_list))
		return 0;

	list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
				 vlan_list) {
		if (ip == vlan->vlan_ip)
			return 1;
	}

	return 0;
}

/*
 * We go to the (large) trouble of VLAN tagging ARP frames because
 * switches in VLAN mode (especially if ports are configured as
@@ -2436,6 +2469,93 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
	}
}

static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip, u32 tip)
{
	int i;
	u32 *targets = bond->params.arp_targets;

	targets = bond->params.arp_targets;
	for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
		dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] "
			"%u.%u.%u.%u bhti(tip) %d\n",
		       NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]),
		       bond_has_this_ip(bond, tip));
		if (sip == targets[i]) {
			if (bond_has_this_ip(bond, tip))
				slave->last_arp_rx = jiffies;
			return;
		}
	}
}

static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
	struct arphdr *arp;
	struct slave *slave;
	struct bonding *bond;
	unsigned char *arp_ptr;
	u32 sip, tip;

	if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
		goto out;

	bond = dev->priv;
	read_lock(&bond->lock);

	dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
		bond->dev->name, skb->dev ? skb->dev->name : "NULL",
		orig_dev ? orig_dev->name : "NULL");

	slave = bond_get_slave_by_dev(bond, orig_dev);
	if (!slave || !slave_do_arp_validate(bond, slave))
		goto out_unlock;

	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
	if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
				 (2 * dev->addr_len) +
				 (2 * sizeof(u32)))))
		goto out_unlock;

	arp = skb->nh.arph;
	if (arp->ar_hln != dev->addr_len ||
	    skb->pkt_type == PACKET_OTHERHOST ||
	    skb->pkt_type == PACKET_LOOPBACK ||
	    arp->ar_hrd != htons(ARPHRD_ETHER) ||
	    arp->ar_pro != htons(ETH_P_IP) ||
	    arp->ar_pln != 4)
		goto out_unlock;

	arp_ptr = (unsigned char *)(arp + 1);
	arp_ptr += dev->addr_len;
	memcpy(&sip, arp_ptr, 4);
	arp_ptr += 4 + dev->addr_len;
	memcpy(&tip, arp_ptr, 4);

	dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u"
		" tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name,
		slave->state, bond->params.arp_validate,
		slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip));

	/*
	 * Backup slaves won't see the ARP reply, but do come through
	 * here for each ARP probe (so we swap the sip/tip to validate
	 * the probe).  In a "redundant switch, common router" type of
	 * configuration, the ARP probe will (hopefully) travel from
	 * the active, through one switch, the router, then the other
	 * switch before reaching the backup.
	 */
	if (slave->state == BOND_STATE_ACTIVE)
		bond_validate_arp(bond, slave, sip, tip);
	else
		bond_validate_arp(bond, slave, tip, sip);

out_unlock:
	read_unlock(&bond->lock);
out:
	dev_kfree_skb(skb);
	return NET_RX_SUCCESS;
}

/*
 * this function is called regularly to monitor each slave's link
 * ensuring that traffic is being sent and received when arp monitoring
@@ -2600,7 +2720,8 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
	 */
	bond_for_each_slave(bond, slave, i) {
		if (slave->link != BOND_LINK_UP) {
			if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) {
			if ((jiffies - slave_last_rx(bond, slave)) <=
			     delta_in_ticks) {

				slave->link = BOND_LINK_UP;

@@ -2645,7 +2766,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)

			if ((slave != bond->curr_active_slave) &&
			    (!bond->current_arp_slave) &&
			    (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) &&
			    (((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) &&
			     bond_has_ip(bond))) {
				/* a backup slave has gone down; three times
				 * the delta allows the current slave to be
@@ -2692,7 +2813,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
		 * if it is up and needs to take over as the curr_active_slave
		 */
		if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
	    (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
	    (((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) &&
	     bond_has_ip(bond))) &&
		    ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) {

@@ -3315,6 +3436,21 @@ static void bond_unregister_lacpdu(struct bonding *bond)
	dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
}

void bond_register_arp(struct bonding *bond)
{
	struct packet_type *pt = &bond->arp_mon_pt;

	pt->type = htons(ETH_P_ARP);
	pt->dev = NULL; /*bond->dev;XXX*/
	pt->func = bond_arp_rcv;
	dev_add_pack(pt);
}

void bond_unregister_arp(struct bonding *bond)
{
	dev_remove_pack(&bond->arp_mon_pt);
}

/*---------------------------- Hashing Policies -----------------------------*/

/*
@@ -3401,6 +3537,9 @@ static int bond_open(struct net_device *bond_dev)
		} else {
			arp_timer->function = (void *)&bond_loadbalance_arp_mon;
		}
		if (bond->params.arp_validate)
			bond_register_arp(bond);

		add_timer(arp_timer);
	}

@@ -3428,6 +3567,9 @@ static int bond_close(struct net_device *bond_dev)
		bond_unregister_lacpdu(bond);
	}

	if (bond->params.arp_validate)
		bond_unregister_arp(bond);

	write_lock_bh(&bond->lock);


@@ -4281,6 +4423,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl)

static int bond_check_params(struct bond_params *params)
{
	int arp_validate_value;

	/*
	 * Convert string parameters.
	 */
@@ -4484,6 +4628,29 @@ static int bond_check_params(struct bond_params *params)
		arp_interval = 0;
	}

	if (arp_validate) {
		if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
			printk(KERN_ERR DRV_NAME
	       ": arp_validate only supported in active-backup mode\n");
			return -EINVAL;
		}
		if (!arp_interval) {
			printk(KERN_ERR DRV_NAME
			       ": arp_validate requires arp_interval\n");
			return -EINVAL;
		}

		arp_validate_value = bond_parse_parm(arp_validate,
						     arp_validate_tbl);
		if (arp_validate_value == -1) {
			printk(KERN_ERR DRV_NAME
			       ": Error: invalid arp_validate \"%s\"\n",
			       arp_validate == NULL ? "NULL" : arp_validate);
			return -EINVAL;
		}
	} else
		arp_validate_value = 0;

	if (miimon) {
		printk(KERN_INFO DRV_NAME
		       ": MII link monitoring set to %d ms\n",
@@ -4492,8 +4659,10 @@ static int bond_check_params(struct bond_params *params)
		int i;

		printk(KERN_INFO DRV_NAME
		       ": ARP monitoring set to %d ms with %d target(s):",
		       arp_interval, arp_ip_count);
		       ": ARP monitoring set to %d ms, validate %s, with %d target(s):",
		       arp_interval,
		       arp_validate_tbl[arp_validate_value].modename,
		       arp_ip_count);

		for (i = 0; i < arp_ip_count; i++)
			printk (" %s", arp_ip_target[i]);
@@ -4527,6 +4696,7 @@ static int bond_check_params(struct bond_params *params)
	params->xmit_policy = xmit_hashtype;
	params->miimon = miimon;
	params->arp_interval = arp_interval;
	params->arp_validate = arp_validate_value;
	params->updelay = updelay;
	params->downdelay = downdelay;
	params->use_carrier = use_carrier;
+54 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@ extern struct bond_params bonding_defaults;
extern struct bond_parm_tbl bond_mode_tbl[];
extern struct bond_parm_tbl bond_lacp_tbl[];
extern struct bond_parm_tbl xmit_hashtype_tbl[];
extern struct bond_parm_tbl arp_validate_tbl[];

static int expected_refcount = -1;
static struct class *netdev_class;
@@ -502,6 +503,53 @@ static ssize_t bonding_store_xmit_hash(struct class_device *cd, const char *buf,
}
static CLASS_DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR, bonding_show_xmit_hash, bonding_store_xmit_hash);

/*
 * Show and set arp_validate.
 */
static ssize_t bonding_show_arp_validate(struct class_device *cd, char *buf)
{
	struct bonding *bond = to_bond(cd);

	return sprintf(buf, "%s %d\n",
		       arp_validate_tbl[bond->params.arp_validate].modename,
		       bond->params.arp_validate) + 1;
}

static ssize_t bonding_store_arp_validate(struct class_device *cd, const char *buf, size_t count)
{
	int new_value;
	struct bonding *bond = to_bond(cd);

	new_value = bond_parse_parm((char *)buf, arp_validate_tbl);
	if (new_value < 0) {
		printk(KERN_ERR DRV_NAME
		       ": %s: Ignoring invalid arp_validate value %s\n",
		       bond->dev->name, buf);
		return -EINVAL;
	}
	if (new_value && (bond->params.mode != BOND_MODE_ACTIVEBACKUP)) {
		printk(KERN_ERR DRV_NAME
		       ": %s: arp_validate only supported in active-backup mode.\n",
		       bond->dev->name);
		return -EINVAL;
	}
	printk(KERN_INFO DRV_NAME ": %s: setting arp_validate to %s (%d).\n",
	       bond->dev->name, arp_validate_tbl[new_value].modename,
	       new_value);

	if (!bond->params.arp_validate && new_value) {
		bond_register_arp(bond);
	} else if (bond->params.arp_validate && !new_value) {
		bond_unregister_arp(bond);
	}

	bond->params.arp_validate = new_value;

	return count;
}

static CLASS_DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate);

/*
 * Show and set the arp timer interval.  There are two tricky bits
 * here.  First, if ARP monitoring is activated, then we must disable
@@ -914,6 +962,11 @@ static ssize_t bonding_store_miimon(struct class_device *cd, const char *buf, si
			       "ARP monitoring. Disabling ARP monitoring...\n",
			       bond->dev->name);
			bond->params.arp_interval = 0;
			if (bond->params.arp_validate) {
				bond_unregister_arp(bond);
				bond->params.arp_validate =
					BOND_ARP_VALIDATE_NONE;
			}
			/* Kill ARP timer, else it brings bond's link down */
			if (bond->mii_timer.function) {
				printk(KERN_INFO DRV_NAME
@@ -1273,6 +1326,7 @@ static CLASS_DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, N
static struct attribute *per_bond_attrs[] = {
	&class_device_attr_slaves.attr,
	&class_device_attr_mode.attr,
	&class_device_attr_arp_validate.attr,
	&class_device_attr_arp_interval.attr,
	&class_device_attr_arp_ip_target.attr,
	&class_device_attr_downdelay.attr,
+29 −3
Original line number Diff line number Diff line
@@ -22,8 +22,8 @@
#include "bond_3ad.h"
#include "bond_alb.h"

#define DRV_VERSION	"3.0.3"
#define DRV_RELDATE	"March 23, 2006"
#define DRV_VERSION	"3.1.0-test"
#define DRV_RELDATE	"September 9, 2006"
#define DRV_NAME	"bonding"
#define DRV_DESCRIPTION	"Ethernet Channel Bonding Driver"

@@ -126,6 +126,7 @@ struct bond_params {
	int xmit_policy;
	int miimon;
	int arp_interval;
	int arp_validate;
	int use_carrier;
	int updelay;
	int downdelay;
@@ -151,6 +152,7 @@ struct slave {
	struct slave *prev;
	int    delay;
	u32    jiffies;
	u32    last_arp_rx;
	s8     link;    /* one of BOND_LINK_XXXX */
	s8     state;   /* one of BOND_STATE_XXXX */
	u32    original_flags;
@@ -198,6 +200,7 @@ struct bonding {
	struct   bond_params params;
	struct   list_head vlan_list;
	struct   vlan_group *vlgrp;
	struct   packet_type arp_mon_pt;
};

/**
@@ -228,6 +231,25 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
	return (struct bonding *)slave->dev->master->priv;
}

#define BOND_ARP_VALIDATE_NONE		0
#define BOND_ARP_VALIDATE_ACTIVE	(1 << BOND_STATE_ACTIVE)
#define BOND_ARP_VALIDATE_BACKUP	(1 << BOND_STATE_BACKUP)
#define BOND_ARP_VALIDATE_ALL		(BOND_ARP_VALIDATE_ACTIVE | \
					 BOND_ARP_VALIDATE_BACKUP)

extern inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave)
{
	return bond->params.arp_validate & (1 << slave->state);
}

extern inline u32 slave_last_rx(struct bonding *bond, struct slave *slave)
{
	if (slave_do_arp_validate(bond, slave))
		return slave->last_arp_rx;

	return slave->dev->last_rx;
}

static inline void bond_set_slave_inactive_flags(struct slave *slave)
{
	struct bonding *bond = slave->dev->master->priv;
@@ -235,12 +257,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
	    bond->params.mode != BOND_MODE_ALB)
		slave->state = BOND_STATE_BACKUP;
	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
	if (slave_do_arp_validate(bond, slave))
		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
}

static inline void bond_set_slave_active_flags(struct slave *slave)
{
	slave->state = BOND_STATE_ACTIVE;
	slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
}

static inline void bond_set_master_3ad_flags(struct bonding *bond)
@@ -284,6 +308,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl);
const char *bond_mode_name(int mode);
void bond_select_active_slave(struct bonding *bond);
void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
void bond_register_arp(struct bonding *);
void bond_unregister_arp(struct bonding *);

#endif /* _LINUX_BONDING_H */
+1 −0
Original line number Diff line number Diff line
@@ -60,6 +60,7 @@
#define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
#define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
#define IFF_BONDING	0x20		/* bonding master or slave	*/
#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/

#define IF_GET_IFACE	0x0001		/* for querying only */
#define IF_GET_PROTO	0x0002
Loading