Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26bbf13c authored by Yosef Etigin's avatar Yosef Etigin Committed by Roland Dreier
Browse files

IPoIB: Handle P_Key table reordering



SM reconfiguration or failover possibly causes a shuffling of the values
in the P_Key table. Right now, IPoIB only queries for the P_Key index
once when it creates the device QP, and hence there are problems if the
index of a P_Key value changes.  Fix this by using the PKEY_CHANGE event
to trigger a recheck of the P_Key index.

Signed-off-by: default avatarYosef Etigin <yosefe@voltaire.com>
Acked-by: default avatarMichael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 1af4c435
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -201,15 +201,17 @@ struct ipoib_dev_priv {
	struct list_head multicast_list;
	struct rb_root multicast_tree;

	struct delayed_work pkey_task;
	struct delayed_work pkey_poll_task;
	struct delayed_work mcast_task;
	struct work_struct flush_task;
	struct work_struct restart_task;
	struct delayed_work ah_reap_task;
	struct work_struct pkey_event_task;

	struct ib_device *ca;
	u8            	  port;
	u16           	  pkey;
	u16               pkey_index;
	struct ib_pd  	 *pd;
	struct ib_mr  	 *mr;
	struct ib_cq  	 *cq;
@@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);

int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_ib_dev_flush(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);

int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev);
int ipoib_ib_dev_stop(struct net_device *dev, int flush);

int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
+68 −17
Original line number Diff line number Diff line
@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int ret;

	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
		ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
		return -1;
	}
	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);

	ret = ipoib_init_qp(dev);
	if (ret) {
		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
	ret = ipoib_ib_post_receives(dev);
	if (ret) {
		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
		ipoib_ib_dev_stop(dev);
		ipoib_ib_dev_stop(dev, 1);
		return -1;
	}

	ret = ipoib_cm_dev_open(dev);
	if (ret) {
		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
		ipoib_ib_dev_stop(dev);
		ipoib_ib_dev_stop(dev, 1);
		return -1;
	}

@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
		mutex_lock(&pkey_mutex);
		set_bit(IPOIB_PKEY_STOP, &priv->flags);
		cancel_delayed_work(&priv->pkey_task);
		cancel_delayed_work(&priv->pkey_poll_task);
		mutex_unlock(&pkey_mutex);
		if (flush)
			flush_workqueue(ipoib_workqueue);
@@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev)
	return pending;
}

int ipoib_ib_dev_stop(struct net_device *dev)
int ipoib_ib_dev_stop(struct net_device *dev, int flush)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_qp_attr qp_attr;
@@ -629,6 +636,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
	/* Wait for all AHs to be reaped */
	set_bit(IPOIB_STOP_REAPER, &priv->flags);
	cancel_delayed_work(&priv->ah_reap_task);
	if (flush)
		flush_workqueue(ipoib_workqueue);

	begin = jiffies;
@@ -673,11 +681,22 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
	return 0;
}

void ipoib_ib_dev_flush(struct work_struct *work)
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
{
	struct ipoib_dev_priv *cpriv, *priv =
		container_of(work, struct ipoib_dev_priv, flush_task);
	struct ipoib_dev_priv *cpriv;
	struct net_device *dev = priv->dev;
	u16 new_index;

	mutex_lock(&priv->vlan_mutex);

	/*
	 * Flush any child interfaces too -- they might be up even if
	 * the parent is down.
	 */
	list_for_each_entry(cpriv, &priv->child_intfs, list)
		__ipoib_ib_dev_flush(cpriv, pkey_event);

	mutex_unlock(&priv->vlan_mutex);

	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
@@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
		return;
	}

	if (pkey_event) {
		if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
			clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
			ipoib_ib_dev_down(dev, 0);
			ipoib_pkey_dev_delay_open(dev);
			return;
		}
		set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);

		/* restart QP only if P_Key index is changed */
		if (new_index == priv->pkey_index) {
			ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
			return;
		}
		priv->pkey_index = new_index;
	}

	ipoib_dbg(priv, "flushing\n");

	ipoib_ib_dev_down(dev, 0);

	if (pkey_event) {
		ipoib_ib_dev_stop(dev, 0);
		ipoib_ib_dev_open(dev);
	}

	/*
	 * The device could have been brought down between the start and when
	 * we get here, don't bring it back up if it's not configured up
@@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
		ipoib_ib_dev_up(dev);
		ipoib_mcast_restart_task(&priv->restart_task);
	}
}

	mutex_lock(&priv->vlan_mutex);
void ipoib_ib_dev_flush(struct work_struct *work)
{
	struct ipoib_dev_priv *priv =
		container_of(work, struct ipoib_dev_priv, flush_task);

	/* Flush any child interfaces too */
	list_for_each_entry(cpriv, &priv->child_intfs, list)
		ipoib_ib_dev_flush(&cpriv->flush_task);
	ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
	__ipoib_ib_dev_flush(priv, 0);
}

	mutex_unlock(&priv->vlan_mutex);
void ipoib_pkey_event(struct work_struct *work)
{
	struct ipoib_dev_priv *priv =
		container_of(work, struct ipoib_dev_priv, pkey_event_task);

	ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
	__ipoib_ib_dev_flush(priv, 1);
}

void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
void ipoib_pkey_poll(struct work_struct *work)
{
	struct ipoib_dev_priv *priv =
		container_of(work, struct ipoib_dev_priv, pkey_task.work);
		container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
	struct net_device *dev = priv->dev;

	ipoib_pkey_dev_check_presence(dev);
@@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work)
		mutex_lock(&pkey_mutex);
		if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
			queue_delayed_work(ipoib_workqueue,
					   &priv->pkey_task,
					   &priv->pkey_poll_task,
					   HZ);
		mutex_unlock(&pkey_mutex);
	}
@@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
		mutex_lock(&pkey_mutex);
		clear_bit(IPOIB_PKEY_STOP, &priv->flags);
		queue_delayed_work(ipoib_workqueue,
				   &priv->pkey_task,
				   &priv->pkey_poll_task,
				   HZ);
		mutex_unlock(&pkey_mutex);
		return 1;
+4 −3
Original line number Diff line number Diff line
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
		return -EINVAL;

	if (ipoib_ib_dev_up(dev)) {
		ipoib_ib_dev_stop(dev);
		ipoib_ib_dev_stop(dev, 1);
		return -EINVAL;
	}

@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
	flush_workqueue(ipoib_workqueue);

	ipoib_ib_dev_down(dev, 1);
	ipoib_ib_dev_stop(dev);
	ipoib_ib_dev_stop(dev, 1);

	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
		struct ipoib_dev_priv *cpriv;
@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
	INIT_LIST_HEAD(&priv->dead_ahs);
	INIT_LIST_HEAD(&priv->multicast_list);

	INIT_DELAYED_WORK(&priv->pkey_task,    ipoib_pkey_poll);
	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
	INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
	INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush);
	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
+15 −23
Original line number Diff line number Diff line
@@ -33,8 +33,6 @@
 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
 */

#include <rdma/ib_cache.h>

#include "ipoib.h"

int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
	if (!qp_attr)
		goto out;

	if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
		ret = -ENXIO;
		goto out;
@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int ret;
	u16 pkey_index;
	struct ib_qp_attr qp_attr;
	int attr_mask;

	/*
	 * Search through the port P_Key table for the requested pkey value.
	 * The port has to be assigned to the respective IB partition in
	 * advance.
	 */
	ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
	if (ret) {
		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
		return ret;
	}
	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
		return -1;

	qp_attr.qp_state = IB_QPS_INIT;
	qp_attr.qkey = 0;
	qp_attr.port_num = priv->port;
	qp_attr.pkey_index = pkey_index;
	qp_attr.pkey_index = priv->pkey_index;
	attr_mask =
	    IB_QP_QKEY |
	    IB_QP_PORT |
@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
	struct ipoib_dev_priv *priv =
		container_of(handler, struct ipoib_dev_priv, event_handler);

	if ((record->event == IB_EVENT_PORT_ERR    ||
	     record->event == IB_EVENT_PKEY_CHANGE ||
	if (record->element.port_num != priv->port)
		return;

	if (record->event == IB_EVENT_PORT_ERR    ||
	    record->event == IB_EVENT_PORT_ACTIVE ||
	    record->event == IB_EVENT_LID_CHANGE  ||
	    record->event == IB_EVENT_SM_CHANGE   ||
	     record->event == IB_EVENT_CLIENT_REREGISTER) &&
	    record->element.port_num == priv->port) {
	    record->event == IB_EVENT_CLIENT_REREGISTER) {
		ipoib_dbg(priv, "Port state change event\n");
		queue_work(ipoib_workqueue, &priv->flush_task);
	} else if (record->event == IB_EVENT_PKEY_CHANGE) {
		ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
		queue_work(ipoib_workqueue, &priv->pkey_event_task);
	}
}