Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2be8e3ee authored by Roland Dreier's avatar Roland Dreier
Browse files

IB/umad: Add P_Key index support



Add support for setting the P_Key index of sent MADs and getting the
P_Key index of received MADs.  This requires a change to the layout of
the ABI structure struct ib_user_mad_hdr, so to avoid breaking
compatibility, we default to the old (unchanged) ABI and add a new
ioctl IB_USER_MAD_ENABLE_PKEY that allows applications that are aware
of the new ABI to opt into using it.

We plan on switching to the new ABI by default in a year or so, and
this patch adds a warning that is printed when an application uses the
old ABI, to push people towards converting to the new ABI.

Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
Reviewed-by: default avatarSean Hefty <sean.hefty@intel.com>
Reviewed-by: default avatarHal Rosenstock <hal@xsigo.com>
parent c01759ce
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -99,6 +99,20 @@ Transaction IDs
  request/response pairs.  The upper 32 bits are reserved for use by
  the kernel and will be overwritten before a MAD is sent.

P_Key Index Handling

  The old ib_umad interface did not allow setting the P_Key index for
  MADs that are sent and did not provide a way for obtaining the P_Key
  index of received MADs.  A new layout for struct ib_user_mad_hdr
  with a pkey_index member has been defined; however, to preserve
  binary compatibility with older applications, this new layout will
  not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
  before a file descriptor is used for anything else.

  In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
  to 6, the new layout of struct ib_user_mad_hdr will be used by
  default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.

Setting IsSM Capability Bit

  To set the IsSM capability bit for a port, simply open the
+73 −29
Original line number Diff line number Diff line
@@ -118,6 +118,8 @@ struct ib_umad_file {
	wait_queue_head_t	recv_wait;
	struct ib_mad_agent    *agent[IB_UMAD_MAX_AGENTS];
	int			agents_dead;
	u8			use_pkey_index;
	u8			already_used;
};

struct ib_umad_packet {
@@ -147,6 +149,12 @@ static void ib_umad_release_dev(struct kref *ref)
	kfree(dev);
}

static int hdr_size(struct ib_umad_file *file)
{
	return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
		sizeof (struct ib_user_mad_hdr_old);
}

/* caller must hold port->mutex at least for reading */
static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
{
@@ -222,12 +230,12 @@ static void recv_handler(struct ib_mad_agent *agent,
	packet->recv_wc = mad_recv_wc;

	packet->mad.hdr.status	   = 0;
	packet->mad.hdr.length    = sizeof (struct ib_user_mad) +
				    mad_recv_wc->mad_len;
	packet->mad.hdr.length	   = hdr_size(file) + mad_recv_wc->mad_len;
	packet->mad.hdr.qpn	   = cpu_to_be32(mad_recv_wc->wc->src_qp);
	packet->mad.hdr.lid	   = cpu_to_be16(mad_recv_wc->wc->slid);
	packet->mad.hdr.sl	   = mad_recv_wc->wc->sl;
	packet->mad.hdr.path_bits  = mad_recv_wc->wc->dlid_path_bits;
	packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
	packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
	if (packet->mad.hdr.grh_present) {
		struct ib_ah_attr ah_attr;
@@ -253,8 +261,8 @@ static void recv_handler(struct ib_mad_agent *agent,
	ib_free_recv_mad(mad_recv_wc);
}

static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
			     size_t count)
static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
			     struct ib_umad_packet *packet, size_t count)
{
	struct ib_mad_recv_buf *recv_buf;
	int left, seg_payload, offset, max_seg_payload;
@@ -262,15 +270,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
	/* We need enough room to copy the first (or only) MAD segment. */
	recv_buf = &packet->recv_wc->recv_buf;
	if ((packet->length <= sizeof (*recv_buf->mad) &&
	     count < sizeof (packet->mad) + packet->length) ||
	     count < hdr_size(file) + packet->length) ||
	    (packet->length > sizeof (*recv_buf->mad) &&
	     count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
	     count < hdr_size(file) + sizeof (*recv_buf->mad)))
		return -EINVAL;

	if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
	if (copy_to_user(buf, &packet->mad, hdr_size(file)))
		return -EFAULT;

	buf += sizeof (packet->mad);
	buf += hdr_size(file);
	seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
	if (copy_to_user(buf, recv_buf->mad, seg_payload))
		return -EFAULT;
@@ -280,7 +288,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
		 * Multipacket RMPP MAD message. Copy remainder of message.
		 * Note that last segment may have a shorter payload.
		 */
		if (count < sizeof (packet->mad) + packet->length) {
		if (count < hdr_size(file) + packet->length) {
			/*
			 * The buffer is too small, return the first RMPP segment,
			 * which includes the RMPP message length.
@@ -300,18 +308,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
				return -EFAULT;
		}
	}
	return sizeof (packet->mad) + packet->length;
	return hdr_size(file) + packet->length;
}

static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
			     size_t count)
static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
			     struct ib_umad_packet *packet, size_t count)
{
	ssize_t size = sizeof (packet->mad) + packet->length;
	ssize_t size = hdr_size(file) + packet->length;

	if (count < size)
		return -EINVAL;

	if (copy_to_user(buf, &packet->mad, size))
	if (copy_to_user(buf, &packet->mad, hdr_size(file)))
		return -EFAULT;

	buf += hdr_size(file);

	if (copy_to_user(buf, packet->mad.data, packet->length))
		return -EFAULT;

	return size;
@@ -324,7 +337,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
	struct ib_umad_packet *packet;
	ssize_t ret;

	if (count < sizeof (struct ib_user_mad))
	if (count < hdr_size(file))
		return -EINVAL;

	spin_lock_irq(&file->recv_lock);
@@ -348,9 +361,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
	spin_unlock_irq(&file->recv_lock);

	if (packet->recv_wc)
		ret = copy_recv_mad(buf, packet, count);
		ret = copy_recv_mad(file, buf, packet, count);
	else
		ret = copy_send_mad(buf, packet, count);
		ret = copy_send_mad(file, buf, packet, count);

	if (ret < 0) {
		/* Requeue packet */
@@ -442,15 +455,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
	__be64 *tid;
	int ret, data_len, hdr_len, copy_offset, rmpp_active;

	if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
	if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
		return -EINVAL;

	packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
	if (!packet)
		return -ENOMEM;

	if (copy_from_user(&packet->mad, buf,
			    sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
	if (copy_from_user(&packet->mad, buf, hdr_size(file))) {
		ret = -EFAULT;
		goto err;
	}
@@ -461,6 +473,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
		goto err;
	}

	buf += hdr_size(file);

	if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) {
		ret = -EFAULT;
		goto err;
	}

	down_read(&file->port->mutex);

	agent = __get_agent(file, packet->mad.hdr.id);
@@ -500,11 +519,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
			      IB_MGMT_RMPP_FLAG_ACTIVE;
	}

	data_len = count - sizeof (struct ib_user_mad) - hdr_len;
	data_len = count - hdr_size(file) - hdr_len;
	packet->msg = ib_create_send_mad(agent,
					 be32_to_cpu(packet->mad.hdr.qpn),
					 0, rmpp_active, hdr_len,
					 data_len, GFP_KERNEL);
					 packet->mad.hdr.pkey_index, rmpp_active,
					 hdr_len, data_len, GFP_KERNEL);
	if (IS_ERR(packet->msg)) {
		ret = PTR_ERR(packet->msg);
		goto err_ah;
@@ -517,7 +536,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,

	/* Copy MAD header.  Any RMPP header is already in place. */
	memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
	buf += sizeof (struct ib_user_mad);

	if (!rmpp_active) {
		if (copy_from_user(packet->msg->mad + copy_offset,
@@ -646,6 +664,16 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
		goto out;
	}

	if (!file->already_used) {
		file->already_used = 1;
		if (!file->use_pkey_index) {
			printk(KERN_WARNING "user_mad: process %s did not enable "
			       "P_Key index support.\n", current->comm);
			printk(KERN_WARNING "user_mad:   Documentation/infiniband/user_mad.txt "
			       "has info on the new ABI.\n");
		}
	}

	file->agent[agent_id] = agent;
	ret = 0;

@@ -682,6 +710,20 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
	return ret;
}

static long ib_umad_enable_pkey(struct ib_umad_file *file)
{
	int ret = 0;

	down_write(&file->port->mutex);
	if (file->already_used)
		ret = -EINVAL;
	else
		file->use_pkey_index = 1;
	up_write(&file->port->mutex);

	return ret;
}

static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
			  unsigned long arg)
{
@@ -690,6 +732,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
		return ib_umad_reg_agent(filp->private_data, arg);
	case IB_USER_MAD_UNREGISTER_AGENT:
		return ib_umad_unreg_agent(filp->private_data, arg);
	case IB_USER_MAD_ENABLE_PKEY:
		return ib_umad_enable_pkey(filp->private_data);
	default:
		return -ENOIOCTLCMD;
	}
+48 −0
Original line number Diff line number Diff line
@@ -51,8 +51,51 @@
 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
 */

/**
 * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index
 * @id - ID of agent MAD received with/to be sent with
 * @status - 0 on successful receive, ETIMEDOUT if no response
 *   received (transaction ID in data[] will be set to TID of original
 *   request) (ignored on send)
 * @timeout_ms - Milliseconds to wait for response (unset on receive)
 * @retries - Number of automatic retries to attempt
 * @qpn - Remote QP number received from/to be sent to
 * @qkey - Remote Q_Key to be sent with (unset on receive)
 * @lid - Remote lid received from/to be sent to
 * @sl - Service level received with/to be sent with
 * @path_bits - Local path bits received with/to be sent with
 * @grh_present - If set, GRH was received/should be sent
 * @gid_index - Local GID index to send with (unset on receive)
 * @hop_limit - Hop limit in GRH
 * @traffic_class - Traffic class in GRH
 * @gid - Remote GID in GRH
 * @flow_label - Flow label in GRH
 */
struct ib_user_mad_hdr_old {
	__u32	id;
	__u32	status;
	__u32	timeout_ms;
	__u32	retries;
	__u32	length;
	__be32	qpn;
	__be32  qkey;
	__be16	lid;
	__u8	sl;
	__u8	path_bits;
	__u8	grh_present;
	__u8	gid_index;
	__u8	hop_limit;
	__u8	traffic_class;
	__u8	gid[16];
	__be32	flow_label;
};

/**
 * ib_user_mad_hdr - MAD packet header
 *   This layout allows specifying/receiving the P_Key index.  To use
 *   this capability, an application must call the
 *   IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before
 *   any other actions with the file handle.
 * @id - ID of agent MAD received with/to be sent with
 * @status - 0 on successful receive, ETIMEDOUT if no response
 *   received (transaction ID in data[] will be set to TID of original
@@ -70,6 +113,7 @@
 * @traffic_class - Traffic class in GRH
 * @gid - Remote GID in GRH
 * @flow_label - Flow label in GRH
 * @pkey_index - P_Key index
 */
struct ib_user_mad_hdr {
	__u32	id;
@@ -88,6 +132,8 @@ struct ib_user_mad_hdr {
	__u8	traffic_class;
	__u8	gid[16];
	__be32	flow_label;
	__u16	pkey_index;
	__u8	reserved[6];
};

/**
@@ -134,4 +180,6 @@ struct ib_user_mad_reg_req {

#define IB_USER_MAD_UNREGISTER_AGENT	_IOW(IB_IOCTL_MAGIC, 2, __u32)

#define IB_USER_MAD_ENABLE_PKEY		_IO(IB_IOCTL_MAGIC, 3)

#endif /* IB_USER_MAD_H */