Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3eb45036 authored by Santosh Shilimkar's avatar Santosh Shilimkar
Browse files

rds: add type of service(tos) infrastructure



RDS Service type (TOS) is user-defined and needs to be configured
via RDS IOCTL interface. It must be set before initiating any
traffic and once set the TOS can not be changed. All out-going
traffic from the socket will be associated with its TOS.

Reviewed-by: default avatarSowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: default avatarSantosh Shilimkar <santosh.shilimkar@oracle.com>
[yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes]
Signed-off-by: default avatarZhu Yanjun <yanjun.zhu@oracle.com>
parent d021fabf
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -69,6 +69,12 @@
#define RDS_TRANS_COUNT	3
#define	RDS_TRANS_NONE	(~0)

/* IOCTLS commands for SOL_RDS */
#define SIOCRDSSETTOS		(SIOCPROTOPRIVATE)
#define SIOCRDSGETTOS		(SIOCPROTOPRIVATE + 1)

typedef __u8	rds_tos_t;

/*
 * Control message types for SOL_RDS.
 *
@@ -149,6 +155,7 @@ struct rds_info_connection {
	__be32		faddr;
	__u8		transport[TRANSNAMSIZ];		/* null term ascii */
	__u8		flags;
	__u8		tos;
} __attribute__((packed));

struct rds6_info_connection {
@@ -171,6 +178,7 @@ struct rds_info_message {
	__be16		lport;
	__be16		fport;
	__u8		flags;
	__u8		tos;
} __attribute__((packed));

struct rds6_info_message {
@@ -214,6 +222,7 @@ struct rds_info_tcp_socket {
	__u32           last_sent_nxt;
	__u32           last_expected_una;
	__u32           last_seen_una;
	__u8		tos;
} __attribute__((packed));

struct rds6_info_tcp_socket {
@@ -240,6 +249,7 @@ struct rds_info_rdma_connection {
	__u32		max_send_sge;
	__u32		rdma_mr_max;
	__u32		rdma_mr_size;
	__u8		tos;
};

struct rds6_info_rdma_connection {
@@ -253,6 +263,7 @@ struct rds6_info_rdma_connection {
	__u32		max_send_sge;
	__u32		rdma_mr_max;
	__u32		rdma_mr_size;
	__u8		tos;
};

/* RDS message Receive Path Latency points */
+34 −1
Original line number Diff line number Diff line
@@ -254,9 +254,40 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,

static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
	rds_tos_t tos;

	switch (cmd) {
	case SIOCRDSSETTOS:
		if (get_user(tos, (rds_tos_t __user *)arg))
			return -EFAULT;

		if (rs->rs_transport &&
		    rs->rs_transport->t_type == RDS_TRANS_TCP)
			tos = 0;

		spin_lock_bh(&rds_sock_lock);
		if (rs->rs_tos || rs->rs_conn) {
			spin_unlock_bh(&rds_sock_lock);
			return -EINVAL;
		}
		rs->rs_tos = tos;
		spin_unlock_bh(&rds_sock_lock);
		break;
	case SIOCRDSGETTOS:
		spin_lock_bh(&rds_sock_lock);
		tos = rs->rs_tos;
		spin_unlock_bh(&rds_sock_lock);
		if (put_user(tos, (rds_tos_t __user *)arg))
			return -EFAULT;
		break;
	default:
		return -ENOIOCTLCMD;
	}

	return 0;
}

static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
			      int len)
{
@@ -650,6 +681,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
	spin_lock_init(&rs->rs_rdma_lock);
	rs->rs_rdma_keys = RB_ROOT;
	rs->rs_rx_traces = 0;
	rs->rs_tos = 0;
	rs->rs_conn = NULL;

	spin_lock_bh(&rds_sock_lock);
	list_add_tail(&rs->rs_item, &rds_sock_list);
+11 −9
Original line number Diff line number Diff line
@@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
					      const struct in6_addr *laddr,
					      const struct in6_addr *faddr,
					      struct rds_transport *trans,
					      int dev_if)
					      u8 tos, int dev_if)
{
	struct rds_connection *conn, *ret = NULL;

@@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
		if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
		    ipv6_addr_equal(&conn->c_laddr, laddr) &&
		    conn->c_trans == trans &&
		    conn->c_tos == tos &&
		    net == rds_conn_net(conn) &&
		    conn->c_dev_if == dev_if) {
			ret = conn;
@@ -160,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
						const struct in6_addr *laddr,
						const struct in6_addr *faddr,
						struct rds_transport *trans,
						gfp_t gfp,
						gfp_t gfp, u8 tos,
						int is_outgoing,
						int dev_if)
{
@@ -172,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
	int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);

	rcu_read_lock();
	conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
	conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
	if (conn &&
	    conn->c_loopback &&
	    conn->c_trans != &rds_loop_transport &&
@@ -206,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
	conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
	conn->c_faddr = *faddr;
	conn->c_dev_if = dev_if;
	conn->c_tos = tos;

#if IS_ENABLED(CONFIG_IPV6)
	/* If the local address is link local, set c_bound_if to be the
@@ -298,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
		struct rds_connection *found;

		found = rds_conn_lookup(net, head, laddr, faddr, trans,
					dev_if);
					tos, dev_if);
		if (found) {
			struct rds_conn_path *cp;
			int i;
@@ -333,10 +335,10 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_connection *rds_conn_create(struct net *net,
				       const struct in6_addr *laddr,
				       const struct in6_addr *faddr,
				       struct rds_transport *trans, gfp_t gfp,
				       int dev_if)
				       struct rds_transport *trans, u8 tos,
				       gfp_t gfp, int dev_if)
{
	return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
	return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create);

@@ -344,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
						const struct in6_addr *laddr,
						const struct in6_addr *faddr,
						struct rds_transport *trans,
						gfp_t gfp, int dev_if)
						u8 tos, gfp_t gfp, int dev_if)
{
	return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
	return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);

+1 −0
Original line number Diff line number Diff line
@@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,

	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
	iinfo->tos = conn->c_tos;

	memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
+1 −1
Original line number Diff line number Diff line
@@ -786,7 +786,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,

	/* RDS/IB is not currently netns aware, thus init_net */
	conn = rds_conn_create(&init_net, daddr6, saddr6,
			       &rds_ib_transport, GFP_KERNEL, ifindex);
			       &rds_ib_transport, 0, GFP_KERNEL, ifindex);
	if (IS_ERR(conn)) {
		rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
		conn = NULL;
Loading