Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ebeeb1ad authored by Sowmini Varadhan's avatar Sowmini Varadhan Committed by David S. Miller
Browse files

rds: tcp: use rds_destroy_pending() to synchronize netns/module teardown and...


rds: tcp: use rds_destroy_pending() to synchronize netns/module teardown and rds connection/workq management

An rds_connection can get added during netns deletion between lines 528
and 529 of

  506 static void rds_tcp_kill_sock(struct net *net)
  :
  /* code to pull out all the rds_connections that should be destroyed */
  :
  528         spin_unlock_irq(&rds_tcp_conn_lock);
  529         list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
  530                 rds_conn_destroy(tc->t_cpath->cp_conn);

Such an rds_connection would miss out the rds_conn_destroy()
loop (that cancels all pending work) and (if it was scheduled
after netns deletion) could trigger the use-after-free.

A similar race-window exists for the module unload path
in rds_tcp_exit -> rds_tcp_destroy_conns

Concurrency with netns deletion (rds_tcp_kill_sock()) must be handled
by checking check_net() before enqueuing new work or adding new
connections.

Concurrency with module-unload is handled by maintaining a module
specific flag that is set at the start of the module exit function,
and must be checked before enqueuing new work or adding new connections.

This commit refactors existing RDS_DESTROY_PENDING checks added by
commit 3db6e0d1 ("rds: use RCU to synchronize work-enqueue with
connection teardown") and consolidates all the concurrency checks
listed above into the function rds_destroy_pending().

Signed-off-by: default avatarSowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: default avatarSantosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 79a8a642
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -223,7 +223,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)


		rcu_read_lock();
		rcu_read_lock();
		if (!test_and_set_bit(0, &conn->c_map_queued) &&
		if (!test_and_set_bit(0, &conn->c_map_queued) &&
		    !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
		    !rds_destroy_pending(cp->cp_conn)) {
			rds_stats_inc(s_cong_update_queued);
			rds_stats_inc(s_cong_update_queued);
			/* We cannot inline the call to rds_send_xmit() here
			/* We cannot inline the call to rds_send_xmit() here
			 * for two reasons (both pertaining to a TCP transport):
			 * for two reasons (both pertaining to a TCP transport):
+9 −6
Original line number Original line Diff line number Diff line
@@ -220,8 +220,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,
				     is_outgoing);
				     is_outgoing);
		conn->c_path[i].cp_index = i;
		conn->c_path[i].cp_index = i;
	}
	}
	rcu_read_lock();
	if (rds_destroy_pending(conn))
		ret = -ENETDOWN;
	else
		ret = trans->conn_alloc(conn, gfp);
		ret = trans->conn_alloc(conn, gfp);
	if (ret) {
	if (ret) {
		rcu_read_unlock();
		kfree(conn->c_path);
		kfree(conn->c_path);
		kmem_cache_free(rds_conn_slab, conn);
		kmem_cache_free(rds_conn_slab, conn);
		conn = ERR_PTR(ret);
		conn = ERR_PTR(ret);
@@ -283,6 +288,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
		}
		}
	}
	}
	spin_unlock_irqrestore(&rds_conn_lock, flags);
	spin_unlock_irqrestore(&rds_conn_lock, flags);
	rcu_read_unlock();


out:
out:
	return conn;
	return conn;
@@ -382,13 +388,10 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
{
{
	struct rds_message *rm, *rtmp;
	struct rds_message *rm, *rtmp;


	set_bit(RDS_DESTROY_PENDING, &cp->cp_flags);

	if (!cp->cp_transport_data)
	if (!cp->cp_transport_data)
		return;
		return;


	/* make sure lingering queued work won't try to ref the conn */
	/* make sure lingering queued work won't try to ref the conn */
	synchronize_rcu();
	cancel_delayed_work_sync(&cp->cp_send_w);
	cancel_delayed_work_sync(&cp->cp_send_w);
	cancel_delayed_work_sync(&cp->cp_recv_w);
	cancel_delayed_work_sync(&cp->cp_recv_w);


@@ -691,7 +694,7 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
	atomic_set(&cp->cp_state, RDS_CONN_ERROR);
	atomic_set(&cp->cp_state, RDS_CONN_ERROR);


	rcu_read_lock();
	rcu_read_lock();
	if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
	if (!destroy && rds_destroy_pending(cp->cp_conn)) {
		rcu_read_unlock();
		rcu_read_unlock();
		return;
		return;
	}
	}
@@ -714,7 +717,7 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
{
{
	rcu_read_lock();
	rcu_read_lock();
	if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
	if (rds_destroy_pending(cp->cp_conn)) {
		rcu_read_unlock();
		rcu_read_unlock();
		return;
		return;
	}
	}
+17 −0
Original line number Original line Diff line number Diff line
@@ -48,6 +48,7 @@
static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
static atomic_t rds_ib_unloading;


module_param(rds_ib_mr_1m_pool_size, int, 0444);
module_param(rds_ib_mr_1m_pool_size, int, 0444);
MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA");
MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA");
@@ -378,8 +379,23 @@ static void rds_ib_unregister_client(void)
	flush_workqueue(rds_wq);
	flush_workqueue(rds_wq);
}
}


static void rds_ib_set_unloading(void)
{
	atomic_set(&rds_ib_unloading, 1);
}

static bool rds_ib_is_unloading(struct rds_connection *conn)
{
	struct rds_conn_path *cp = &conn->c_path[0];

	return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) ||
		atomic_read(&rds_ib_unloading) != 0);
}

void rds_ib_exit(void)
void rds_ib_exit(void)
{
{
	rds_ib_set_unloading();
	synchronize_rcu();
	rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
	rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
	rds_ib_unregister_client();
	rds_ib_unregister_client();
	rds_ib_destroy_nodev_conns();
	rds_ib_destroy_nodev_conns();
@@ -413,6 +429,7 @@ struct rds_transport rds_ib_transport = {
	.flush_mrs		= rds_ib_flush_mrs,
	.flush_mrs		= rds_ib_flush_mrs,
	.t_owner		= THIS_MODULE,
	.t_owner		= THIS_MODULE,
	.t_name			= "infiniband",
	.t_name			= "infiniband",
	.t_unloading		= rds_ib_is_unloading,
	.t_type			= RDS_TRANS_IB
	.t_type			= RDS_TRANS_IB
};
};


+1 −0
Original line number Original line Diff line number Diff line
@@ -117,6 +117,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
			  &conn->c_laddr, &conn->c_faddr,
			  &conn->c_laddr, &conn->c_faddr,
			  RDS_PROTOCOL_MAJOR(conn->c_version),
			  RDS_PROTOCOL_MAJOR(conn->c_version),
			  RDS_PROTOCOL_MINOR(conn->c_version));
			  RDS_PROTOCOL_MINOR(conn->c_version));
		set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags);
		rds_conn_destroy(conn);
		rds_conn_destroy(conn);
		return;
		return;
	} else {
	} else {
+7 −0
Original line number Original line Diff line number Diff line
@@ -518,6 +518,7 @@ struct rds_transport {
	void (*sync_mr)(void *trans_private, int direction);
	void (*sync_mr)(void *trans_private, int direction);
	void (*free_mr)(void *trans_private, int invalidate);
	void (*free_mr)(void *trans_private, int invalidate);
	void (*flush_mrs)(void);
	void (*flush_mrs)(void);
	bool (*t_unloading)(struct rds_connection *conn);
};
};


struct rds_sock {
struct rds_sock {
@@ -862,6 +863,12 @@ static inline void rds_mr_put(struct rds_mr *mr)
		__rds_put_mr_final(mr);
		__rds_put_mr_final(mr);
}
}


static inline bool rds_destroy_pending(struct rds_connection *conn)
{
	return !check_net(rds_conn_net(conn)) ||
	       (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn));
}

/* stats.c */
/* stats.c */
DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
#define rds_stats_inc_which(which, member) do {		\
#define rds_stats_inc_which(which, member) do {		\
Loading