Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a285c1fa authored by David S. Miller's avatar David S. Miller
Browse files


David Howells says:

====================
rxrpc: Fix use of skb_cow_data()

Here's a series of patches that replaces the use of skb_cow_data() in rxrpc
with skb_unshare() early on in the input process.  The problem that is
being seen is that skb_cow_data() indirectly requires that the maximum
usage count on an sk_buff be 1, and it may generate an assertion failure in
pskb_expand_head() if not.

This can occur because rxrpc_input_data() may be still holding a ref when
it has just attached the sk_buff to the rx ring and given that attachment
its own ref.  If recvmsg happens fast enough, skb_cow_data() can see the
ref still held by the softirq handler.

Further, a packet may contain multiple subpackets, each of which gets its
own attachment to the ring and its own ref - also making skb_cow_data() go
bang.

Fix this by:

 (1) The DATA packet is currently parsed for subpackets twice by the input
     routines.  Parse it just once instead and make notes in the sk_buff
     private data.

 (2) Use the notes from (1) when attaching the packet to the ring multiple
     times.  Once the packet is attached to the ring, recvmsg can see it
     and start modifying it, so the softirq handler is not permitted to
     look inside it from that point.

 (3) Pass the ref from the input code to the ring rather than getting an
     extra ref.  rxrpc_input_data() uses a ref on the second refcount to
     prevent the packet from evaporating under it.

 (4) Call skb_unshare() on secured DATA packets in rxrpc_input_packet()
     before we take call->input_lock.  Other sorts of packets don't get
     modified and so can be left.

     A trace is emitted if skb_unshare() eats the skb.  Note that
     skb_share() for our accounting in this regard as we can't see the
     parameters in the packet to log in a trace line if it releases it.

 (5) Remove the calls to skb_cow_data().  These are then no longer
     necessary.

There are also patches to improve the rxrpc_skb tracepoint to make sure
that Tx-derived buffers are identified separately from Rx-derived buffers
in the trace.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3b25528e d0d5c0cd
Loading
Loading
Loading
Loading
+28 −31
Original line number Diff line number Diff line
@@ -23,20 +23,17 @@
#define __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY

enum rxrpc_skb_trace {
	rxrpc_skb_rx_cleaned,
	rxrpc_skb_rx_freed,
	rxrpc_skb_rx_got,
	rxrpc_skb_rx_lost,
	rxrpc_skb_rx_purged,
	rxrpc_skb_rx_received,
	rxrpc_skb_rx_rotated,
	rxrpc_skb_rx_seen,
	rxrpc_skb_tx_cleaned,
	rxrpc_skb_tx_freed,
	rxrpc_skb_tx_got,
	rxrpc_skb_tx_new,
	rxrpc_skb_tx_rotated,
	rxrpc_skb_tx_seen,
	rxrpc_skb_cleaned,
	rxrpc_skb_freed,
	rxrpc_skb_got,
	rxrpc_skb_lost,
	rxrpc_skb_new,
	rxrpc_skb_purged,
	rxrpc_skb_received,
	rxrpc_skb_rotated,
	rxrpc_skb_seen,
	rxrpc_skb_unshared,
	rxrpc_skb_unshared_nomem,
};

enum rxrpc_local_trace {
@@ -228,20 +225,17 @@ enum rxrpc_tx_point {
 * Declare tracing information enums and their string mappings for display.
 */
#define rxrpc_skb_traces \
	EM(rxrpc_skb_rx_cleaned,		"Rx CLN") \
	EM(rxrpc_skb_rx_freed,			"Rx FRE") \
	EM(rxrpc_skb_rx_got,			"Rx GOT") \
	EM(rxrpc_skb_rx_lost,			"Rx *L*") \
	EM(rxrpc_skb_rx_purged,			"Rx PUR") \
	EM(rxrpc_skb_rx_received,		"Rx RCV") \
	EM(rxrpc_skb_rx_rotated,		"Rx ROT") \
	EM(rxrpc_skb_rx_seen,			"Rx SEE") \
	EM(rxrpc_skb_tx_cleaned,		"Tx CLN") \
	EM(rxrpc_skb_tx_freed,			"Tx FRE") \
	EM(rxrpc_skb_tx_got,			"Tx GOT") \
	EM(rxrpc_skb_tx_new,			"Tx NEW") \
	EM(rxrpc_skb_tx_rotated,		"Tx ROT") \
	E_(rxrpc_skb_tx_seen,			"Tx SEE")
	EM(rxrpc_skb_cleaned,			"CLN") \
	EM(rxrpc_skb_freed,			"FRE") \
	EM(rxrpc_skb_got,			"GOT") \
	EM(rxrpc_skb_lost,			"*L*") \
	EM(rxrpc_skb_new,			"NEW") \
	EM(rxrpc_skb_purged,			"PUR") \
	EM(rxrpc_skb_received,			"RCV") \
	EM(rxrpc_skb_rotated,			"ROT") \
	EM(rxrpc_skb_seen,			"SEE") \
	EM(rxrpc_skb_unshared,			"UNS") \
	E_(rxrpc_skb_unshared_nomem,		"US0")

#define rxrpc_local_traces \
	EM(rxrpc_local_got,			"GOT") \
@@ -643,13 +637,14 @@ TRACE_EVENT(rxrpc_call,

TRACE_EVENT(rxrpc_skb,
	    TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op,
		     int usage, int mod_count, const void *where),
		     int usage, int mod_count, u8 flags,    const void *where),

	    TP_ARGS(skb, op, usage, mod_count, where),
	    TP_ARGS(skb, op, usage, mod_count, flags, where),

	    TP_STRUCT__entry(
		    __field(struct sk_buff *,		skb		)
		    __field(enum rxrpc_skb_trace,	op		)
		    __field(u8,				flags		)
		    __field(int,			usage		)
		    __field(int,			mod_count	)
		    __field(const void *,		where		)
@@ -657,14 +652,16 @@ TRACE_EVENT(rxrpc_skb,

	    TP_fast_assign(
		    __entry->skb = skb;
		    __entry->flags = flags;
		    __entry->op = op;
		    __entry->usage = usage;
		    __entry->mod_count = mod_count;
		    __entry->where = where;
			   ),

	    TP_printk("s=%p %s u=%d m=%d p=%pSR",
	    TP_printk("s=%p %cx %s u=%d m=%d p=%pSR",
		      __entry->skb,
		      __entry->flags & RXRPC_SKB_TX_BUFFER ? 'T' : 'R',
		      __print_symbolic(__entry->op, rxrpc_skb_traces),
		      __entry->usage,
		      __entry->mod_count,
+11 −5
Original line number Diff line number Diff line
@@ -185,11 +185,17 @@ struct rxrpc_host_header {
 * - max 48 bytes (struct sk_buff::cb)
 */
struct rxrpc_skb_priv {
	union {
		u8		nr_jumbo;	/* Number of jumbo subpackets */
	};
	atomic_t	nr_ring_pins;		/* Number of rxtx ring pins */
	u8		nr_subpackets;		/* Number of subpackets */
	u8		rx_flags;		/* Received packet flags */
#define RXRPC_SKB_INCL_LAST	0x01		/* - Includes last packet */
#define RXRPC_SKB_TX_BUFFER	0x02		/* - Is transmit buffer */
	union {
		int		remain;		/* amount of space remaining for next write */

		/* List of requested ACKs on subpackets */
		unsigned long	rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) /
					   BITS_PER_LONG];
	};

	struct rxrpc_host_header hdr;		/* RxRPC packet header from this packet */
@@ -613,8 +619,7 @@ struct rxrpc_call {
#define RXRPC_TX_ANNO_LAST	0x04
#define RXRPC_TX_ANNO_RESENT	0x08

#define RXRPC_RX_ANNO_JUMBO	0x3f		/* Jumbo subpacket number + 1 if not zero */
#define RXRPC_RX_ANNO_JLAST	0x40		/* Set if last element of a jumbo packet */
#define RXRPC_RX_ANNO_SUBPACKET	0x3f		/* Subpacket number in jumbogram */
#define RXRPC_RX_ANNO_VERIFIED	0x80		/* Set if verified and decrypted */
	rxrpc_seq_t		tx_hard_ack;	/* Dead slot in buffer; the first transmitted but
						 * not hard-ACK'd packet follows this.
@@ -1105,6 +1110,7 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
void rxrpc_packet_destructor(struct sk_buff *);
void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_purge_queue(struct sk_buff_head *);
+4 −4
Original line number Diff line number Diff line
@@ -199,7 +199,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
			continue;

		skb = call->rxtx_buffer[ix];
		rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
		rxrpc_see_skb(skb, rxrpc_skb_seen);

		if (anno_type == RXRPC_TX_ANNO_UNACK) {
			if (ktime_after(skb->tstamp, max_age)) {
@@ -255,18 +255,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
			continue;

		skb = call->rxtx_buffer[ix];
		rxrpc_get_skb(skb, rxrpc_skb_tx_got);
		rxrpc_get_skb(skb, rxrpc_skb_got);
		spin_unlock_bh(&call->lock);

		if (rxrpc_send_data_packet(call, skb, true) < 0) {
			rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
			rxrpc_free_skb(skb, rxrpc_skb_freed);
			return;
		}

		if (rxrpc_is_client_call(call))
			rxrpc_expose_client_call(call);

		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
		rxrpc_free_skb(skb, rxrpc_skb_freed);
		spin_lock_bh(&call->lock);

		/* We need to clear the retransmit state, but there are two
+16 −17
Original line number Diff line number Diff line
@@ -421,6 +421,19 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
	trace_rxrpc_call(call, op, n, here, NULL);
}

/*
 * Clean up the RxTx skb ring.
 */
static void rxrpc_cleanup_ring(struct rxrpc_call *call)
{
	int i;

	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
		rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned);
		call->rxtx_buffer[i] = NULL;
	}
}

/*
 * Detach a call from its owning socket.
 */
@@ -429,7 +442,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
	const void *here = __builtin_return_address(0);
	struct rxrpc_connection *conn = call->conn;
	bool put = false;
	int i;

	_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));

@@ -479,13 +491,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
	if (conn)
		rxrpc_disconnect_call(call);

	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
		rxrpc_free_skb(call->rxtx_buffer[i],
			       (call->tx_phase ? rxrpc_skb_tx_cleaned :
				rxrpc_skb_rx_cleaned));
		call->rxtx_buffer[i] = NULL;
	}

	rxrpc_cleanup_ring(call);
	_leave("");
}

@@ -568,8 +574,6 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
 */
void rxrpc_cleanup_call(struct rxrpc_call *call)
{
	int i;

	_net("DESTROY CALL %d", call->debug_id);

	memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
@@ -580,13 +584,8 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
	ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
	ASSERTCMP(call->conn, ==, NULL);

	/* Clean up the Rx/Tx buffer */
	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++)
		rxrpc_free_skb(call->rxtx_buffer[i],
			       (call->tx_phase ? rxrpc_skb_tx_cleaned :
				rxrpc_skb_rx_cleaned));

	rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned);
	rxrpc_cleanup_ring(call);
	rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned);

	call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
}
+3 −3
Original line number Diff line number Diff line
@@ -472,7 +472,7 @@ void rxrpc_process_connection(struct work_struct *work)
	/* go through the conn-level event packets, releasing the ref on this
	 * connection that each one has when we've finished with it */
	while ((skb = skb_dequeue(&conn->rx_queue))) {
		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
		rxrpc_see_skb(skb, rxrpc_skb_seen);
		ret = rxrpc_process_event(conn, skb, &abort_code);
		switch (ret) {
		case -EPROTO:
@@ -484,7 +484,7 @@ void rxrpc_process_connection(struct work_struct *work)
			goto requeue_and_leave;
		case -ECONNABORTED:
		default:
			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
			rxrpc_free_skb(skb, rxrpc_skb_freed);
			break;
		}
	}
@@ -501,6 +501,6 @@ void rxrpc_process_connection(struct work_struct *work)
protocol_error:
	if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
		goto requeue_and_leave;
	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
	rxrpc_free_skb(skb, rxrpc_skb_freed);
	goto out;
}
Loading