Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d260e9e6 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-sw-rx-timestamps'



Mike Maloney says:

====================
net: Add software rx timestamp for TCP.

Add software rx timestamps for TCP, and a test to ensure consistency of
behavior between IP, UDP, and TCP implementation.

Changes since v1:
  -Initialize tss->ts[1] to 0 if caller requested any timestamps.
  -Fix test case to validate that tss->ts[1] is zero.
  -Fix tests to actually use a raw socket.
  -Fix --tcp flag to work on the test.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b2854772 16e78122
Loading
Loading
Loading
Loading
+8 −1
Original line number Original line Diff line number Diff line
@@ -774,6 +774,12 @@ struct tcp_skb_cb {
			u16	tcp_gso_segs;
			u16	tcp_gso_segs;
			u16	tcp_gso_size;
			u16	tcp_gso_size;
		};
		};

		/* Used to stash the receive timestamp while this skb is in the
		 * out of order queue, as skb->tstamp is overwritten by the
		 * rbnode.
		 */
		ktime_t		swtstamp;
	};
	};
	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/


@@ -790,7 +796,8 @@ struct tcp_skb_cb {
	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
	__u8		txstamp_ack:1,	/* Record TX timestamp for ack? */
	__u8		txstamp_ack:1,	/* Record TX timestamp for ack? */
			eor:1,		/* Is skb MSG_EOR marked? */
			eor:1,		/* Is skb MSG_EOR marked? */
			unused:6;
			has_rxtstamp:1,	/* SKB has a RX timestamp	*/
			unused:5;
	__u32		ack_seq;	/* Sequence number ACK'd	*/
	__u32		ack_seq;	/* Sequence number ACK'd	*/
	union {
	union {
		struct {
		struct {
+65 −0
Original line number Original line Diff line number Diff line
@@ -269,6 +269,7 @@
#include <linux/err.h>
#include <linux/err.h>
#include <linux/time.h>
#include <linux/time.h>
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/errqueue.h>


#include <net/icmp.h>
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/inet_common.h>
@@ -1695,6 +1696,61 @@ int tcp_peek_len(struct socket *sock)
}
}
EXPORT_SYMBOL(tcp_peek_len);
EXPORT_SYMBOL(tcp_peek_len);


static void tcp_update_recv_tstamps(struct sk_buff *skb,
				    struct scm_timestamping *tss)
{
	if (skb->tstamp)
		tss->ts[0] = ktime_to_timespec(skb->tstamp);
	else
		tss->ts[0] = (struct timespec) {0};

	if (skb_hwtstamps(skb)->hwtstamp)
		tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp);
	else
		tss->ts[2] = (struct timespec) {0};
}

/* Similar to __sock_recv_timestamp, but does not require an skb */
void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
			struct scm_timestamping *tss)
{
	struct timeval tv;
	bool has_timestamping = false;

	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
					 sizeof(tss->ts[0]), &tss->ts[0]);
			} else {
				tv.tv_sec = tss->ts[0].tv_sec;
				tv.tv_usec = tss->ts[0].tv_nsec / 1000;

				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
					 sizeof(tv), &tv);
			}
		}

		if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
			has_timestamping = true;
		else
			tss->ts[0] = (struct timespec) {0};
	}

	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
		if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
			has_timestamping = true;
		else
			tss->ts[2] = (struct timespec) {0};
	}

	if (has_timestamping) {
		tss->ts[1] = (struct timespec) {0};
		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING,
			 sizeof(*tss), tss);
	}
}

/*
/*
 *	This routine copies from a sock struct into the user buffer.
 *	This routine copies from a sock struct into the user buffer.
 *
 *
@@ -1716,6 +1772,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
	long timeo;
	long timeo;
	struct sk_buff *skb, *last;
	struct sk_buff *skb, *last;
	u32 urg_hole = 0;
	u32 urg_hole = 0;
	struct scm_timestamping tss;
	bool has_tss = false;


	if (unlikely(flags & MSG_ERRQUEUE))
	if (unlikely(flags & MSG_ERRQUEUE))
		return inet_recv_error(sk, msg, len, addr_len);
		return inet_recv_error(sk, msg, len, addr_len);
@@ -1911,6 +1969,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
		if (used + offset < skb->len)
		if (used + offset < skb->len)
			continue;
			continue;


		if (TCP_SKB_CB(skb)->has_rxtstamp) {
			tcp_update_recv_tstamps(skb, &tss);
			has_tss = true;
		}
		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
			goto found_fin_ok;
			goto found_fin_ok;
		if (!(flags & MSG_PEEK))
		if (!(flags & MSG_PEEK))
@@ -1929,6 +1991,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
	 * on connected socket. I was just happy when found this 8) --ANK
	 * on connected socket. I was just happy when found this 8) --ANK
	 */
	 */


	if (has_tss)
		tcp_recv_timestamp(msg, sk, &tss);

	/* Clean up data we have read: This will do ACK frames. */
	/* Clean up data we have read: This will do ACK frames. */
	tcp_cleanup_rbuf(sk, copied);
	tcp_cleanup_rbuf(sk, copied);


+31 −4
Original line number Original line Diff line number Diff line
@@ -4246,9 +4246,15 @@ static void tcp_sack_remove(struct tcp_sock *tp)
	tp->rx_opt.num_sacks = num_sacks;
	tp->rx_opt.num_sacks = num_sacks;
}
}


enum tcp_queue {
	OOO_QUEUE,
	RCV_QUEUE,
};

/**
/**
 * tcp_try_coalesce - try to merge skb to prior one
 * tcp_try_coalesce - try to merge skb to prior one
 * @sk: socket
 * @sk: socket
 * @dest: destination queue
 * @to: prior buffer
 * @to: prior buffer
 * @from: buffer to add in queue
 * @from: buffer to add in queue
 * @fragstolen: pointer to boolean
 * @fragstolen: pointer to boolean
@@ -4260,6 +4266,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 * Returns true if caller should free @from instead of queueing it
 * Returns true if caller should free @from instead of queueing it
 */
 */
static bool tcp_try_coalesce(struct sock *sk,
static bool tcp_try_coalesce(struct sock *sk,
			     enum tcp_queue dest,
			     struct sk_buff *to,
			     struct sk_buff *to,
			     struct sk_buff *from,
			     struct sk_buff *from,
			     bool *fragstolen)
			     bool *fragstolen)
@@ -4281,6 +4288,15 @@ static bool tcp_try_coalesce(struct sock *sk,
	TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
	TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
	TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
	TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;

	if (TCP_SKB_CB(from)->has_rxtstamp) {
		TCP_SKB_CB(to)->has_rxtstamp = true;
		if (dest == OOO_QUEUE)
			TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
		else
			to->tstamp = from->tstamp;
	}

	return true;
	return true;
}
}


@@ -4315,6 +4331,9 @@ static void tcp_ofo_queue(struct sock *sk)
		}
		}
		p = rb_next(p);
		p = rb_next(p);
		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
		/* Replace tstamp which was stomped by rbnode */
		if (TCP_SKB_CB(skb)->has_rxtstamp)
			skb->tstamp = TCP_SKB_CB(skb)->swtstamp;


		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
			SOCK_DEBUG(sk, "ofo packet was already received\n");
			SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4326,7 +4345,8 @@ static void tcp_ofo_queue(struct sock *sk)
			   TCP_SKB_CB(skb)->end_seq);
			   TCP_SKB_CB(skb)->end_seq);


		tail = skb_peek_tail(&sk->sk_receive_queue);
		tail = skb_peek_tail(&sk->sk_receive_queue);
		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
		eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
						 tail, skb, &fragstolen);
		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
		if (!eaten)
		if (!eaten)
@@ -4380,6 +4400,10 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
		return;
		return;
	}
	}


	/* Stash tstamp to avoid being stomped on by rbnode */
	if (TCP_SKB_CB(skb)->has_rxtstamp)
		TCP_SKB_CB(skb)->swtstamp = skb->tstamp;

	inet_csk_schedule_ack(sk);
	inet_csk_schedule_ack(sk);


	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@ -4405,7 +4429,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
	/* In the typical case, we are adding an skb to the end of the list.
	/* In the typical case, we are adding an skb to the end of the list.
	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
	 */
	 */
	if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
	if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
			     skb, &fragstolen)) {
coalesce_done:
coalesce_done:
		tcp_grow_window(sk, skb);
		tcp_grow_window(sk, skb);
		kfree_skb_partial(skb, fragstolen);
		kfree_skb_partial(skb, fragstolen);
@@ -4455,7 +4480,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
				__kfree_skb(skb1);
				__kfree_skb(skb1);
				goto merge_right;
				goto merge_right;
			}
			}
		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
		} else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
					    skb, &fragstolen)) {
			goto coalesce_done;
			goto coalesce_done;
		}
		}
		p = &parent->rb_right;
		p = &parent->rb_right;
@@ -4506,7 +4532,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int


	__skb_pull(skb, hdrlen);
	__skb_pull(skb, hdrlen);
	eaten = (tail &&
	eaten = (tail &&
		 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
		 tcp_try_coalesce(sk, RCV_QUEUE, tail,
				  skb, fragstolen)) ? 1 : 0;
	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
	if (!eaten) {
	if (!eaten) {
		__skb_queue_tail(&sk->sk_receive_queue, skb);
		__skb_queue_tail(&sk->sk_receive_queue, skb);
+2 −0
Original line number Original line Diff line number Diff line
@@ -1637,6 +1637,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
	TCP_SKB_CB(skb)->sacked	 = 0;
	TCP_SKB_CB(skb)->sacked	 = 0;
	TCP_SKB_CB(skb)->has_rxtstamp =
			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;


lookup:
lookup:
	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+2 −0
Original line number Original line Diff line number Diff line
@@ -1394,6 +1394,8 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
	TCP_SKB_CB(skb)->sacked = 0;
	TCP_SKB_CB(skb)->sacked = 0;
	TCP_SKB_CB(skb)->has_rxtstamp =
			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
}
}


static int tcp_v6_rcv(struct sk_buff *skb)
static int tcp_v6_rcv(struct sk_buff *skb)
Loading