Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6703a605 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-tls-small-TX-offload-optimizations'



Jakub Kicinski says:

====================
net/tls: small TX offload optimizations

This set brings small TLS TX device optimizations. The biggest
gain comes from fixing a misuse of non temporal copy instructions.
On a synthetic workload modelled after customer's RFC application
I see 3-5% percent gain.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fcd8c627 e681cc60
Loading
Loading
Loading
Loading
+91 −39
Original line number Diff line number Diff line
@@ -122,13 +122,10 @@ static struct net_device *get_netdev_for_sock(struct sock *sk)

static void destroy_record(struct tls_record_info *record)
{
	int nr_frags = record->num_frags;
	skb_frag_t *frag;
	int i;

	while (nr_frags-- > 0) {
		frag = &record->frags[nr_frags];
		__skb_frag_unref(frag);
	}
	for (i = 0; i < record->num_frags; i++)
		__skb_frag_unref(&record->frags[i]);
	kfree(record);
}

@@ -259,33 +256,15 @@ static int tls_push_record(struct sock *sk,
			   struct tls_context *ctx,
			   struct tls_offload_context_tx *offload_ctx,
			   struct tls_record_info *record,
			   struct page_frag *pfrag,
			   int flags,
			   unsigned char record_type)
			   int flags)
{
	struct tls_prot_info *prot = &ctx->prot_info;
	struct tcp_sock *tp = tcp_sk(sk);
	struct page_frag dummy_tag_frag;
	skb_frag_t *frag;
	int i;

	/* fill prepend */
	frag = &record->frags[0];
	tls_fill_prepend(ctx,
			 skb_frag_address(frag),
			 record->len - prot->prepend_size,
			 record_type,
			 prot->version);

	/* HW doesn't care about the data in the tag, because it fills it. */
	dummy_tag_frag.page = skb_frag_page(frag);
	dummy_tag_frag.offset = 0;

	tls_append_frag(record, &dummy_tag_frag, prot->tag_size);
	record->end_seq = tp->write_seq + record->len;
	spin_lock_irq(&offload_ctx->lock);
	list_add_tail(&record->list, &offload_ctx->records_list);
	spin_unlock_irq(&offload_ctx->lock);
	list_add_tail_rcu(&record->list, &offload_ctx->records_list);
	offload_ctx->open_record = NULL;

	if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags))
@@ -307,6 +286,38 @@ static int tls_push_record(struct sock *sk,
	return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
}

static int tls_device_record_close(struct sock *sk,
				   struct tls_context *ctx,
				   struct tls_record_info *record,
				   struct page_frag *pfrag,
				   unsigned char record_type)
{
	struct tls_prot_info *prot = &ctx->prot_info;
	int ret;

	/* append tag
	 * device will fill in the tag, we just need to append a placeholder
	 * use socket memory to improve coalescing (re-using a single buffer
	 * increases frag count)
	 * if we can't allocate memory now, steal some back from data
	 */
	if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
					sk->sk_allocation))) {
		ret = 0;
		tls_append_frag(record, pfrag, prot->tag_size);
	} else {
		ret = prot->tag_size;
		if (record->len <= prot->overhead_size)
			return -ENOMEM;
	}

	/* fill prepend */
	tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
			 record->len - prot->overhead_size,
			 record_type, prot->version);
	return ret;
}

static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
				 struct page_frag *pfrag,
				 size_t prepend_size)
@@ -361,6 +372,31 @@ static int tls_do_allocation(struct sock *sk,
	return 0;
}

static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
{
	size_t pre_copy, nocache;

	pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1);
	if (pre_copy) {
		pre_copy = min(pre_copy, bytes);
		if (copy_from_iter(addr, pre_copy, i) != pre_copy)
			return -EFAULT;
		bytes -= pre_copy;
		addr += pre_copy;
	}

	nocache = round_down(bytes, SMP_CACHE_BYTES);
	if (copy_from_iter_nocache(addr, nocache, i) != nocache)
		return -EFAULT;
	bytes -= nocache;
	addr += nocache;

	if (bytes && copy_from_iter(addr, bytes, i) != bytes)
		return -EFAULT;

	return 0;
}

static int tls_push_data(struct sock *sk,
			 struct iov_iter *msg_iter,
			 size_t size, int flags,
@@ -434,12 +470,10 @@ static int tls_push_data(struct sock *sk,
		copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
		copy = min_t(size_t, copy, (max_open_record_len - record->len));

		if (copy_from_iter_nocache(page_address(pfrag->page) +
					       pfrag->offset,
					   copy, msg_iter) != copy) {
			rc = -EFAULT;
		rc = tls_device_copy_data(page_address(pfrag->page) +
					  pfrag->offset, copy, msg_iter);
		if (rc)
			goto handle_error;
		}
		tls_append_frag(record, pfrag, copy);

		size -= copy;
@@ -457,13 +491,24 @@ static int tls_push_data(struct sock *sk,

		if (done || record->len >= max_open_record_len ||
		    (record->num_frags >= MAX_SKB_FRAGS - 1)) {
			rc = tls_device_record_close(sk, tls_ctx, record,
						     pfrag, record_type);
			if (rc) {
				if (rc > 0) {
					size += rc;
				} else {
					size = orig_size;
					destroy_record(record);
					ctx->open_record = NULL;
					break;
				}
			}

			rc = tls_push_record(sk,
					     tls_ctx,
					     ctx,
					     record,
					     pfrag,
					     tls_push_record_flags,
					     record_type);
					     tls_push_record_flags);
			if (rc < 0)
				break;
		}
@@ -538,12 +583,16 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
		/* if retransmit_hint is irrelevant start
		 * from the beggining of the list
		 */
		info = list_first_entry(&context->records_list,
		info = list_first_entry_or_null(&context->records_list,
						struct tls_record_info, list);
		if (!info)
			return NULL;
		record_sn = context->unacked_record_sn;
	}

	list_for_each_entry_from(info, &context->records_list, list) {
	/* We just need the _rcu for the READ_ONCE() */
	rcu_read_lock();
	list_for_each_entry_from_rcu(info, &context->records_list, list) {
		if (before(seq, info->end_seq)) {
			if (!context->retransmit_hint ||
			    after(info->end_seq,
@@ -552,12 +601,15 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
				context->retransmit_hint = info;
			}
			*p_record_sn = record_sn;
			return info;
			goto exit_rcu_unlock;
		}
		record_sn++;
	}
	info = NULL;

	return NULL;
exit_rcu_unlock:
	rcu_read_unlock();
	return info;
}
EXPORT_SYMBOL(tls_get_record);