Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3c97f5de authored by Rajkumar Manoharan's avatar Rajkumar Manoharan Committed by Kalle Valo
Browse files

ath10k: implement NAPI support



Add NAPI support for rx and tx completion. NAPI poll is scheduled
from interrupt handler. The design is as below

 - on interrupt
     - schedule napi and mask interrupts
 - on poll
   - process all pipes (no actual Tx/Rx)
   - process Rx within budget
   - if quota exceeds budget reschedule napi poll by returning budget
   - process Tx completions and update budget if necessary
   - process Tx fetch indications (pull-push)
   - push any other pending Tx (if possible)
   - before resched or napi completion replenish htt rx ring buffer
   - if work done < budget, complete napi poll and unmask interrupts

This change also get rid of two tasklets (intr_tq and txrx_compl_task).

Measured peak throughput with NAPI on IPQ4019 platform in controlled
environment. No noticeable reduction in throughput is seen and also
observed improvements in CPU usage. Approx. 15% CPU usage got reduced
in UDP uplink case.

DL: AP DUT Tx
UL: AP DUT Rx

IPQ4019 (avg. cpu usage %)

========
                TOT              +NAPI
              ===========      =============
TCP DL       644 Mbps (42%)    645 Mbps (36%)
TCP UL       673 Mbps (30%)    675 Mbps (26%)
UDP DL       682 Mbps (49%)    680 Mbps (49%)
UDP UL       720 Mbps (28%)    717 Mbps (11%)

Signed-off-by: default avatarRajkumar Manoharan <rmanohar@qti.qualcomm.com>
Signed-off-by: default avatarKalle Valo <kvalo@qca.qualcomm.com>
parent c39265f7
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -462,13 +462,13 @@ static void ath10k_ahb_halt_chip(struct ath10k *ar)
static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg)
{
	struct ath10k *ar = arg;
	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);

	if (!ath10k_pci_irq_pending(ar))
		return IRQ_NONE;

	ath10k_pci_disable_and_clear_legacy_irq(ar);
	tasklet_schedule(&ar_pci->intr_tq);
	ath10k_pci_irq_msi_fw_mask(ar);
	napi_schedule(&ar->napi);

	return IRQ_HANDLED;
}
@@ -717,6 +717,9 @@ static void ath10k_ahb_hif_stop(struct ath10k *ar)
	synchronize_irq(ar_ahb->irq);

	ath10k_pci_flush(ar);

	napi_synchronize(&ar->napi);
	napi_disable(&ar->napi);
}

static int ath10k_ahb_hif_power_up(struct ath10k *ar)
@@ -748,6 +751,7 @@ static int ath10k_ahb_hif_power_up(struct ath10k *ar)
		ath10k_err(ar, "could not wake up target CPU: %d\n", ret);
		goto err_ce_deinit;
	}
	napi_enable(&ar->napi);

	return 0;

@@ -831,7 +835,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev)
		goto err_resource_deinit;
	}

	ath10k_pci_init_irq_tasklets(ar);
	ath10k_pci_init_napi(ar);

	ret = ath10k_ahb_request_irq_legacy(ar);
	if (ret)
+2 −0
Original line number Diff line number Diff line
@@ -2322,6 +2322,8 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
	INIT_WORK(&ar->register_work, ath10k_core_register_work);
	INIT_WORK(&ar->restart_work, ath10k_core_restart);

	init_dummy_netdev(&ar->napi_dev);

	ret = ath10k_debug_create(ar);
	if (ret)
		goto err_free_aux_wq;
+8 −0
Original line number Diff line number Diff line
@@ -65,6 +65,10 @@
#define ATH10K_KEEPALIVE_MAX_IDLE 3895
#define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900

/* NAPI poll budget */
#define ATH10K_NAPI_BUDGET      64
#define ATH10K_NAPI_QUOTA_LIMIT 60

struct ath10k;

enum ath10k_bus {
@@ -954,6 +958,10 @@ struct ath10k {
	struct ath10k_thermal thermal;
	struct ath10k_wow wow;

	/* NAPI */
	struct net_device napi_dev;
	struct napi_struct napi;

	/* must be last */
	u8 drv_priv[0] __aligned(sizeof(void *));
};
+1 −1
Original line number Diff line number Diff line
@@ -1665,7 +1665,6 @@ struct ath10k_htt {

	/* This is used to group tx/rx completions separately and process them
	 * in batches to reduce cache stalls */
	struct tasklet_struct txrx_compl_task;
	struct sk_buff_head rx_compl_q;
	struct sk_buff_head rx_in_ord_compl_q;
	struct sk_buff_head tx_fetch_ind_q;
@@ -1798,5 +1797,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt,
		  struct sk_buff *msdu);
void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar,
					     struct sk_buff *skb);
int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget);

#endif
+94 −60
Original line number Diff line number Diff line
@@ -34,7 +34,6 @@
#define HTT_RX_RING_REFILL_RESCHED_MS 5

static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb);
static void ath10k_htt_txrx_compl_task(unsigned long ptr);

static struct sk_buff *
ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr)
@@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar)
void ath10k_htt_rx_free(struct ath10k_htt *htt)
{
	del_timer_sync(&htt->rx_ring.refill_retry_timer);
	tasklet_kill(&htt->txrx_compl_task);

	skb_queue_purge(&htt->rx_compl_q);
	skb_queue_purge(&htt->rx_in_ord_compl_q);
@@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
	skb_queue_head_init(&htt->tx_fetch_ind_q);
	atomic_set(&htt->num_mpdus_ready, 0);

	tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
		     (unsigned long)htt);

	ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n",
		   htt->rx_ring.size, htt->rx_ring.fill_level);
	return 0;
@@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath10k *ar,
	trace_ath10k_rx_hdr(ar, skb->data, skb->len);
	trace_ath10k_rx_payload(ar, skb->data, skb->len);

	ieee80211_rx(ar->hw, skb);
	ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi);
}

static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar,
@@ -1527,7 +1522,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
	struct ath10k *ar = htt->ar;
	static struct ieee80211_rx_status rx_status;
	struct sk_buff_head amsdu;
	int ret;
	int ret, num_msdus;

	__skb_queue_head_init(&amsdu);

@@ -1549,13 +1544,14 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
		return ret;
	}

	num_msdus = skb_queue_len(&amsdu);
	ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff);
	ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0);
	ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status);
	ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status);
	ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status);

	return 0;
	return num_msdus;
}

static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt,
@@ -1579,15 +1575,6 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt,
		mpdu_count += mpdu_ranges[i].mpdu_count;

	atomic_add(mpdu_count, &htt->num_mpdus_ready);

	tasklet_schedule(&htt->txrx_compl_task);
}

static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt)
{
	atomic_inc(&htt->num_mpdus_ready);

	tasklet_schedule(&htt->txrx_compl_task);
}

static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar,
@@ -1772,7 +1759,7 @@ static void ath10k_htt_rx_h_rx_offload_prot(struct ieee80211_rx_status *status,
			RX_FLAG_MMIC_STRIPPED;
}

static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
				      struct sk_buff_head *list)
{
	struct ath10k_htt *htt = &ar->htt;
@@ -1780,6 +1767,7 @@ static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
	struct htt_rx_offload_msdu *rx;
	struct sk_buff *msdu;
	size_t offset;
	int num_msdu = 0;

	while ((msdu = __skb_dequeue(list))) {
		/* Offloaded frames don't have Rx descriptor. Instead they have
@@ -1819,10 +1807,12 @@ static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
		ath10k_htt_rx_h_rx_offload_prot(status, msdu);
		ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id);
		ath10k_process_rx(ar, status, msdu);
		num_msdu++;
	}
	return num_msdu;
}

static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
{
	struct ath10k_htt *htt = &ar->htt;
	struct htt_resp *resp = (void *)skb->data;
@@ -1835,12 +1825,12 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
	u8 tid;
	bool offload;
	bool frag;
	int ret;
	int ret, num_msdus = 0;

	lockdep_assert_held(&htt->rx_ring.lock);

	if (htt->rx_confused)
		return;
		return -EIO;

	skb_pull(skb, sizeof(resp->hdr));
	skb_pull(skb, sizeof(resp->rx_in_ord_ind));
@@ -1859,7 +1849,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)

	if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) {
		ath10k_warn(ar, "dropping invalid in order rx indication\n");
		return;
		return -EINVAL;
	}

	/* The event can deliver more than 1 A-MSDU. Each A-MSDU is later
@@ -1870,14 +1860,14 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
	if (ret < 0) {
		ath10k_warn(ar, "failed to pop paddr list: %d\n", ret);
		htt->rx_confused = true;
		return;
		return -EIO;
	}

	/* Offloaded frames are very different and need to be handled
	 * separately.
	 */
	if (offload)
		ath10k_htt_rx_h_rx_offload(ar, &list);
		num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list);

	while (!skb_queue_empty(&list)) {
		__skb_queue_head_init(&amsdu);
@@ -1890,6 +1880,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
			 * better to report something than nothing though. This
			 * should still give an idea about rx rate to the user.
			 */
			num_msdus += skb_queue_len(&amsdu);
			ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
			ath10k_htt_rx_h_filter(ar, &amsdu, status);
			ath10k_htt_rx_h_mpdu(ar, &amsdu, status);
@@ -1902,9 +1893,10 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
			ath10k_warn(ar, "failed to extract amsdu: %d\n", ret);
			htt->rx_confused = true;
			__skb_queue_purge(&list);
			return;
			return -EIO;
		}
	}
	return num_msdus;
}

static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
@@ -2267,7 +2259,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
	}
	case HTT_T2H_MSG_TYPE_TX_COMPL_IND:
		ath10k_htt_rx_tx_compl_ind(htt->ar, skb);
		tasklet_schedule(&htt->txrx_compl_task);
		break;
	case HTT_T2H_MSG_TYPE_SEC_IND: {
		struct ath10k *ar = htt->ar;
@@ -2284,7 +2275,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
	case HTT_T2H_MSG_TYPE_RX_FRAG_IND: {
		ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ",
				skb->data, skb->len);
		ath10k_htt_rx_frag_handler(htt);
		atomic_inc(&htt->num_mpdus_ready);
		break;
	}
	case HTT_T2H_MSG_TYPE_TEST:
@@ -2320,8 +2311,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
		break;
	}
	case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: {
		skb_queue_tail(&htt->rx_in_ord_compl_q, skb);
		tasklet_schedule(&htt->txrx_compl_task);
		__skb_queue_tail(&htt->rx_in_ord_compl_q, skb);
		return false;
	}
	case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND:
@@ -2347,7 +2337,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
			break;
		}
		skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind);
		tasklet_schedule(&htt->txrx_compl_task);
		break;
	}
	case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM:
@@ -2376,27 +2365,77 @@ void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar,
}
EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler);

static void ath10k_htt_txrx_compl_task(unsigned long ptr)
int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget)
{
	struct ath10k_htt *htt = (struct ath10k_htt *)ptr;
	struct ath10k *ar = htt->ar;
	struct ath10k_htt *htt = &ar->htt;
	struct htt_tx_done tx_done = {};
	struct sk_buff_head rx_ind_q;
	struct sk_buff_head tx_ind_q;
	struct sk_buff *skb;
	unsigned long flags;
	int num_mpdus;
	int quota = 0, done, num_rx_msdus;
	bool resched_napi = false;

	__skb_queue_head_init(&rx_ind_q);
	__skb_queue_head_init(&tx_ind_q);

	spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags);
	skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q);
	spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags);
	/* Since in-ord-ind can deliver more than 1 A-MSDU in single event,
	 * process it first to utilize full available quota.
	 */
	while (quota < budget) {
		if (skb_queue_empty(&htt->rx_in_ord_compl_q))
			break;

	spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
	skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
	spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
		skb = __skb_dequeue(&htt->rx_in_ord_compl_q);
		if (!skb) {
			resched_napi = true;
			goto exit;
		}

		spin_lock_bh(&htt->rx_ring.lock);
		num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb);
		spin_unlock_bh(&htt->rx_ring.lock);
		if (num_rx_msdus < 0) {
			resched_napi = true;
			goto exit;
		}

		dev_kfree_skb_any(skb);
		if (num_rx_msdus > 0)
			quota += num_rx_msdus;

		if ((quota > ATH10K_NAPI_QUOTA_LIMIT) &&
		    !skb_queue_empty(&htt->rx_in_ord_compl_q)) {
			resched_napi = true;
			goto exit;
		}
	}

	while (quota < budget) {
		/* no more data to receive */
		if (!atomic_read(&htt->num_mpdus_ready))
			break;

		num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt);
		if (num_rx_msdus < 0) {
			resched_napi = true;
			goto exit;
		}

		quota += num_rx_msdus;
		atomic_dec(&htt->num_mpdus_ready);
		if ((quota > ATH10K_NAPI_QUOTA_LIMIT) &&
		    atomic_read(&htt->num_mpdus_ready)) {
			resched_napi = true;
			goto exit;
		}
	}

	/* From NAPI documentation:
	 *  The napi poll() function may also process TX completions, in which
	 *  case if it processes the entire TX ring then it should count that
	 *  work as the rest of the budget.
	 */
	if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo))
		quota = budget;

	/* kfifo_get: called only within txrx_tasklet so it's neatly serialized.
	 * From kfifo_get() documentation:
@@ -2406,27 +2445,22 @@ static void ath10k_htt_txrx_compl_task(unsigned long ptr)
	while (kfifo_get(&htt->txdone_fifo, &tx_done))
		ath10k_txrx_tx_unref(htt, &tx_done);

	spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
	skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
	spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);

	while ((skb = __skb_dequeue(&tx_ind_q))) {
		ath10k_htt_rx_tx_fetch_ind(ar, skb);
		dev_kfree_skb_any(skb);
	}

	num_mpdus = atomic_read(&htt->num_mpdus_ready);

	while (num_mpdus) {
		if (ath10k_htt_rx_handle_amsdu(htt))
			break;

		num_mpdus--;
		atomic_dec(&htt->num_mpdus_ready);
	}

	while ((skb = __skb_dequeue(&rx_ind_q))) {
		spin_lock_bh(&htt->rx_ring.lock);
		ath10k_htt_rx_in_ord_ind(ar, skb);
		spin_unlock_bh(&htt->rx_ring.lock);
		dev_kfree_skb_any(skb);
	}

exit:
	ath10k_htt_rx_msdu_buff_replenish(htt);
	/* In case of rx failure or more data to read, report budget
	 * to reschedule NAPI poll
	 */
	done = resched_napi ? budget : quota;

	return done;
}
EXPORT_SYMBOL(ath10k_htt_txrx_compl_task);
Loading