ath10k: reuse copy engine 5 (htt rx) descriptors

Whenever htt rx indication i.e target to host messages are received on rx copy engine (CE5), the message will be freed after processing the response. Then CE 5 will be refilled with new descriptors at post rx processing. This memory alloc and free operations can be avoided by reusing the same descriptors. During CE pipe allocation, full ring is not initialized i.e n-1 entries are filled up. So for CE 5 full ring should be filled up to reuse descriptors. Moreover CE 5 write index will be updated in single shot instead of incremental access. This could avoid multiple pci_write and ce_ring access. From experiments, It improves CPU usage by ~3% in IPQ4019 platform. Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
author: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> 2016-03-22 17:22:18 +0530
committer: Kalle Valo <kvalo@qca.qualcomm.com> 2016-04-04 17:03:21 +0300
commit: 128abd09134a5b415fef4373841ea6d3fb7b680f (patch)
tree: f044f94436a670bcd38d9aa22dd9c12258fc872d /drivers/net/wireless/ath/ath10k/pci.c
parent: 24d9ef5eff5057bb6339ed1cf852a2b2a7be324d (diff)
download: linux-0-day-128abd09134a5b415fef4373841ea6d3fb7b680f.tar.gz
linux-0-day-128abd09134a5b415fef4373841ea6d3fb7b680f.tar.xz
1 files changed, 61 insertions, 2 deletions
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 290a61afde1a3..0b305efe6c946 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -809,7 +809,8 @@ static void ath10k_pci_rx_post_pipe(struct ath10k_pci_pipe *pipe)
 	spin_lock_bh(&ar_pci->ce_lock);
 	num = __ath10k_ce_rx_num_free_bufs(ce_pipe);
 	spin_unlock_bh(&ar_pci->ce_lock);
-	while (num--) {
+
+	while (num >= 0) {
 		ret = __ath10k_pci_rx_post_buf(pipe);
 		if (ret) {
 			if (ret == -ENOSPC)
@@ -819,6 +820,7 @@ static void ath10k_pci_rx_post_pipe(struct ath10k_pci_pipe *pipe)
 				  ATH10K_PCI_RX_POST_RETRY_MS);
 			break;
 		}
+		num--;
 	}
 }
 
@@ -1212,6 +1214,63 @@ static void ath10k_pci_process_rx_cb(struct ath10k_ce_pipe *ce_state,
 	ath10k_pci_rx_post_pipe(pipe_info);
 }
 
+static void ath10k_pci_process_htt_rx_cb(struct ath10k_ce_pipe *ce_state,
+					 void (*callback)(struct ath10k *ar,
+							  struct sk_buff *skb))
+{
+	struct ath10k *ar = ce_state->ar;
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_pci_pipe *pipe_info =  &ar_pci->pipe_info[ce_state->id];
+	struct ath10k_ce_pipe *ce_pipe = pipe_info->ce_hdl;
+	struct sk_buff *skb;
+	struct sk_buff_head list;
+	void *transfer_context;
+	unsigned int nbytes, max_nbytes, nentries;
+	int orig_len;
+
+	/* No need to aquire ce_lock for CE5, since this is the only place CE5
+	 * is processed other than init and deinit. Before releasing CE5
+	 * buffers, interrupts are disabled. Thus CE5 access is serialized.
+	 */
+	__skb_queue_head_init(&list);
+	while (ath10k_ce_completed_recv_next_nolock(ce_state, &transfer_context,
+						    &nbytes) == 0) {
+		skb = transfer_context;
+		max_nbytes = skb->len + skb_tailroom(skb);
+
+		if (unlikely(max_nbytes < nbytes)) {
+			ath10k_warn(ar, "rxed more than expected (nbytes %d, max %d)",
+				    nbytes, max_nbytes);
+			continue;
+		}
+
+		dma_sync_single_for_cpu(ar->dev, ATH10K_SKB_RXCB(skb)->paddr,
+					max_nbytes, DMA_FROM_DEVICE);
+		skb_put(skb, nbytes);
+		__skb_queue_tail(&list, skb);
+	}
+
+	nentries = skb_queue_len(&list);
+	while ((skb = __skb_dequeue(&list))) {
+		ath10k_dbg(ar, ATH10K_DBG_PCI, "pci rx ce pipe %d len %d\n",
+			   ce_state->id, skb->len);
+		ath10k_dbg_dump(ar, ATH10K_DBG_PCI_DUMP, NULL, "pci rx: ",
+				skb->data, skb->len);
+
+		orig_len = skb->len;
+		callback(ar, skb);
+		skb_push(skb, orig_len - skb->len);
+		skb_reset_tail_pointer(skb);
+		skb_trim(skb, 0);
+
+		/*let device gain the buffer again*/
+		dma_sync_single_for_device(ar->dev, ATH10K_SKB_RXCB(skb)->paddr,
+					   skb->len + skb_tailroom(skb),
+					   DMA_FROM_DEVICE);
+	}
+	ath10k_ce_rx_update_write_idx(ce_pipe, nentries);
+}
+
 /* Called by lower (CE) layer when data is received from the Target. */
 static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state)
 {
@@ -1268,7 +1327,7 @@ static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state)
 	 */
 	ath10k_ce_per_engine_service(ce_state->ar, 4);
 
-	ath10k_pci_process_rx_cb(ce_state, ath10k_pci_htt_rx_deliver);
+	ath10k_pci_process_htt_rx_cb(ce_state, ath10k_pci_htt_rx_deliver);
 }
 
 int ath10k_pci_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
author	Rajkumar Manoharan <rmanohar@qti.qualcomm.com>	2016-03-22 17:22:18 +0530
committer	Kalle Valo <kvalo@qca.qualcomm.com>	2016-04-04 17:03:21 +0300
commit	128abd09134a5b415fef4373841ea6d3fb7b680f (patch)
tree	f044f94436a670bcd38d9aa22dd9c12258fc872d /drivers/net/wireless/ath/ath10k/pci.c
parent	24d9ef5eff5057bb6339ed1cf852a2b2a7be324d (diff)
download	linux-0-day-128abd09134a5b415fef4373841ea6d3fb7b680f.tar.gz linux-0-day-128abd09134a5b415fef4373841ea6d3fb7b680f.tar.xz