cxgb4: Add support for FW_ETH_TX_PKT_VM_WR (d5fbda61) · Commits · e / devices / android_kernel_xiaomi_nabu

drivers/net/ethernet/chelsio/cxgb4/cxgb4.h

+12 −1

Original line number	Diff line number	Diff line
		@@ -522,6 +522,15 @@ enum {
		MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
		};

		enum {
		PRIV_FLAG_PORT_TX_VM_BIT,
		};

		#define PRIV_FLAG_PORT_TX_VM BIT(PRIV_FLAG_PORT_TX_VM_BIT)

		#define PRIV_FLAGS_ADAP 0
		#define PRIV_FLAGS_PORT PRIV_FLAG_PORT_TX_VM

		struct adapter;
		struct sge_rspq;

		@@ -558,6 +567,7 @@ struct port_info {
		struct hwtstamp_config tstamp_config;
		bool ptp_enable;
		struct sched_table *sched_tbl;
		u32 eth_flags;
		};

		struct dentry;
		@@ -868,6 +878,7 @@ struct adapter {
		unsigned int flags;
		unsigned int adap_idx;
		enum chip_type chip;
		u32 eth_flags;

		int msg_enable;
		__be16 vxlan_port;
		@@ -1335,7 +1346,7 @@ void t4_os_link_changed(struct adapter *adap, int port_id, int link_stat);
		void t4_free_sge_resources(struct adapter *adap);
		void t4_free_ofld_rxqs(struct adapter adap, int n, struct sge_ofld_rxq q);
		irq_handler_t t4_intr_handler(struct adapter *adap);
		netdev_tx_t t4_eth_xmit(struct sk_buff skb, struct net_device dev);
		netdev_tx_t t4_start_xmit(struct sk_buff skb, struct net_device dev);
		int t4_ethrx_handler(struct sge_rspq q, const __be64 rsp,
		const struct pkt_gl *gl);
		int t4_mgmt_tx(struct adapter adap, struct sk_buff skb);

drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -3217,7 +3217,7 @@ static netdev_features_t cxgb_fix_features(struct net_device *dev,
		static const struct net_device_ops cxgb4_netdev_ops = {
		.ndo_open = cxgb_open,
		.ndo_stop = cxgb_close,
		.ndo_start_xmit = t4_eth_xmit,
		.ndo_start_xmit = t4_start_xmit,
		.ndo_select_queue = cxgb_select_queue,
		.ndo_get_stats64 = cxgb_get_stats,
		.ndo_set_rx_mode = cxgb_set_rxmode,

drivers/net/ethernet/chelsio/cxgb4/sge.c

+370 −2

Original line number	Diff line number	Diff line
		@@ -1288,13 +1288,13 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
		}

		/**
		* t4_eth_xmit - add a packet to an Ethernet Tx queue
		* cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
		* @skb: the packet
		* @dev: the egress net device
		*
		* Add a packet to an SGE Ethernet Tx queue. Runs with softirqs disabled.
		*/
		netdev_tx_t t4_eth_xmit(struct sk_buff skb, struct net_device dev)
		static netdev_tx_t cxgb4_eth_xmit(struct sk_buff skb, struct net_device dev)
		{
		u32 wr_mid, ctrl0, op;
		u64 cntrl, end, sgl;
		@@ -1547,6 +1547,374 @@ out_free: dev_kfree_skb_any(skb);
		return NETDEV_TX_OK;
		}

		/* Constants ... */
		enum {
		/* Egress Queue sizes, producer and consumer indices are all in units
		* of Egress Context Units bytes. Note that as far as the hardware is
		* concerned, the free list is an Egress Queue (the host produces free
		* buffers which the hardware consumes) and free list entries are
		* 64-bit PCI DMA addresses.
		*/
		EQ_UNIT = SGE_EQ_IDXSIZE,
		FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
		TXD_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),

		T4VF_ETHTXQ_MAX_HDR = (sizeof(struct fw_eth_tx_pkt_vm_wr) +
		sizeof(struct cpl_tx_pkt_lso_core) +
		sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64),
		};

		/**
		* t4vf_is_eth_imm - can an Ethernet packet be sent as immediate data?
		* @skb: the packet
		*
		* Returns whether an Ethernet packet is small enough to fit completely as
		* immediate data.
		*/
		static inline int t4vf_is_eth_imm(const struct sk_buff *skb)
		{
		/* The VF Driver uses the FW_ETH_TX_PKT_VM_WR firmware Work Request
		* which does not accommodate immediate data. We could dike out all
		* of the support code for immediate data but that would tie our hands
		* too much if we ever want to enhace the firmware. It would also
		* create more differences between the PF and VF Drivers.
		*/
		return false;
		}

		/**
		* t4vf_calc_tx_flits - calculate the number of flits for a packet TX WR
		* @skb: the packet
		*
		* Returns the number of flits needed for a TX Work Request for the
		* given Ethernet packet, including the needed WR and CPL headers.
		*/
		static inline unsigned int t4vf_calc_tx_flits(const struct sk_buff *skb)
		{
		unsigned int flits;

		/* If the skb is small enough, we can pump it out as a work request
		* with only immediate data. In that case we just have to have the
		* TX Packet header plus the skb data in the Work Request.
		*/
		if (t4vf_is_eth_imm(skb))
		return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt),
		sizeof(__be64));

		/* Otherwise, we're going to have to construct a Scatter gather list
		* of the skb body and fragments. We also include the flits necessary
		* for the TX Packet Work Request and CPL. We always have a firmware
		* Write Header (incorporated as part of the cpl_tx_pkt_lso and
		* cpl_tx_pkt structures), followed by either a TX Packet Write CPL
		* message or, if we're doing a Large Send Offload, an LSO CPL message
		* with an embedded TX Packet Write CPL message.
		*/
		flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
		if (skb_shinfo(skb)->gso_size)
		flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
		sizeof(struct cpl_tx_pkt_lso_core) +
		sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
		else
		flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
		sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
		return flits;
		}

		/**
		* cxgb4_vf_eth_xmit - add a packet to an Ethernet TX queue
		* @skb: the packet
		* @dev: the egress net device
		*
		* Add a packet to an SGE Ethernet TX queue. Runs with softirqs disabled.
		*/
		static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
		struct net_device *dev)
		{
		dma_addr_t addr[MAX_SKB_FRAGS + 1];
		const struct skb_shared_info *ssi;
		struct fw_eth_tx_pkt_vm_wr *wr;
		int qidx, credits, max_pkt_len;
		struct cpl_tx_pkt_core *cpl;
		const struct port_info *pi;
		unsigned int flits, ndesc;
		struct sge_eth_txq *txq;
		struct adapter *adapter;
		u64 cntrl, *end;
		u32 wr_mid;
		const size_t fw_hdr_copy_len = sizeof(wr->ethmacdst) +
		sizeof(wr->ethmacsrc) +
		sizeof(wr->ethtype) +
		sizeof(wr->vlantci);

		/* The chip minimum packet length is 10 octets but the firmware
		* command that we are using requires that we copy the Ethernet header
		* (including the VLAN tag) into the header so we reject anything
		* smaller than that ...
		*/
		if (unlikely(skb->len < fw_hdr_copy_len))
		goto out_free;

		/* Discard the packet if the length is greater than mtu */
		max_pkt_len = ETH_HLEN + dev->mtu;
		if (skb_vlan_tag_present(skb))
		max_pkt_len += VLAN_HLEN;
		if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
		goto out_free;

		/* Figure out which TX Queue we're going to use. */
		pi = netdev_priv(dev);
		adapter = pi->adapter;
		qidx = skb_get_queue_mapping(skb);
		WARN_ON(qidx >= pi->nqsets);
		txq = &adapter->sge.ethtxq[pi->first_qset + qidx];

		/* Take this opportunity to reclaim any TX Descriptors whose DMA
		* transfers have completed.
		*/
		cxgb4_reclaim_completed_tx(adapter, &txq->q, true);

		/* Calculate the number of flits and TX Descriptors we're going to
		* need along with how many TX Descriptors will be left over after
		* we inject our Work Request.
		*/
		flits = t4vf_calc_tx_flits(skb);
		ndesc = flits_to_desc(flits);
		credits = txq_avail(&txq->q) - ndesc;

		if (unlikely(credits < 0)) {
		/* Not enough room for this packet's Work Request. Stop the
		* TX Queue and return a "busy" condition. The queue will get
		* started later on when the firmware informs us that space
		* has opened up.
		*/
		eth_txq_stop(txq);
		dev_err(adapter->pdev_dev,
		"%s: TX ring %u full while queue awake!\n",
		dev->name, qidx);
		return NETDEV_TX_BUSY;
		}

		if (!t4vf_is_eth_imm(skb) &&
		unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, addr) < 0)) {
		/* We need to map the skb into PCI DMA space (because it can't
		* be in-lined directly into the Work Request) and the mapping
		* operation failed. Record the error and drop the packet.
		*/
		txq->mapping_err++;
		goto out_free;
		}

		wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
		if (unlikely(credits < ETHTXQ_STOP_THRES)) {
		/* After we're done injecting the Work Request for this
		* packet, we'll be below our "stop threshold" so stop the TX
		* Queue now and schedule a request for an SGE Egress Queue
		* Update message. The queue will get started later on when
		* the firmware processes this Work Request and sends us an
		* Egress Queue Status Update message indicating that space
		* has opened up.
		*/
		eth_txq_stop(txq);
		wr_mid \|= FW_WR_EQUEQ_F \| FW_WR_EQUIQ_F;
		}

		/* Start filling in our Work Request. Note that we do _not_ handle
		* the WR Header wrapping around the TX Descriptor Ring. If our
		* maximum header size ever exceeds one TX Descriptor, we'll need to
		* do something else here.
		*/
		WARN_ON(DIV_ROUND_UP(T4VF_ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1);
		wr = (void *)&txq->q.desc[txq->q.pidx];
		wr->equiq_to_len16 = cpu_to_be32(wr_mid);
		wr->r3[0] = cpu_to_be32(0);
		wr->r3[1] = cpu_to_be32(0);
		skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
		end = (u64 *)wr + flits;

		/* If this is a Large Send Offload packet we'll put in an LSO CPL
		* message with an encapsulated TX Packet CPL message. Otherwise we
		* just use a TX Packet CPL message.
		*/
		ssi = skb_shinfo(skb);
		if (ssi->gso_size) {
		struct cpl_tx_pkt_lso_core lso = (void )(wr + 1);
		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
		int l3hdr_len = skb_network_header_len(skb);
		int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;

		wr->op_immdlen =
		cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) \|
		FW_WR_IMMDLEN_V(sizeof(*lso) +
		sizeof(*cpl)));
		/* Fill in the LSO CPL message. */
		lso->lso_ctrl =
		cpu_to_be32(LSO_OPCODE_V(CPL_TX_PKT_LSO) \|
		LSO_FIRST_SLICE_F \|
		LSO_LAST_SLICE_F \|
		LSO_IPV6_V(v6) \|
		LSO_ETHHDR_LEN_V(eth_xtra_len / 4) \|
		LSO_IPHDR_LEN_V(l3hdr_len / 4) \|
		LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
		lso->ipid_ofst = cpu_to_be16(0);
		lso->mss = cpu_to_be16(ssi->gso_size);
		lso->seqno_offset = cpu_to_be32(0);
		if (is_t4(adapter->params.chip))
		lso->len = cpu_to_be32(skb->len);
		else
		lso->len = cpu_to_be32(LSO_T5_XFER_SIZE_V(skb->len));

		/* Set up TX Packet CPL pointer, control word and perform
		* accounting.
		*/
		cpl = (void *)(lso + 1);

		if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5)
		cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len);
		else
		cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);

		cntrl \|= TXPKT_CSUM_TYPE_V(v6 ?
		TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) \|
		TXPKT_IPHDR_LEN_V(l3hdr_len);
		txq->tso++;
		txq->tx_cso += ssi->gso_segs;
		} else {
		int len;

		len = (t4vf_is_eth_imm(skb)
		? skb->len + sizeof(*cpl)
		: sizeof(*cpl));
		wr->op_immdlen =
		cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) \|
		FW_WR_IMMDLEN_V(len));

		/* Set up TX Packet CPL pointer, control word and perform
		* accounting.
		*/
		cpl = (void *)(wr + 1);
		if (skb->ip_summed == CHECKSUM_PARTIAL) {
		cntrl = hwcsum(adapter->params.chip, skb) \|
		TXPKT_IPCSUM_DIS_F;
		txq->tx_cso++;
		} else {
		cntrl = TXPKT_L4CSUM_DIS_F \| TXPKT_IPCSUM_DIS_F;
		}
		}

		/* If there's a VLAN tag present, add that to the list of things to
		* do in this Work Request.
		*/
		if (skb_vlan_tag_present(skb)) {
		txq->vlan_ins++;
		cntrl \|= TXPKT_VLAN_VLD_F \| TXPKT_VLAN_V(skb_vlan_tag_get(skb));
		}

		/* Fill in the TX Packet CPL message header. */
		cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) \|
		TXPKT_INTF_V(pi->port_id) \|
		TXPKT_PF_V(0));
		cpl->pack = cpu_to_be16(0);
		cpl->len = cpu_to_be16(skb->len);
		cpl->ctrl1 = cpu_to_be64(cntrl);

		/* Fill in the body of the TX Packet CPL message with either in-lined
		* data or a Scatter/Gather List.
		*/
		if (t4vf_is_eth_imm(skb)) {
		/* In-line the packet's data and free the skb since we don't
		* need it any longer.
		*/
		cxgb4_inline_tx_skb(skb, &txq->q, cpl + 1);
		dev_consume_skb_any(skb);
		} else {
		/* Write the skb's Scatter/Gather list into the TX Packet CPL
		* message and retain a pointer to the skb so we can free it
		* later when its DMA completes. (We store the skb pointer
		* in the Software Descriptor corresponding to the last TX
		* Descriptor used by the Work Request.)
		*
		* The retained skb will be freed when the corresponding TX
		* Descriptors are reclaimed after their DMAs complete.
		* However, this could take quite a while since, in general,
		* the hardware is set up to be lazy about sending DMA
		* completion notifications to us and we mostly perform TX
		* reclaims in the transmit routine.
		*
		* This is good for performamce but means that we rely on new
		* TX packets arriving to run the destructors of completed
		* packets, which open up space in their sockets' send queues.
		* Sometimes we do not get such new packets causing TX to
		* stall. A single UDP transmitter is a good example of this
		* situation. We have a clean up timer that periodically
		* reclaims completed packets but it doesn't run often enough
		* (nor do we want it to) to prevent lengthy stalls. A
		* solution to this problem is to run the destructor early,
		* after the packet is queued but before it's DMAd. A con is
		* that we lie to socket memory accounting, but the amount of
		* extra memory is reasonable (limited by the number of TX
		* descriptors), the packets do actually get freed quickly by
		* new packets almost always, and for protocols like TCP that
		* wait for acks to really free up the data the extra memory
		* is even less. On the positive side we run the destructors
		* on the sending CPU rather than on a potentially different
		* completing CPU, usually a good thing.
		*
		* Run the destructor before telling the DMA engine about the
		* packet to make sure it doesn't complete and get freed
		* prematurely.
		*/
		struct ulptx_sgl sgl = (struct ulptx_sgl )(cpl + 1);
		struct sge_txq *tq = &txq->q;
		int last_desc;

		/* If the Work Request header was an exact multiple of our TX
		* Descriptor length, then it's possible that the starting SGL
		* pointer lines up exactly with the end of our TX Descriptor
		* ring. If that's the case, wrap around to the beginning
		* here ...
		*/
		if (unlikely((void )sgl == (void )tq->stat)) {
		sgl = (void *)tq->desc;
		end = (void )((void )tq->desc +
		((void )end - (void )tq->stat));
		}

		cxgb4_write_sgl(skb, tq, sgl, end, 0, addr);
		skb_orphan(skb);

		last_desc = tq->pidx + ndesc - 1;
		if (last_desc >= tq->size)
		last_desc -= tq->size;
		tq->sdesc[last_desc].skb = skb;
		tq->sdesc[last_desc].sgl = sgl;
		}

		/* Advance our internal TX Queue state, tell the hardware about
		* the new TX descriptors and return success.
		*/
		txq_advance(&txq->q, ndesc);

		cxgb4_ring_tx_db(adapter, &txq->q, ndesc);
		return NETDEV_TX_OK;

		out_free:
		/* An error of some sort happened. Free the TX skb and tell the
		* OS that we've "dealt" with the packet ...
		*/
		dev_kfree_skb_any(skb);
		return NETDEV_TX_OK;
		}

		netdev_tx_t t4_start_xmit(struct sk_buff skb, struct net_device dev)
		{
		struct port_info *pi = netdev_priv(dev);

		if (unlikely(pi->eth_flags & PRIV_FLAG_PORT_TX_VM))
		return cxgb4_vf_eth_xmit(skb, dev);

		return cxgb4_eth_xmit(skb, dev);
		}

		/**
		* reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
		* @q: the SGE control Tx queue