diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_tx.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 334 |
1 files changed, 190 insertions, 144 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5532aa3675c75..f29deb44bf3b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -35,12 +35,21 @@ #include <net/dsfield.h> #include "en.h" #include "ipoib/ipoib.h" -#include "en_accel/ipsec_rxtx.h" +#include "en_accel/en_accel.h" #include "lib/clock.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS + +#ifndef CONFIG_MLX5_EN_TLS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ MLX5E_SQ_NOPS_ROOM) +#else +/* TLS offload requires MLX5E_SQ_STOP_ROOM to have + * enough room for a resync SKB, a normal SKB and a NOP + */ +#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\ + MLX5E_SQ_NOPS_ROOM) +#endif static inline void mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) @@ -179,28 +188,16 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, return min_t(u16, hlen, skb_headlen(skb)); } -static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, - unsigned int *skb_len, - unsigned int len) -{ - *skb_len -= len; - *skb_data += len; -} - -static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs, - unsigned char **skb_data, - unsigned int *skb_len) +static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) { struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; int cpy1_sz = 2 * ETH_ALEN; int cpy2_sz = ihs - cpy1_sz; - memcpy(vhdr, *skb_data, cpy1_sz); - mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy1_sz); + memcpy(vhdr, skb->data, cpy1_sz); vhdr->h_vlan_proto = skb->vlan_proto; vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); - memcpy(&vhdr->h_vlan_encapsulated_proto, *skb_data, cpy2_sz); - mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy2_sz); + memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); } static inline void @@ -211,34 +208,31 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct if (skb->encapsulation) { eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM; - sq->stats.csum_partial_inner++; + sq->stats->csum_partial_inner++; } else { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; - sq->stats.csum_partial++; + sq->stats->csum_partial++; } } else - sq->stats.csum_none++; + sq->stats->csum_none++; } static inline u16 -mlx5e_txwqe_build_eseg_gso(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5_wqe_eth_seg *eseg, unsigned int *num_bytes) +mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb) { + struct mlx5e_sq_stats *stats = sq->stats; u16 ihs; - eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - if (skb->encapsulation) { ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); - sq->stats.tso_inner_packets++; - sq->stats.tso_inner_bytes += skb->len - ihs; + stats->tso_inner_packets++; + stats->tso_inner_bytes += skb->len - ihs; } else { ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); - sq->stats.tso_packets++; - sq->stats.tso_bytes += skb->len - ihs; + stats->tso_packets++; + stats->tso_bytes += skb->len - ihs; } - *num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; return ihs; } @@ -291,17 +285,34 @@ dma_unmap_wqe_err: return -ENOMEM; } +static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, + struct mlx5_wq_cyc *wq, + u16 pi, u16 frag_pi) +{ + struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi; + + edge_wi = wi + nnops; + + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->skb = NULL; + wi->num_wqebbs = 1; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nop += nnops; +} + static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, - u8 opcode, u16 ds_cnt, u32 num_bytes, u8 num_dma, + u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma, struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg) { struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi; wi->num_bytes = num_bytes; wi->num_dma = num_dma; - wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + wi->num_wqebbs = num_wqebbs; wi->skb = skb; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); @@ -315,84 +326,108 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->pc += wi->num_wqebbs; if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) { netif_tx_stop_queue(sq->txq); - sq->stats.stopped++; + sq->stats->stopped++; } if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); - - /* fill sq edge with nops to avoid wqe wrap around */ - while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->db.wqe_info[pi].skb = NULL; - mlx5e_post_nop(wq, sq->sqn, &sq->pc); - sq->stats.nop++; - } } -static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tx_wqe *wqe, u16 pi) -{ - struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; +#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; +netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe *wqe, u16 pi) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe_info *wi; - unsigned char *skb_data = skb->data; - unsigned int skb_len = skb->len; - u8 opcode = MLX5_OPCODE_SEND; - unsigned int num_bytes; + struct mlx5e_sq_stats *stats = sq->stats; + u16 ds_cnt, ds_cnt_inl = 0; + u16 headlen, ihs, frag_pi; + u8 num_wqebbs, opcode; + u32 num_bytes; int num_dma; - u16 headlen; - u16 ds_cnt; - u16 ihs; - - mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + __be16 mss; + /* Calc ihs and ds cnt, no writes to wqe yet */ + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (skb_is_gso(skb)) { - opcode = MLX5_OPCODE_LSO; - ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); - sq->stats.packets += skb_shinfo(skb)->gso_segs; + opcode = MLX5_OPCODE_LSO; + mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + ihs = mlx5e_tx_get_gso_ihs(sq, skb); + num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + stats->packets += skb_shinfo(skb)->gso_segs; } else { - ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + opcode = MLX5_OPCODE_SEND; + mss = 0; + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - sq->stats.packets++; + stats->packets++; } - sq->stats.bytes += num_bytes; - sq->stats.xmit_more += skb->xmit_more; - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + stats->bytes += num_bytes; + stats->xmit_more += skb->xmit_more; + + headlen = skb->len - ihs - skb->data_len; + ds_cnt += !!headlen; + ds_cnt += skb_shinfo(skb)->nr_frags; + + if (ihs) { + ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN; + + ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + } + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); + if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) { + mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi); + mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + } + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + eseg->mss = mss; + if (ihs) { + eseg->inline_hdr.sz = cpu_to_be16(ihs); if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); - ihs += VLAN_HLEN; - sq->stats.added_vlan_packets++; + ihs -= VLAN_HLEN; + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs); + stats->added_vlan_packets++; } else { - memcpy(eseg->inline_hdr.start, skb_data, ihs); - mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + memcpy(eseg->inline_hdr.start, skb->data, ihs); } - eseg->inline_hdr.sz = cpu_to_be16(ihs); - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); + dseg += ds_cnt_inl; } else if (skb_vlan_tag_present(skb)) { eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); - sq->stats.added_vlan_packets++; + stats->added_vlan_packets++; } - headlen = skb_len - skb->data_len; - num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, - (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg); if (unlikely(num_dma < 0)) goto err_drop; - mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, - num_bytes, num_dma, wi, cseg); + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, + num_dma, wi, cseg); return NETDEV_TX_OK; err_drop: - sq->stats.dropped++; + stats->dropped++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -401,21 +436,19 @@ err_drop: netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)]; - struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe *wqe; + struct mlx5e_txqsq *sq; + u16 pi; - memset(wqe, 0, sizeof(*wqe)); + sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + mlx5e_sq_fetch_wqe(sq, &wqe, &pi); -#ifdef CONFIG_MLX5_EN_IPSEC - if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) { - skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb); - if (unlikely(!skb)) - return NETDEV_TX_OK; - } +#ifdef CONFIG_MLX5_ACCEL + /* might send skbs and update wqe and pi */ + skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi); + if (unlikely(!skb)) + return NETDEV_TX_OK; #endif - return mlx5e_sq_xmit(sq, skb, wqe, pi); } @@ -443,7 +476,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_txqsq, cq); - if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; cqe = mlx5_cqwq_get_cqe(&cq->wq); @@ -478,7 +511,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) queue_work(cq->channel->priv->wq, &sq->recover.recover_work); } - sq->stats.cqe_err++; + sq->stats->cqe_err++; } do { @@ -489,7 +522,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); - ci = sqcc & sq->wq.sz_m1; + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.wqe_info[ci]; skb = wi->skb; @@ -538,7 +571,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) MLX5E_SQ_STOP_ROOM) && !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { netif_tx_wake_queue(sq->txq); - sq->stats.wake++; + sq->stats->wake++; } return (i == MLX5E_TX_CQ_POLL_BUDGET); @@ -552,7 +585,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) int i; while (sq->cc != sq->pc) { - ci = sq->cc & sq->wq.sz_m1; + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); wi = &sq->db.wqe_info[ci]; skb = wi->skb; @@ -574,18 +607,6 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) } #ifdef CONFIG_MLX5_CORE_IPOIB - -struct mlx5_wqe_eth_pad { - u8 rsvd0[16]; -}; - -struct mlx5i_tx_wqe { - struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_datagram_seg datagram; - struct mlx5_wqe_eth_pad pad; - struct mlx5_wqe_eth_seg eth; -}; - static inline void mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, struct mlx5_wqe_datagram_seg *dseg) @@ -598,67 +619,92 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_av *av, u32 dqpn, u32 dqkey) { - struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = sq->pc & wq->sz_m1; - struct mlx5i_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_datagram_seg *datagram = &wqe->datagram; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - - unsigned char *skb_data = skb->data; - unsigned int skb_len = skb->len; - u8 opcode = MLX5_OPCODE_SEND; - unsigned int num_bytes; - int num_dma; - u16 headlen; - u16 ds_cnt; - u16 ihs; - - memset(wqe, 0, sizeof(*wqe)); + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5i_tx_wqe *wqe; - mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); + struct mlx5_wqe_datagram_seg *datagram; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe_info *wi; - mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + struct mlx5e_sq_stats *stats = sq->stats; + u16 headlen, ihs, pi, frag_pi; + u16 ds_cnt, ds_cnt_inl = 0; + u8 num_wqebbs, opcode; + u32 num_bytes; + int num_dma; + __be16 mss; + /* Calc ihs and ds cnt, no writes to wqe yet */ + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (skb_is_gso(skb)) { - opcode = MLX5_OPCODE_LSO; - ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); - sq->stats.packets += skb_shinfo(skb)->gso_segs; + opcode = MLX5_OPCODE_LSO; + mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + ihs = mlx5e_tx_get_gso_ihs(sq, skb); + num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + stats->packets += skb_shinfo(skb)->gso_segs; } else { - ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + opcode = MLX5_OPCODE_SEND; + mss = 0; + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - sq->stats.packets++; + stats->packets++; } - sq->stats.bytes += num_bytes; - sq->stats.xmit_more += skb->xmit_more; + stats->bytes += num_bytes; + stats->xmit_more += skb->xmit_more; + + headlen = skb->len - ihs - skb->data_len; + ds_cnt += !!headlen; + ds_cnt += skb_shinfo(skb)->nr_frags; + + if (ihs) { + ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + } + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); + if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) { + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi); + } + + mlx5i_sq_fetch_wqe(sq, &wqe, &pi); + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; + cseg = &wqe->ctrl; + datagram = &wqe->datagram; + eseg = &wqe->eth; + dseg = wqe->data; + + mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + eseg->mss = mss; - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (ihs) { - memcpy(eseg->inline_hdr.start, skb_data, ihs); - mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + memcpy(eseg->inline_hdr.start, skb->data, ihs); eseg->inline_hdr.sz = cpu_to_be16(ihs); - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); + dseg += ds_cnt_inl; } - headlen = skb_len - skb->data_len; - num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, - (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg); if (unlikely(num_dma < 0)) goto err_drop; - mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, - num_bytes, num_dma, wi, cseg); + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, + num_dma, wi, cseg); return NETDEV_TX_OK; err_drop: - sq->stats.dropped++; + stats->dropped++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; } - #endif |