summaryrefslogtreecommitdiffstats
path: root/include/linux/netdevice.h
diff options
context:
space:
mode:
authorJesper Dangaard Brouer <brouer@redhat.com>2018-05-24 16:46:12 +0200
committerAlexei Starovoitov <ast@kernel.org>2018-05-24 18:36:15 -0700
commit735fc4054b3a25034445c6713d259da0f96f8131 (patch)
tree355f7a0672e6239fa4227d562f7d5b65fac9c011 /include/linux/netdevice.h
parent389ab7f01af988c2a1ec5617eb0c7e220df1ef1c (diff)
downloadlinux-0-day-735fc4054b3a25034445c6713d259da0f96f8131.tar.gz
linux-0-day-735fc4054b3a25034445c6713d259da0f96f8131.tar.xz
xdp: change ndo_xdp_xmit API to support bulking
This patch change the API for ndo_xdp_xmit to support bulking xdp_frames. When kernel is compiled with CONFIG_RETPOLINE, XDP sees a huge slowdown. Most of the slowdown is caused by DMA API indirect function calls, but also the net_device->ndo_xdp_xmit() call. Benchmarked patch with CONFIG_RETPOLINE, using xdp_redirect_map with single flow/core test (CPU E5-1650 v4 @ 3.60GHz), showed performance improved: for driver ixgbe: 6,042,682 pps -> 6,853,768 pps = +811,086 pps for driver i40e : 6,187,169 pps -> 6,724,519 pps = +537,350 pps With frames avail as a bulk inside the driver ndo_xdp_xmit call, further optimizations are possible, like bulk DMA-mapping for TX. Testing without CONFIG_RETPOLINE show the same performance for physical NIC drivers. The virtual NIC driver tun sees a huge performance boost, as it can avoid doing per frame producer locking, but instead amortize the locking cost over the bulk. V2: Fix compile errors reported by kbuild test robot <lkp@intel.com> V4: Isolated ndo, driver changes and callers. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r--include/linux/netdevice.h14
1 files changed, 9 insertions, 5 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 03ed492c4e14a..debdb6286170d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1185,9 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
- * This function is used to submit a XDP packet for transmit on a
- * netdevice.
+ * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
+ * This function is used to submit @n XDP packets for transmit on a
+ * netdevice. Returns number of frames successfully transmitted, frames
+ * that got dropped are freed/returned via xdp_return_frame().
+ * Returns negative number, means general error invoking ndo, meaning
+ * no frames were xmit'ed and core-caller will free all frames.
+ * TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1375,8 +1379,8 @@ struct net_device_ops {
int needed_headroom);
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
- int (*ndo_xdp_xmit)(struct net_device *dev,
- struct xdp_frame *xdp);
+ int (*ndo_xdp_xmit)(struct net_device *dev, int n,
+ struct xdp_frame **xdp);
void (*ndo_xdp_flush)(struct net_device *dev);
};