summaryrefslogtreecommitdiffstats
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c182
1 files changed, 104 insertions, 78 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 23e9eb66197fb..85e14adf52074 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -70,6 +70,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
+#include <net/xdp.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
#include <linux/skb_array.h>
@@ -80,6 +81,9 @@
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
+static void tun_default_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd);
+
/* Uncomment to enable debugging */
/* #define TUN_DEBUG 1 */
@@ -241,6 +245,7 @@ struct tun_struct {
struct bpf_prog __rcu *xdp_prog;
struct tun_prog __rcu *steering_prog;
struct tun_prog __rcu *filter_prog;
+ struct ethtool_link_ksettings link_ksettings;
};
struct veth {
@@ -248,11 +253,11 @@ struct veth {
__be16 h_vlan_TCI;
};
-bool tun_is_xdp_buff(void *ptr)
+bool tun_is_xdp_frame(void *ptr)
{
return (unsigned long)ptr & TUN_XDP_FLAG;
}
-EXPORT_SYMBOL(tun_is_xdp_buff);
+EXPORT_SYMBOL(tun_is_xdp_frame);
void *tun_xdp_to_ptr(void *ptr)
{
@@ -525,11 +530,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
rcu_read_lock();
- /* We may get a very small possibility of OOO during switching, not
- * worth to optimize.*/
- if (tun->numqueues == 1 || tfile->detached)
- goto unlock;
-
e = tun_flow_find(head, rxhash);
if (likely(e)) {
/* TODO: keep queueing to old queue until it's empty? */
@@ -548,7 +548,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
spin_unlock_bh(&tun->lock);
}
-unlock:
rcu_read_unlock();
}
@@ -660,10 +659,10 @@ void tun_ptr_free(void *ptr)
{
if (!ptr)
return;
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- put_page(virt_to_head_page(xdp->data));
+ xdp_return_frame(xdpf);
} else {
__skb_array_destroy_skb(ptr);
}
@@ -848,6 +847,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
tun->dev, tfile->queue_index);
if (err < 0)
goto out;
+ err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err < 0) {
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ goto out;
+ }
err = 0;
}
@@ -1284,65 +1289,69 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
};
-static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+static void __tun_xdp_flush_tfile(struct tun_file *tfile)
+{
+ /* Notify and wake up reader process */
+ if (tfile->flags & TUN_FASYNC)
+ kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
+ tfile->socket.sk->sk_data_ready(tfile->socket.sk);
+}
+
+static int tun_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
{
struct tun_struct *tun = netdev_priv(dev);
- struct xdp_buff *buff = xdp->data_hard_start;
- int headroom = xdp->data - xdp->data_hard_start;
struct tun_file *tfile;
u32 numqueues;
- int ret = 0;
-
- /* Assure headroom is available and buff is properly aligned */
- if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
- return -ENOSPC;
+ int drops = 0;
+ int cnt = n;
+ int i;
- *buff = *xdp;
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
if (!numqueues) {
- ret = -ENOSPC;
- goto out;
+ rcu_read_unlock();
+ return -ENXIO; /* Caller will free/return all frames */
}
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
- /* Encode the XDP flag into lowest bit for consumer to differ
- * XDP buffer from sk_buff.
- */
- if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
- this_cpu_inc(tun->pcpu_stats->tx_dropped);
- ret = -ENOSPC;
+
+ spin_lock(&tfile->tx_ring.producer_lock);
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdp = frames[i];
+ /* Encode the XDP flag into lowest bit for consumer to differ
+ * XDP buffer from sk_buff.
+ */
+ void *frame = tun_xdp_to_ptr(xdp);
+
+ if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
+ this_cpu_inc(tun->pcpu_stats->tx_dropped);
+ xdp_return_frame_rx_napi(xdp);
+ drops++;
+ }
}
+ spin_unlock(&tfile->tx_ring.producer_lock);
+
+ if (flags & XDP_XMIT_FLUSH)
+ __tun_xdp_flush_tfile(tfile);
-out:
rcu_read_unlock();
- return ret;
+ return cnt - drops;
}
-static void tun_xdp_flush(struct net_device *dev)
+static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
{
- struct tun_struct *tun = netdev_priv(dev);
- struct tun_file *tfile;
- u32 numqueues;
+ struct xdp_frame *frame = convert_to_xdp_frame(xdp);
- rcu_read_lock();
+ if (unlikely(!frame))
+ return -EOVERFLOW;
- numqueues = READ_ONCE(tun->numqueues);
- if (!numqueues)
- goto out;
-
- tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
- numqueues]);
- /* Notify and wake up reader process */
- if (tfile->flags & TUN_FASYNC)
- kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
- tfile->socket.sk->sk_data_ready(tfile->socket.sk);
-
-out:
- rcu_read_unlock();
+ return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
}
static const struct net_device_ops tap_netdev_ops = {
@@ -1363,7 +1372,6 @@ static const struct net_device_ops tap_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
.ndo_bpf = tun_xdp,
.ndo_xdp_xmit = tun_xdp_xmit,
- .ndo_xdp_flush = tun_xdp_flush,
};
static void tun_flow_init(struct tun_struct *tun)
@@ -1680,14 +1688,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
case XDP_TX:
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
- if (tun_xdp_xmit(tun->dev, &xdp))
+ if (tun_xdp_tx(tun->dev, &xdp))
goto err_redirect;
- tun_xdp_flush(tun->dev);
rcu_read_unlock();
local_bh_enable();
return NULL;
case XDP_PASS:
delta = orig_data - xdp.data;
+ len = xdp.data_end - xdp.data;
break;
default:
bpf_warn_invalid_xdp_action(act);
@@ -1708,7 +1716,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
}
skb_reserve(skb, pad - delta);
- skb_put(skb, len + delta);
+ skb_put(skb, len);
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
@@ -1932,10 +1940,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
local_bh_enable();
}
- rcu_read_lock();
- if (!rcu_dereference(tun->steering_prog))
+ /* Compute the costly rx hash only if needed for flow updates.
+ * We may get a very small possibility of OOO during switching, not
+ * worth to optimize.
+ */
+ if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
+ !tfile->detached)
rxhash = __skb_get_hash_symmetric(skb);
- rcu_read_unlock();
if (frags) {
/* Exercise flow dissector code path. */
@@ -2004,11 +2015,11 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
static ssize_t tun_put_user_xdp(struct tun_struct *tun,
struct tun_file *tfile,
- struct xdp_buff *xdp,
+ struct xdp_frame *xdp_frame,
struct iov_iter *iter)
{
int vnet_hdr_sz = 0;
- size_t size = xdp->data_end - xdp->data;
+ size_t size = xdp_frame->len;
struct tun_pcpu_stats *stats;
size_t ret;
@@ -2024,7 +2035,7 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
}
- ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
+ ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
stats = get_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
@@ -2192,11 +2203,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
return err;
}
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- ret = tun_put_user_xdp(tun, tfile, xdp, to);
- put_page(virt_to_head_page(xdp->data));
+ ret = tun_put_user_xdp(tun, tfile, xdpf, to);
+ xdp_return_frame(xdpf);
} else {
struct sk_buff *skb = ptr;
@@ -2278,6 +2289,7 @@ static void tun_setup(struct net_device *dev)
tun->owner = INVALID_UID;
tun->group = INVALID_GID;
+ tun_default_link_ksettings(dev, &tun->link_ksettings);
dev->ethtool_ops = &tun_ethtool_ops;
dev->needs_free_netdev = true;
@@ -2435,10 +2447,10 @@ out_free:
static int tun_ptr_peek_len(void *ptr)
{
if (likely(ptr)) {
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- return xdp->data_end - xdp->data;
+ return xdpf->len;
}
return __skb_array_len_with_tag(ptr);
} else {
@@ -2852,10 +2864,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg, int ifreq_len)
{
struct tun_file *tfile = file->private_data;
+ struct net *net = sock_net(&tfile->sk);
struct tun_struct *tun;
void __user* argp = (void __user*)arg;
struct ifreq ifr;
- struct net *net;
kuid_t owner;
kgid_t group;
int sndbuf;
@@ -2879,14 +2891,18 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
*/
return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
(unsigned int __user*)argp);
- } else if (cmd == TUNSETQUEUE)
+ } else if (cmd == TUNSETQUEUE) {
return tun_set_queue(file, &ifr);
+ } else if (cmd == SIOCGSKNS) {
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ return open_related_ns(&net->ns, get_net_ns);
+ }
ret = 0;
rtnl_lock();
tun = tun_get(tfile);
- net = sock_net(&tfile->sk);
if (cmd == TUNSETIFF) {
ret = -EEXIST;
if (tun)
@@ -2916,14 +2932,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
tfile->ifindex = ifindex;
goto unlock;
}
- if (cmd == SIOCGSKNS) {
- ret = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto unlock;
-
- ret = open_related_ns(&net->ns, get_net_ns);
- goto unlock;
- }
ret = -EBADFD;
if (!tun)
@@ -3313,8 +3321,8 @@ static struct miscdevice tun_miscdev = {
/* ethtool interface */
-static int tun_get_link_ksettings(struct net_device *dev,
- struct ethtool_link_ksettings *cmd)
+static void tun_default_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
ethtool_link_ksettings_zero_link_mode(cmd, supported);
ethtool_link_ksettings_zero_link_mode(cmd, advertising);
@@ -3323,6 +3331,23 @@ static int tun_get_link_ksettings(struct net_device *dev,
cmd->base.port = PORT_TP;
cmd->base.phy_address = 0;
cmd->base.autoneg = AUTONEG_DISABLE;
+}
+
+static int tun_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ memcpy(cmd, &tun->link_ksettings, sizeof(*cmd));
+ return 0;
+}
+
+static int tun_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ memcpy(&tun->link_ksettings, cmd, sizeof(*cmd));
return 0;
}
@@ -3393,6 +3418,7 @@ static const struct ethtool_ops tun_ethtool_ops = {
.get_coalesce = tun_get_coalesce,
.set_coalesce = tun_set_coalesce,
.get_link_ksettings = tun_get_link_ksettings,
+ .set_link_ksettings = tun_set_link_ksettings,
};
static int tun_queue_resize(struct tun_struct *tun)