summaryrefslogtreecommitdiffstats
path: root/drivers/net/vxlan.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-11-15 12:16:14 -0500
committerDavid S. Miller <davem@davemloft.net>2016-11-15 12:16:14 -0500
commit81fea579aa57750bb42ed9de5790c48357435cea (patch)
tree0f662099486d53745d3bb4f2d5914875c839b083 /drivers/net/vxlan.c
parentc915fe13cbaae5c7aa7b44f367d05addd60c9008 (diff)
parent9efdb92d68c726e70066e5b4189c1186c9b6f90c (diff)
downloadlinux-81fea579aa57750bb42ed9de5790c48357435cea.tar.gz
linux-81fea579aa57750bb42ed9de5790c48357435cea.tar.xz
Merge branch 'vxlan-xmit-improvements'
Pravin B Shelar says: ==================== vxlan: xmit improvements. Following patch series improves vxlan fast path, removes duplicate code and simplifies vxlan xmit code path. v2-v3: Removed unrelated warning fix from patch 2. rearranged error handling from patch 3 Fixed stats updates in vxlan route lookup in patch 4 v1-v2: Fix compilation error when IPv6 support is not enabled. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r--drivers/net/vxlan.c285
1 files changed, 137 insertions, 148 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 5264c1a49d86..d536a9340cd5 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1750,21 +1750,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
}
min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
- + VXLAN_HLEN + iphdr_len
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
+ + VXLAN_HLEN + iphdr_len;
/* Need space for new headers (invalidates iph ptr) */
err = skb_cow_head(skb, min_headroom);
if (unlikely(err))
- goto out_free;
-
- skb = vlan_hwaccel_push_inside(skb);
- if (WARN_ON(!skb))
- return -ENOMEM;
+ return err;
err = iptunnel_handle_offloads(skb, type);
if (err)
- goto out_free;
+ return err;
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = VXLAN_HF_VNI;
@@ -1788,19 +1783,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
if (vxflags & VXLAN_F_GPE) {
err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
if (err < 0)
- goto out_free;
+ return err;
inner_protocol = skb->protocol;
}
skb_set_inner_protocol(skb, inner_protocol);
return 0;
-
-out_free:
- kfree_skb(skb);
- return err;
}
-static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
+static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
+ struct vxlan_sock *sock4,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr,
struct dst_cache *dst_cache,
@@ -1810,6 +1802,9 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct rtable *rt = NULL;
struct flowi4 fl4;
+ if (!sock4)
+ return ERR_PTR(-EIO);
+
if (tos && !info)
use_cache = false;
if (use_cache) {
@@ -1827,16 +1822,27 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
fl4.saddr = *saddr;
rt = ip_route_output_key(vxlan->net, &fl4);
- if (!IS_ERR(rt)) {
+ if (likely(!IS_ERR(rt))) {
+ if (rt->dst.dev == dev) {
+ netdev_dbg(dev, "circular route to %pI4\n", &daddr);
+ ip_rt_put(rt);
+ return ERR_PTR(-ELOOP);
+ }
+
*saddr = fl4.saddr;
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
+ } else {
+ netdev_dbg(dev, "no route to %pI4\n", &daddr);
+ return ERR_PTR(-ENETUNREACH);
}
return rt;
}
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
+ struct net_device *dev,
+ struct vxlan_sock *sock6,
struct sk_buff *skb, int oif, u8 tos,
__be32 label,
const struct in6_addr *daddr,
@@ -1844,7 +1850,6 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct dst_cache *dst_cache,
const struct ip_tunnel_info *info)
{
- struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct dst_entry *ndst;
struct flowi6 fl6;
@@ -1872,8 +1877,16 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
sock6->sock->sk,
&ndst, &fl6);
- if (err < 0)
- return ERR_PTR(err);
+ if (unlikely(err < 0)) {
+ netdev_dbg(dev, "no route to %pI6\n", daddr);
+ return ERR_PTR(-ENETUNREACH);
+ }
+
+ if (unlikely(ndst->dev == dev)) {
+ netdev_dbg(dev, "circular route to %pI6\n", daddr);
+ dst_release(ndst);
+ return ERR_PTR(-ELOOP);
+ }
*saddr = fl6.saddr;
if (use_cache)
@@ -1927,23 +1940,55 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
}
}
+static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
+ struct vxlan_dev *vxlan, union vxlan_addr *daddr,
+ __be32 dst_port, __be32 vni, struct dst_entry *dst,
+ u32 rt_flags)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
+ * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
+ * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
+ */
+ BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
+#endif
+ /* Bypass encapsulation if the destination is local */
+ if (rt_flags & RTCF_LOCAL &&
+ !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
+ struct vxlan_dev *dst_vxlan;
+
+ dst_release(dst);
+ dst_vxlan = vxlan_find_vni(vxlan->net, vni,
+ daddr->sa.sa_family, dst_port,
+ vxlan->flags);
+ if (!dst_vxlan) {
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+
+ return -ENOENT;
+ }
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan);
+ return 1;
+ }
+
+ return 0;
+}
+
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc)
{
struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct sock *sk;
- struct rtable *rt = NULL;
- const struct iphdr *old_iph;
+ const struct iphdr *old_iph = ip_hdr(skb);
union vxlan_addr *dst;
union vxlan_addr remote_ip, local_ip;
union vxlan_addr *src;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
+ struct dst_entry *ndst = NULL;
__be32 vni, label;
- __be16 df = 0;
__u8 tos, ttl;
int err;
u32 flags = vxlan->flags;
@@ -1953,19 +1998,40 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
info = skb_tunnel_info(skb);
if (rdst) {
+ dst = &rdst->remote_ip;
+ if (vxlan_addr_any(dst)) {
+ if (did_rsc) {
+ /* short-circuited back to local bridge */
+ vxlan_encap_bypass(skb, vxlan, vxlan);
+ return;
+ }
+ goto drop;
+ }
+
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni;
- dst = &rdst->remote_ip;
src = &vxlan->cfg.saddr;
dst_cache = &rdst->dst_cache;
+ md->gbp = skb->mark;
+ ttl = vxlan->cfg.ttl;
+ if (!ttl && vxlan_addr_multicast(dst))
+ ttl = 1;
+
+ tos = vxlan->cfg.tos;
+ if (tos == 1)
+ tos = ip_tunnel_get_dsfield(old_iph, skb);
+
+ if (dst->sa.sa_family == AF_INET)
+ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
+ else
+ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+ label = vxlan->cfg.label;
} else {
if (!info) {
WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
dev->name);
goto drop;
}
- dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
- vni = tunnel_id_to_key32(info->key.tun_id);
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
if (remote_ip.sa.sa_family == AF_INET) {
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
@@ -1975,182 +2041,108 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
}
dst = &remote_ip;
+ dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
+ vni = tunnel_id_to_key32(info->key.tun_id);
src = &local_ip;
dst_cache = &info->dst_cache;
- }
-
- if (vxlan_addr_any(dst)) {
- if (did_rsc) {
- /* short-circuited back to local bridge */
- vxlan_encap_bypass(skb, vxlan, vxlan);
- return;
- }
- goto drop;
- }
-
- old_iph = ip_hdr(skb);
-
- ttl = vxlan->cfg.ttl;
- if (!ttl && vxlan_addr_multicast(dst))
- ttl = 1;
-
- tos = vxlan->cfg.tos;
- if (tos == 1)
- tos = ip_tunnel_get_dsfield(old_iph, skb);
-
- label = vxlan->cfg.label;
- src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
- vxlan->cfg.port_max, true);
-
- if (info) {
+ if (info->options_len)
+ md = ip_tunnel_info_opts(info);
ttl = info->key.ttl;
tos = info->key.tos;
label = info->key.label;
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
-
- if (info->options_len)
- md = ip_tunnel_info_opts(info);
- } else {
- md->gbp = skb->mark;
}
+ src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+ vxlan->cfg.port_max, true);
if (dst->sa.sa_family == AF_INET) {
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+ struct rtable *rt;
+ __be16 df = 0;
- if (!sock4)
- goto drop;
- sk = sock4->sock->sk;
-
- rt = vxlan_get_route(vxlan, skb,
+ rt = vxlan_get_route(vxlan, dev, sock4, skb,
rdst ? rdst->remote_ifindex : 0, tos,
dst->sin.sin_addr.s_addr,
&src->sin.sin_addr.s_addr,
dst_cache, info);
- if (IS_ERR(rt)) {
- netdev_dbg(dev, "no route to %pI4\n",
- &dst->sin.sin_addr.s_addr);
- dev->stats.tx_carrier_errors++;
+ if (IS_ERR(rt))
goto tx_error;
- }
-
- if (rt->dst.dev == dev) {
- netdev_dbg(dev, "circular route to %pI4\n",
- &dst->sin.sin_addr.s_addr);
- dev->stats.collisions++;
- goto rt_tx_error;
- }
/* Bypass encapsulation if the destination is local */
- if (!info && rt->rt_flags & RTCF_LOCAL &&
- !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
- struct vxlan_dev *dst_vxlan;
-
- ip_rt_put(rt);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
- dst->sa.sa_family, dst_port,
- vxlan->flags);
- if (!dst_vxlan)
- goto tx_error;
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
- return;
- }
-
- if (!info)
- udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
- else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (!info) {
+ err = encap_bypass_if_local(skb, dev, vxlan, dst,
+ dst_port, vni, &rt->dst,
+ rt->rt_flags);
+ if (err)
+ return;
+ } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
df = htons(IP_DF);
+ }
+ ndst = &rt->dst;
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
+ err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
vni, md, flags, udp_sum);
if (err < 0)
- goto xmit_tx_error;
+ goto tx_error;
- udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
+ udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, src->sin.sin_addr.s_addr,
dst->sin.sin_addr.s_addr, tos, ttl, df,
src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
- struct dst_entry *ndst;
- u32 rt6i_flags;
- if (!sock6)
- goto drop;
- sk = sock6->sock->sk;
-
- ndst = vxlan6_get_route(vxlan, skb,
+ ndst = vxlan6_get_route(vxlan, dev, sock6, skb,
rdst ? rdst->remote_ifindex : 0, tos,
label, &dst->sin6.sin6_addr,
&src->sin6.sin6_addr,
dst_cache, info);
if (IS_ERR(ndst)) {
- netdev_dbg(dev, "no route to %pI6\n",
- &dst->sin6.sin6_addr);
- dev->stats.tx_carrier_errors++;
+ ndst = NULL;
goto tx_error;
}
- if (ndst->dev == dev) {
- netdev_dbg(dev, "circular route to %pI6\n",
- &dst->sin6.sin6_addr);
- dst_release(ndst);
- dev->stats.collisions++;
- goto tx_error;
- }
+ if (!info) {
+ u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
- /* Bypass encapsulation if the destination is local */
- rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
- if (!info && rt6i_flags & RTF_LOCAL &&
- !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
- struct vxlan_dev *dst_vxlan;
-
- dst_release(ndst);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
- dst->sa.sa_family, dst_port,
- vxlan->flags);
- if (!dst_vxlan)
- goto tx_error;
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
- return;
+ err = encap_bypass_if_local(skb, dev, vxlan, dst,
+ dst_port, vni, ndst,
+ rt6i_flags);
+ if (err)
+ return;
}
- if (!info)
- udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
-
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
skb_scrub_packet(skb, xnet);
err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
vni, md, flags, udp_sum);
- if (err < 0) {
- dst_release(ndst);
- dev->stats.tx_errors++;
- return;
- }
- udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
+ if (err < 0)
+ goto tx_error;
+
+ udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
&src->sin6.sin6_addr,
&dst->sin6.sin6_addr, tos, ttl,
label, src_port, dst_port, !udp_sum);
#endif
}
-
return;
drop:
dev->stats.tx_dropped++;
- goto tx_free;
+ dev_kfree_skb(skb);
+ return;
-xmit_tx_error:
- /* skb is already freed. */
- skb = NULL;
-rt_tx_error:
- ip_rt_put(rt);
tx_error:
+ if (err == -ELOOP)
+ dev->stats.collisions++;
+ else if (err == -ENETUNREACH)
+ dev->stats.tx_carrier_errors++;
+ dst_release(ndst);
dev->stats.tx_errors++;
-tx_free:
- dev_kfree_skb(skb);
+ kfree_skb(skb);
}
/* Transmit local packets over Vxlan
@@ -2429,9 +2421,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
struct rtable *rt;
- if (!sock4)
- return -EINVAL;
- rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
+ rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
info->key.u.ipv4.dst,
&info->key.u.ipv4.src, NULL, info);
if (IS_ERR(rt))
@@ -2439,9 +2429,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
ip_rt_put(rt);
} else {
#if IS_ENABLED(CONFIG_IPV6)
+ struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
struct dst_entry *ndst;
- ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
+ ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
info->key.label, &info->key.u.ipv6.dst,
&info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst))
@@ -2529,10 +2520,8 @@ static void vxlan_setup(struct net_device *dev)
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->vlan_features = dev->features;
- dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
netif_keep_dst(dev);
dev->priv_flags |= IFF_NO_QUEUE;