summaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 10:15:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 10:15:09 -0800
commit3051bf36c25d5153051704291782f8d44e744d36 (patch)
tree72dfc8a1d12675c6f2981d13102df954b678f11b /net/ipv4/udp.c
parent1e74a2eb1f5cc7f2f2b5aa9c9eeecbcf352220a3 (diff)
parent005c3490e9db23738d91e02788606c0fe4734723 (diff)
downloadlinux-3051bf36c25d5153051704291782f8d44e744d36.tar.gz
linux-3051bf36c25d5153051704291782f8d44e744d36.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support TX_RING in AF_PACKET TPACKET_V3 mode, from Sowmini Varadhan. 2) Simplify classifier state on sk_buff in order to shrink it a bit. From Willem de Bruijn. 3) Introduce SIPHASH and it's usage for secure sequence numbers and syncookies. From Jason A. Donenfeld. 4) Reduce CPU usage for ICMP replies we are going to limit or suppress, from Jesper Dangaard Brouer. 5) Introduce Shared Memory Communications socket layer, from Ursula Braun. 6) Add RACK loss detection and allow it to actually trigger fast recovery instead of just assisting after other algorithms have triggered it. From Yuchung Cheng. 7) Add xmit_more and BQL support to mvneta driver, from Simon Guinot. 8) skb_cow_data avoidance in esp4 and esp6, from Steffen Klassert. 9) Export MPLS packet stats via netlink, from Robert Shearman. 10) Significantly improve inet port bind conflict handling, especially when an application is restarted and changes it's setting of reuseport. From Josef Bacik. 11) Implement TX batching in vhost_net, from Jason Wang. 12) Extend the dummy device so that VF (virtual function) features, such as configuration, can be more easily tested. From Phil Sutter. 13) Avoid two atomic ops per page on x86 in bnx2x driver, from Eric Dumazet. 14) Add new bpf MAP, implementing a longest prefix match trie. From Daniel Mack. 15) Packet sample offloading support in mlxsw driver, from Yotam Gigi. 16) Add new aquantia driver, from David VomLehn. 17) Add bpf tracepoints, from Daniel Borkmann. 18) Add support for port mirroring to b53 and bcm_sf2 drivers, from Florian Fainelli. 19) Remove custom busy polling in many drivers, it is done in the core networking since 4.5 times. From Eric Dumazet. 20) Support XDP adjust_head in virtio_net, from John Fastabend. 21) Fix several major holes in neighbour entry confirmation, from Julian Anastasov. 22) Add XDP support to bnxt_en driver, from Michael Chan. 23) VXLAN offloads for enic driver, from Govindarajulu Varadarajan. 24) Add IPVTAP driver (IP-VLAN based tap driver) from Sainath Grandhi. 25) Support GRO in IPSEC protocols, from Steffen Klassert" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1764 commits) Revert "ath10k: Search SMBIOS for OEM board file extension" net: socket: fix recvmmsg not returning error from sock_error bnxt_en: use eth_hw_addr_random() bpf: fix unlocking of jited image when module ronx not set arch: add ARCH_HAS_SET_MEMORY config net: napi_watchdog() can use napi_schedule_irqoff() tcp: Revert "tcp: tcp_probe: use spin_lock_bh()" net/hsr: use eth_hw_addr_random() net: mvpp2: enable building on 64-bit platforms net: mvpp2: switch to build_skb() in the RX path net: mvpp2: simplify MVPP2_PRS_RI_* definitions net: mvpp2: fix indentation of MVPP2_EXT_GLOBAL_CTRL_DEFAULT net: mvpp2: remove unused register definitions net: mvpp2: simplify mvpp2_bm_bufs_add() net: mvpp2: drop useless fields in mvpp2_bm_pool and related code net: mvpp2: remove unused 'tx_skb' field of 'struct mvpp2_tx_queue' net: mvpp2: release reference to txq_cpu[] entry after unmapping net: mvpp2: handle too large value in mvpp2_rx_time_coal_set() net: mvpp2: handle too large value handling in mvpp2_rx_pkts_coal_set() net: mvpp2: remove useless arguments in mvpp2_rx_{pkts, time}_coal_set ...
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c117
1 files changed, 53 insertions, 64 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8aab7d78d25b..ea6e4cff9faf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -134,14 +134,21 @@ EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
+/* IPCB reference means this can not be used from early demux */
+static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (!net->ipv4.sysctl_udp_l3mdev_accept &&
+ skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
+ return true;
+#endif
+ return false;
+}
+
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard),
- unsigned int log)
+ struct sock *sk, unsigned int log)
{
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -153,13 +160,18 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (!sk2->sk_reuseport || !sk->sk_reuseport ||
- rcu_access_pointer(sk->sk_reuseport_cb) ||
- !uid_eq(uid, sock_i_uid(sk2))) &&
- saddr_comp(sk, sk2, true)) {
- if (!bitmap)
- return 1;
- __set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap);
+ inet_rcv_saddr_equal(sk, sk2, true)) {
+ if (sk2->sk_reuseport && sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(uid, sock_i_uid(sk2))) {
+ if (!bitmap)
+ return 0;
+ } else {
+ if (!bitmap)
+ return 1;
+ __set_bit(udp_sk(sk2)->udp_port_hash >> log,
+ bitmap);
+ }
}
}
return 0;
@@ -171,10 +183,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
*/
static int udp_lib_lport_inuse2(struct net *net, __u16 num,
struct udp_hslot *hslot2,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+ struct sock *sk)
{
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -188,11 +197,14 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (!sk2->sk_reuseport || !sk->sk_reuseport ||
- rcu_access_pointer(sk->sk_reuseport_cb) ||
- !uid_eq(uid, sock_i_uid(sk2))) &&
- saddr_comp(sk, sk2, true)) {
- res = 1;
+ inet_rcv_saddr_equal(sk, sk2, true)) {
+ if (sk2->sk_reuseport && sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(uid, sock_i_uid(sk2))) {
+ res = 0;
+ } else {
+ res = 1;
+ }
break;
}
}
@@ -200,10 +212,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
return res;
}
-static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
{
struct net *net = sock_net(sk);
kuid_t uid = sock_i_uid(sk);
@@ -217,7 +226,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
(udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
(sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
- (*saddr_same)(sk, sk2, false)) {
+ inet_rcv_saddr_equal(sk, sk2, false)) {
return reuseport_add_sock(sk, sk2);
}
}
@@ -233,14 +242,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
*
* @sk: socket struct in question
* @snum: port number to look up
- * @saddr_comp: AF-dependent comparison of bound local IP addresses
* @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
* with NULL address
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard),
unsigned int hash2_nulladdr)
{
struct udp_hslot *hslot, *hslot2;
@@ -269,7 +274,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
bitmap_zero(bitmap, PORTS_PER_CHAIN);
spin_lock_bh(&hslot->lock);
udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
- saddr_comp, udptable->log);
+ udptable->log);
snum = first;
/*
@@ -285,6 +290,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
snum += rand;
} while (snum != first);
spin_unlock_bh(&hslot->lock);
+ cond_resched();
} while (++first != last);
goto fail;
} else {
@@ -301,12 +307,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
if (hslot->count < hslot2->count)
goto scan_primary_hash;
- exist = udp_lib_lport_inuse2(net, snum, hslot2,
- sk, saddr_comp);
+ exist = udp_lib_lport_inuse2(net, snum, hslot2, sk);
if (!exist && (hash2_nulladdr != slot2)) {
hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
exist = udp_lib_lport_inuse2(net, snum, hslot2,
- sk, saddr_comp);
+ sk);
}
if (exist)
goto fail_unlock;
@@ -314,8 +319,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
goto found;
}
scan_primary_hash:
- if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
- saddr_comp, 0))
+ if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0))
goto fail_unlock;
}
found:
@@ -324,7 +328,7 @@ found:
udp_sk(sk)->udp_portaddr_hash ^= snum;
if (sk_unhashed(sk)) {
if (sk->sk_reuseport &&
- udp_reuseport_add_sock(sk, hslot, saddr_comp)) {
+ udp_reuseport_add_sock(sk, hslot)) {
inet_sk(sk)->inet_num = 0;
udp_sk(sk)->udp_port_hash = 0;
udp_sk(sk)->udp_portaddr_hash ^= snum;
@@ -356,24 +360,6 @@ fail:
}
EXPORT_SYMBOL(udp_lib_get_port);
-/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * 0.0.0.0 only equals to 0.0.0.0
- */
-int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- bool match_wildcard)
-{
- struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
-
- if (!ipv6_only_sock(sk2)) {
- if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
- return 1;
- if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
- return match_wildcard;
- }
- return 0;
-}
-
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
unsigned int port)
{
@@ -389,12 +375,13 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
- return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
+ return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
static int compute_score(struct sock *sk, struct net *net,
__be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum, int dif)
+ __be32 daddr, unsigned short hnum, int dif,
+ bool exact_dif)
{
int score;
struct inet_sock *inet;
@@ -425,7 +412,7 @@ static int compute_score(struct sock *sk, struct net *net,
score += 4;
}
- if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if || exact_dif) {
if (sk->sk_bound_dev_if != dif)
return -1;
score += 4;
@@ -450,7 +437,7 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
- __be32 daddr, unsigned int hnum, int dif,
+ __be32 daddr, unsigned int hnum, int dif, bool exact_dif,
struct udp_hslot *hslot2,
struct sk_buff *skb)
{
@@ -462,7 +449,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif);
+ daddr, hnum, dif, exact_dif);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -497,6 +484,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+ bool exact_dif = udp_lib_exact_dif_match(net, skb);
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
@@ -509,7 +497,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, skb);
+ exact_dif, hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
@@ -524,7 +512,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, skb);
+ exact_dif, hslot2, skb);
}
return result;
}
@@ -533,7 +521,7 @@ begin:
badness = 0;
sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif);
+ daddr, hnum, dif, exact_dif);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -1113,7 +1101,8 @@ out:
return err;
do_confirm:
- dst_confirm(&rt->dst);
+ if (msg->msg_flags & MSG_PROBE)
+ dst_confirm_neigh(&rt->dst, &fl4->daddr);
if (!(msg->msg_flags&MSG_PROBE) || len)
goto back_from_confirm;
err = 0;