summaryrefslogtreecommitdiffstats
path: root/net/netfilter/xt_TPROXY.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 18:39:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 18:39:49 -0700
commit1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21 (patch)
treedcc97181d4d187252e0cc8fdf29d9b365fa3ffd0 /net/netfilter/xt_TPROXY.c
parent285767604576148fc1be7fcd112e4a90eb0d6ad2 (diff)
parent7170e6045a6a8b33f4fa5753589dc77b16198e2d (diff)
downloadlinux-0-day-1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21.tar.gz
linux-0-day-1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add Maglev hashing scheduler to IPVS, from Inju Song. 2) Lots of new TC subsystem tests from Roman Mashak. 3) Add TCP zero copy receive and fix delayed acks and autotuning with SO_RCVLOWAT, from Eric Dumazet. 4) Add XDP_REDIRECT support to mlx5 driver, from Jesper Dangaard Brouer. 5) Add ttl inherit support to vxlan, from Hangbin Liu. 6) Properly separate ipv6 routes into their logically independant components. fib6_info for the routing table, and fib6_nh for sets of nexthops, which thus can be shared. From David Ahern. 7) Add bpf_xdp_adjust_tail helper, which can be used to generate ICMP messages from XDP programs. From Nikita V. Shirokov. 8) Lots of long overdue cleanups to the r8169 driver, from Heiner Kallweit. 9) Add BTF ("BPF Type Format"), from Martin KaFai Lau. 10) Add traffic condition monitoring to iwlwifi, from Luca Coelho. 11) Plumb extack down into fib_rules, from Roopa Prabhu. 12) Add Flower classifier offload support to igb, from Vinicius Costa Gomes. 13) Add UDP GSO support, from Willem de Bruijn. 14) Add documentation for eBPF helpers, from Quentin Monnet. 15) Add TLS tx offload to mlx5, from Ilya Lesokhin. 16) Allow applications to be given the number of bytes available to read on a socket via a control message returned from recvmsg(), from Soheil Hassas Yeganeh. 17) Add x86_32 eBPF JIT compiler, from Wang YanQing. 18) Add AF_XDP sockets, with zerocopy support infrastructure as well. From Björn Töpel. 19) Remove indirect load support from all of the BPF JITs and handle these operations in the verifier by translating them into native BPF instead. From Daniel Borkmann. 20) Add GRO support to ipv6 gre tunnels, from Eran Ben Elisha. 21) Allow XDP programs to do lookups in the main kernel routing tables for forwarding. From David Ahern. 22) Allow drivers to store hardware state into an ELF section of kernel dump vmcore files, and use it in cxgb4. From Rahul Lakkireddy. 23) Various RACK and loss detection improvements in TCP, from Yuchung Cheng. 24) Add TCP SACK compression, from Eric Dumazet. 25) Add User Mode Helper support and basic bpfilter infrastructure, from Alexei Starovoitov. 26) Support ports and protocol values in RTM_GETROUTE, from Roopa Prabhu. 27) Support bulking in ->ndo_xdp_xmit() API, from Jesper Dangaard Brouer. 28) Add lots of forwarding selftests, from Petr Machata. 29) Add generic network device failover driver, from Sridhar Samudrala. * ra.kernel.org:/pub/scm/linux/kernel/git/davem/net-next: (1959 commits) strparser: Add __strp_unpause and use it in ktls. rxrpc: Fix terminal retransmission connection ID to include the channel net: hns3: Optimize PF CMDQ interrupt switching process net: hns3: Fix for VF mailbox receiving unknown message net: hns3: Fix for VF mailbox cannot receiving PF response bnx2x: use the right constant Revert "net: sched: cls: Fix offloading when ingress dev is vxlan" net: dsa: b53: Fix for brcm tag issue in Cygnus SoC enic: fix UDP rss bits netdev-FAQ: clarify DaveM's position for stable backports rtnetlink: validate attributes in do_setlink() mlxsw: Add extack messages for port_{un, }split failures netdevsim: Add extack error message for devlink reload devlink: Add extack to reload and port_{un, }split operations net: metrics: add proper netlink validation ipmr: fix error path when ipmr_new_table fails ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds net: hns3: remove unused hclgevf_cfg_func_mta_filter netfilter: provide udp*_lib_lookup for nf_tproxy qed*: Utilize FW 8.37.2.0 ...
Diffstat (limited to 'net/netfilter/xt_TPROXY.c')
-rw-r--r--net/netfilter/xt_TPROXY.c366
1 files changed, 18 insertions, 348 deletions
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 8c89323c06afe..58fce4e749a97 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -33,264 +33,9 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
+#include <net/netfilter/nf_tproxy.h>
#include <linux/netfilter/xt_TPROXY.h>
-enum nf_tproxy_lookup_t {
- NFT_LOOKUP_LISTENER,
- NFT_LOOKUP_ESTABLISHED,
-};
-
-static bool tproxy_sk_is_transparent(struct sock *sk)
-{
- switch (sk->sk_state) {
- case TCP_TIME_WAIT:
- if (inet_twsk(sk)->tw_transparent)
- return true;
- break;
- case TCP_NEW_SYN_RECV:
- if (inet_rsk(inet_reqsk(sk))->no_srccheck)
- return true;
- break;
- default:
- if (inet_sk(sk)->transparent)
- return true;
- }
-
- sock_gen_put(sk);
- return false;
-}
-
-static inline __be32
-tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
-{
- struct in_device *indev;
- __be32 laddr;
-
- if (user_laddr)
- return user_laddr;
-
- laddr = 0;
- indev = __in_dev_get_rcu(skb->dev);
- for_primary_ifa(indev) {
- laddr = ifa->ifa_local;
- break;
- } endfor_ifa(indev);
-
- return laddr ? laddr : daddr;
-}
-
-/*
- * This is used when the user wants to intercept a connection matching
- * an explicit iptables rule. In this case the sockets are assumed
- * matching in preference order:
- *
- * - match: if there's a fully established connection matching the
- * _packet_ tuple, it is returned, assuming the redirection
- * already took place and we process a packet belonging to an
- * established connection
- *
- * - match: if there's a listening socket matching the redirection
- * (e.g. on-port & on-ip of the connection), it is returned,
- * regardless if it was bound to 0.0.0.0 or an explicit
- * address. The reasoning is that if there's an explicit rule, it
- * does not really matter if the listener is bound to an interface
- * or to 0. The user already stated that he wants redirection
- * (since he added the rule).
- *
- * Please note that there's an overlap between what a TPROXY target
- * and a socket match will match. Normally if you have both rules the
- * "socket" match will be the first one, effectively all packets
- * belonging to established connections going through that one.
- */
-static inline struct sock *
-nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
- const u8 protocol,
- const __be32 saddr, const __be32 daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in,
- const enum nf_tproxy_lookup_t lookup_type)
-{
- struct sock *sk;
- struct tcphdr *tcph;
-
- switch (protocol) {
- case IPPROTO_TCP:
- switch (lookup_type) {
- case NFT_LOOKUP_LISTENER:
- tcph = hp;
- sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
- ip_hdrlen(skb) +
- __tcp_hdrlen(tcph),
- saddr, sport,
- daddr, dport,
- in->ifindex, 0);
-
- if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
- sk = NULL;
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too
- */
- break;
- case NFT_LOOKUP_ESTABLISHED:
- sk = inet_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, dport,
- in->ifindex);
- break;
- default:
- BUG();
- }
- break;
- case IPPROTO_UDP:
- sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- if (sk) {
- int connected = (sk->sk_state == TCP_ESTABLISHED);
- int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
-
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too
- */
- if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
- (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
- sock_put(sk);
- sk = NULL;
- }
- }
- break;
- default:
- WARN_ON(1);
- sk = NULL;
- }
-
- pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
- protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
-
- return sk;
-}
-
-#ifdef XT_TPROXY_HAVE_IPV6
-static inline struct sock *
-nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
- const u8 protocol,
- const struct in6_addr *saddr, const struct in6_addr *daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in,
- const enum nf_tproxy_lookup_t lookup_type)
-{
- struct sock *sk;
- struct tcphdr *tcph;
-
- switch (protocol) {
- case IPPROTO_TCP:
- switch (lookup_type) {
- case NFT_LOOKUP_LISTENER:
- tcph = hp;
- sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
- thoff + __tcp_hdrlen(tcph),
- saddr, sport,
- daddr, ntohs(dport),
- in->ifindex, 0);
-
- if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
- sk = NULL;
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too
- */
- break;
- case NFT_LOOKUP_ESTABLISHED:
- sk = __inet6_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, ntohs(dport),
- in->ifindex, 0);
- break;
- default:
- BUG();
- }
- break;
- case IPPROTO_UDP:
- sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- if (sk) {
- int connected = (sk->sk_state == TCP_ESTABLISHED);
- int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
-
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too
- */
- if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
- (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
- sock_put(sk);
- sk = NULL;
- }
- }
- break;
- default:
- WARN_ON(1);
- sk = NULL;
- }
-
- pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
- protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
-
- return sk;
-}
-#endif
-
-/**
- * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
- * @skb: The skb being processed.
- * @laddr: IPv4 address to redirect to or zero.
- * @lport: TCP port to redirect to or zero.
- * @sk: The TIME_WAIT TCP socket found by the lookup.
- *
- * We have to handle SYN packets arriving to TIME_WAIT sockets
- * differently: instead of reopening the connection we should rather
- * redirect the new connection to the proxy if there's a listener
- * socket present.
- *
- * tproxy_handle_time_wait4() consumes the socket reference passed in.
- *
- * Returns the listener socket if there's one, the TIME_WAIT socket if
- * no such listener is found, or NULL if the TCP header is incomplete.
- */
-static struct sock *
-tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
- __be32 laddr, __be16 lport, struct sock *sk)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct tcphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
- if (hp == NULL) {
- inet_twsk_put(inet_twsk(sk));
- return NULL;
- }
-
- if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
- /* SYN to a TIME_WAIT socket, we'd rather redirect it
- * to a listener socket if there's one */
- struct sock *sk2;
-
- sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
- iph->saddr, laddr ? laddr : iph->daddr,
- hp->source, lport ? lport : hp->dest,
- skb->dev, NFT_LOOKUP_LISTENER);
- if (sk2) {
- inet_twsk_deschedule_put(inet_twsk(sk));
- sk = sk2;
- }
- }
-
- return sk;
-}
-
/* assign a socket to the skb -- consumes sk */
static void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
@@ -319,26 +64,26 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
- skb->dev, NFT_LOOKUP_ESTABLISHED);
+ skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
- laddr = tproxy_laddr4(skb, laddr, iph->daddr);
+ laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr);
if (!lport)
lport = hp->dest;
/* UDP has no TCP_TIME_WAIT state, so we never enter here */
if (sk && sk->sk_state == TCP_TIME_WAIT)
/* reopening a TIME_WAIT connection needs special handling */
- sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
+ sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
- skb->dev, NFT_LOOKUP_LISTENER);
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
- if (sk && tproxy_sk_is_transparent(sk)) {
+ if (sk && nf_tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
@@ -377,87 +122,6 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef XT_TPROXY_HAVE_IPV6
-static inline const struct in6_addr *
-tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
- const struct in6_addr *daddr)
-{
- struct inet6_dev *indev;
- struct inet6_ifaddr *ifa;
- struct in6_addr *laddr;
-
- if (!ipv6_addr_any(user_laddr))
- return user_laddr;
- laddr = NULL;
-
- indev = __in6_dev_get(skb->dev);
- if (indev) {
- read_lock_bh(&indev->lock);
- list_for_each_entry(ifa, &indev->addr_list, if_list) {
- if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
- continue;
-
- laddr = &ifa->addr;
- break;
- }
- read_unlock_bh(&indev->lock);
- }
-
- return laddr ? laddr : daddr;
-}
-
-/**
- * tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections
- * @skb: The skb being processed.
- * @tproto: Transport protocol.
- * @thoff: Transport protocol header offset.
- * @par: Iptables target parameters.
- * @sk: The TIME_WAIT TCP socket found by the lookup.
- *
- * We have to handle SYN packets arriving to TIME_WAIT sockets
- * differently: instead of reopening the connection we should rather
- * redirect the new connection to the proxy if there's a listener
- * socket present.
- *
- * tproxy_handle_time_wait6() consumes the socket reference passed in.
- *
- * Returns the listener socket if there's one, the TIME_WAIT socket if
- * no such listener is found, or NULL if the TCP header is incomplete.
- */
-static struct sock *
-tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
- const struct xt_action_param *par,
- struct sock *sk)
-{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct tcphdr _hdr, *hp;
- const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
-
- hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
- if (hp == NULL) {
- inet_twsk_put(inet_twsk(sk));
- return NULL;
- }
-
- if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
- /* SYN to a TIME_WAIT socket, we'd rather redirect it
- * to a listener socket if there's one */
- struct sock *sk2;
-
- sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
- &iph->saddr,
- tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
- hp->source,
- tgi->lport ? tgi->lport : hp->dest,
- skb->dev, NFT_LOOKUP_LISTENER);
- if (sk2) {
- inet_twsk_deschedule_put(inet_twsk(sk));
- sk = sk2;
- }
- }
-
- return sk;
-}
-
static unsigned int
tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -489,25 +153,31 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
- xt_in(par), NFT_LOOKUP_ESTABLISHED);
+ xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
- laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
+ laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
lport = tgi->lport ? tgi->lport : hp->dest;
/* UDP has no TCP_TIME_WAIT state, so we never enter here */
- if (sk && sk->sk_state == TCP_TIME_WAIT)
+ if (sk && sk->sk_state == TCP_TIME_WAIT) {
+ const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
/* reopening a TIME_WAIT connection needs special handling */
- sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk);
+ sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff,
+ xt_net(par),
+ &tgi->laddr.in6,
+ tgi->lport,
+ sk);
+ }
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
tproto, &iph->saddr, laddr,
hp->source, lport,
- xt_in(par), NFT_LOOKUP_LISTENER);
+ xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
- if (sk && tproxy_sk_is_transparent(sk)) {
+ if (sk && nf_tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;