summaryrefslogtreecommitdiffstats
path: root/net/netfilter/nft_ct.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 10:15:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 10:15:09 -0800
commit3051bf36c25d5153051704291782f8d44e744d36 (patch)
tree72dfc8a1d12675c6f2981d13102df954b678f11b /net/netfilter/nft_ct.c
parent1e74a2eb1f5cc7f2f2b5aa9c9eeecbcf352220a3 (diff)
parent005c3490e9db23738d91e02788606c0fe4734723 (diff)
downloadlinux-3051bf36c25d5153051704291782f8d44e744d36.tar.gz
linux-3051bf36c25d5153051704291782f8d44e744d36.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support TX_RING in AF_PACKET TPACKET_V3 mode, from Sowmini Varadhan. 2) Simplify classifier state on sk_buff in order to shrink it a bit. From Willem de Bruijn. 3) Introduce SIPHASH and it's usage for secure sequence numbers and syncookies. From Jason A. Donenfeld. 4) Reduce CPU usage for ICMP replies we are going to limit or suppress, from Jesper Dangaard Brouer. 5) Introduce Shared Memory Communications socket layer, from Ursula Braun. 6) Add RACK loss detection and allow it to actually trigger fast recovery instead of just assisting after other algorithms have triggered it. From Yuchung Cheng. 7) Add xmit_more and BQL support to mvneta driver, from Simon Guinot. 8) skb_cow_data avoidance in esp4 and esp6, from Steffen Klassert. 9) Export MPLS packet stats via netlink, from Robert Shearman. 10) Significantly improve inet port bind conflict handling, especially when an application is restarted and changes it's setting of reuseport. From Josef Bacik. 11) Implement TX batching in vhost_net, from Jason Wang. 12) Extend the dummy device so that VF (virtual function) features, such as configuration, can be more easily tested. From Phil Sutter. 13) Avoid two atomic ops per page on x86 in bnx2x driver, from Eric Dumazet. 14) Add new bpf MAP, implementing a longest prefix match trie. From Daniel Mack. 15) Packet sample offloading support in mlxsw driver, from Yotam Gigi. 16) Add new aquantia driver, from David VomLehn. 17) Add bpf tracepoints, from Daniel Borkmann. 18) Add support for port mirroring to b53 and bcm_sf2 drivers, from Florian Fainelli. 19) Remove custom busy polling in many drivers, it is done in the core networking since 4.5 times. From Eric Dumazet. 20) Support XDP adjust_head in virtio_net, from John Fastabend. 21) Fix several major holes in neighbour entry confirmation, from Julian Anastasov. 22) Add XDP support to bnxt_en driver, from Michael Chan. 23) VXLAN offloads for enic driver, from Govindarajulu Varadarajan. 24) Add IPVTAP driver (IP-VLAN based tap driver) from Sainath Grandhi. 25) Support GRO in IPSEC protocols, from Steffen Klassert" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1764 commits) Revert "ath10k: Search SMBIOS for OEM board file extension" net: socket: fix recvmmsg not returning error from sock_error bnxt_en: use eth_hw_addr_random() bpf: fix unlocking of jited image when module ronx not set arch: add ARCH_HAS_SET_MEMORY config net: napi_watchdog() can use napi_schedule_irqoff() tcp: Revert "tcp: tcp_probe: use spin_lock_bh()" net/hsr: use eth_hw_addr_random() net: mvpp2: enable building on 64-bit platforms net: mvpp2: switch to build_skb() in the RX path net: mvpp2: simplify MVPP2_PRS_RI_* definitions net: mvpp2: fix indentation of MVPP2_EXT_GLOBAL_CTRL_DEFAULT net: mvpp2: remove unused register definitions net: mvpp2: simplify mvpp2_bm_bufs_add() net: mvpp2: drop useless fields in mvpp2_bm_pool and related code net: mvpp2: remove unused 'tx_skb' field of 'struct mvpp2_tx_queue' net: mvpp2: release reference to txq_cpu[] entry after unmapping net: mvpp2: handle too large value in mvpp2_rx_time_coal_set() net: mvpp2: handle too large value handling in mvpp2_rx_pkts_coal_set() net: mvpp2: remove useless arguments in mvpp2_rx_{pkts, time}_coal_set ...
Diffstat (limited to 'net/netfilter/nft_ct.c')
-rw-r--r--net/netfilter/nft_ct.c220
1 files changed, 199 insertions, 21 deletions
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index e6baeaebe653..c6b8022c0e47 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -32,6 +32,11 @@ struct nft_ct {
};
};
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
+static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
+#endif
+
static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
enum nft_ct_keys k,
enum ip_conntrack_dir d)
@@ -129,12 +134,40 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
memcpy(dest, &count, sizeof(count));
return;
}
+ case NFT_CT_AVGPKT: {
+ const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
+ u64 avgcnt = 0, bcnt = 0, pcnt = 0;
+
+ if (acct) {
+ pcnt = nft_ct_get_eval_counter(acct->counter,
+ NFT_CT_PKTS, priv->dir);
+ bcnt = nft_ct_get_eval_counter(acct->counter,
+ NFT_CT_BYTES, priv->dir);
+ if (pcnt != 0)
+ avgcnt = div64_u64(bcnt, pcnt);
+ }
+
+ memcpy(dest, &avgcnt, sizeof(avgcnt));
+ return;
+ }
case NFT_CT_L3PROTOCOL:
*dest = nf_ct_l3num(ct);
return;
case NFT_CT_PROTOCOL:
*dest = nf_ct_protonum(ct);
return;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE: {
+ const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+
+ if (priv->dir < IP_CT_DIR_MAX)
+ *dest = nf_ct_zone_id(zone, priv->dir);
+ else
+ *dest = zone->id;
+
+ return;
+ }
+#endif
default:
break;
}
@@ -163,6 +196,53 @@ err:
regs->verdict.code = NFT_BREAK;
}
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void nft_ct_set_zone_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
+ const struct nft_ct *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ enum ip_conntrack_info ctinfo;
+ u16 value = regs->data[priv->sreg];
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) /* already tracked */
+ return;
+
+ zone.id = value;
+
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ zone.dir = NF_CT_ZONE_DIR_ORIG;
+ break;
+ case IP_CT_DIR_REPLY:
+ zone.dir = NF_CT_ZONE_DIR_REPL;
+ break;
+ default:
+ break;
+ }
+
+ ct = this_cpu_read(nft_ct_pcpu_template);
+
+ if (likely(atomic_read(&ct->ct_general.use) == 1)) {
+ nf_ct_zone_add(ct, &zone);
+ } else {
+ /* previous skb got queued to userspace */
+ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
+ if (!ct) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+ }
+
+ atomic_inc(&ct->ct_general.use);
+ nf_ct_set(skb, ct, IP_CT_NEW);
+}
+#endif
+
static void nft_ct_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -241,6 +321,45 @@ static void nft_ct_netns_put(struct net *net, uint8_t family)
nf_ct_netns_put(net, family);
}
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void nft_ct_tmpl_put_pcpu(void)
+{
+ struct nf_conn *ct;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ ct = per_cpu(nft_ct_pcpu_template, cpu);
+ if (!ct)
+ break;
+ nf_ct_put(ct);
+ per_cpu(nft_ct_pcpu_template, cpu) = NULL;
+ }
+}
+
+static bool nft_ct_tmpl_alloc_pcpu(void)
+{
+ struct nf_conntrack_zone zone = { .id = 0 };
+ struct nf_conn *tmp;
+ int cpu;
+
+ if (nft_ct_pcpu_template_refcnt)
+ return true;
+
+ for_each_possible_cpu(cpu) {
+ tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
+ if (!tmp) {
+ nft_ct_tmpl_put_pcpu();
+ return false;
+ }
+
+ atomic_set(&tmp->ct_general.use, 1);
+ per_cpu(nft_ct_pcpu_template, cpu) = tmp;
+ }
+
+ return true;
+}
+#endif
+
static int nft_ct_get_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -250,6 +369,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+ priv->dir = IP_CT_DIR_MAX;
switch (priv->key) {
case NFT_CT_DIRECTION:
if (tb[NFTA_CT_DIRECTION] != NULL)
@@ -316,11 +436,14 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
break;
case NFT_CT_BYTES:
case NFT_CT_PKTS:
- /* no direction? return sum of original + reply */
- if (tb[NFTA_CT_DIRECTION] == NULL)
- priv->dir = IP_CT_DIR_MAX;
+ case NFT_CT_AVGPKT:
len = sizeof(u64);
break;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ len = sizeof(u16);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -346,21 +469,41 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS)
+ if (priv->key == NFT_CT_BYTES ||
+ priv->key == NFT_CT_PKTS ||
+ priv->key == NFT_CT_AVGPKT)
nf_ct_set_acct(ctx->net, true);
return 0;
}
+static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
+{
+ switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
+ nf_connlabels_put(ctx->net);
+ break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ if (--nft_ct_pcpu_template_refcnt == 0)
+ nft_ct_tmpl_put_pcpu();
+#endif
+ default:
+ break;
+ }
+}
+
static int nft_ct_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
- bool label_got = false;
unsigned int len;
int err;
+ priv->dir = IP_CT_DIR_MAX;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -378,13 +521,30 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
if (err)
return err;
- label_got = true;
+ break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ if (!nft_ct_tmpl_alloc_pcpu())
+ return -ENOMEM;
+ nft_ct_pcpu_template_refcnt++;
break;
#endif
default:
return -EOPNOTSUPP;
}
+ if (tb[NFTA_CT_DIRECTION]) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
@@ -397,8 +557,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
return 0;
err1:
- if (label_got)
- nf_connlabels_put(ctx->net);
+ __nft_ct_set_destroy(ctx, priv);
return err;
}
@@ -413,16 +572,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
{
struct nft_ct *priv = nft_expr_priv(expr);
- switch (priv->key) {
-#ifdef CONFIG_NF_CONNTRACK_LABELS
- case NFT_CT_LABELS:
- nf_connlabels_put(ctx->net);
- break;
-#endif
- default:
- break;
- }
-
+ __nft_ct_set_destroy(ctx, priv);
nft_ct_netns_put(ctx->net, ctx->afi->family);
}
@@ -445,6 +595,8 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
break;
case NFT_CT_BYTES:
case NFT_CT_PKTS:
+ case NFT_CT_AVGPKT:
+ case NFT_CT_ZONE:
if (priv->dir < IP_CT_DIR_MAX &&
nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
goto nla_put_failure;
@@ -467,6 +619,17 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
+
+ switch (priv->key) {
+ case NFT_CT_ZONE:
+ if (priv->dir < IP_CT_DIR_MAX &&
+ nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ break;
+ default:
+ break;
+ }
+
return 0;
nla_put_failure:
@@ -492,6 +655,17 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.dump = nft_ct_set_dump,
};
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static const struct nft_expr_ops nft_ct_set_zone_ops = {
+ .type = &nft_ct_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+ .eval = nft_ct_set_zone_eval,
+ .init = nft_ct_set_init,
+ .destroy = nft_ct_set_destroy,
+ .dump = nft_ct_set_dump,
+};
+#endif
+
static const struct nft_expr_ops *
nft_ct_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -505,8 +679,13 @@ nft_ct_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DREG])
return &nft_ct_get_ops;
- if (tb[NFTA_CT_SREG])
+ if (tb[NFTA_CT_SREG]) {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
+ return &nft_ct_set_zone_ops;
+#endif
return &nft_ct_set_ops;
+ }
return ERR_PTR(-EINVAL);
}
@@ -534,8 +713,7 @@ static void nft_notrack_eval(const struct nft_expr *expr,
ct = nf_ct_untracked_get();
atomic_inc(&ct->ct_general.use);
- skb->nfct = &ct->ct_general;
- skb->nfctinfo = IP_CT_NEW;
+ nf_ct_set(skb, ct, IP_CT_NEW);
}
static struct nft_expr_type nft_notrack_type;