summaryrefslogtreecommitdiffstats
path: root/net/netfilter/nft_set_rbtree.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-09-06 12:45:26 -0700
committerDavid S. Miller <davem@davemloft.net>2016-09-06 12:45:26 -0700
commit60175ccdf46ac5076725cb3e66f6bc2e2766ad2b (patch)
treea5433388291a0151a00c54db596d2baf8ce649ab /net/netfilter/nft_set_rbtree.c
parent2f5281ba2a8feaf6f0aee93356f350855bb530fc (diff)
parent779994fa3636d46848edb402fe7517968e036e6f (diff)
downloadlinux-60175ccdf46ac5076725cb3e66f6bc2e2766ad2b.tar.gz
linux-60175ccdf46ac5076725cb3e66f6bc2e2766ad2b.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree. Most relevant updates are the removal of per-conntrack timers to use a workqueue/garbage collection approach instead from Florian Westphal, the hash and numgen expression for nf_tables from Laura Garcia, updates on nf_tables hash set to honor the NLM_F_EXCL flag, removal of ip_conntrack sysctl and many other incremental updates on our Netfilter codebase. More specifically, they are: 1) Retrieve only 4 bytes to fetch ports in case of non-linear skb transport area in dccp, sctp, tcp, udp and udplite protocol conntrackers, from Gao Feng. 2) Missing whitespace on error message in physdev match, from Hangbin Liu. 3) Skip redundant IPv4 checksum calculation in nf_dup_ipv4, from Liping Zhang. 4) Add nf_ct_expires() helper function and use it, from Florian Westphal. 5) Replace opencoded nf_ct_kill() call in IPVS conntrack support, also from Florian. 6) Rename nf_tables set implementation to nft_set_{name}.c 7) Introduce the hash expression to allow arbitrary hashing of selector concatenations, from Laura Garcia Liebana. 8) Remove ip_conntrack sysctl backward compatibility code, this code has been around for long time already, and we have two interfaces to do this already: nf_conntrack sysctl and ctnetlink. 9) Use nf_conntrack_get_ht() helper function whenever possible, instead of opencoding fetch of hashtable pointer and size, patch from Liping Zhang. 10) Add quota expression for nf_tables. 11) Add number generator expression for nf_tables, this supports incremental and random generators that can be combined with maps, very useful for load balancing purpose, again from Laura Garcia Liebana. 12) Fix a typo in a debug message in FTP conntrack helper, from Colin Ian King. 13) Introduce a nft_chain_parse_hook() helper function to parse chain hook configuration, this is used by a follow up patch to perform better chain update validation. 14) Add rhashtable_lookup_get_insert_key() to rhashtable and use it from the nft_set_hash implementation to honor the NLM_F_EXCL flag. 15) Missing nulls check in nf_conntrack from nf_conntrack_tuple_taken(), patch from Florian Westphal. 16) Don't use the DYING bit to know if the conntrack event has been already delivered, instead a state variable to track event re-delivery states, also from Florian. 17) Remove the per-conntrack timer, use the workqueue approach that was discussed during the NFWS, from Florian Westphal. 18) Use the netlink conntrack table dump path to kill stale entries, again from Florian. 19) Add a garbage collector to get rid of stale conntracks, from Florian. 20) Reschedule garbage collector if eviction rate is high. 21) Get rid of the __nf_ct_kill_acct() helper. 22) Use ARPHRD_ETHER instead of hardcoded 1 from ARP logger. 23) Make nf_log_set() interface assertive on unsupported families. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter/nft_set_rbtree.c')
-rw-r--r--net/netfilter/nft_set_rbtree.c320
1 files changed, 320 insertions, 0 deletions
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
new file mode 100644
index 000000000000..38b5bda242f8
--- /dev/null
+++ b/net/netfilter/nft_set_rbtree.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+static DEFINE_SPINLOCK(nft_rbtree_lock);
+
+struct nft_rbtree {
+ struct rb_root root;
+};
+
+struct nft_rbtree_elem {
+ struct rb_node node;
+ struct nft_set_ext ext;
+};
+
+static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
+{
+ return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+ (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
+}
+
+static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
+ const struct nft_rbtree_elem *interval)
+{
+ return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+}
+
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ const struct nft_rbtree_elem *rbe, *interval = NULL;
+ u8 genmask = nft_genmask_cur(net);
+ const struct rb_node *parent;
+ const void *this;
+ int d;
+
+ spin_lock_bh(&nft_rbtree_lock);
+ parent = priv->root.rb_node;
+ while (parent != NULL) {
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ this = nft_set_ext_key(&rbe->ext);
+ d = memcmp(this, key, set->klen);
+ if (d < 0) {
+ parent = parent->rb_left;
+ /* In case of adjacent ranges, we always see the high
+ * part of the range in first place, before the low one.
+ * So don't update interval if the keys are equal.
+ */
+ if (interval && nft_rbtree_equal(set, this, interval))
+ continue;
+ interval = rbe;
+ } else if (d > 0)
+ parent = parent->rb_right;
+ else {
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
+ }
+ if (nft_rbtree_interval_end(rbe))
+ goto out;
+ spin_unlock_bh(&nft_rbtree_lock);
+
+ *ext = &rbe->ext;
+ return true;
+ }
+ }
+
+ if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+ nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_rbtree_interval_end(interval)) {
+ spin_unlock_bh(&nft_rbtree_lock);
+ *ext = &interval->ext;
+ return true;
+ }
+out:
+ spin_unlock_bh(&nft_rbtree_lock);
+ return false;
+}
+
+static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+ struct nft_rbtree_elem *new,
+ struct nft_set_ext **ext)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+ struct nft_rbtree_elem *rbe;
+ struct rb_node *parent, **p;
+ int d;
+
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ d = memcmp(nft_set_ext_key(&rbe->ext),
+ nft_set_ext_key(&new->ext),
+ set->klen);
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ else {
+ if (nft_set_elem_active(&rbe->ext, genmask)) {
+ if (nft_rbtree_interval_end(rbe) &&
+ !nft_rbtree_interval_end(new))
+ p = &parent->rb_left;
+ else if (!nft_rbtree_interval_end(rbe) &&
+ nft_rbtree_interval_end(new))
+ p = &parent->rb_right;
+ else {
+ *ext = &rbe->ext;
+ return -EEXIST;
+ }
+ }
+ }
+ }
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, &priv->root);
+ return 0;
+}
+
+static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
+{
+ struct nft_rbtree_elem *rbe = elem->priv;
+ int err;
+
+ spin_lock_bh(&nft_rbtree_lock);
+ err = __nft_rbtree_insert(net, set, rbe, ext);
+ spin_unlock_bh(&nft_rbtree_lock);
+
+ return err;
+}
+
+static void nft_rbtree_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe = elem->priv;
+
+ spin_lock_bh(&nft_rbtree_lock);
+ rb_erase(&rbe->node, &priv->root);
+ spin_unlock_bh(&nft_rbtree_lock);
+}
+
+static void nft_rbtree_activate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree_elem *rbe = elem->priv;
+
+ nft_set_elem_change_active(net, set, &rbe->ext);
+}
+
+static void *nft_rbtree_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ const struct rb_node *parent = priv->root.rb_node;
+ struct nft_rbtree_elem *rbe, *this = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ int d;
+
+ while (parent != NULL) {
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
+ set->klen);
+ if (d < 0)
+ parent = parent->rb_left;
+ else if (d > 0)
+ parent = parent->rb_right;
+ else {
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
+ }
+ if (nft_rbtree_interval_end(rbe) &&
+ !nft_rbtree_interval_end(this)) {
+ parent = parent->rb_left;
+ continue;
+ } else if (!nft_rbtree_interval_end(rbe) &&
+ nft_rbtree_interval_end(this)) {
+ parent = parent->rb_right;
+ continue;
+ }
+ nft_set_elem_change_active(net, set, &rbe->ext);
+ return rbe;
+ }
+ }
+ return NULL;
+}
+
+static void nft_rbtree_walk(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe;
+ struct nft_set_elem elem;
+ struct rb_node *node;
+
+ spin_lock_bh(&nft_rbtree_lock);
+ for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
+ if (iter->count < iter->skip)
+ goto cont;
+ if (!nft_set_elem_active(&rbe->ext, iter->genmask))
+ goto cont;
+
+ elem.priv = rbe;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0) {
+ spin_unlock_bh(&nft_rbtree_lock);
+ return;
+ }
+cont:
+ iter->count++;
+ }
+ spin_unlock_bh(&nft_rbtree_lock);
+}
+
+static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
+{
+ return sizeof(struct nft_rbtree);
+}
+
+static int nft_rbtree_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const nla[])
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ priv->root = RB_ROOT;
+ return 0;
+}
+
+static void nft_rbtree_destroy(const struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe;
+ struct rb_node *node;
+
+ while ((node = priv->root.rb_node) != NULL) {
+ rb_erase(node, &priv->root);
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ nft_set_elem_destroy(set, rbe);
+ }
+}
+
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int nsize;
+
+ nsize = sizeof(struct nft_rbtree_elem);
+ if (desc->size)
+ est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+ else
+ est->size = nsize;
+
+ est->class = NFT_SET_CLASS_O_LOG_N;
+
+ return true;
+}
+
+static struct nft_set_ops nft_rbtree_ops __read_mostly = {
+ .privsize = nft_rbtree_privsize,
+ .elemsize = offsetof(struct nft_rbtree_elem, ext),
+ .estimate = nft_rbtree_estimate,
+ .init = nft_rbtree_init,
+ .destroy = nft_rbtree_destroy,
+ .insert = nft_rbtree_insert,
+ .remove = nft_rbtree_remove,
+ .deactivate = nft_rbtree_deactivate,
+ .activate = nft_rbtree_activate,
+ .lookup = nft_rbtree_lookup,
+ .walk = nft_rbtree_walk,
+ .features = NFT_SET_INTERVAL | NFT_SET_MAP,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_rbtree_module_init(void)
+{
+ return nft_register_set(&nft_rbtree_ops);
+}
+
+static void __exit nft_rbtree_module_exit(void)
+{
+ nft_unregister_set(&nft_rbtree_ops);
+}
+
+module_init(nft_rbtree_module_init);
+module_exit(nft_rbtree_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();