diff options
Diffstat (limited to 'net/netfilter')
110 files changed, 3377 insertions, 1554 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e8d56d9a4df2..63729b489c2c 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -57,6 +57,10 @@ config NF_CONNTRACK config NF_LOG_COMMON tristate +config NF_LOG_NETDEV + tristate "Netdev packet logging" + select NF_LOG_COMMON + if NF_CONNTRACK config NF_CONNTRACK_MARK @@ -142,38 +146,38 @@ config NF_CONNTRACK_LABELS to connection tracking entries. It selected by the connlabel match. config NF_CT_PROTO_DCCP - tristate 'DCCP protocol connection tracking support' + bool 'DCCP protocol connection tracking support' depends on NETFILTER_ADVANCED - default IP_DCCP + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on DCCP connections. - If unsure, say 'N'. + If unsure, say Y. config NF_CT_PROTO_GRE tristate config NF_CT_PROTO_SCTP - tristate 'SCTP protocol connection tracking support' + bool 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED - default IP_SCTP + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + If unsure, say Y. config NF_CT_PROTO_UDPLITE - tristate 'UDP-Lite protocol connection tracking support' + bool 'UDP-Lite protocol connection tracking support' depends on NETFILTER_ADVANCED + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on UDP-Lite connections. - To compile it as a module, choose M here. If unsure, say N. + If unsure, say Y. config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" @@ -380,17 +384,17 @@ config NF_NAT_NEEDED default y config NF_NAT_PROTO_DCCP - tristate + bool depends on NF_NAT && NF_CT_PROTO_DCCP default NF_NAT && NF_CT_PROTO_DCCP config NF_NAT_PROTO_UDPLITE - tristate + bool depends on NF_NAT && NF_CT_PROTO_UDPLITE default NF_NAT && NF_CT_PROTO_UDPLITE config NF_NAT_PROTO_SCTP - tristate + bool default NF_NAT && NF_CT_PROTO_SCTP depends on NF_NAT && NF_CT_PROTO_SCTP select LIBCRC32C @@ -474,6 +478,12 @@ config NFT_META This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. +config NFT_RT + tristate "Netfilter nf_tables routing module" + help + This option adds the "rt" expression that you can use to match + packet routing information such as the packet nexthop. + config NFT_NUMGEN tristate "Netfilter nf_tables number generator module" help @@ -541,6 +551,12 @@ config NFT_NAT This option adds the "nat" expression that you can use to perform typical Network Address Translation (NAT) packet transformations. +config NFT_OBJREF + tristate "Netfilter nf_tables stateful object reference module" + help + This option adds the "objref" expression that allows you to refer to + stateful objects, such as counters and quotas. + config NFT_QUEUE depends on NETFILTER_NETLINK_QUEUE tristate "Netfilter nf_tables queue module" @@ -581,6 +597,19 @@ config NFT_HASH This option adds the "hash" expression that you can use to perform a hash operation on registers. +config NFT_FIB + tristate + +config NFT_FIB_INET + depends on NF_TABLES_INET + depends on NFT_FIB_IPV4 + depends on NFT_FIB_IPV6 + tristate "Netfilter nf_tables fib inet support" + help + This option allows using the FIB expression from the inet table. + The lookup will be delegated to the IPv4 or IPv6 FIB depending + on the protocol of the packet. + if NF_TABLES_NETDEV config NF_DUP_NETDEV @@ -1409,9 +1438,10 @@ config NETFILTER_XT_MATCH_SOCKET tristate '"socket" match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on !NF_CONNTRACK || NF_CONNTRACK depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n + depends on NF_SOCKET_IPV4 + depends on NF_SOCKET_IPV6 select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c23c3c84416f..ca30d1960f1d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -5,6 +5,9 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -16,11 +19,7 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o # connection tracking obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o -# SCTP protocol connection tracking -obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o -obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o -obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o @@ -45,17 +44,20 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o +# NAT protocols (nf_nat) +nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o +nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o +nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o + # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o +# packet logging for netdev family +obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o + obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o -# NAT protocols (nf_nat) -obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o -obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o -obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o - # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o @@ -81,10 +83,12 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_RT) += nft_rt.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o +obj-$(CONFIG_NFT_OBJREF) += nft_objref.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o obj-$(CONFIG_NFT_QUOTA) += nft_quota.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o @@ -96,6 +100,8 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_FIB) += nft_fib.o +obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 004af030ef1a..ce6adfae521a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -102,17 +102,14 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) if (!entry) return -ENOMEM; - entry->orig_ops = reg; - entry->ops = *reg; - entry->next = NULL; + nf_hook_entry_init(entry, reg); mutex_lock(&nf_hook_mutex); /* Find the spot in the list */ - while ((p = nf_entry_dereference(*pp)) != NULL) { - if (reg->priority < p->orig_ops->priority) + for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { + if (reg->priority < nf_hook_entry_priority(p)) break; - pp = &p->next; } rcu_assign_pointer(entry->next, p); rcu_assign_pointer(*pp, entry); @@ -139,12 +136,11 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) return; mutex_lock(&nf_hook_mutex); - while ((p = nf_entry_dereference(*pp)) != NULL) { - if (p->orig_ops == reg) { + for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { + if (nf_hook_entry_ops(p) == reg) { rcu_assign_pointer(*pp, p->next); break; } - pp = &p->next; } mutex_unlock(&nf_hook_mutex); if (!p) { @@ -302,70 +298,40 @@ void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(_nf_unregister_hooks); -unsigned int nf_iterate(struct sk_buff *skb, - struct nf_hook_state *state, - struct nf_hook_entry **entryp) +/* Returns 1 if okfn() needs to be executed by the caller, + * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry) { unsigned int verdict; + int ret; - /* - * The caller must not block between calls to this - * function because of risk of continuing from deleted element. - */ - while (*entryp) { - if (state->thresh > (*entryp)->ops.priority) { - *entryp = rcu_dereference((*entryp)->next); - continue; - } - - /* Optimization: we don't need to hold module - reference here, since function can't sleep. --RR */ -repeat: - verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); - if (verdict != NF_ACCEPT) { -#ifdef CONFIG_NETFILTER_DEBUG - if (unlikely((verdict & NF_VERDICT_MASK) - > NF_MAX_VERDICT)) { - NFDEBUG("Evil return from %p(%u).\n", - (*entryp)->ops.hook, state->hook); - *entryp = rcu_dereference((*entryp)->next); + do { + verdict = nf_hook_entry_hookfn(entry, skb, state); + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + entry = rcu_dereference(entry->next); + break; + case NF_DROP: + kfree_skb(skb); + ret = NF_DROP_GETERR(verdict); + if (ret == 0) + ret = -EPERM; + return ret; + case NF_QUEUE: + ret = nf_queue(skb, state, &entry, verdict); + if (ret == 1 && entry) continue; - } -#endif - if (verdict != NF_REPEAT) - return verdict; - goto repeat; + return ret; + default: + /* Implicit handling for NF_STOLEN, as well as any other + * non conventional verdicts. + */ + return 0; } - *entryp = rcu_dereference((*entryp)->next); - } - return NF_ACCEPT; -} - + } while (entry); -/* Returns 1 if okfn() needs to be executed by the caller, - * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) -{ - struct nf_hook_entry *entry; - unsigned int verdict; - int ret = 0; - - entry = rcu_dereference(state->hook_entries); -next_hook: - verdict = nf_iterate(skb, state, &entry); - if (verdict == NF_ACCEPT || verdict == NF_STOP) { - ret = 1; - } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { - kfree_skb(skb); - ret = NF_DROP_GETERR(verdict); - if (ret == 0) - ret = -EPERM; - } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - ret = nf_queue(skb, state, &entry, verdict); - if (ret == 1 && entry) - goto next_hook; - } - return ret; + return 1; } EXPORT_SYMBOL(nf_hook_slow); diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 234a8ec82076..4083a8051f0f 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPMAC + tristate "hash:ip,mac set support" + depends on IP_SET + help + This option adds the hash:ip,mac set type support, by which + one can store IPv4/IPv6 address and MAC (ethernet address) pairs in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_MAC tristate "hash:mac set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 3dbd5e958489..28ec148df02d 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o # hash types obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o +obj-$(CONFIG_IP_SET_HASH_IPMAC) += ip_set_hash_ipmac.o obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 2e8e7e5fb4a6..6f09a99298cd 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -22,6 +22,7 @@ #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_memsize IPSET_TOKEN(MTYPE, _memsize) #define mtype_flush IPSET_TOKEN(MTYPE, _flush) #define mtype_head IPSET_TOKEN(MTYPE, _head) #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) @@ -40,11 +41,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) { struct mtype *map = set->data; - init_timer(&map->gc); - map->gc.data = (unsigned long)set; - map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&map->gc); + setup_timer(&map->gc, gc, (unsigned long)set); + mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } static void @@ -82,6 +80,16 @@ mtype_flush(struct ip_set *set) if (set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); + set->elements = 0; + set->ext_size = 0; +} + +/* Calculate the actual memory size of the set data */ +static size_t +mtype_memsize(const struct mtype *map, size_t dsize) +{ + return sizeof(*map) + map->memsize + + map->elements * dsize; } static int @@ -89,14 +97,15 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) { const struct mtype *map = set->data; struct nlattr *nested; - size_t memsize = sizeof(*map) + map->memsize; + size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (mtype_do_head(skb, map) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -140,6 +149,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(x, set))) { + set->elements--; ret = 0; } else if (!(flags & IPSET_FLAG_EXIST)) { set_bit(e->id, map->members); @@ -148,6 +158,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Element is re-added, cleanup extensions */ ip_set_ext_destroy(set, x); } + if (ret > 0) + set->elements--; if (SET_WITH_TIMEOUT(set)) #ifdef IP_SET_BITMAP_STORED_TIMEOUT @@ -159,12 +171,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(x, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(x, set), ext); + ip_set_init_comment(set, ext_comment(x, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(x, set), ext); /* Activate element */ set_bit(e->id, map->members); + set->elements++; return 0; } @@ -181,6 +194,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, return -IPSET_ERR_EXIST; ip_set_ext_destroy(set, x); + set->elements--; if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(x, set))) return -IPSET_ERR_EXIST; @@ -276,6 +290,7 @@ mtype_gc(unsigned long ul_set) if (ip_set_timeout_expired(ext_timeout(x, set))) { clear_bit(id, map->members); ip_set_ext_destroy(set, x); + set->elements--; } } spin_unlock_bh(&set->lock); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a748b0c2c981..c296f9b606d4 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -36,7 +36,7 @@ struct ip_set_net { bool is_destroyed; /* all sets are destroyed */ }; -static int ip_set_net_id __read_mostly; +static unsigned int ip_set_net_id __read_mostly; static inline struct ip_set_net *ip_set_pernet(struct net *net) { @@ -324,7 +324,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); -typedef void (*destroyer)(void *); +typedef void (*destroyer)(struct ip_set *, void *); /* ipset data extension types, in size order */ const struct ip_set_ext_type ip_set_extensions[] = { @@ -426,20 +426,20 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); - ext->skbmark = fullmark >> 32; - ext->skbmarkmask = fullmark & 0xffffffff; + ext->skbinfo.skbmark = fullmark >> 32; + ext->skbinfo.skbmarkmask = fullmark & 0xffffffff; } if (tb[IPSET_ATTR_SKBPRIO]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; - ext->skbprio = be32_to_cpu(nla_get_be32( - tb[IPSET_ATTR_SKBPRIO])); + ext->skbinfo.skbprio = + be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO])); } if (tb[IPSET_ATTR_SKBQUEUE]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; - ext->skbqueue = be16_to_cpu(nla_get_be16( - tb[IPSET_ATTR_SKBQUEUE])); + ext->skbinfo.skbqueue = + be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE])); } return 0; } @@ -541,7 +541,7 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); @@ -579,7 +579,7 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret; BUG_ON(!set); @@ -601,7 +601,7 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index d32fd6b036bf..1b05d4a7d5a1 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -85,6 +85,8 @@ struct htable { }; #define hbucket(h, i) ((h)->bucket[i]) +#define ext_size(n, dsize) \ + (sizeof(struct hbucket) + (n) * (dsize)) #ifndef IPSET_NET_COUNT #define IPSET_NET_COUNT 1 @@ -150,24 +152,34 @@ htable_bits(u32 hashsize) #define INIT_CIDR(cidr, host_mask) \ DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) -#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) - #ifdef IP_SET_HASH_WITH_NET0 -/* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */ -#define NLEN(family) (SET_HOST_MASK(family) + 1) +/* cidr from 0 to HOST_MASK value and c = cidr + 1 */ +#define NLEN (HOST_MASK + 1) #define CIDR_POS(c) ((c) - 1) #else -/* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */ -#define NLEN(family) SET_HOST_MASK(family) +/* cidr from 1 to HOST_MASK value and c = cidr + 1 */ +#define NLEN HOST_MASK #define CIDR_POS(c) ((c) - 2) #endif #else -#define NLEN(family) 0 +#define NLEN 0 #endif /* IP_SET_HASH_WITH_NETS */ #endif /* _IP_SET_HASH_GEN_H */ +#ifndef MTYPE +#error "MTYPE is not defined!" +#endif + +#ifndef HTYPE +#error "HTYPE is not defined!" +#endif + +#ifndef HOST_MASK +#error "HOST_MASK is not defined!" +#endif + /* Family dependent templates */ #undef ahash_data @@ -191,7 +203,6 @@ htable_bits(u32 hashsize) #undef mtype_same_set #undef mtype_kadt #undef mtype_uadt -#undef mtype #undef mtype_add #undef mtype_del @@ -207,6 +218,7 @@ htable_bits(u32 hashsize) #undef mtype_variant #undef mtype_data_match +#undef htype #undef HKEY #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) @@ -233,7 +245,6 @@ htable_bits(u32 hashsize) #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) -#define mtype MTYPE #define mtype_add IPSET_TOKEN(MTYPE, _add) #define mtype_del IPSET_TOKEN(MTYPE, _del) @@ -249,62 +260,54 @@ htable_bits(u32 hashsize) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) -#ifndef MTYPE -#error "MTYPE is not defined!" -#endif - -#ifndef HOST_MASK -#error "HOST_MASK is not defined!" -#endif - #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) #endif -#define HKEY(data, initval, htable_bits) \ -(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \ - & jhash_mask(htable_bits)) +#define htype MTYPE -#ifndef htype -#ifndef HTYPE -#error "HTYPE is not defined!" -#endif /* HTYPE */ -#define htype HTYPE +#define HKEY(data, initval, htable_bits) \ +({ \ + const u32 *__k = (const u32 *)data; \ + u32 __l = HKEY_DATALEN / sizeof(u32); \ + \ + BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \ + \ + jhash2(__k, __l, initval) & jhash_mask(htable_bits); \ +}) /* The generic hash structure */ struct htype { struct htable __rcu *table; /* the hash table */ + struct timer_list gc; /* garbage collection when timeout enabled */ u32 maxelem; /* max elements in the hash */ - u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK u32 markmask; /* markmask value for mark mask to store */ #endif - struct timer_list gc; /* garbage collection when timeout enabled */ - struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI u8 ahash_max; /* max elements in an array block */ #endif #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; /* netmask value for subnets to store */ #endif + struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_NETS - struct net_prefixes nets[0]; /* book-keeping of prefixes */ + struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ #endif }; -#endif /* htype */ #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different * sized networks. cidr == real cidr + 1 to support /0. */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) +mtype_add_cidr(struct htype *h, u8 cidr, u8 n) { int i, j; /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) { + for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { if (j != -1) { continue; } else if (h->nets[i].cidr[n] < cidr) { @@ -323,11 +326,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) +mtype_del_cidr(struct htype *h, u8 cidr, u8 n) { - u8 i, j, net_end = nets_length - 1; + u8 i, j, net_end = NLEN - 1; - for (i = 0; i < nets_length; i++) { + for (i = 0; i < NLEN; i++) { if (h->nets[i].cidr[n] != cidr) continue; h->nets[CIDR_POS(cidr)].nets[n]--; @@ -343,24 +346,9 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Calculate the actual memory size of the set data */ static size_t -mtype_ahash_memsize(const struct htype *h, const struct htable *t, - u8 nets_length, size_t dsize) +mtype_ahash_memsize(const struct htype *h, const struct htable *t) { - u32 i; - struct hbucket *n; - size_t memsize = sizeof(*h) + sizeof(*t); - -#ifdef IP_SET_HASH_WITH_NETS - memsize += sizeof(struct net_prefixes) * nets_length; -#endif - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = rcu_dereference_bh(hbucket(t, i)); - if (!n) - continue; - memsize += sizeof(struct hbucket) + n->size * dsize; - } - - return memsize; + return sizeof(*h) + sizeof(*t); } /* Get the ith element from the array block n */ @@ -398,9 +386,10 @@ mtype_flush(struct ip_set *set) kfree_rcu(n, rcu); } #ifdef IP_SET_HASH_WITH_NETS - memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); + memset(h->nets, 0, sizeof(h->nets)); #endif - h->elements = 0; + set->elements = 0; + set->ext_size = 0; } /* Destroy the hashtable part of the set */ @@ -444,11 +433,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) { struct htype *h = set->data; - init_timer(&h->gc); - h->gc.data = (unsigned long)set; - h->gc.function = gc; - h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&h->gc); + setup_timer(&h->gc, gc, (unsigned long)set); + mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); pr_debug("gc initialized, run in every %u\n", IPSET_GC_PERIOD(set->timeout)); } @@ -473,12 +459,13 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Delete expired elements from the hashtable */ static void -mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) +mtype_expire(struct ip_set *set, struct htype *h) { struct htable *t; struct hbucket *n, *tmp; struct mtype_elem *data; u32 i, j, d; + size_t dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 k; #endif @@ -494,21 +481,20 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) continue; } data = ahash_data(n, j, dsize); - if (ip_set_timeout_expired(ext_timeout(data, set))) { - pr_debug("expired %u/%u\n", i, j); - clear_bit(j, n->used); - smp_mb__after_atomic(); + if (!ip_set_timeout_expired(ext_timeout(data, set))) + continue; + pr_debug("expired %u/%u\n", i, j); + clear_bit(j, n->used); + smp_mb__after_atomic(); #ifdef IP_SET_HASH_WITH_NETS - for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, - NCIDR_PUT(DCIDR_GET(data->cidr, - k)), - nets_length, k); + for (k = 0; k < IPSET_NET_COUNT; k++) + mtype_del_cidr(h, + NCIDR_PUT(DCIDR_GET(data->cidr, k)), + k); #endif - ip_set_ext_destroy(set, data); - h->elements--; - d++; - } + ip_set_ext_destroy(set, data); + set->elements--; + d++; } if (d >= AHASH_INIT_SIZE) { if (d >= n->size) { @@ -532,6 +518,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) d++; } tmp->pos = d; + set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, i), tmp); kfree_rcu(n, rcu); } @@ -546,7 +533,7 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); spin_lock_bh(&set->lock); - mtype_expire(set, h, NLEN(set->family), set->dsize); + mtype_expire(set, h); spin_unlock_bh(&set->lock); h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; @@ -563,7 +550,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t dsize = set->dsize; + size_t extsize, dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -606,6 +593,7 @@ retry: /* There can't be another parallel resizing, but dumping is possible */ atomic_set(&orig->ref, 1); atomic_inc(&orig->uref); + extsize = 0; pr_debug("attempt to resize set %s from %u to %u, t %p\n", set->name, orig->htable_bits, htable_bits, orig); for (i = 0; i < jhash_size(orig->htable_bits); i++) { @@ -636,6 +624,7 @@ retry: goto cleanup; } m->size = AHASH_INIT_SIZE; + extsize = ext_size(AHASH_INIT_SIZE, dsize); RCU_INIT_POINTER(hbucket(t, key), m); } else if (m->pos >= m->size) { struct hbucket *ht; @@ -655,6 +644,7 @@ retry: memcpy(ht, m, sizeof(struct hbucket) + m->size * dsize); ht->size = m->size + AHASH_INIT_SIZE; + extsize += ext_size(AHASH_INIT_SIZE, dsize); kfree(m); m = ht; RCU_INIT_POINTER(hbucket(t, key), ht); @@ -668,6 +658,7 @@ retry: } } rcu_assign_pointer(h->table, t); + set->ext_size = extsize; spin_unlock_bh(&set->lock); @@ -715,11 +706,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool deleted = false, forceadd = false, reuse = false; u32 key, multi = 0; - if (h->elements >= h->maxelem) { + if (set->elements >= h->maxelem) { if (SET_WITH_TIMEOUT(set)) /* FIXME: when set is full, we slow down here */ - mtype_expire(set, h, NLEN(set->family), set->dsize); - if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) + mtype_expire(set, h); + if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set)) forceadd = true; } @@ -727,20 +718,15 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = __ipset_dereference_protected(hbucket(t, key), 1); if (!n) { - if (forceadd) { - if (net_ratelimit()) - pr_warn("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - return -IPSET_ERR_HASH_FULL; - } else if (h->elements >= h->maxelem) { + if (forceadd || set->elements >= h->maxelem) goto set_full; - } old = NULL; n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, GFP_ATOMIC); if (!n) return -ENOMEM; n->size = AHASH_INIT_SIZE; + set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); goto copy_elem; } for (i = 0; i < n->pos; i++) { @@ -778,14 +764,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, for (i = 0; i < IPSET_NET_COUNT; i++) mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), - NLEN(set->family), i); + i); #endif ip_set_ext_destroy(set, data); - h->elements--; + set->elements--; } goto copy_data; } - if (h->elements >= h->maxelem) + if (set->elements >= h->maxelem) goto set_full; /* Create a new slot */ if (n->pos >= n->size) { @@ -804,17 +790,17 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, memcpy(n, old, sizeof(struct hbucket) + old->size * set->dsize); n->size = old->size + AHASH_INIT_SIZE; + set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); } copy_elem: j = n->pos++; data = ahash_data(n, j, set->dsize); copy_data: - h->elements++; + set->elements++; #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), - NLEN(set->family), i); + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); #endif memcpy(data, d, sizeof(struct mtype_elem)); overwrite_extensions: @@ -824,7 +810,7 @@ overwrite_extensions: if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(data, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(data, set), ext); + ip_set_init_comment(set, ext_comment(data, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(data, set), ext); /* Must come last for the case when timed out entry is reused */ @@ -883,11 +869,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, smp_mb__after_atomic(); if (i + 1 == n->pos) n->pos--; - h->elements--; + set->elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), - NLEN(set->family), j); + j); #endif ip_set_ext_destroy(set, data); @@ -896,6 +882,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } if (n->pos == 0 && k == 0) { + set->ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); } else if (k >= AHASH_INIT_SIZE) { @@ -914,6 +901,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } tmp->pos = k; + set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, key), tmp); kfree_rcu(n, rcu); } @@ -957,14 +945,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, int i, j = 0; #endif u32 key, multi = 0; - u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) { + for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) { #if IPSET_NET_COUNT == 2 mtype_data_reset_elem(d, &orig); mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); - for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi; + for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi; k++) { mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), true); @@ -1021,7 +1008,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, * try all possible network sizes */ for (i = 0; i < IPSET_NET_COUNT; i++) - if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) + if (DCIDR_GET(d->cidr, i) != HOST_MASK) break; if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); @@ -1062,7 +1049,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) rcu_read_lock_bh(); t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); + memsize = mtype_ahash_memsize(h, t) + set->ext_size; htable_bits = t->htable_bits; rcu_read_unlock_bh(); @@ -1083,7 +1070,8 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -1238,41 +1226,35 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, struct htype *h; struct htable *t; + pr_debug("Create set %s with family %s\n", + set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); + #ifndef IP_SET_PROTO_UNDEF if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; #endif -#ifdef IP_SET_HASH_WITH_MARKMASK - markmask = 0xffffffff; -#endif -#ifdef IP_SET_HASH_WITH_NETMASK - netmask = set->family == NFPROTO_IPV4 ? 32 : 128; - pr_debug("Create set %s with family %s\n", - set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); -#endif - if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; + #ifdef IP_SET_HASH_WITH_MARKMASK /* Separated condition in order to avoid directive in argument list */ if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) return -IPSET_ERR_PROTOCOL; -#endif - if (tb[IPSET_ATTR_HASHSIZE]) { - hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); - if (hashsize < IPSET_MIMINAL_HASHSIZE) - hashsize = IPSET_MIMINAL_HASHSIZE; + markmask = 0xffffffff; + if (tb[IPSET_ATTR_MARKMASK]) { + markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); + if (markmask == 0) + return -IPSET_ERR_INVALID_MARKMASK; } - - if (tb[IPSET_ATTR_MAXELEM]) - maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); +#endif #ifdef IP_SET_HASH_WITH_NETMASK + netmask = set->family == NFPROTO_IPV4 ? 32 : 128; if (tb[IPSET_ATTR_NETMASK]) { netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); @@ -1282,33 +1264,21 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, return -IPSET_ERR_INVALID_NETMASK; } #endif -#ifdef IP_SET_HASH_WITH_MARKMASK - if (tb[IPSET_ATTR_MARKMASK]) { - markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); - if (markmask == 0) - return -IPSET_ERR_INVALID_MARKMASK; + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; } -#endif + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); hsize = sizeof(*h); -#ifdef IP_SET_HASH_WITH_NETS - hsize += sizeof(struct net_prefixes) * NLEN(set->family); -#endif h = kzalloc(hsize, GFP_KERNEL); if (!h) return -ENOMEM; - h->maxelem = maxelem; -#ifdef IP_SET_HASH_WITH_NETMASK - h->netmask = netmask; -#endif -#ifdef IP_SET_HASH_WITH_MARKMASK - h->markmask = markmask; -#endif - get_random_bytes(&h->initval, sizeof(h->initval)); - set->timeout = IPSET_NO_TIMEOUT; - hbits = htable_bits(hashsize); hsize = htable_size(hbits); if (hsize == 0) { @@ -1320,8 +1290,17 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, kfree(h); return -ENOMEM; } + h->maxelem = maxelem; +#ifdef IP_SET_HASH_WITH_NETMASK + h->netmask = netmask; +#endif +#ifdef IP_SET_HASH_WITH_MARKMASK + h->markmask = markmask; +#endif + get_random_bytes(&h->initval, sizeof(h->initval)); + t->htable_bits = hbits; - rcu_assign_pointer(h->table, t); + RCU_INIT_POINTER(h->table, t); set->data = h; #ifndef IP_SET_PROTO_UNDEF @@ -1339,6 +1318,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem))); } #endif + set->timeout = IPSET_NO_TIMEOUT; if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); #ifndef IP_SET_PROTO_UNDEF diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 9d6bf19f7b78..20bfbd315f61 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -82,7 +82,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ip *h = set->data; + const struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -101,7 +101,7 @@ static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ip *h = set->data; + const struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -199,7 +199,7 @@ nla_put_failure: } static inline void -hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e) +hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e) { } @@ -217,7 +217,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ip *h = set->data; + const struct hash_ip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -234,7 +234,7 @@ static int hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ip *h = set->data; + const struct hash_ip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c new file mode 100644 index 000000000000..1ab5ed2f6839 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -0,0 +1,315 @@ +/* Copyright (C) 2016 Tomasz Chilinski <tomasz.chilinski@chilan.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,mac type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <linux/if_ether.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> +#include <net/tcp.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>"); +IP_SET_MODULE_DESC("hash:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:ip,mac"); + +/* Type specific function prefix */ +#define HTYPE hash_ipmac + +/* Zero valued element is not supported */ +static const unsigned char invalid_ether[ETH_ALEN] = { 0 }; + +/* IPv4 variant */ + +/* Member elements */ +struct hash_ipmac4_elem { + /* Zero valued IP addresses cannot be stored */ + __be32 ip; + union { + unsigned char ether[ETH_ALEN]; + __be32 foo[2]; + }; +}; + +/* Common functions */ + +static inline bool +hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1, + const struct hash_ipmac4_elem *e2, + u32 *multi) +{ + return e1->ip == e2->ip && ether_addr_equal(e1->ether, e2->ether); +} + +static bool +hash_ipmac4_data_list(struct sk_buff *skb, const struct hash_ipmac4_elem *e) +{ + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip) || + nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether)) + goto nla_put_failure; + return false; + +nla_put_failure: + return true; +} + +static inline void +hash_ipmac4_data_next(struct hash_ipmac4_elem *next, + const struct hash_ipmac4_elem *e) +{ + next->ip = e->ip; +} + +#define MTYPE hash_ipmac4 +#define PF 4 +#define HOST_MASK 32 +#define HKEY_DATALEN sizeof(struct hash_ipmac4_elem) +#include "ip_set_hash_gen.h" + +static int +hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + /* MAC can be src only */ + if (!(opt->flags & IPSET_DIM_TWO_SRC)) + return 0; + + if (skb_mac_header(skb) < skb->head || + (skb_mac_header(skb) + ETH_HLEN) > skb->data) + return -EINVAL; + + memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + if (ether_addr_equal(e.ether, invalid_ether)) + return -EINVAL; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !tb[IPSET_ATTR_ETHER] || + nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); + if (ether_addr_equal(e.ether, invalid_ether)) + return -IPSET_ERR_HASH_ELEM; + + return adtfn(set, &e, &ext, &ext, flags); +} + +/* IPv6 variant */ + +/* Member elements */ +struct hash_ipmac6_elem { + /* Zero valued IP addresses cannot be stored */ + union nf_inet_addr ip; + union { + unsigned char ether[ETH_ALEN]; + __be32 foo[2]; + }; +}; + +/* Common functions */ + +static inline bool +hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1, + const struct hash_ipmac6_elem *e2, + u32 *multi) +{ + return ipv6_addr_equal(&e1->ip.in6, &e2->ip.in6) && + ether_addr_equal(e1->ether, e2->ether); +} + +static bool +hash_ipmac6_data_list(struct sk_buff *skb, const struct hash_ipmac6_elem *e) +{ + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether)) + goto nla_put_failure; + return false; + +nla_put_failure: + return true; +} + +static inline void +hash_ipmac6_data_next(struct hash_ipmac6_elem *next, + const struct hash_ipmac6_elem *e) +{ +} + +#undef MTYPE +#undef PF +#undef HOST_MASK +#undef HKEY_DATALEN + +#define MTYPE hash_ipmac6 +#define PF 6 +#define HOST_MASK 128 +#define HKEY_DATALEN sizeof(struct hash_ipmac6_elem) +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmac6_elem e = { + { .all = { 0 } }, + { .foo[0] = 0, .foo[1] = 0 } + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + /* MAC can be src only */ + if (!(opt->flags & IPSET_DIM_TWO_SRC)) + return 0; + + if (skb_mac_header(skb) < skb->head || + (skb_mac_header(skb) + ETH_HLEN) > skb->data) + return -EINVAL; + + memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + if (ether_addr_equal(e.ether, invalid_ether)) + return -EINVAL; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmac6_elem e = { + { .all = { 0 } }, + { .foo[0] = 0, .foo[1] = 0 } + }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !tb[IPSET_ATTR_ETHER] || + nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); + if (ether_addr_equal(e.ether, invalid_ether)) + return -IPSET_ERR_HASH_ELEM; + + return adtfn(set, &e, &ext, &ext, flags); +} + +static struct ip_set_type hash_ipmac_type __read_mostly = { + .name = "hash:ip,mac", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, + .dimension = IPSET_DIM_TWO, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_ipmac_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, + .len = ETH_ALEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipmac_init(void) +{ + return ip_set_type_register(&hash_ipmac_type); +} + +static void __exit +hash_ipmac_fini(void) +{ + ip_set_type_unregister(&hash_ipmac_type); +} + +module_init(hash_ipmac_init); +module_exit(hash_ipmac_fini); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index a0695a2ab585..b64cf14e8352 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -85,7 +85,7 @@ hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipmark *h = set->data; + const struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -101,7 +101,7 @@ static int hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipmark *h = set->data; + const struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -193,7 +193,7 @@ nla_put_failure: } static inline void -hash_ipmark6_data_next(struct hash_ipmark4_elem *next, +hash_ipmark6_data_next(struct hash_ipmark6_elem *next, const struct hash_ipmark6_elem *d) { } @@ -211,7 +211,7 @@ hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipmark *h = set->data; + const struct hash_ipmark6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -227,7 +227,7 @@ static int hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipmark *h = set->data; + const struct hash_ipmark6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 9d84b3dff603..f438740e6c6a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -108,7 +108,7 @@ static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipport *h = set->data; + const struct hash_ipport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -231,7 +231,7 @@ nla_put_failure: } static inline void -hash_ipport6_data_next(struct hash_ipport4_elem *next, +hash_ipport6_data_next(struct hash_ipport6_elem *next, const struct hash_ipport6_elem *d) { next->port = d->port; @@ -266,7 +266,7 @@ static int hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipport *h = set->data; + const struct hash_ipport6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 215b7b942038..6215fb898c50 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -111,7 +111,7 @@ static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportip *h = set->data; + const struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -241,7 +241,7 @@ nla_put_failure: } static inline void -hash_ipportip6_data_next(struct hash_ipportip4_elem *next, +hash_ipportip6_data_next(struct hash_ipportip6_elem *next, const struct hash_ipportip6_elem *d) { next->port = d->port; @@ -277,7 +277,7 @@ static int hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportip *h = set->data; + const struct hash_ipportip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 9ca719625ea3..5ab1b99a53c2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -138,7 +138,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportnet *h = set->data; + const struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -163,7 +163,7 @@ static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportnet *h = set->data; + const struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -370,7 +370,7 @@ nla_put_failure: } static inline void -hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next, +hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next, const struct hash_ipportnet6_elem *d) { next->port = d->port; @@ -389,7 +389,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportnet *h = set->data; + const struct hash_ipportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -414,7 +414,7 @@ static int hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportnet *h = set->data; + const struct hash_ipportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 3e4bffdc1cc0..5d9e895452e7 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -117,7 +117,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_net *h = set->data; + const struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -139,7 +139,7 @@ static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net *h = set->data; + const struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -268,7 +268,7 @@ nla_put_failure: } static inline void -hash_net6_data_next(struct hash_net4_elem *next, +hash_net6_data_next(struct hash_net6_elem *next, const struct hash_net6_elem *d) { } @@ -286,7 +286,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_net *h = set->data; + const struct hash_net6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index f0f688db6213..44cf11939c91 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -156,7 +156,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct hash_netiface *h = set->data; + struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -170,7 +170,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); -#define IFACE(dir) (par->dir ? par->dir->name : "") +#define IFACE(dir) (par->state->dir ? par->state->dir->name : "") #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { @@ -196,7 +196,7 @@ static int hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - struct hash_netiface *h = set->data; + struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -348,7 +348,7 @@ nla_put_failure: } static inline void -hash_netiface6_data_next(struct hash_netiface4_elem *next, +hash_netiface6_data_next(struct hash_netiface6_elem *next, const struct hash_netiface6_elem *d) { } @@ -367,7 +367,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct hash_netiface *h = set->data; + struct hash_netiface6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index a93dfebffa81..db614e13b193 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -143,7 +143,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netnet *h = set->data; + const struct hash_netnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -165,7 +165,7 @@ static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netnet *h = set->data; + const struct hash_netnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -352,7 +352,7 @@ nla_put_failure: } static inline void -hash_netnet6_data_next(struct hash_netnet4_elem *next, +hash_netnet6_data_next(struct hash_netnet6_elem *next, const struct hash_netnet6_elem *d) { } @@ -377,7 +377,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netnet *h = set->data; + const struct hash_netnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 731813e0f08c..54b64b6cd0cd 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -133,7 +133,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netport *h = set->data; + const struct hash_netport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -157,7 +157,7 @@ static int hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netport *h = set->data; + const struct hash_netport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -329,7 +329,7 @@ nla_put_failure: } static inline void -hash_netport6_data_next(struct hash_netport4_elem *next, +hash_netport6_data_next(struct hash_netport6_elem *next, const struct hash_netport6_elem *d) { next->port = d->port; @@ -348,7 +348,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netport *h = set->data; + const struct hash_netport6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -372,7 +372,7 @@ static int hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netport *h = set->data; + const struct hash_netport6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 9a14c237830f..aff846960ac4 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -154,7 +154,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netportnet *h = set->data; + const struct hash_netportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -180,7 +180,7 @@ static int hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netportnet *h = set->data; + const struct hash_netportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -406,7 +406,7 @@ nla_put_failure: } static inline void -hash_netportnet6_data_next(struct hash_netportnet4_elem *next, +hash_netportnet6_data_next(struct hash_netportnet6_elem *next, const struct hash_netportnet6_elem *d) { next->port = d->port; @@ -432,7 +432,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netportnet *h = set->data; + const struct hash_netportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -458,7 +458,7 @@ static int hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netportnet *h = set->data; + const struct hash_netportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a2a89e4e0a14..51077c53d76b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -166,6 +166,7 @@ __list_set_del_rcu(struct rcu_head * rcu) static inline void list_set_del(struct ip_set *set, struct set_elem *e) { + set->elements--; list_del_rcu(&e->list); call_rcu(&e->rcu, __list_set_del_rcu); } @@ -227,7 +228,7 @@ list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(e, set), ext); + ip_set_init_comment(set, ext_comment(e, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(e, set), ext); /* Update timeout last */ @@ -309,6 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, list_add_rcu(&e->list, &prev->list); else list_add_tail_rcu(&e->list, &map->members); + set->elements++; return 0; } @@ -419,6 +421,8 @@ list_set_flush(struct ip_set *set) list_for_each_entry_safe(e, n, &map->members, list) list_set_del(set, e); + set->elements = 0; + set->ext_size = 0; } static void @@ -441,12 +445,12 @@ list_set_destroy(struct ip_set *set) set->data = NULL; } -static int -list_set_head(struct ip_set *set, struct sk_buff *skb) +/* Calculate the actual memory size of the set data */ +static size_t +list_set_memsize(const struct list_set *map, size_t dsize) { - const struct list_set *map = set->data; - struct nlattr *nested; struct set_elem *e; + size_t memsize; u32 n = 0; rcu_read_lock(); @@ -454,13 +458,25 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) n++; rcu_read_unlock(); + memsize = sizeof(*map) + n * dsize; + + return memsize; +} + +static int +list_set_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct list_set *map = set->data; + struct nlattr *nested; + size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + n * set->dsize))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -570,11 +586,8 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) { struct list_set *map = set->data; - init_timer(&map->gc); - map->gc.data = (unsigned long)set; - map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&map->gc); + setup_timer(&map->gc, gc, (unsigned long)set); + mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } /* Create list:set type of sets */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2c1b498a7a27..db40050f8785 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -70,7 +70,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level); #endif EXPORT_SYMBOL(ip_vs_new_conn_out); -static int ip_vs_net_id __read_mostly; +static unsigned int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a6e44ef2ec9a..3d02b0c13547 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2840,14 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { */ /* IPVS genetlink family */ -static struct genl_family ip_vs_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = IPVS_GENL_NAME, - .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_ATTR_MAX, - .netnsok = true, /* Make ipvsadm to work on netns */ -}; +static struct genl_family ip_vs_genl_family; /* Policy used for first-level command attributes */ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { @@ -3267,7 +3260,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); - if (IS_ERR(svc) || svc == NULL) + if (IS_ERR_OR_NULL(svc)) goto out_err; /* Dump the destinations */ @@ -3872,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = { }, }; +static struct genl_family ip_vs_genl_family __ro_after_init = { + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_ATTR_MAX, + .netnsok = true, /* Make ipvsadm to work on netns */ + .module = THIS_MODULE, + .ops = ip_vs_genl_ops, + .n_ops = ARRAY_SIZE(ip_vs_genl_ops), +}; + static int __init ip_vs_genl_register(void) { - return genl_register_family_with_ops(&ip_vs_genl_family, - ip_vs_genl_ops); + return genl_register_family(&ip_vs_genl_family); } static void ip_vs_genl_unregister(void) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 01d3d894de46..4e1a98fcc8c3 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -254,6 +254,54 @@ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, return true; } +static inline bool decrement_ttl(struct netns_ipvs *ipvs, + int skb_af, + struct sk_buff *skb) +{ + struct net *net = ipvs->net; + +#ifdef CONFIG_IP_VS_IPV6 + if (skb_af == AF_INET6) { + struct dst_entry *dst = skb_dst(skb); + + /* check and decrement ttl */ + if (ipv6_hdr(skb)->hop_limit <= 1) { + /* Force OUTPUT device used as source address */ + skb->dev = dst->dev; + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); + + return false; + } + + /* don't propagate ttl change to cloned packets */ + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + return false; + + ipv6_hdr(skb)->hop_limit--; + } else +#endif + { + if (ip_hdr(skb)->ttl <= 1) { + /* Tell the sender its packet died... */ + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); + icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); + return false; + } + + /* don't propagate ttl change to cloned packets */ + if (!skb_make_writable(skb, sizeof(struct iphdr))) + return false; + + /* Decrease ttl */ + ip_decrease_ttl(ip_hdr(skb)); + } + + return true; +} + /* Get route to destination or remote server */ static int __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, @@ -326,6 +374,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, return local; } + if (!decrement_ttl(ipvs, skb_af, skb)) + goto err_put; + if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { mtu = dst_mtu(&rt->dst); } else { @@ -473,6 +524,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, return local; } + if (!decrement_ttl(ipvs, skb_af, skb)) + goto err_put; + /* MTU checking */ if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) mtu = dst_mtu(&rt->dst); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0f87e5d21be7..6a0bbfa8e702 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1338,7 +1338,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (skb->nfct) goto out; } - +repeat: ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { @@ -1370,6 +1370,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); + /* Special case: TCP tracker reports an attempt to reopen a + * closed/aborted connection. We have to go back and create a + * fresh conntrack. + */ + if (ret == -NF_REPEAT) + goto repeat; ret = -ret; goto out; } @@ -1377,15 +1383,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: - if (tmpl) { - /* Special case: we have to repeat this hook, assign the - * template again to this packet. We assume that this packet - * has no conntrack assigned. This is used by nf_ct_tcp. */ - if (ret == NF_REPEAT) - skb->nfct = (struct nf_conntrack *)tmpl; - else - nf_ct_put(tmpl); - } + if (tmpl) + nf_ct_put(tmpl); return ret; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 8d2c7d8c666a..2d6ee1803415 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -125,6 +125,54 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); +int nf_ct_netns_get(struct net *net, u8 nfproto) +{ + const struct nf_conntrack_l3proto *l3proto; + int ret; + + might_sleep(); + + ret = nf_ct_l3proto_try_module_get(nfproto); + if (ret < 0) + return ret; + + /* we already have a reference, can't fail */ + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(nfproto); + rcu_read_unlock(); + + if (!l3proto->net_ns_get) + return 0; + + ret = l3proto->net_ns_get(net); + if (ret < 0) + nf_ct_l3proto_module_put(nfproto); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_netns_get); + +void nf_ct_netns_put(struct net *net, u8 nfproto) +{ + const struct nf_conntrack_l3proto *l3proto; + + might_sleep(); + + /* same as nf_conntrack_netns_get(), reference assumed */ + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(nfproto); + rcu_read_unlock(); + + if (WARN_ON(!l3proto)) + return; + + if (l3proto->net_ns_put) + l3proto->net_ns_put(net); + + nf_ct_l3proto_module_put(nfproto); +} +EXPORT_SYMBOL_GPL(nf_ct_netns_put); + struct nf_conntrack_l4proto * nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) { @@ -190,20 +238,19 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); +#ifdef CONFIG_SYSCTL +extern unsigned int nf_conntrack_default_on; + int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { - int ret; - - if (proto->init_net) { - ret = proto->init_net(net); - if (ret < 0) - return ret; - } + if (nf_conntrack_default_on == 0) + return 0; - return 0; + return proto->net_ns_get ? proto->net_ns_get(net) : 0; } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); +#endif void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { @@ -224,6 +271,16 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { + /* + * nf_conntrack_default_on *might* have registered hooks. + * ->net_ns_put must cope with more puts() than get(), i.e. + * if nf_conntrack_default_on was 0 at time of + * nf_ct_l3proto_pernet_register invocation this net_ns_put() + * should be a noop. + */ + if (proto->net_ns_put) + proto->net_ns_put(net); + /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } @@ -281,15 +338,15 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) { int ret = 0; if (l4proto->l3proto >= PF_MAX) return -EBUSY; - if ((l4proto->to_nlattr && !l4proto->nlattr_size) - || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) + if ((l4proto->to_nlattr && !l4proto->nlattr_size) || + (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) return -EINVAL; mutex_lock(&nf_ct_proto_mutex); @@ -307,7 +364,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) } for (i = 0; i < MAX_NF_CT_PROTO; i++) - RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic); + RCU_INIT_POINTER(proto_array[i], + &nf_conntrack_l4proto_generic); /* Before making proto_array visible to lockless readers, * we must make sure its content is committed to memory. @@ -335,10 +393,10 @@ out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); -int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_pernet_register_one(struct net *net, + struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nf_proto_net *pn = NULL; @@ -361,9 +419,9 @@ int nf_ct_l4proto_pernet_register(struct net *net, out: return ret; } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) { BUG_ON(l4proto->l3proto >= PF_MAX); @@ -378,10 +436,10 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); -void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_pernet_unregister_one(struct net *net, + struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = NULL; @@ -395,6 +453,66 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0); } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); + +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + int ret = -EINVAL, ver; + unsigned int i; + + for (i = 0; i < num_proto; i++) { + ret = nf_ct_l4proto_register_one(l4proto[i]); + if (ret < 0) + break; + } + if (i != num_proto) { + ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4; + pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n", + ver, l4proto[i]->name, ver); + nf_ct_l4proto_unregister(l4proto, i); + } + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); + +int nf_ct_l4proto_pernet_register(struct net *net, + struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + int ret = -EINVAL; + unsigned int i; + + for (i = 0; i < num_proto; i++) { + ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]); + if (ret < 0) + break; + } + if (i != num_proto) { + pr_err("nf_conntrack_%s%d: pernet registration failed\n", + l4proto[i]->name, + l4proto[i]->l3proto == PF_INET6 ? 6 : 4); + nf_ct_l4proto_pernet_unregister(net, l4proto, i); + } + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); + +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + while (num_proto-- != 0) + nf_ct_l4proto_unregister_one(l4proto[num_proto]); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); + +void nf_ct_l4proto_pernet_unregister(struct net *net, + struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + while (num_proto-- != 0) + nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); +} EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); int nf_conntrack_proto_pernet_init(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a45bee52dccc..b68ce6ac13b3 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -9,7 +9,6 @@ * */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/spinlock.h> @@ -384,17 +383,9 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }, }; -/* this module per-net specifics */ -static int dccp_net_id __read_mostly; -struct dccp_net { - struct nf_proto_net pn; - int dccp_loose; - unsigned int dccp_timeout[CT_DCCP_MAX + 1]; -}; - -static inline struct dccp_net *dccp_pernet(struct net *net) +static inline struct nf_dccp_net *dccp_pernet(struct net *net) { - return net_generic(net, dccp_net_id); + return &net->ct.nf_ct_proto.dccp; } static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -424,7 +415,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); - struct dccp_net *dn; + struct nf_dccp_net *dn; struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -719,7 +710,7 @@ static int dccp_nlattr_size(void) static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = dccp_pernet(net); unsigned int *timeouts = data; int i; @@ -820,7 +811,7 @@ static struct ctl_table dccp_sysctl_table[] = { #endif /* CONFIG_SYSCTL */ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, - struct dccp_net *dn) + struct nf_dccp_net *dn) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -850,7 +841,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, static int dccp_init_net(struct net *net, u_int16_t proto) { - struct dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = dccp_pernet(net); struct nf_proto_net *pn = &dn->pn; if (!pn->users) { @@ -868,7 +859,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto) return dccp_kmemdup_sysctl_table(net, pn, dn); } -static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_DCCP, .name = "dccp", @@ -898,11 +889,11 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &dccp_net_id, .init_net = dccp_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); -static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .l3proto = AF_INET6, .l4proto = IPPROTO_DCCP, .name = "dccp", @@ -932,78 +923,6 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &dccp_net_id, .init_net = dccp_init_net, }; - -static __net_init int dccp_net_init(struct net *net) -{ - int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4); - if (ret < 0) { - pr_err("nf_conntrack_dccp4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6); - if (ret < 0) { - pr_err("nf_conntrack_dccp6: pernet registration failed.\n"); - goto cleanup_dccp4; - } - return 0; -cleanup_dccp4: - nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); -out: - return ret; -} - -static __net_exit void dccp_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, &dccp_proto6); - nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); -} - -static struct pernet_operations dccp_net_ops = { - .init = dccp_net_init, - .exit = dccp_net_exit, - .id = &dccp_net_id, - .size = sizeof(struct dccp_net), -}; - -static int __init nf_conntrack_proto_dccp_init(void) -{ - int ret; - - ret = register_pernet_subsys(&dccp_net_ops); - if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&dccp_proto4); - if (ret < 0) - goto out_dccp4; - - ret = nf_ct_l4proto_register(&dccp_proto6); - if (ret < 0) - goto out_dccp6; - - return 0; -out_dccp6: - nf_ct_l4proto_unregister(&dccp_proto4); -out_dccp4: - unregister_pernet_subsys(&dccp_net_ops); -out_pernet: - return ret; -} - -static void __exit nf_conntrack_proto_dccp_fini(void) -{ - nf_ct_l4proto_unregister(&dccp_proto6); - nf_ct_l4proto_unregister(&dccp_proto4); - unregister_pernet_subsys(&dccp_net_ops); -} - -module_init(nf_conntrack_proto_dccp_init); -module_exit(nf_conntrack_proto_dccp_fini); - -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("DCCP connection tracking protocol helper"); -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9a715f88b2f1..87bb40a3feb5 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -53,7 +53,7 @@ static unsigned int gre_timeouts[GRE_CT_MAX] = { [GRE_CT_REPLIED] = 180*HZ, }; -static int proto_gre_net_id __read_mostly; +static unsigned int proto_gre_net_id __read_mostly; struct netns_proto_gre { struct nf_proto_net nf; rwlock_t keymap_lock; @@ -396,7 +396,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { static int proto_gre_net_init(struct net *net) { int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4); + + ret = nf_ct_l4proto_pernet_register_one(net, + &nf_conntrack_l4proto_gre4); if (ret < 0) pr_err("nf_conntrack_gre4: pernet registration failed.\n"); return ret; @@ -404,7 +406,7 @@ static int proto_gre_net_init(struct net *net) static void proto_gre_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); + nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); nf_ct_gre_keymap_flush(net); } @@ -422,8 +424,7 @@ static int __init nf_ct_proto_gre_init(void) ret = register_pernet_subsys(&proto_gre_net_ops); if (ret < 0) goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); if (ret < 0) goto out_gre4; @@ -436,7 +437,7 @@ out_pernet: static void __exit nf_ct_proto_gre_fini(void) { - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); + nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); unregister_pernet_subsys(&proto_gre_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 982ea62606c7..a0efde38da44 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -15,7 +15,6 @@ #include <linux/types.h> #include <linux/timer.h> #include <linux/netfilter.h> -#include <linux/module.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/sctp.h> @@ -144,15 +143,9 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { } }; -static int sctp_net_id __read_mostly; -struct sctp_net { - struct nf_proto_net pn; - unsigned int timeouts[SCTP_CONNTRACK_MAX]; -}; - -static inline struct sctp_net *sctp_pernet(struct net *net) +static inline struct nf_sctp_net *sctp_pernet(struct net *net) { - return net_generic(net, sctp_net_id); + return &net->ct.nf_ct_proto.sctp; } static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -600,7 +593,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = sctp_pernet(net); int i; /* set default SCTP timeouts. */ @@ -708,7 +701,7 @@ static struct ctl_table sctp_sysctl_table[] = { #endif static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct sctp_net *sn) + struct nf_sctp_net *sn) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -735,7 +728,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int sctp_init_net(struct net *net, u_int16_t proto) { - struct sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; if (!pn->users) { @@ -748,7 +741,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto) return sctp_kmemdup_sysctl_table(pn, sn); } -static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -778,11 +771,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &sctp_net_id, .init_net = sctp_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); -static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -812,81 +805,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #endif - .net_id = &sctp_net_id, .init_net = sctp_init_net, }; - -static int sctp_net_init(struct net *net) -{ - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4); - if (ret < 0) { - pr_err("nf_conntrack_sctp4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6); - if (ret < 0) { - pr_err("nf_conntrack_sctp6: pernet registration failed.\n"); - goto cleanup_sctp4; - } - return 0; - -cleanup_sctp4: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); -out: - return ret; -} - -static void sctp_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); -} - -static struct pernet_operations sctp_net_ops = { - .init = sctp_net_init, - .exit = sctp_net_exit, - .id = &sctp_net_id, - .size = sizeof(struct sctp_net), -}; - -static int __init nf_conntrack_proto_sctp_init(void) -{ - int ret; - - ret = register_pernet_subsys(&sctp_net_ops); - if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); - if (ret < 0) - goto out_sctp4; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6); - if (ret < 0) - goto out_sctp6; - - return 0; -out_sctp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); -out_sctp4: - unregister_pernet_subsys(&sctp_net_ops); -out_pernet: - return ret; -} - -static void __exit nf_conntrack_proto_sctp_fini(void) -{ - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); - unregister_pernet_subsys(&sctp_net_ops); -} - -module_init(nf_conntrack_proto_sctp_init); -module_exit(nf_conntrack_proto_sctp_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Kiran Kumar Immidi"); -MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); -MODULE_ALIAS("ip_conntrack_proto_sctp"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 029206e8dec4..c35f7bf05d8c 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -9,7 +9,6 @@ #include <linux/types.h> #include <linux/timer.h> -#include <linux/module.h> #include <linux/udp.h> #include <linux/seq_file.h> #include <linux/skbuff.h> @@ -24,26 +23,14 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> -enum udplite_conntrack { - UDPLITE_CT_UNREPLIED, - UDPLITE_CT_REPLIED, - UDPLITE_CT_MAX -}; - static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = { [UDPLITE_CT_UNREPLIED] = 30*HZ, [UDPLITE_CT_REPLIED] = 180*HZ, }; -static int udplite_net_id __read_mostly; -struct udplite_net { - struct nf_proto_net pn; - unsigned int timeouts[UDPLITE_CT_MAX]; -}; - -static inline struct udplite_net *udplite_pernet(struct net *net) +static inline struct nf_udplite_net *udplite_pernet(struct net *net) { - return net_generic(net, udplite_net_id); + return &net->ct.nf_ct_proto.udplite; } static bool udplite_pkt_to_tuple(const struct sk_buff *skb, @@ -178,7 +165,7 @@ static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct udplite_net *un = udplite_pernet(net); + struct nf_udplite_net *un = udplite_pernet(net); /* set default timeouts for UDPlite. */ timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; @@ -237,7 +224,7 @@ static struct ctl_table udplite_sysctl_table[] = { #endif /* CONFIG_SYSCTL */ static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct udplite_net *un) + struct nf_udplite_net *un) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -257,7 +244,7 @@ static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, static int udplite_init_net(struct net *net, u_int16_t proto) { - struct udplite_net *un = udplite_pernet(net); + struct nf_udplite_net *un = udplite_pernet(net); struct nf_proto_net *pn = &un->pn; if (!pn->users) { @@ -270,7 +257,7 @@ static int udplite_init_net(struct net *net, u_int16_t proto) return udplite_kmemdup_sysctl_table(pn, un); } -static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, @@ -299,11 +286,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &udplite_net_id, .init_net = udplite_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); -static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, @@ -332,78 +319,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &udplite_net_id, .init_net = udplite_init_net, }; - -static int udplite_net_init(struct net *net) -{ - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4); - if (ret < 0) { - pr_err("nf_conntrack_udplite4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6); - if (ret < 0) { - pr_err("nf_conntrack_udplite6: pernet registration failed.\n"); - goto cleanup_udplite4; - } - return 0; - -cleanup_udplite4: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); -out: - return ret; -} - -static void udplite_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); -} - -static struct pernet_operations udplite_net_ops = { - .init = udplite_net_init, - .exit = udplite_net_exit, - .id = &udplite_net_id, - .size = sizeof(struct udplite_net), -}; - -static int __init nf_conntrack_proto_udplite_init(void) -{ - int ret; - - ret = register_pernet_subsys(&udplite_net_ops); - if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); - if (ret < 0) - goto out_udplite4; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6); - if (ret < 0) - goto out_udplite6; - - return 0; -out_udplite6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); -out_udplite4: - unregister_pernet_subsys(&udplite_net_ops); -out_pernet: - return ret; -} - -static void __exit nf_conntrack_proto_udplite_exit(void) -{ - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); - unregister_pernet_subsys(&udplite_net_ops); -} - -module_init(nf_conntrack_proto_udplite_init); -module_exit(nf_conntrack_proto_udplite_exit); - -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5f446cd9f3fd..d009ae663453 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -452,6 +452,9 @@ static int log_invalid_proto_max __read_mostly = 255; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; +extern unsigned int nf_conntrack_default_on; +unsigned int nf_conntrack_default_on __read_mostly = 1; + static int nf_conntrack_hash_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -517,6 +520,13 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "nf_conntrack_default_on", + .data = &nf_conntrack_default_on, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 7ec69723940f..c9d7f95768ab 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -14,24 +14,41 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) +{ + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + + skb->dev = dev; + dev_queue_xmit(skb); +} + +void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif) +{ + struct net_device *dev; + + dev = dev_get_by_index_rcu(nft_net(pkt), oif); + if (!dev) { + kfree_skb(pkt->skb); + return; + } + + nf_do_netdev_egress(pkt->skb, dev); +} +EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress); + void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) { struct net_device *dev; struct sk_buff *skb; - dev = dev_get_by_index_rcu(pkt->net, oif); + dev = dev_get_by_index_rcu(nft_net(pkt), oif); if (dev == NULL) return; skb = skb_clone(pkt->skb, GFP_ATOMIC); - if (skb == NULL) - return; - - if (skb_mac_header_was_set(skb)) - skb_push(skb, skb->mac_len); - - skb->dev = dev; - dev_queue_xmit(skb); + if (skb) + nf_do_netdev_egress(skb, dev); } EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 9fdb655f85bc..c46d214d5323 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -11,11 +11,6 @@ #define NFDEBUG(format, args...) #endif - -/* core.c */ -unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry **entryp); - /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry **entryp, unsigned int verdict); diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index 119fe1cb1ea9..dc61399e30be 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -175,6 +175,34 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, } EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); +/* bridge and netdev logging families share this code. */ +void nf_log_l2packet(struct net *net, u_int8_t pf, + __be16 protocol, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + switch (protocol) { + case htons(ETH_P_IP): + nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_IPV6): + nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + } +} +EXPORT_SYMBOL_GPL(nf_log_l2packet); + static int __init nf_log_common_init(void) { return 0; diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c new file mode 100644 index 000000000000..350eb147754d --- /dev/null +++ b/net/netfilter/nf_log_netdev.c @@ -0,0 +1,81 @@ +/* + * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_log.h> + +static void nf_log_netdev_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out, + loginfo, prefix); +} + +static struct nf_logger nf_netdev_logger __read_mostly = { + .name = "nf_log_netdev", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_netdev_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_netdev_net_init(struct net *net) +{ + return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); +} + +static void __net_exit nf_log_netdev_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_netdev_logger); +} + +static struct pernet_operations nf_log_netdev_net_ops = { + .init = nf_log_netdev_net_init, + .exit = nf_log_netdev_net_exit, +}; + +static int __init nf_log_netdev_init(void) +{ + int ret; + + /* Request to load the real packet loggers. */ + nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); + + ret = register_pernet_subsys(&nf_log_netdev_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); + return 0; +} + +static void __exit nf_log_netdev_exit(void) +{ + unregister_pernet_subsys(&nf_log_netdev_net_ops); + nf_log_unregister(&nf_netdev_logger); +} + +module_init(nf_log_netdev_init); +module_exit(nf_log_netdev_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter netdev packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5b9c884a452e..94b14c5a8b17 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -682,6 +682,18 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) &nf_nat_l4proto_tcp); RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], &nf_nat_l4proto_udp); +#ifdef CONFIG_NF_NAT_PROTO_DCCP + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP], + &nf_nat_l4proto_dccp); +#endif +#ifdef CONFIG_NF_NAT_PROTO_SCTP + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP], + &nf_nat_l4proto_sctp); +#endif +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE], + &nf_nat_l4proto_udplite); +#endif mutex_unlock(&nf_nat_proto_mutex); RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index 15c47b246d0d..269fcd5dc34c 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -10,8 +10,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> #include <linux/skbuff.h> #include <linux/dccp.h> @@ -73,7 +71,7 @@ dccp_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { +const struct nf_nat_l4proto nf_nat_l4proto_dccp = { .l4proto = IPPROTO_DCCP, .manip_pkt = dccp_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -82,35 +80,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_dccp_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); -err1: - return err; -} - -static void __exit nf_nat_proto_dccp_fini(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); - -} - -module_init(nf_nat_proto_dccp_init); -module_exit(nf_nat_proto_dccp_fini); - -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("DCCP NAT protocol helper"); -MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index cbc7ade1487b..31d358691af0 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -7,9 +7,7 @@ */ #include <linux/types.h> -#include <linux/init.h> #include <linux/sctp.h> -#include <linux/module.h> #include <net/sctp/checksum.h> #include <net/netfilter/nf_nat_l4proto.h> @@ -49,12 +47,15 @@ sctp_manip_pkt(struct sk_buff *skb, hdr->dest = tuple->dst.u.sctp.port; } - hdr->checksum = sctp_compute_cksum(skb, hdroff); + if (skb->ip_summed != CHECKSUM_PARTIAL) { + hdr->checksum = sctp_compute_cksum(skb, hdroff); + skb->ip_summed = CHECKSUM_NONE; + } return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { +const struct nf_nat_l4proto nf_nat_l4proto_sctp = { .l4proto = IPPROTO_SCTP, .manip_pkt = sctp_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -63,34 +64,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_sctp_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); -err1: - return err; -} - -static void __exit nf_nat_proto_sctp_exit(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); -} - -module_init(nf_nat_proto_sctp_init); -module_exit(nf_nat_proto_sctp_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SCTP NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 58340c97bd83..366bfbfd82a1 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -8,11 +8,9 @@ */ #include <linux/types.h> -#include <linux/init.h> #include <linux/udp.h> #include <linux/netfilter.h> -#include <linux/module.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_l3proto.h> #include <net/netfilter/nf_nat_l4proto.h> @@ -64,7 +62,7 @@ udplite_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { +const struct nf_nat_l4proto nf_nat_l4proto_udplite = { .l4proto = IPPROTO_UDPLITE, .manip_pkt = udplite_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -73,34 +71,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_udplite_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); -err1: - return err; -} - -static void __exit nf_nat_proto_udplite_fini(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); -} - -module_init(nf_nat_proto_udplite_init); -module_exit(nf_nat_proto_udplite_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 8f08d759844a..4a7662486f44 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int queuenum) + struct nf_hook_entry *hook_entry, unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -136,6 +136,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, + .hook = hook_entry, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -163,8 +164,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry *entry = *entryp; int ret; - RCU_INIT_POINTER(state->hook_entries, entry); - ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); + ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { @@ -177,22 +177,38 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, return 0; } +static unsigned int nf_iterate(struct sk_buff *skb, + struct nf_hook_state *state, + struct nf_hook_entry **entryp) +{ + unsigned int verdict; + + do { +repeat: + verdict = nf_hook_entry_hookfn((*entryp), skb, state); + if (verdict != NF_ACCEPT) { + if (verdict != NF_REPEAT) + return verdict; + goto repeat; + } + *entryp = rcu_dereference((*entryp)->next); + } while (*entryp); + + return NF_ACCEPT; +} + void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { - struct nf_hook_entry *hook_entry; + struct nf_hook_entry *hook_entry = entry->hook; struct sk_buff *skb = entry->skb; const struct nf_afinfo *afinfo; - struct nf_hook_ops *elem; int err; - hook_entry = rcu_dereference(entry->state.hook_entries); - elem = &hook_entry->ops; - nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) - verdict = elem->hook(elem->priv, skb, &entry->state); + verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(entry->state.pf); @@ -200,8 +216,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) verdict = NF_DROP; } - entry->state.thresh = INT_MIN; - if (verdict == NF_ACCEPT) { hook_entry = rcu_dereference(hook_entry->next); if (hook_entry) diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index c8a4a48bced9..7c6d1fbe38b9 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -24,7 +24,7 @@ #include <net/netfilter/nf_conntrack_synproxy.h> #include <net/netfilter/nf_conntrack_zones.h> -int synproxy_net_id; +unsigned int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); bool diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e5194f6f906c..a019a87e58ee 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -22,6 +22,7 @@ #include <net/sock.h> static LIST_HEAD(nf_tables_expressions); +static LIST_HEAD(nf_tables_objects); /** * nft_register_afinfo - register nf_tables address family info @@ -110,12 +111,12 @@ static void nft_ctx_init(struct nft_ctx *ctx, ctx->seq = nlh->nlmsg_seq; } -static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, - u32 size) +static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, + int msg_type, u32 size, gfp_t gfp) { struct nft_trans *trans; - trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL); + trans = kzalloc(sizeof(struct nft_trans) + size, gfp); if (trans == NULL) return NULL; @@ -125,6 +126,12 @@ static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, return trans; } +static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, + int msg_type, u32 size) +{ + return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); +} + static void nft_trans_destroy(struct nft_trans *trans) { list_del(&trans->list); @@ -304,6 +311,38 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) return err; } +static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type, + struct nft_object *obj) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWOBJ) + nft_activate_next(ctx->net, obj); + + nft_trans_obj(trans) = obj; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + +static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) +{ + int err; + + err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj); + if (err < 0) + return err; + + nft_deactivate_next(ctx->net, obj); + ctx->table->use--; + + return err; +} + /* * Tables */ @@ -688,6 +727,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); + INIT_LIST_HEAD(&table->objects); table->flags = flags; nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); @@ -709,6 +749,7 @@ static int nft_flush_table(struct nft_ctx *ctx) { int err; struct nft_chain *chain, *nc; + struct nft_object *obj, *ne; struct nft_set *set, *ns; list_for_each_entry(chain, &ctx->table->chains, list) { @@ -735,6 +776,12 @@ static int nft_flush_table(struct nft_ctx *ctx) goto out; } + list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { + err = nft_delobj(ctx, obj); + if (err < 0) + goto out; + } + list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { if (!nft_is_active_next(ctx->net, chain)) continue; @@ -2411,6 +2458,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, [NFTA_SET_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, + [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2462,6 +2510,7 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, } return ERR_PTR(-ENOENT); } +EXPORT_SYMBOL_GPL(nf_tables_set_lookup); struct nft_set *nf_tables_set_lookup_byid(const struct net *net, const struct nlattr *nla, @@ -2480,6 +2529,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net, } return ERR_PTR(-ENOENT); } +EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid); static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) @@ -2568,6 +2618,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) goto nla_put_failure; } + if (set->flags & NFT_SET_OBJECT && + nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) + goto nla_put_failure; if (set->timeout && nla_put_be64(skb, NFTA_SET_TIMEOUT, @@ -2797,7 +2850,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, unsigned int size; bool create; u64 timeout; - u32 ktype, dtype, flags, policy, gc_int; + u32 ktype, dtype, flags, policy, gc_int, objtype; struct nft_set_desc desc; unsigned char *udata; u16 udlen; @@ -2827,11 +2880,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | NFT_SET_INTERVAL | NFT_SET_TIMEOUT | - NFT_SET_MAP | NFT_SET_EVAL)) + NFT_SET_MAP | NFT_SET_EVAL | + NFT_SET_OBJECT)) return -EINVAL; - /* Only one of both operations is supported */ - if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) == - (NFT_SET_MAP | NFT_SET_EVAL)) + /* Only one of these operations is supported */ + if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) == + (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; } @@ -2856,6 +2910,19 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } else if (flags & NFT_SET_MAP) return -EINVAL; + if (nla[NFTA_SET_OBJ_TYPE] != NULL) { + if (!(flags & NFT_SET_OBJECT)) + return -EINVAL; + + objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); + if (objtype == NFT_OBJECT_UNSPEC || + objtype > NFT_OBJECT_MAX) + return -EINVAL; + } else if (flags & NFT_SET_OBJECT) + return -EINVAL; + else + objtype = NFT_OBJECT_UNSPEC; + timeout = 0; if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) @@ -2943,6 +3010,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, set->ktype = ktype; set->klen = desc.klen; set->dtype = dtype; + set->objtype = objtype; set->dlen = desc.dlen; set->flags = flags; set->size = desc.size; @@ -3064,6 +3132,7 @@ bind: list_add_tail_rcu(&binding->list, &set->bindings); return 0; } +EXPORT_SYMBOL_GPL(nf_tables_bind_set); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding) @@ -3074,6 +3143,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, nft_is_active(ctx->net, set)) nf_tables_set_destroy(ctx, set); } +EXPORT_SYMBOL_GPL(nf_tables_unbind_set); const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_KEY] = { @@ -3085,6 +3155,10 @@ const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_EXPR] = { .align = __alignof__(struct nft_expr), }, + [NFT_SET_EXT_OBJREF] = { + .len = sizeof(struct nft_object *), + .align = __alignof__(struct nft_object *), + }, [NFT_SET_EXT_FLAGS] = { .len = sizeof(u8), .align = __alignof__(u8), @@ -3173,6 +3247,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0) goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && + nla_put_string(skb, NFTA_SET_ELEM_OBJREF, + (*nft_set_ext_obj(ext))->name) < 0) + goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(*nft_set_ext_flags(ext)))) @@ -3467,7 +3546,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, nft_data_uninit(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); - + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use--; kfree(elem); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); @@ -3492,11 +3572,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + u8 genmask = nft_genmask_next(ctx->net); struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_object *obj = NULL; struct nft_userdata *udata; struct nft_data data; enum nft_registers dreg; @@ -3559,6 +3641,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); } + if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { + if (!(set->flags & NFT_SET_OBJECT)) { + err = -EINVAL; + goto err2; + } + obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], + set->objtype, genmask); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err2; + } + nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + } + if (nla[NFTA_SET_ELEM_DATA] != NULL) { err = nft_data_init(ctx, &data, sizeof(data), &d2, nla[NFTA_SET_ELEM_DATA]); @@ -3617,6 +3713,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } + if (obj) { + *nft_set_ext_obj(ext) = obj; + obj->use++; + } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) @@ -3626,10 +3726,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = set->ops->insert(ctx->net, set, &elem, &ext2); if (err) { if (err == -EEXIST) { - if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && - nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && - memcmp(nft_set_ext_data(ext), - nft_set_ext_data(ext2), set->dlen) != 0) + if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && + memcmp(nft_set_ext_data(ext), + nft_set_ext_data(ext2), set->dlen) != 0) || + (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) && + *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2))) err = -EBUSY; else if (!(nlmsg_flags & NLM_F_EXCL)) err = 0; @@ -3779,6 +3882,34 @@ err1: return err; } +static int nft_flush_set(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + struct nft_trans *trans; + int err; + + trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, + sizeof(struct nft_trans_elem), GFP_ATOMIC); + if (!trans) + return -ENOMEM; + + if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) { + err = -ENOENT; + goto err1; + } + + nft_trans_elem_set(trans) = (struct nft_set *)set; + nft_trans_elem(trans) = *((struct nft_set_elem *)elem); + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +err1: + kfree(trans); + return err; +} + static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -3789,9 +3920,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_ctx ctx; int rem, err = 0; - if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) - return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -3803,6 +3931,18 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY; + if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { + struct nft_set_dump_args args = { + .iter = { + .genmask = genmask, + .fn = nft_flush_set, + }, + }; + set->ops->walk(&ctx, set, &args.iter); + + return args.iter.err; + } + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); if (err < 0) @@ -3838,6 +3978,500 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, } EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); +/* + * Stateful objects + */ + +/** + * nft_register_obj- register nf_tables stateful object type + * @obj: object type + * + * Registers the object type for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_obj(struct nft_object_type *obj_type) +{ + if (obj_type->type == NFT_OBJECT_UNSPEC) + return -EINVAL; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_rcu(&obj_type->list, &nf_tables_objects); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_obj); + +/** + * nft_unregister_obj - unregister nf_tables object type + * @obj: object type + * + * Unregisters the object type for use with nf_tables. + */ +void nft_unregister_obj(struct nft_object_type *obj_type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del_rcu(&obj_type->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_obj); + +struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, + u32 objtype, u8 genmask) +{ + struct nft_object *obj; + + list_for_each_entry(obj, &table->objects, list) { + if (!nla_strcmp(nla, obj->name) && + objtype == obj->type->type && + nft_active_genmask(obj, genmask)) + return obj; + } + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); + +static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { + [NFTA_OBJ_TABLE] = { .type = NLA_STRING }, + [NFTA_OBJ_NAME] = { .type = NLA_STRING }, + [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, + [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, +}; + +static struct nft_object *nft_obj_init(const struct nft_object_type *type, + const struct nlattr *attr) +{ + struct nlattr *tb[type->maxattr + 1]; + struct nft_object *obj; + int err; + + if (attr) { + err = nla_parse_nested(tb, type->maxattr, attr, type->policy); + if (err < 0) + goto err1; + } else { + memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1)); + } + + err = -ENOMEM; + obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL); + if (obj == NULL) + goto err1; + + err = type->init((const struct nlattr * const *)tb, obj); + if (err < 0) + goto err2; + + obj->type = type; + return obj; +err2: + kfree(obj); +err1: + return ERR_PTR(err); +} + +static int nft_object_dump(struct sk_buff *skb, unsigned int attr, + struct nft_object *obj, bool reset) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, attr); + if (!nest) + goto nla_put_failure; + if (obj->type->dump(skb, obj, reset) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +{ + const struct nft_object_type *type; + + list_for_each_entry(type, &nf_tables_objects, list) { + if (objtype == type->type) + return type; + } + return NULL; +} + +static const struct nft_object_type *nft_obj_type_get(u32 objtype) +{ + const struct nft_object_type *type; + + type = __nft_obj_type_get(objtype); + if (type != NULL && try_module_get(type->owner)) + return type; + +#ifdef CONFIG_MODULES + if (type == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-obj-%u", objtype); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_obj_type_get(objtype)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static int nf_tables_newobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_object_type *type; + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_object *obj; + struct nft_ctx ctx; + u32 objtype; + int err; + + if (!nla[NFTA_OBJ_TYPE] || + !nla[NFTA_OBJ_NAME] || + !nla[NFTA_OBJ_DATA]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err != -ENOENT) + return err; + + obj = NULL; + } + + if (obj != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + return 0; + } + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + + type = nft_obj_type_get(objtype); + if (IS_ERR(type)) + return PTR_ERR(type); + + obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err1; + } + obj->table = table; + nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN); + + err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); + if (err < 0) + goto err2; + + list_add_tail_rcu(&obj->list, &table->objects); + table->use++; + return 0; +err2: + if (obj->type->destroy) + obj->type->destroy(obj); + kfree(obj); +err1: + module_put(type->owner); + return err; +} + +static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, u32 flags, + int family, const struct nft_table *table, + struct nft_object *obj, bool reset) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + + if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || + nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || + nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) || + nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || + nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +struct nft_obj_filter { + char table[NFT_OBJ_MAXNAMELEN]; + u32 type; +}; + +static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + struct nft_obj_filter *filter = cb->data; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + struct nft_object *obj; + bool reset = false; + + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; + + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(obj, &table->objects, list) { + if (!nft_is_active(net, obj)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (filter->table[0] && + strcmp(filter->table, table->name)) + goto cont; + if (filter->type != NFT_OBJECT_UNSPEC && + obj->type->type != filter->type) + goto cont; + + if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWOBJ, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, table, obj, reset) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } + } +done: + rcu_read_unlock(); + + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_dump_obj_done(struct netlink_callback *cb) +{ + kfree(cb->data); + + return 0; +} + +static struct nft_obj_filter * +nft_obj_filter_alloc(const struct nlattr * const nla[]) +{ + struct nft_obj_filter *filter; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); + + if (nla[NFTA_OBJ_TABLE]) + nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE], + NFT_TABLE_MAXNAMELEN); + if (nla[NFTA_OBJ_TYPE]) + filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + + return filter; +} + +static int nf_tables_getobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); + int family = nfmsg->nfgen_family; + const struct nft_af_info *afi; + const struct nft_table *table; + struct nft_object *obj; + struct sk_buff *skb2; + bool reset = false; + u32 objtype; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_obj, + .done = nf_tables_dump_obj_done, + }; + + if (nla[NFTA_OBJ_TABLE] || + nla[NFTA_OBJ_TYPE]) { + struct nft_obj_filter *filter; + + filter = nft_obj_filter_alloc(nla); + if (IS_ERR(filter)) + return -ENOMEM; + + c.data = filter; + } + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + if (!nla[NFTA_OBJ_NAME] || + !nla[NFTA_OBJ_TYPE]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; + + err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj, reset); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); +err: + kfree_skb(skb2); + return err; + + return 0; +} + +static void nft_obj_destroy(struct nft_object *obj) +{ + if (obj->type->destroy) + obj->type->destroy(obj); + + module_put(obj->type->owner); + kfree(obj); +} + +static int nf_tables_delobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_object *obj; + struct nft_ctx ctx; + u32 objtype; + + if (!nla[NFTA_OBJ_TYPE] || + !nla[NFTA_OBJ_NAME]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) + return PTR_ERR(obj); + if (obj->use > 0) + return -EBUSY; + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + + return nft_delobj(&ctx, obj); +} + +int nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, int event, + int family, int report, gfp_t gfp) +{ + struct sk_buff *skb; + int err; + + if (!report && + !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (skb == NULL) + goto err; + + err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family, + table, obj, false); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); +err: + if (err < 0) { + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + } + return err; +} +EXPORT_SYMBOL_GPL(nft_obj_notify); + +static int nf_tables_obj_notify(const struct nft_ctx *ctx, + struct nft_object *obj, int event) +{ + return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, + ctx->seq, event, ctx->afi->family, ctx->report, + GFP_KERNEL); +} + static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { @@ -3998,6 +4632,26 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_GETGEN] = { .call = nf_tables_getgen, }, + [NFT_MSG_NEWOBJ] = { + .call_batch = nf_tables_newobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, + [NFT_MSG_GETOBJ] = { + .call = nf_tables_getobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, + [NFT_MSG_DELOBJ] = { + .call_batch = nf_tables_delobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, + [NFT_MSG_GETOBJ_RESET] = { + .call = nf_tables_getobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, }; static void nft_chain_commit_update(struct nft_trans *trans) @@ -4040,6 +4694,9 @@ static void nf_tables_commit_release(struct nft_trans *trans) nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem(trans).priv, true); break; + case NFT_MSG_DELOBJ: + nft_obj_destroy(nft_trans_obj(trans)); + break; } kfree(trans); } @@ -4147,6 +4804,17 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) atomic_dec(&te->set->nelems); te->set->ndeact--; break; + case NFT_MSG_NEWOBJ: + nft_clear(net, nft_trans_obj(trans)); + nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + NFT_MSG_NEWOBJ); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELOBJ: + list_del_rcu(&nft_trans_obj(trans)->list); + nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + NFT_MSG_DELOBJ); + break; } } @@ -4181,6 +4849,9 @@ static void nf_tables_abort_release(struct nft_trans *trans) nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem(trans).priv, true); break; + case NFT_MSG_NEWOBJ: + nft_obj_destroy(nft_trans_obj(trans)); + break; } kfree(trans); } @@ -4261,6 +4932,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; + case NFT_MSG_NEWOBJ: + trans->ctx.table->use--; + list_del_rcu(&nft_trans_obj(trans)->list); + break; + case NFT_MSG_DELOBJ: + trans->ctx.table->use++; + nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_trans_destroy(trans); + break; } } @@ -4807,6 +5487,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) { struct nft_table *table, *nt; struct nft_chain *chain, *nc; + struct nft_object *obj, *ne; struct nft_rule *rule, *nr; struct nft_set *set, *ns; struct nft_ctx ctx = { @@ -4833,6 +5514,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) table->use--; nft_set_destroy(set); } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + list_del(&obj->list); + table->use--; + nft_obj_destroy(obj); + } list_for_each_entry_safe(chain, nc, &table->chains, list) { list_del(&chain->list); table->use--; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 0dd5c695482f..65dbeadcb118 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -53,10 +53,10 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, nft_trace_notify(info); - nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); + nf_log_trace(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, + nft_in(pkt), nft_out(pkt), &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], rulenum); } static inline void nft_trace_packet(struct nft_traceinfo *info, @@ -124,7 +124,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv) { const struct nft_chain *chain = priv, *basechain = chain; - const struct net *net = pkt->net; + const struct net *net = nft_net(pkt); const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; @@ -178,6 +178,7 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: + case NF_STOLEN: nft_trace_packet(&info, chain, rule, rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; @@ -231,68 +232,40 @@ next_rule: } EXPORT_SYMBOL_GPL(nft_do_chain); +static struct nft_expr_type *nft_basic_types[] = { + &nft_imm_type, + &nft_cmp_type, + &nft_lookup_type, + &nft_bitwise_type, + &nft_byteorder_type, + &nft_payload_type, + &nft_dynset_type, + &nft_range_type, +}; + int __init nf_tables_core_module_init(void) { - int err; - - err = nft_immediate_module_init(); - if (err < 0) - goto err1; - - err = nft_cmp_module_init(); - if (err < 0) - goto err2; - - err = nft_lookup_module_init(); - if (err < 0) - goto err3; - - err = nft_bitwise_module_init(); - if (err < 0) - goto err4; + int err, i; - err = nft_byteorder_module_init(); - if (err < 0) - goto err5; - - err = nft_payload_module_init(); - if (err < 0) - goto err6; - - err = nft_dynset_module_init(); - if (err < 0) - goto err7; - - err = nft_range_module_init(); - if (err < 0) - goto err8; + for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) { + err = nft_register_expr(nft_basic_types[i]); + if (err) + goto err; + } return 0; -err8: - nft_dynset_module_exit(); -err7: - nft_payload_module_exit(); -err6: - nft_byteorder_module_exit(); -err5: - nft_bitwise_module_exit(); -err4: - nft_lookup_module_exit(); -err3: - nft_cmp_module_exit(); -err2: - nft_immediate_module_exit(); -err1: + +err: + while (i-- > 0) + nft_unregister_expr(nft_basic_types[i]); return err; } void nf_tables_core_module_exit(void) { - nft_dynset_module_exit(); - nft_payload_module_exit(); - nft_byteorder_module_exit(); - nft_bitwise_module_exit(); - nft_lookup_module_exit(); - nft_cmp_module_exit(); - nft_immediate_module_exit(); + int i; + + i = ARRAY_SIZE(nft_basic_types); + while (i-- > 0) + nft_unregister_expr(nft_basic_types[i]); } diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index ab695f8e2d29..12eb9041dca2 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -171,7 +171,7 @@ void nft_trace_notify(struct nft_traceinfo *info) unsigned int size; int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; - if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) return; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + @@ -207,7 +207,7 @@ void nft_trace_notify(struct nft_traceinfo *info) nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt)))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) @@ -249,7 +249,7 @@ void nft_trace_notify(struct nft_traceinfo *info) goto nla_put_failure; if (!info->packet_dumped) { - if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt))) goto nla_put_failure; if (nf_trace_fill_pkt_info(skb, pkt)) @@ -258,7 +258,7 @@ void nft_trace_notify(struct nft_traceinfo *info) } nlmsg_end(skb, nlh); - nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); return; nla_put_failure: diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index eb086a192c5a..200922bb2036 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -80,7 +80,7 @@ struct nfulnl_instance { #define INSTANCE_BUCKETS 16 -static int nfnl_log_net_id __read_mostly; +static unsigned int nfnl_log_net_id __read_mostly; struct nfnl_log_net { spinlock_t instances_lock; @@ -330,7 +330,7 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = alloc_skb(n, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current @@ -1152,6 +1152,7 @@ MODULE_ALIAS_NF_LOGGER(AF_INET, 1); MODULE_ALIAS_NF_LOGGER(AF_INET6, 1); MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1); MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */ +MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */ module_init(nfnetlink_log_init); module_exit(nfnetlink_log_fini); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index af832c526048..be7627b80400 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -69,7 +69,7 @@ struct nfqnl_instance { * Following fields are dirtied for each queued packet, * keep them in same cache line if possible. */ - spinlock_t lock; + spinlock_t lock ____cacheline_aligned_in_smp; unsigned int queue_total; unsigned int id_sequence; /* 'sequence' of pkt ids */ struct list_head queue_list; /* packets in queue */ @@ -77,7 +77,7 @@ struct nfqnl_instance { typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); -static int nfnl_queue_net_id __read_mostly; +static unsigned int nfnl_queue_net_id __read_mostly; #define INSTANCE_BUCKETS 16 struct nfnl_queue_net { @@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = { static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) { - return rcu_access_pointer(entry->state.hook_entries) == + return rcu_access_pointer(entry->hook) == (struct nf_hook_entry *)entry_ptr; } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 31c15ed2e5fc..877d9acd91ef 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -121,7 +121,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_bitwise_type; static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), @@ -130,20 +129,10 @@ static const struct nft_expr_ops nft_bitwise_ops = { .dump = nft_bitwise_dump, }; -static struct nft_expr_type nft_bitwise_type __read_mostly = { +struct nft_expr_type nft_bitwise_type __read_mostly = { .name = "bitwise", .ops = &nft_bitwise_ops, .policy = nft_bitwise_policy, .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE, }; - -int __init nft_bitwise_module_init(void) -{ - return nft_register_expr(&nft_bitwise_type); -} - -void nft_bitwise_module_exit(void) -{ - nft_unregister_expr(&nft_bitwise_type); -} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index ee63d981268d..13d4e421a6b3 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -169,7 +169,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_byteorder_type; static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), @@ -178,20 +177,10 @@ static const struct nft_expr_ops nft_byteorder_ops = { .dump = nft_byteorder_dump, }; -static struct nft_expr_type nft_byteorder_type __read_mostly = { +struct nft_expr_type nft_byteorder_type __read_mostly = { .name = "byteorder", .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, .owner = THIS_MODULE, }; - -int __init nft_byteorder_module_init(void) -{ - return nft_register_expr(&nft_byteorder_type); -} - -void nft_byteorder_module_exit(void) -{ - nft_unregister_expr(&nft_byteorder_type); -} diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2e53739812b1..2b96effeadc1 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -84,9 +84,6 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - if (desc.len > U8_MAX) - return -ERANGE; - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; @@ -110,7 +107,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_cmp_type; static const struct nft_expr_ops nft_cmp_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), @@ -211,20 +207,10 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return &nft_cmp_ops; } -static struct nft_expr_type nft_cmp_type __read_mostly = { +struct nft_expr_type nft_cmp_type __read_mostly = { .name = "cmp", .select_ops = nft_cmp_select_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, .owner = THIS_MODULE, }; - -int __init nft_cmp_module_init(void) -{ - return nft_register_expr(&nft_cmp_type); -} - -void nft_cmp_module_exit(void) -{ - nft_unregister_expr(&nft_cmp_type); -} diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 77db8358ab14..7f8422213341 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -18,105 +18,197 @@ #include <net/netfilter/nf_tables.h> struct nft_counter { - u64 bytes; - u64 packets; -}; - -struct nft_counter_percpu { - struct nft_counter counter; - struct u64_stats_sync syncp; + s64 bytes; + s64 packets; }; struct nft_counter_percpu_priv { - struct nft_counter_percpu __percpu *counter; + struct nft_counter __percpu *counter; }; -static void nft_counter_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static DEFINE_PER_CPU(seqcount_t, nft_counter_seq); + +static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - struct nft_counter_percpu *this_cpu; + struct nft_counter *this_cpu; + seqcount_t *myseq; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); - u64_stats_update_begin(&this_cpu->syncp); - this_cpu->counter.bytes += pkt->skb->len; - this_cpu->counter.packets++; - u64_stats_update_end(&this_cpu->syncp); + myseq = this_cpu_ptr(&nft_counter_seq); + + write_seqcount_begin(myseq); + + this_cpu->bytes += pkt->skb->len; + this_cpu->packets++; + + write_seqcount_end(myseq); local_bh_enable(); } -static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter, +static inline void nft_counter_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + + nft_counter_do_eval(priv, regs, pkt); +} + +static int nft_counter_do_init(const struct nlattr * const tb[], + struct nft_counter_percpu_priv *priv) +{ + struct nft_counter __percpu *cpu_stats; + struct nft_counter *this_cpu; + + cpu_stats = alloc_percpu(struct nft_counter); + if (cpu_stats == NULL) + return -ENOMEM; + + preempt_disable(); + this_cpu = this_cpu_ptr(cpu_stats); + if (tb[NFTA_COUNTER_PACKETS]) { + this_cpu->packets = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + } + if (tb[NFTA_COUNTER_BYTES]) { + this_cpu->bytes = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + } + preempt_enable(); + priv->counter = cpu_stats; + return 0; +} + +static int nft_counter_obj_init(const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + + return nft_counter_do_init(tb, priv); +} + +static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv) +{ + free_percpu(priv->counter); +} + +static void nft_counter_obj_destroy(struct nft_object *obj) +{ + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + + nft_counter_do_destroy(priv); +} + +static void nft_counter_reset(struct nft_counter_percpu_priv __percpu *priv, struct nft_counter *total) { - const struct nft_counter_percpu *cpu_stats; + struct nft_counter *this_cpu; + + local_bh_disable(); + this_cpu = this_cpu_ptr(priv->counter); + this_cpu->packets -= total->packets; + this_cpu->bytes -= total->bytes; + local_bh_enable(); +} + +static void nft_counter_fetch(struct nft_counter_percpu_priv *priv, + struct nft_counter *total) +{ + struct nft_counter *this_cpu; + const seqcount_t *myseq; u64 bytes, packets; unsigned int seq; int cpu; memset(total, 0, sizeof(*total)); for_each_possible_cpu(cpu) { - cpu_stats = per_cpu_ptr(counter, cpu); + myseq = per_cpu_ptr(&nft_counter_seq, cpu); + this_cpu = per_cpu_ptr(priv->counter, cpu); do { - seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - bytes = cpu_stats->counter.bytes; - packets = cpu_stats->counter.packets; - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + seq = read_seqcount_begin(myseq); + bytes = this_cpu->bytes; + packets = this_cpu->packets; + } while (read_seqcount_retry(myseq, seq)); - total->packets += packets; - total->bytes += bytes; + total->bytes += bytes; + total->packets += packets; } } -static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_counter_do_dump(struct sk_buff *skb, + struct nft_counter_percpu_priv *priv, + bool reset) { - struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); struct nft_counter total; - nft_counter_fetch(priv->counter, &total); + nft_counter_fetch(priv, &total); if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), NFTA_COUNTER_PAD) || nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets), NFTA_COUNTER_PAD)) goto nla_put_failure; + + if (reset) + nft_counter_reset(priv, &total); + return 0; nla_put_failure: return -1; } +static int nft_counter_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + + return nft_counter_do_dump(skb, priv, reset); +} + static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; +static struct nft_object_type nft_counter_obj __read_mostly = { + .type = NFT_OBJECT_COUNTER, + .size = sizeof(struct nft_counter_percpu_priv), + .maxattr = NFTA_COUNTER_MAX, + .policy = nft_counter_policy, + .eval = nft_counter_obj_eval, + .init = nft_counter_obj_init, + .destroy = nft_counter_obj_destroy, + .dump = nft_counter_obj_dump, + .owner = THIS_MODULE, +}; + +static void nft_counter_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + + nft_counter_do_eval(priv, regs, pkt); +} + +static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + + return nft_counter_do_dump(skb, priv, false); +} + static int nft_counter_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - struct nft_counter_percpu __percpu *cpu_stats; - struct nft_counter_percpu *this_cpu; - - cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); - if (cpu_stats == NULL) - return -ENOMEM; - preempt_disable(); - this_cpu = this_cpu_ptr(cpu_stats); - if (tb[NFTA_COUNTER_PACKETS]) { - this_cpu->counter.packets = - be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); - } - if (tb[NFTA_COUNTER_BYTES]) { - this_cpu->counter.bytes = - be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); - } - preempt_enable(); - priv->counter = cpu_stats; - return 0; + return nft_counter_do_init(tb, priv); } static void nft_counter_destroy(const struct nft_ctx *ctx, @@ -124,28 +216,27 @@ static void nft_counter_destroy(const struct nft_ctx *ctx, { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - free_percpu(priv->counter); + nft_counter_do_destroy(priv); } static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) { struct nft_counter_percpu_priv *priv = nft_expr_priv(src); struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); - struct nft_counter_percpu __percpu *cpu_stats; - struct nft_counter_percpu *this_cpu; + struct nft_counter __percpu *cpu_stats; + struct nft_counter *this_cpu; struct nft_counter total; - nft_counter_fetch(priv->counter, &total); + nft_counter_fetch(priv, &total); - cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu, - GFP_ATOMIC); + cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC); if (cpu_stats == NULL) return -ENOMEM; preempt_disable(); this_cpu = this_cpu_ptr(cpu_stats); - this_cpu->counter.packets = total.packets; - this_cpu->counter.bytes = total.bytes; + this_cpu->packets = total.packets; + this_cpu->bytes = total.bytes; preempt_enable(); priv_clone->counter = cpu_stats; @@ -174,12 +265,29 @@ static struct nft_expr_type nft_counter_type __read_mostly = { static int __init nft_counter_module_init(void) { - return nft_register_expr(&nft_counter_type); + int cpu, err; + + for_each_possible_cpu(cpu) + seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu)); + + err = nft_register_obj(&nft_counter_obj); + if (err < 0) + return err; + + err = nft_register_expr(&nft_counter_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_obj(&nft_counter_obj); + return err; } static void __exit nft_counter_module_exit(void) { nft_unregister_expr(&nft_counter_type); + nft_unregister_obj(&nft_counter_obj); } module_init(nft_counter_module_init); @@ -188,3 +296,4 @@ module_exit(nft_counter_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("counter"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d7b0d171172a..e6baeaebe653 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -207,37 +208,37 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_l3proto_try_module_get(uint8_t family) +static int nft_ct_netns_get(struct net *net, uint8_t family) { int err; if (family == NFPROTO_INET) { - err = nf_ct_l3proto_try_module_get(NFPROTO_IPV4); + err = nf_ct_netns_get(net, NFPROTO_IPV4); if (err < 0) goto err1; - err = nf_ct_l3proto_try_module_get(NFPROTO_IPV6); + err = nf_ct_netns_get(net, NFPROTO_IPV6); if (err < 0) goto err2; } else { - err = nf_ct_l3proto_try_module_get(family); + err = nf_ct_netns_get(net, family); if (err < 0) goto err1; } return 0; err2: - nf_ct_l3proto_module_put(NFPROTO_IPV4); + nf_ct_netns_put(net, NFPROTO_IPV4); err1: return err; } -static void nft_ct_l3proto_module_put(uint8_t family) +static void nft_ct_netns_put(struct net *net, uint8_t family) { if (family == NFPROTO_INET) { - nf_ct_l3proto_module_put(NFPROTO_IPV4); - nf_ct_l3proto_module_put(NFPROTO_IPV6); + nf_ct_netns_put(net, NFPROTO_IPV4); + nf_ct_netns_put(net, NFPROTO_IPV6); } else - nf_ct_l3proto_module_put(family); + nf_ct_netns_put(net, family); } static int nft_ct_get_init(const struct nft_ctx *ctx, @@ -341,7 +342,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_ct_l3proto_try_module_get(ctx->afi->family); + err = nft_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) return err; @@ -389,7 +390,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nft_ct_l3proto_try_module_get(ctx->afi->family); + err = nft_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) goto err1; @@ -404,7 +405,7 @@ err1: static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { - nft_ct_l3proto_module_put(ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->afi->family); } static void nft_ct_set_destroy(const struct nft_ctx *ctx, @@ -422,7 +423,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, break; } - nft_ct_l3proto_module_put(ctx->afi->family); + nft_ct_netns_put(ctx->net, ctx->afi->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -518,15 +519,61 @@ static struct nft_expr_type nft_ct_type __read_mostly = { .owner = THIS_MODULE, }; +static void nft_notrack_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct sk_buff *skb = pkt->skb; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(pkt->skb, &ctinfo); + /* Previously seen (loopback or untracked)? Ignore. */ + if (ct) + return; + + ct = nf_ct_untracked_get(); + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; +} + +static struct nft_expr_type nft_notrack_type; +static const struct nft_expr_ops nft_notrack_ops = { + .type = &nft_notrack_type, + .size = NFT_EXPR_SIZE(0), + .eval = nft_notrack_eval, +}; + +static struct nft_expr_type nft_notrack_type __read_mostly = { + .name = "notrack", + .ops = &nft_notrack_ops, + .owner = THIS_MODULE, +}; + static int __init nft_ct_module_init(void) { + int err; + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE); - return nft_register_expr(&nft_ct_type); + err = nft_register_expr(&nft_ct_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_notrack_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_expr(&nft_ct_type); + return err; } static void __exit nft_ct_module_exit(void) { + nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); } @@ -536,3 +583,4 @@ module_exit(nft_ct_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("ct"); +MODULE_ALIAS_NFT_EXPR("notrack"); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 31ca94793aa9..7de2f46734a4 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -268,7 +268,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_dynset_type; static const struct nft_expr_ops nft_dynset_ops = { .type = &nft_dynset_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), @@ -278,20 +277,10 @@ static const struct nft_expr_ops nft_dynset_ops = { .dump = nft_dynset_dump, }; -static struct nft_expr_type nft_dynset_type __read_mostly = { +struct nft_expr_type nft_dynset_type __read_mostly = { .name = "dynset", .ops = &nft_dynset_ops, .policy = nft_dynset_policy, .maxattr = NFTA_DYNSET_MAX, .owner = THIS_MODULE, }; - -int __init nft_dynset_module_init(void) -{ - return nft_register_expr(&nft_dynset_type); -} - -void nft_dynset_module_exit(void) -{ - nft_unregister_expr(&nft_dynset_type); -} diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c new file mode 100644 index 000000000000..29a4906adc27 --- /dev/null +++ b/net/netfilter/nft_fib.c @@ -0,0 +1,159 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Generic part shared by ipv4 and ipv6 backends. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { + [NFTA_FIB_DREG] = { .type = NLA_U32 }, + [NFTA_FIB_RESULT] = { .type = NLA_U32 }, + [NFTA_FIB_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL(nft_fib_policy); + +#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF) + +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: /* fallthrough */ + case NFT_FIB_RESULT_OIFNAME: + hooks = (1 << NF_INET_PRE_ROUTING); + break; + case NFT_FIB_RESULT_ADDRTYPE: + if (priv->flags & NFTA_FIB_F_IIF) + hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + else if (priv->flags & NFTA_FIB_F_OIF) + hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_FORWARD); + else + hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING); + + break; + default: + return -EINVAL; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} +EXPORT_SYMBOL_GPL(nft_fib_validate); + +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_fib *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS]) + return -EINVAL; + + priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS])); + + if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL)) + return -EINVAL; + + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == + (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) == + (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0) + return -EINVAL; + + priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); + priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = sizeof(int); + break; + case NFT_FIB_RESULT_OIFNAME: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = IFNAMSIZ; + break; + case NFT_FIB_RESULT_ADDRTYPE: + len = sizeof(u32); + break; + default: + return -EINVAL; + } + + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); + if (err < 0) + return err; + + return nft_fib_validate(ctx, expr, NULL); +} +EXPORT_SYMBOL_GPL(nft_fib_init); + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg)) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result))) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags))) + return -1; + + return 0; +} +EXPORT_SYMBOL_GPL(nft_fib_dump); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index) +{ + struct net_device *dev; + u32 *dreg = reg; + + switch (r) { + case NFT_FIB_RESULT_OIF: + *dreg = index; + break; + case NFT_FIB_RESULT_OIFNAME: + dev = dev_get_by_index_rcu(nft_net(pkt), index); + strncpy(reg, dev ? dev->name : "", IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + *dreg = 0; + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib_store_result); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c new file mode 100644 index 000000000000..9120fc7228f4 --- /dev/null +++ b/net/netfilter/nft_fib_inet.c @@ -0,0 +1,82 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +#include <net/netfilter/nft_fib.h> + +static void nft_fib_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib4_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib4_eval_type(expr, regs, pkt); + } + break; + case NFPROTO_IPV6: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib6_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib6_eval_type(expr, regs, pkt); + } + break; + } + + regs->verdict.code = NF_DROP; +} + +static struct nft_expr_type nft_fib_inet_type; +static const struct nft_expr_ops nft_fib_inet_ops = { + .type = &nft_fib_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib_inet_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static struct nft_expr_type nft_fib_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "fib", + .ops = &nft_fib_inet_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fib_inet_module_init(void) +{ + return nft_register_expr(&nft_fib_inet_type); +} + +static void __exit nft_fib_inet_module_exit(void) +{ + nft_unregister_expr(&nft_fib_inet_type); +} + +module_init(nft_fib_inet_module_init); +module_exit(nft_fib_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(1, "fib"); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 763ebc3e0b2b..ce13a50b9189 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -26,8 +26,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr, struct nft_fwd_netdev *priv = nft_expr_priv(expr); int oif = regs->data[priv->sreg_dev]; - nf_dup_netdev_egress(pkt, oif); - regs->verdict.code = NF_DROP; + nf_fwd_netdev_egress(pkt, oif); + regs->verdict.code = NF_STOLEN; } static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index d5447a22275c..eb2721af898d 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -58,7 +58,6 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (!tb[NFTA_HASH_SREG] || !tb[NFTA_HASH_DREG] || !tb[NFTA_HASH_LEN] || - !tb[NFTA_HASH_SEED] || !tb[NFTA_HASH_MODULUS]) return -EINVAL; @@ -83,7 +82,10 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); + if (tb[NFTA_HASH_SEED]) + priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); + else + get_random_bytes(&priv->seed, sizeof(priv->seed)); return nft_validate_register_load(priv->sreg, len) && nft_validate_register_store(ctx, priv->dreg, NULL, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index d17018ff54e6..728baf88295a 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -54,9 +54,6 @@ static int nft_immediate_init(const struct nft_ctx *ctx, if (err < 0) return err; - if (desc.len > U8_MAX) - return -ERANGE; - priv->dlen = desc.len; priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); @@ -105,7 +102,6 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, return 0; } -static struct nft_expr_type nft_imm_type; static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -116,20 +112,10 @@ static const struct nft_expr_ops nft_imm_ops = { .validate = nft_immediate_validate, }; -static struct nft_expr_type nft_imm_type __read_mostly = { +struct nft_expr_type nft_imm_type __read_mostly = { .name = "immediate", .ops = &nft_imm_ops, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, .owner = THIS_MODULE, }; - -int __init nft_immediate_module_init(void) -{ - return nft_register_expr(&nft_imm_type); -} - -void nft_immediate_module_exit(void) -{ - nft_unregister_expr(&nft_imm_type); -} diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 1b01404bb33f..6271e40a3dd6 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -32,8 +32,9 @@ static void nft_log_eval(const struct nft_expr *expr, { const struct nft_log *priv = nft_expr_priv(expr); - nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, - pkt->out, &priv->loginfo, "%s", priv->prefix); + nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, + nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s", + priv->prefix); } static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 8166b6994cc7..d4f97fa7e21d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -35,9 +35,8 @@ static void nft_lookup_eval(const struct nft_expr *expr, const struct nft_set_ext *ext; bool found; - found = set->ops->lookup(pkt->net, set, ®s->data[priv->sreg], &ext) ^ - priv->invert; - + found = set->ops->lookup(nft_net(pkt), set, ®s->data[priv->sreg], + &ext) ^ priv->invert; if (!found) { regs->verdict.code = NFT_BREAK; return; @@ -155,7 +154,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_lookup_type; static const struct nft_expr_ops nft_lookup_ops = { .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), @@ -165,20 +163,10 @@ static const struct nft_expr_ops nft_lookup_ops = { .dump = nft_lookup_dump, }; -static struct nft_expr_type nft_lookup_type __read_mostly = { +struct nft_expr_type nft_lookup_type __read_mostly = { .name = "lookup", .ops = &nft_lookup_ops, .policy = nft_lookup_policy, .maxattr = NFTA_LOOKUP_MAX, .owner = THIS_MODULE, }; - -int __init nft_lookup_module_init(void) -{ - return nft_register_expr(&nft_lookup_type); -} - -void nft_lookup_module_exit(void) -{ - nft_unregister_expr(&nft_lookup_type); -} diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 81b5ad6165ac..11ce016cd479 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -77,7 +77,7 @@ int nft_masq_init(const struct nft_ctx *ctx, } } - return 0; + return nf_ct_netns_get(ctx->net, ctx->afi->family); } EXPORT_SYMBOL_GPL(nft_masq_init); @@ -105,4 +105,4 @@ nla_put_failure: EXPORT_SYMBOL_GPL(nft_masq_dump); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6c1e0246706e..66c7f4b4c49b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -36,7 +36,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - const struct net_device *in = pkt->in, *out = pkt->out; + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); struct sock *sk; u32 *dest = ®s->data[priv->dreg]; @@ -49,7 +49,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, *(__be16 *)dest = skb->protocol; break; case NFT_META_NFPROTO: - *dest = pkt->pf; + *dest = nft_pf(pkt); break; case NFT_META_L4PROTO: if (!pkt->tprot_set) @@ -146,7 +146,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; } - switch (pkt->pf) { + switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) *dest = PACKET_MULTICAST; @@ -310,6 +310,11 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + hooks = 1 << NF_INET_PRE_ROUTING; + break; default: return -EOPNOTSUPP; } diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index ee2d71753746..19a7bf3236f9 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -209,7 +209,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } - return 0; + return nf_ct_netns_get(ctx->net, family); } static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -257,12 +257,21 @@ nla_put_failure: return -1; } +static void +nft_nat_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + nf_ct_netns_put(ctx->net, priv->family); +} + static struct nft_expr_type nft_nat_type; static const struct nft_expr_ops nft_nat_ops = { .type = &nft_nat_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), .eval = nft_nat_eval, .init = nft_nat_init, + .destroy = nft_nat_destroy, .dump = nft_nat_dump, .validate = nft_nat_validate, }; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 55bc5ab78d4a..a66b36097b8f 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -65,7 +65,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, return -EOVERFLOW; priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); - atomic_set(&priv->counter, 0); + atomic_set(&priv->counter, priv->modulus - 1); return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c new file mode 100644 index 000000000000..415a65ba2b85 --- /dev/null +++ b/net/netfilter/nft_objref.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2012-2016 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +#define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr)) + +static void nft_objref_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_object *obj = nft_objref_priv(expr); + + obj->type->eval(obj, regs, pkt); +} + +static int nft_objref_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_object *obj = nft_objref_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + u32 objtype; + + if (!tb[NFTA_OBJREF_IMM_NAME] || + !tb[NFTA_OBJREF_IMM_TYPE]) + return -EINVAL; + + objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE])); + obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype, + genmask); + if (IS_ERR(obj)) + return -ENOENT; + + nft_objref_priv(expr) = obj; + obj->use++; + + return 0; +} + +static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_object *obj = nft_objref_priv(expr); + + if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) || + nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static void nft_objref_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_object *obj = nft_objref_priv(expr); + + obj->use--; +} + +static struct nft_expr_type nft_objref_type; +static const struct nft_expr_ops nft_objref_ops = { + .type = &nft_objref_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)), + .eval = nft_objref_eval, + .init = nft_objref_init, + .destroy = nft_objref_destroy, + .dump = nft_objref_dump, +}; + +struct nft_objref_map { + struct nft_set *set; + enum nft_registers sreg:8; + struct nft_set_binding binding; +}; + +static void nft_objref_map_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); + const struct nft_set *set = priv->set; + const struct nft_set_ext *ext; + struct nft_object *obj; + bool found; + + found = set->ops->lookup(nft_net(pkt), set, ®s->data[priv->sreg], + &ext); + if (!found) { + regs->verdict.code = NFT_BREAK; + return; + } + obj = *nft_set_ext_obj(ext); + obj->type->eval(obj, regs, pkt); +} + +static int nft_objref_map_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set *set; + int err; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask); + if (IS_ERR(set)) { + if (tb[NFTA_OBJREF_SET_ID]) { + set = nf_tables_set_lookup_byid(ctx->net, + tb[NFTA_OBJREF_SET_ID], + genmask); + } + if (IS_ERR(set)) + return PTR_ERR(set); + } + + if (!(set->flags & NFT_SET_OBJECT)) + return -EINVAL; + + priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]); + err = nft_validate_register_load(priv->sreg, set->klen); + if (err < 0) + return err; + + priv->binding.flags = set->flags & NFT_SET_OBJECT; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + return err; + + priv->set = set; + return 0; +} + +static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_objref_map *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_OBJREF_SET_SREG, priv->sreg) || + nla_put_string(skb, NFTA_OBJREF_SET_NAME, priv->set->name)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static void nft_objref_map_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(ctx, priv->set, &priv->binding); +} + +static struct nft_expr_type nft_objref_type; +static const struct nft_expr_ops nft_objref_map_ops = { + .type = &nft_objref_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), + .eval = nft_objref_map_eval, + .init = nft_objref_map_init, + .destroy = nft_objref_map_destroy, + .dump = nft_objref_map_dump, +}; + +static const struct nft_expr_ops * +nft_objref_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_OBJREF_SET_SREG] && + (tb[NFTA_OBJREF_SET_NAME] || + tb[NFTA_OBJREF_SET_ID])) + return &nft_objref_map_ops; + else if (tb[NFTA_OBJREF_IMM_NAME] && + tb[NFTA_OBJREF_IMM_TYPE]) + return &nft_objref_ops; + + return ERR_PTR(-EOPNOTSUPP); +} + +static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = { + [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING }, + [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 }, + [NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 }, + [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING }, + [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 }, +}; + +static struct nft_expr_type nft_objref_type __read_mostly = { + .name = "objref", + .select_ops = nft_objref_select_ops, + .policy = nft_objref_policy, + .maxattr = NFTA_OBJREF_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_objref_module_init(void) +{ + return nft_register_expr(&nft_objref_type); +} + +static void __exit nft_objref_module_exit(void) +{ + nft_unregister_expr(&nft_objref_type); +} + +module_init(nft_objref_module_init); +module_exit(nft_objref_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_EXPR("objref"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index b2f88617611a..36d2b1096546 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,6 +18,10 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +/* For layer 4 checksum field offset. */ +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmpv6.h> /* add vlan header into the user buffer for if tag was removed by offloads */ static bool @@ -148,7 +153,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_payload_type; static const struct nft_expr_ops nft_payload_ops = { .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), @@ -165,6 +169,87 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum) +{ + *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum)); + if (*sum == 0) + *sum = CSUM_MANGLED_0; +} + +static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff) +{ + struct udphdr *uh, _uh; + + uh = skb_header_pointer(skb, thoff, sizeof(_uh), &_uh); + if (!uh) + return false; + + return uh->check; +} + +static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, + struct sk_buff *skb, + unsigned int *l4csum_offset) +{ + switch (pkt->tprot) { + case IPPROTO_TCP: + *l4csum_offset = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + if (!nft_payload_udp_checksum(skb, pkt->xt.thoff)) + return -1; + /* Fall through. */ + case IPPROTO_UDPLITE: + *l4csum_offset = offsetof(struct udphdr, check); + break; + case IPPROTO_ICMPV6: + *l4csum_offset = offsetof(struct icmp6hdr, icmp6_cksum); + break; + default: + return -1; + } + + *l4csum_offset += pkt->xt.thoff; + return 0; +} + +static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt, + struct sk_buff *skb, + __wsum fsum, __wsum tsum) +{ + int l4csum_offset; + __sum16 sum; + + /* If we cannot determine layer 4 checksum offset or this packet doesn't + * require layer 4 checksum recalculation, skip this packet. + */ + if (nft_payload_l4csum_offset(pkt, skb, &l4csum_offset) < 0) + return 0; + + if (skb_copy_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0) + return -1; + + /* Checksum mangling for an arbitrary amount of bytes, based on + * inet_proto_csum_replace*() functions. + */ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + nft_csum_replace(&sum, fsum, tsum); + if (skb->ip_summed == CHECKSUM_COMPLETE) { + skb->csum = ~csum_add(csum_sub(~(skb->csum), fsum), + tsum); + } + } else { + sum = ~csum_fold(csum_add(csum_sub(csum_unfold(sum), fsum), + tsum)); + } + + if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) || + skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0) + return -1; + + return 0; +} + static void nft_payload_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -205,14 +290,15 @@ static void nft_payload_set_eval(const struct nft_expr *expr, fsum = skb_checksum(skb, offset, priv->len, 0); tsum = csum_partial(src, priv->len, 0); - sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), - tsum)); - if (sum == 0) - sum = CSUM_MANGLED_0; + nft_csum_replace(&sum, fsum, tsum); if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) goto err; + + if (priv->csum_flags && + nft_payload_l4csum_update(pkt, skb, fsum, tsum) < 0) + goto err; } if (!skb_make_writable(skb, max(offset + priv->len, 0)) || @@ -241,6 +327,15 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) priv->csum_offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) { + u32 flags; + + flags = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_FLAGS])); + if (flags & ~NFT_PAYLOAD_L4CSUM_PSEUDOHDR) + return -EINVAL; + + priv->csum_flags = flags; + } switch (priv->csum_type) { case NFT_PAYLOAD_CSUM_NONE: @@ -263,7 +358,8 @@ static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, - htonl(priv->csum_offset))) + htonl(priv->csum_offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_FLAGS, htonl(priv->csum_flags))) goto nla_put_failure; return 0; @@ -320,20 +416,10 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return &nft_payload_ops; } -static struct nft_expr_type nft_payload_type __read_mostly = { +struct nft_expr_type nft_payload_type __read_mostly = { .name = "payload", .select_ops = nft_payload_select_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, .owner = THIS_MODULE, }; - -int __init nft_payload_module_init(void) -{ - return nft_register_expr(&nft_payload_type); -} - -void nft_payload_module_exit(void) -{ - nft_unregister_expr(&nft_payload_type); -} diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 393d359a1889..3e19fa1230dc 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -43,7 +43,7 @@ static void nft_queue_eval(const struct nft_expr *expr, queue = priv->queuenum + cpu % priv->queues_total; } else { queue = nfqueue_hash(pkt->skb, queue, - priv->queues_total, pkt->pf, + priv->queues_total, nft_pf(pkt), jhash_initval); } } diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index c00104c07095..bd6efc53f26d 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -17,38 +17,59 @@ struct nft_quota { u64 quota; - bool invert; - atomic64_t remain; + unsigned long flags; + atomic64_t consumed; }; static inline bool nft_overquota(struct nft_quota *priv, - const struct nft_pktinfo *pkt) + const struct sk_buff *skb) { - return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0; + return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota; } -static void nft_quota_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static inline bool nft_quota_invert(struct nft_quota *priv) { - struct nft_quota *priv = nft_expr_priv(expr); + return priv->flags & NFT_QUOTA_F_INV; +} - if (nft_overquota(priv, pkt) ^ priv->invert) +static inline void nft_quota_do_eval(struct nft_quota *priv, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { [NFTA_QUOTA_BYTES] = { .type = NLA_U64 }, [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, + [NFTA_QUOTA_CONSUMED] = { .type = NLA_U64 }, }; -static int nft_quota_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +#define NFT_QUOTA_DEPLETED_BIT 1 /* From NFT_QUOTA_F_DEPLETED. */ + +static void nft_quota_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_quota *priv = nft_expr_priv(expr); - u32 flags = 0; - u64 quota; + struct nft_quota *priv = nft_obj_data(obj); + bool overquota; + + overquota = nft_overquota(priv, pkt->skb); + if (overquota ^ nft_quota_invert(priv)) + regs->verdict.code = NFT_BREAK; + + if (overquota && + !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) + nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0, + NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); +} + +static int nft_quota_do_init(const struct nlattr * const tb[], + struct nft_quota *priv) +{ + unsigned long flags = 0; + u64 quota, consumed = 0; if (!tb[NFTA_QUOTA_BYTES]) return -EINVAL; @@ -57,26 +78,60 @@ static int nft_quota_init(const struct nft_ctx *ctx, if (quota > S64_MAX) return -EOVERFLOW; + if (tb[NFTA_QUOTA_CONSUMED]) { + consumed = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_CONSUMED])); + if (consumed > quota) + return -EINVAL; + } + if (tb[NFTA_QUOTA_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS])); if (flags & ~NFT_QUOTA_F_INV) return -EINVAL; + if (flags & NFT_QUOTA_F_DEPLETED) + return -EOPNOTSUPP; } priv->quota = quota; - priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false; - atomic64_set(&priv->remain, quota); + priv->flags = flags; + atomic64_set(&priv->consumed, consumed); return 0; } -static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_quota_obj_init(const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_quota *priv = nft_obj_data(obj); + + return nft_quota_do_init(tb, priv); +} + +static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, + bool reset) { - const struct nft_quota *priv = nft_expr_priv(expr); - u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0; + u32 flags = priv->flags; + u64 consumed; + + if (reset) { + consumed = atomic64_xchg(&priv->consumed, 0); + if (test_and_clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) + flags |= NFT_QUOTA_F_DEPLETED; + } else { + consumed = atomic64_read(&priv->consumed); + } + + /* Since we inconditionally increment consumed quota for each packet + * that we see, don't go over the quota boundary in what we send to + * userspace. + */ + if (consumed > priv->quota) + consumed = priv->quota; if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota), NFTA_QUOTA_PAD) || + nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed), + NFTA_QUOTA_PAD) || nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) goto nla_put_failure; return 0; @@ -85,6 +140,50 @@ nla_put_failure: return -1; } +static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj, + bool reset) +{ + struct nft_quota *priv = nft_obj_data(obj); + + return nft_quota_do_dump(skb, priv, reset); +} + +static struct nft_object_type nft_quota_obj __read_mostly = { + .type = NFT_OBJECT_QUOTA, + .size = sizeof(struct nft_quota), + .maxattr = NFTA_QUOTA_MAX, + .policy = nft_quota_policy, + .init = nft_quota_obj_init, + .eval = nft_quota_obj_eval, + .dump = nft_quota_obj_dump, + .owner = THIS_MODULE, +}; + +static void nft_quota_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + nft_quota_do_eval(priv, regs, pkt); +} + +static int nft_quota_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + return nft_quota_do_init(tb, priv); +} + +static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + return nft_quota_do_dump(skb, priv, false); +} + static struct nft_expr_type nft_quota_type; static const struct nft_expr_ops nft_quota_ops = { .type = &nft_quota_type, @@ -105,12 +204,26 @@ static struct nft_expr_type nft_quota_type __read_mostly = { static int __init nft_quota_module_init(void) { - return nft_register_expr(&nft_quota_type); + int err; + + err = nft_register_obj(&nft_quota_obj); + if (err < 0) + return err; + + err = nft_register_expr(&nft_quota_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_obj(&nft_quota_obj); + return err; } static void __exit nft_quota_module_exit(void) { - nft_unregister_expr(&nft_quota_type); + nft_unregister_expr(&nft_quota_type); + nft_unregister_obj(&nft_quota_obj); } module_init(nft_quota_module_init); @@ -119,3 +232,4 @@ module_exit(nft_quota_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_EXPR("quota"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA); diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 8f0aaaea1376..9edc74eedc10 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -128,7 +128,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_range_type; static const struct nft_expr_ops nft_range_ops = { .type = &nft_range_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), @@ -137,20 +136,10 @@ static const struct nft_expr_ops nft_range_ops = { .dump = nft_range_dump, }; -static struct nft_expr_type nft_range_type __read_mostly = { +struct nft_expr_type nft_range_type __read_mostly = { .name = "range", .ops = &nft_range_ops, .policy = nft_range_policy, .maxattr = NFTA_RANGE_MAX, .owner = THIS_MODULE, }; - -int __init nft_range_module_init(void) -{ - return nft_register_expr(&nft_range_type); -} - -void nft_range_module_exit(void) -{ - nft_unregister_expr(&nft_range_type); -} diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 03f7bf40ae75..40dcd05146d5 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -79,7 +79,7 @@ int nft_redir_init(const struct nft_ctx *ctx, return -EINVAL; } - return 0; + return nf_ct_netns_get(ctx->net, ctx->afi->family); } EXPORT_SYMBOL_GPL(nft_redir_init); @@ -108,4 +108,4 @@ nla_put_failure: EXPORT_SYMBOL_GPL(nft_redir_dump); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index e79d9ca2ffee..9e90a02cb104 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -23,36 +23,36 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, { struct nft_reject *priv = nft_expr_priv(expr); - switch (pkt->pf) { + switch (nft_pf(pkt)) { case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach(pkt->skb, priv->icmp_code, - pkt->hook); + nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->net, pkt->skb, pkt->hook); + nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, nft_reject_icmp_code(priv->icmp_code), - pkt->hook); + nft_hook(pkt)); break; } break; case NFPROTO_IPV6: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, - pkt->hook); + nf_send_unreach6(nft_net(pkt), pkt->skb, + priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(pkt->net, pkt->skb, pkt->hook); + nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, + nf_send_unreach6(nft_net(pkt), pkt->skb, nft_reject_icmpv6_code(priv->icmp_code), - pkt->hook); + nft_hook(pkt)); break; } break; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c new file mode 100644 index 000000000000..d3eb640bc784 --- /dev/null +++ b/net/netfilter/nft_rt.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/dst.h> +#include <net/ip6_route.h> +#include <net/route.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> + +struct nft_rt { + enum nft_rt_keys key:8; + enum nft_registers dreg:8; +}; + +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + u32 *dest = ®s->data[priv->dreg]; + const struct dst_entry *dst; + + dst = skb_dst(skb); + if (!dst) + goto err; + + switch (priv->key) { +#ifdef CONFIG_IP_ROUTE_CLASSID + case NFT_RT_CLASSID: + *dest = dst->tclassid; + break; +#endif + case NFT_RT_NEXTHOP4: + if (nft_pf(pkt) != NFPROTO_IPV4) + goto err; + + *dest = rt_nexthop((const struct rtable *)dst, + ip_hdr(skb)->daddr); + break; + case NFT_RT_NEXTHOP6: + if (nft_pf(pkt) != NFPROTO_IPV6) + goto err; + + memcpy(dest, rt6_nexthop((struct rt6_info *)dst, + &ipv6_hdr(skb)->daddr), + sizeof(struct in6_addr)); + break; + default: + WARN_ON(1); + goto err; + } + return; + +err: + regs->verdict.code = NFT_BREAK; +} + +const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { + [NFTA_RT_DREG] = { .type = NLA_U32 }, + [NFTA_RT_KEY] = { .type = NLA_U32 }, +}; + +int nft_rt_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_rt *priv = nft_expr_priv(expr); + unsigned int len; + + if (tb[NFTA_RT_KEY] == NULL || + tb[NFTA_RT_DREG] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY])); + switch (priv->key) { +#ifdef CONFIG_IP_ROUTE_CLASSID + case NFT_RT_CLASSID: +#endif + case NFT_RT_NEXTHOP4: + len = sizeof(u32); + break; + case NFT_RT_NEXTHOP6: + len = sizeof(struct in6_addr); + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); +} + +int nft_rt_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_rt_type; +static const struct nft_expr_ops nft_rt_get_ops = { + .type = &nft_rt_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)), + .eval = nft_rt_get_eval, + .init = nft_rt_get_init, + .dump = nft_rt_get_dump, +}; + +static struct nft_expr_type nft_rt_type __read_mostly = { + .name = "rt", + .ops = &nft_rt_get_ops, + .policy = nft_rt_policy, + .maxattr = NFTA_RT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_rt_module_init(void) +{ + return nft_register_expr(&nft_rt_type); +} + +static void __exit nft_rt_module_exit(void) +{ + nft_unregister_expr(&nft_rt_type); +} + +module_init(nft_rt_module_init); +module_exit(nft_rt_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>"); +MODULE_ALIAS_NFT_EXPR("rt"); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index a3dface3e6e6..1e20e2bbb6d9 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -167,6 +167,19 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_clear_busy(&he->ext); } +static bool nft_hash_deactivate_one(const struct net *net, + const struct nft_set *set, void *priv) +{ + struct nft_hash_elem *he = priv; + + if (!nft_set_elem_mark_busy(&he->ext) || + !nft_is_active(net, &he->ext)) { + nft_set_elem_change_active(net, set, &he->ext); + return true; + } + return false; +} + static void *nft_hash_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) @@ -181,13 +194,10 @@ static void *nft_hash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) { - if (!nft_set_elem_mark_busy(&he->ext) || - !nft_is_active(net, &he->ext)) - nft_set_elem_change_active(net, set, &he->ext); - else - he = NULL; - } + if (he != NULL && + !nft_hash_deactivate_one(net, set, he)) + he = NULL; + rcu_read_unlock(); return he; @@ -387,6 +397,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .insert = nft_hash_insert, .activate = nft_hash_activate, .deactivate = nft_hash_deactivate, + .deactivate_one = nft_hash_deactivate_one, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .update = nft_hash_update, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 36493a7cae88..08376e50f6cd 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -171,6 +171,15 @@ static void nft_rbtree_activate(const struct net *net, nft_set_elem_change_active(net, set, &rbe->ext); } +static bool nft_rbtree_deactivate_one(const struct net *net, + const struct nft_set *set, void *priv) +{ + struct nft_rbtree_elem *rbe = priv; + + nft_set_elem_change_active(net, set, &rbe->ext); + return true; +} + static void *nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) @@ -204,7 +213,7 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_right; continue; } - nft_set_elem_change_active(net, set, &rbe->ext); + nft_rbtree_deactivate_one(net, set, rbe); return rbe; } } @@ -295,6 +304,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, .deactivate = nft_rbtree_deactivate, + .deactivate_one = nft_rbtree_deactivate_one, .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index fc4977456c30..2ff499680cc6 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) +#define XT_PCPU_BLOCK_SIZE 4096 struct compat_delta { unsigned int offset; /* offset in kernel */ @@ -958,7 +959,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (!info) { - info = vmalloc(sz); + info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | + __GFP_NORETRY | __GFP_HIGHMEM, + PAGE_KERNEL); if (!info) return NULL; } @@ -982,7 +985,7 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ +/* Find table by name, grabs mutex & ref. Returns NULL on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { @@ -1615,6 +1618,59 @@ void xt_proto_fini(struct net *net, u_int8_t af) } EXPORT_SYMBOL_GPL(xt_proto_fini); +/** + * xt_percpu_counter_alloc - allocate x_tables rule counter + * + * @state: pointer to xt_percpu allocation state + * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct + * + * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then + * contain the address of the real (percpu) counter. + * + * Rule evaluation needs to use xt_get_this_cpu_counter() helper + * to fetch the real percpu counter. + * + * To speed up allocation and improve data locality, a 4kb block is + * allocated. + * + * xt_percpu_counter_alloc_state contains the base address of the + * allocated page and the current sub-offset. + * + * returns false on error. + */ +bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, + struct xt_counters *counter) +{ + BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2)); + + if (nr_cpu_ids <= 1) + return true; + + if (!state->mem) { + state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE, + XT_PCPU_BLOCK_SIZE); + if (!state->mem) + return false; + } + counter->pcnt = (__force unsigned long)(state->mem + state->off); + state->off += sizeof(*counter); + if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) { + state->mem = NULL; + state->off = 0; + } + return true; +} +EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc); + +void xt_percpu_counter_free(struct xt_counters *counters) +{ + unsigned long pcnt = counters->pcnt; + + if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0) + free_percpu((void __percpu *)pcnt); +} +EXPORT_SYMBOL_GPL(xt_percpu_counter_free); + static int __net_init xt_net_init(struct net *net) { int i; diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 4973cbddc446..19247a17e511 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -132,9 +132,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) goto errout; audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", - info->type, par->hooknum, skb->len, - par->in ? par->in->name : "?", - par->out ? par->out->name : "?"); + info->type, xt_hooknum(par), skb->len, + xt_in(par) ? xt_inname(par) : "?", + xt_out(par) ? xt_outname(par) : "?"); if (skb->mark) audit_log_format(ab, " mark=%#x", skb->mark); @@ -144,7 +144,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, ntohs(eth_hdr(skb)->h_proto)); - if (par->family == NFPROTO_BRIDGE) { + if (xt_family(par) == NFPROTO_BRIDGE) { switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): audit_ip4(ab, skb); @@ -157,7 +157,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) } } - switch (par->family) { + switch (xt_family(par)) { case NFPROTO_IPV4: audit_ip4(ab, skb); break; diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index e04dc282e3bb..da56c06a443c 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -106,7 +106,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -115,7 +115,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target connsecmark_tg_reg __read_mostly = { diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 6669e68d589e..95c750358747 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -216,7 +216,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err1; #endif - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) goto err1; @@ -260,7 +260,7 @@ out: err3: nf_ct_tmpl_free(ct); err2: - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); err1: return ret; } @@ -341,7 +341,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, if (help) module_put(help->helper->me); - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); xt_ct_destroy_timeout(ct); nf_ct_put(info->ct); diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 1763ab82bcd7..c3b2017ebe41 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -32,15 +32,15 @@ static unsigned int log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_log_info *loginfo = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, - &li, "%s", loginfo->prefix); + nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", loginfo->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c index b253e07cb1c5..e45a01255e70 100644 --- a/net/netfilter/xt_NETMAP.c +++ b/net/netfilter/xt_NETMAP.c @@ -33,8 +33,8 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) + if (xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT) new_addr.in6 = ipv6_hdr(skb)->daddr; else new_addr.in6 = ipv6_hdr(skb)->saddr; @@ -51,7 +51,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) @@ -60,7 +60,12 @@ static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) return -EINVAL; - return 0; + return nf_ct_netns_get(par->net, par->family); +} + +static void netmap_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } static unsigned int @@ -72,16 +77,16 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT || - par->hooknum == NF_INET_LOCAL_IN); + NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_POST_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT || + xt_hooknum(par) == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) + if (xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; @@ -96,7 +101,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg4_check(const struct xt_tgchk_param *par) @@ -111,7 +116,7 @@ static int netmap_tg4_check(const struct xt_tgchk_param *par) pr_debug("bad rangesize %u.\n", mr->rangesize); return -EINVAL; } - return 0; + return nf_ct_netns_get(par->net, par->family); } static struct xt_target netmap_tg_reg[] __read_mostly = { @@ -127,6 +132,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = { (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg6_checkentry, + .destroy = netmap_tg_destroy, .me = THIS_MODULE, }, { @@ -141,6 +147,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = { (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg4_check, + .destroy = netmap_tg_destroy, .me = THIS_MODULE, }, }; diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 8668a5c18dc3..c7f8958cea4a 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -25,8 +25,8 @@ static unsigned int nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; @@ -37,8 +37,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; - nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 8f1779ff7e30..a360b99a958a 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -43,7 +43,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) if (info->queues_total > 1) { queue = nfqueue_hash(skb, queue, info->queues_total, - par->family, jhash_initval); + xt_family(par), jhash_initval); } return NF_QUEUE_NR(queue); } @@ -98,7 +98,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) queue = info->queuenum + cpu % info->queues_total; } else { queue = nfqueue_hash(skb, queue, info->queues_total, - par->family, jhash_initval); + xt_family(par), jhash_initval); } } diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index dbd6c4a12b97..91a373a3f534 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -63,7 +63,7 @@ void xt_rateest_put(struct xt_rateest *est) mutex_lock(&xt_rateest_mutex); if (--est->refcnt == 0) { hlist_del(&est->list); - gen_kill_estimator(&est->bstats, &est->rstats); + gen_kill_estimator(&est->rate_est); /* * gen_estimator est_timer() might access est->lock or bstats, * wait a RCU grace period before freeing 'est' @@ -132,7 +132,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) cfg.est.interval = info->interval; cfg.est.ewma_log = info->ewma_log; - ret = gen_new_estimator(&est->bstats, NULL, &est->rstats, + ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, &est->lock, NULL, &cfg.opt); if (ret < 0) goto err2; diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 03f0b370e178..98a4c6d4f1cb 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -31,7 +31,7 @@ static unsigned int redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum); + return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par)); } static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) @@ -40,7 +40,13 @@ static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) if (range->flags & NF_NAT_RANGE_MAP_IPS) return -EINVAL; - return 0; + + return nf_ct_netns_get(par->net, par->family); +} + +static void redirect_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } /* FIXME: Take multiple ranges --RR */ @@ -56,13 +62,13 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) pr_debug("bad rangesize %u.\n", mr->rangesize); return -EINVAL; } - return 0; + return nf_ct_netns_get(par->net, par->family); } static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum); + return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { @@ -72,6 +78,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = { .revision = 0, .table = "nat", .checkentry = redirect_tg6_checkentry, + .destroy = redirect_tg_destroy, .target = redirect_tg6, .targetsize = sizeof(struct nf_nat_range), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -85,6 +92,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = { .table = "nat", .target = redirect_tg4, .checkentry = redirect_tg4_check, + .destroy = redirect_tg_destroy, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 872db2d0e2a9..27241a767f17 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - struct net *net = par->net; + struct net *net = xt_net(par); unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); @@ -172,7 +172,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, * length IPv6 header of 60, ergo the default MSS value is 1220 * Since no MSS was provided, we must use the default values */ - if (par->family == NFPROTO_IPV4) + if (xt_family(par) == NFPROTO_IPV4) newmss = min(newmss, (u16)536); else newmss = min(newmss, (u16)1220); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 0471db4032c5..1c57ace75ae6 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -33,7 +33,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif); + nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif); return XT_CONTINUE; } @@ -45,7 +45,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif); + nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif); return XT_CONTINUE; } diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 663c4c3c9072..80cb7babeb64 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -364,7 +364,8 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; - return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, + tgi->mark_mask, tgi->mark_value); } static unsigned int @@ -372,7 +373,8 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; - return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, + tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 @@ -442,7 +444,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, + sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), hp->source, @@ -485,10 +487,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, - par->in, NFT_LOOKUP_ESTABLISHED); + xt_in(par), NFT_LOOKUP_ESTABLISHED); laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); lport = tgi->lport ? tgi->lport : hp->dest; @@ -500,10 +502,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, laddr, hp->source, lport, - par->in, NFT_LOOKUP_LISTENER); + xt_in(par), NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && tproxy_sk_is_transparent(sk)) { @@ -529,6 +531,11 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; + int err; + + err = nf_defrag_ipv6_enable(par->net); + if (err) + return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IP6T_INV_PROTO)) @@ -543,6 +550,11 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) static int tproxy_tg4_check(const struct xt_tgchk_param *par) { const struct ipt_ip *i = par->entryinfo; + int err; + + err = nf_defrag_ipv4_enable(par->net); + if (err) + return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IPT_INV_PROTO)) @@ -594,11 +606,6 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = { static int __init tproxy_tg_init(void) { - nf_defrag_ipv4_enable(); -#ifdef XT_TPROXY_HAVE_IPV6 - nf_defrag_ipv6_enable(); -#endif - return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 11d6091991a4..e329dabde35f 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev, static bool addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -143,19 +143,19 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) static bool addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph; const struct net_device *dev = NULL; bool ret = true; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) - dev = par->in; + dev = xt_in(par); else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = par->out; + dev = xt_out(par); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - if (par->family == NFPROTO_IPV6) + if (xt_family(par) == NFPROTO_IPV6) return addrtype_mt6(net, dev, skb, info); #endif iph = ip_hdr(skb); diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index dffee9d47ec4..2dedaa23ab0a 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/filter.h> +#include <linux/bpf.h> #include <linux/netfilter/xt_bpf.h> #include <linux/netfilter/x_tables.h> @@ -20,15 +21,15 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_bpf"); MODULE_ALIAS("ip6t_bpf"); -static int bpf_mt_check(const struct xt_mtchk_param *par) +static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, + struct bpf_prog **ret) { - struct xt_bpf_info *info = par->matchinfo; struct sock_fprog_kern program; - program.len = info->bpf_program_num_elem; - program.filter = info->bpf_program; + program.len = len; + program.filter = insns; - if (bpf_prog_create(&info->filter, &program)) { + if (bpf_prog_create(ret, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; } @@ -36,6 +37,42 @@ static int bpf_mt_check(const struct xt_mtchk_param *par) return 0; } +static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) +{ + struct bpf_prog *prog; + + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + *ret = prog; + return 0; +} + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + + return __bpf_mt_check_bytecode(info->bpf_program, + info->bpf_program_num_elem, + &info->filter); +} + +static int bpf_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info_v1 *info = par->matchinfo; + + if (info->mode == XT_BPF_MODE_BYTECODE) + return __bpf_mt_check_bytecode(info->bpf_program, + info->bpf_program_num_elem, + &info->filter); + else if (info->mode == XT_BPF_MODE_FD_PINNED || + info->mode == XT_BPF_MODE_FD_ELF) + return __bpf_mt_check_fd(info->fd, &info->filter); + else + return -EINVAL; +} + static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info *info = par->matchinfo; @@ -43,31 +80,58 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) return BPF_PROG_RUN(info->filter, skb); } +static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info_v1 *info = par->matchinfo; + + return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb); +} + static void bpf_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_bpf_info *info = par->matchinfo; + + bpf_prog_destroy(info->filter); +} + +static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info_v1 *info = par->matchinfo; + bpf_prog_destroy(info->filter); } -static struct xt_match bpf_mt_reg __read_mostly = { - .name = "bpf", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = bpf_mt_check, - .match = bpf_mt, - .destroy = bpf_mt_destroy, - .matchsize = sizeof(struct xt_bpf_info), - .me = THIS_MODULE, +static struct xt_match bpf_mt_reg[] __read_mostly = { + { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, + }, + { + .name = "bpf", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check_v1, + .match = bpf_mt_v1, + .destroy = bpf_mt_destroy_v1, + .matchsize = sizeof(struct xt_bpf_info_v1), + .me = THIS_MODULE, + }, }; static int __init bpf_mt_init(void) { - return xt_register_match(&bpf_mt_reg); + return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg)); } static void __exit bpf_mt_exit(void) { - xt_unregister_match(&bpf_mt_reg); + xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg)); } module_init(bpf_mt_init); diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 96fa26b20b67..9a9884a39c0e 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -112,7 +112,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) * know, matches should not alter packets, but we are doing this here * because we would need to add a PKTTYPE target for this sole purpose. */ - if (!xt_cluster_is_multicast_addr(skb, par->family) && + if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) && skb->pkt_type == PACKET_MULTICAST) { pskb->pkt_type = PACKET_HOST; } diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d4bec261e74e..cad0b7b5eb35 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -110,7 +110,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) sinfo->direction != XT_CONNBYTES_DIR_BOTH) return -EINVAL; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -129,7 +129,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match connbytes_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 03d66f1c5e69..7827128d5a95 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -61,7 +61,7 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -70,14 +70,14 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) ret = nf_connlabels_get(par->net, info->bit); if (ret < 0) - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); return ret; } static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) { nf_connlabels_put(par->net); - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match connlabels_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index b6dc322593a3..2aff2b7c4689 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -317,7 +317,7 @@ static int count_them(struct net *net, static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; @@ -332,11 +332,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; zone = nf_ct_zone(ct); } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, net, &tuple)) { + xt_family(par), net, &tuple)) { goto hotdrop; } - if (par->family == NFPROTO_IPV6) { + if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? &iph->daddr : &iph->saddr, sizeof(addr.ip6)); @@ -347,7 +347,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, par->family, zone); + &info->mask, xt_family(par), zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; @@ -368,7 +368,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for " "address family %u\n", par->family); @@ -378,7 +378,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) /* init private data */ info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); if (info->data == NULL) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); return -ENOMEM; } @@ -414,7 +414,7 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) const struct xt_connlimit_info *info = par->matchinfo; unsigned int i; - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) destroy_tree(&info->data->climit_root4[i]); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index b83e158e116a..9935d5029b0e 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -77,7 +77,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -86,7 +86,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) static void connmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static bool @@ -107,7 +107,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -116,7 +116,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) static void connmark_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target connmark_tg_reg __read_mostly = { diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index a3b8f697cfc5..c0fb217bc649 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -200,22 +200,22 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) - if (conntrack_mt_origsrc(ct, info, par->family) ^ + if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGSRC)) return false; if (info->match_flags & XT_CONNTRACK_ORIGDST) - if (conntrack_mt_origdst(ct, info, par->family) ^ + if (conntrack_mt_origdst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGDST)) return false; if (info->match_flags & XT_CONNTRACK_REPLSRC) - if (conntrack_mt_replsrc(ct, info, par->family) ^ + if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLSRC)) return false; if (info->match_flags & XT_CONNTRACK_REPLDST) - if (conntrack_mt_repldst(ct, info, par->family) ^ + if (conntrack_mt_repldst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; @@ -271,7 +271,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -280,7 +280,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match conntrack_mt_reg[] __read_mostly = { diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c index d9202cdd25c9..96ebe1cdefec 100644 --- a/net/netfilter/xt_devgroup.c +++ b/net/netfilter/xt_devgroup.c @@ -24,12 +24,12 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_devgroup_info *info = par->matchinfo; if (info->flags & XT_DEVGROUP_MATCH_SRC && - (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^ + (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^ ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0))) return false; if (info->flags & XT_DEVGROUP_MATCH_DST && - (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^ + (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^ ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0))) return false; diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 64670fc5d0e1..236ac8008909 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_tos_match_info *info = par->matchinfo; - if (par->family == NFPROTO_IPV4) + if (xt_family(par) == NFPROTO_IPV4) return ((ip_hdr(skb)->tos & info->tos_mask) == info->tos_value) ^ !!info->invert; else diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index b89b688e9d01..10063408141d 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -49,7 +49,7 @@ struct hashlimit_net { struct proc_dir_entry *ip6t_hashlimit; }; -static int hashlimit_net_id; +static unsigned int hashlimit_net_id; static inline struct hashlimit_net *hashlimit_pernet(struct net *net) { return net_generic(net, hashlimit_net_id); diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index f679dd4c272a..38a78151c0e9 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -59,7 +59,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) struct xt_helper_info *info = par->matchinfo; int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -71,7 +71,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) static void helper_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match helper_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 71a9d95e0a81..0fdc89064488 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -48,9 +48,9 @@ static bool ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ipvs_mtinfo *data = par->matchinfo; - struct netns_ipvs *ipvs = net_ipvs(par->net); + struct netns_ipvs *ipvs = net_ipvs(xt_net(par)); /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ - const u_int8_t family = par->family; + const u_int8_t family = xt_family(par); struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index ac1d3c3d09e7..1cde0e4985b7 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -42,29 +42,43 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, e = minfo->ports[++i]; pr_debug("src or dst matches with %d-%d?\n", s, e); - if (minfo->flags == XT_MULTIPORT_SOURCE - && src >= s && src <= e) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_DESTINATION - && dst >= s && dst <= e) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_EITHER - && ((dst >= s && dst <= e) - || (src >= s && src <= e))) - return true ^ minfo->invert; + switch (minfo->flags) { + case XT_MULTIPORT_SOURCE: + if (src >= s && src <= e) + return true ^ minfo->invert; + break; + case XT_MULTIPORT_DESTINATION: + if (dst >= s && dst <= e) + return true ^ minfo->invert; + break; + case XT_MULTIPORT_EITHER: + if ((dst >= s && dst <= e) || + (src >= s && src <= e)) + return true ^ minfo->invert; + break; + default: + break; + } } else { /* exact port matching */ pr_debug("src or dst matches with %d?\n", s); - if (minfo->flags == XT_MULTIPORT_SOURCE - && src == s) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_DESTINATION - && dst == s) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_EITHER - && (src == s || dst == s)) - return true ^ minfo->invert; + switch (minfo->flags) { + case XT_MULTIPORT_SOURCE: + if (src == s) + return true ^ minfo->invert; + break; + case XT_MULTIPORT_DESTINATION: + if (dst == s) + return true ^ minfo->invert; + break; + case XT_MULTIPORT_EITHER: + if (src == s || dst == s) + return true ^ minfo->invert; + break; + default: + break; + } } } diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index bea7464cc43f..8107b3eb865f 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -23,7 +23,17 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) par->target->name); return -EINVAL; } - return 0; + return nf_ct_netns_get(par->net, par->family); +} + +static int xt_nat_checkentry(const struct xt_tgchk_param *par) +{ + return nf_ct_netns_get(par->net, par->family); +} + +static void xt_nat_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } static void xt_nat_convert_range(struct nf_nat_range *dst, @@ -106,6 +116,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = { .name = "SNAT", .revision = 0, .checkentry = xt_nat_checkentry_v0, + .destroy = xt_nat_destroy, .target = xt_snat_target_v0, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .family = NFPROTO_IPV4, @@ -118,6 +129,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = { .name = "DNAT", .revision = 0, .checkentry = xt_nat_checkentry_v0, + .destroy = xt_nat_destroy, .target = xt_dnat_target_v0, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .family = NFPROTO_IPV4, @@ -129,6 +141,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = { { .name = "SNAT", .revision = 1, + .checkentry = xt_nat_checkentry, + .destroy = xt_nat_destroy, .target = xt_snat_target_v1, .targetsize = sizeof(struct nf_nat_range), .table = "nat", @@ -139,6 +153,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = { { .name = "DNAT", .revision = 1, + .checkentry = xt_nat_checkentry, + .destroy = xt_nat_destroy, .target = xt_dnat_target_v1, .targetsize = sizeof(struct nf_nat_range), .table = "nat", diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index cf327593852a..cc0518fe598e 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) nfnl_acct_update(skb, info->nfacct); - overquota = nfnl_acct_overquota(par->net, skb, info->nfacct); + overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct); return overquota == NFACCT_UNDERQUOTA ? false : true; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 2455b69b5810..c05fefcec238 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,7 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; - struct net *net = p->net; + struct net *net = xt_net(p); if (!info) return false; @@ -326,8 +326,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(net, p->family, p->hooknum, skb, - p->in, p->out, NULL, + nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, + xt_in(p), xt_out(p), NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, &ip->saddr, ntohs(tcp->source), @@ -341,8 +341,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(net, p->family, p->hooknum, skb, p->in, - p->out, NULL, + nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p), + xt_out(p), NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index a20e731b5b6c..16477df45b3b 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -63,7 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; struct sock *sk = skb_to_full_sk(skb); - struct net *net = par->net; + struct net *net = xt_net(par); if (sk == NULL || sk->sk_socket == NULL) return (info->match ^ info->invert) == 0; diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 5b645cb598fc..57efb703ff18 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->pkt_type != PACKET_LOOPBACK) type = skb->pkt_type; - else if (par->family == NFPROTO_IPV4 && + else if (xt_family(par) == NFPROTO_IPV4 && ipv4_is_multicast(ip_hdr(skb)->daddr)) type = PACKET_MULTICAST; - else if (par->family == NFPROTO_IPV6 && + else if (xt_family(par) == NFPROTO_IPV6 && ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) type = PACKET_MULTICAST; else diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index f23e97bb42d7..2b4ab189bba7 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) int ret; if (info->flags & XT_POLICY_MATCH_IN) - ret = match_policy_in(skb, info, par->family); + ret = match_policy_in(skb, info, xt_family(par)); else - ret = match_policy_out(skb, info, par->family); + ret = match_policy_out(skb, info, xt_family(par)); if (ret < 0) ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 7720b036d76a..1db02f6fca54 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -18,35 +18,33 @@ static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est64 *r; + struct gnet_stats_rate_est64 sample = {0}; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; - spin_lock_bh(&info->est1->lock); - r = &info->est1->rstats; + gen_estimator_read(&info->est1->rate_est, &sample); + if (info->flags & XT_RATEEST_MATCH_DELTA) { - bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0; - pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0; + bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0; + pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0; } else { - bps1 = r->bps; - pps1 = r->pps; + bps1 = sample.bps; + pps1 = sample.pps; } - spin_unlock_bh(&info->est1->lock); if (info->flags & XT_RATEEST_MATCH_ABS) { bps2 = info->bps2; pps2 = info->pps2; } else { - spin_lock_bh(&info->est2->lock); - r = &info->est2->rstats; + gen_estimator_read(&info->est2->rate_est, &sample); + if (info->flags & XT_RATEEST_MATCH_DELTA) { - bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0; - pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0; + bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0; + pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0; } else { - bps2 = r->bps; - pps2 = r->pps; + bps2 = sample.bps; + pps2 = sample.pps; } - spin_unlock_bh(&info->est2->lock); } switch (info->mode) { diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index e3b7a09b103e..1d89a4eaf841 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -95,7 +95,7 @@ struct recent_net { #endif }; -static int recent_net_id __read_mostly; +static unsigned int recent_net_id __read_mostly; static inline struct recent_net *recent_pernet(struct net *net) { @@ -236,7 +236,7 @@ static void recent_table_flush(struct recent_table *t) static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; @@ -245,7 +245,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) u_int8_t ttl; bool ret = info->invert; - if (par->family == NFPROTO_IPV4) { + if (xt_family(par) == NFPROTO_IPV4) { const struct iphdr *iph = ip_hdr(skb); if (info->side == XT_RECENT_DEST) @@ -266,7 +266,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) } /* use TTL as seen before forwarding */ - if (par->out != NULL && skb->sk == NULL) + if (xt_out(par) != NULL && skb->sk == NULL) ttl++; spin_lock_bh(&recent_lock); @@ -274,12 +274,12 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) nf_inet_addr_mask(&addr, &addr_mask, &t->mask); - e = recent_entry_lookup(t, &addr_mask, par->family, + e = recent_entry_lookup(t, &addr_mask, xt_family(par), (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { if (!(info->check_set & XT_RECENT_SET)) goto out; - e = recent_entry_init(t, &addr_mask, par->family, ttl); + e = recent_entry_init(t, &addr_mask, xt_family(par), ttl); if (e == NULL) par->hotdrop = true; ret = !ret; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 5669e5b453f4..64285702afd5 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -55,7 +55,7 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.u.compat.dim, + ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim, info->match_set.u.compat.flags, 0, UINT_MAX); return match_set(info->match_set.index, skb, par, &opt, @@ -118,7 +118,7 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v1 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, 0, UINT_MAX); if (opt.flags & IPSET_RETURN_NOMATCH) @@ -184,7 +184,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v3 *info = par->matchinfo; int ret; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); if (info->packets.op != IPSET_COUNTER_NONE || @@ -231,7 +231,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v4 *info = par->matchinfo; int ret; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); if (info->packets.op != IPSET_COUNTER_NONE || @@ -259,9 +259,9 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim, info->add_set.u.compat.flags, 0, UINT_MAX); - ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim, info->del_set.u.compat.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) @@ -332,9 +332,9 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v1 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, 0, UINT_MAX); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) @@ -401,9 +401,9 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, info->flags, info->timeout); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ @@ -423,17 +423,19 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) /* Revision 3 target */ +#define MOPT(opt, member) ((opt).ext.skbinfo.member) + static unsigned int set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v3 *info = par->targinfo; int ret; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, info->flags, info->timeout); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); - ADT_OPT(map_opt, par->family, info->map_set.dim, + ADT_OPT(map_opt, xt_family(par), info->map_set.dim, info->map_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ @@ -453,14 +455,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) if (!ret) return XT_CONTINUE; if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK) - skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask)) - ^ (map_opt.ext.skbmark); + skb->mark = (skb->mark & ~MOPT(map_opt,skbmarkmask)) + ^ MOPT(map_opt, skbmark); if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO) - skb->priority = map_opt.ext.skbprio; + skb->priority = MOPT(map_opt, skbprio); if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) && skb->dev && - skb->dev->real_num_tx_queues > map_opt.ext.skbqueue) - skb_set_queue_mapping(skb, map_opt.ext.skbqueue); + skb->dev->real_num_tx_queues > MOPT(map_opt, skbqueue)) + skb_set_queue_mapping(skb, MOPT(map_opt, skbqueue)); } return XT_CONTINUE; } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index b10ade272b50..770bbec878f1 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,76 +22,14 @@ #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -#define XT_SOCKET_HAVE_IPV6 1 #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/inet6_hashtables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif +#include <net/netfilter/nf_socket.h> #include <linux/netfilter/xt_socket.h> -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#define XT_SOCKET_HAVE_CONNTRACK 1 -#include <net/netfilter/nf_conntrack.h> -#endif - -static int -extract_icmp4_fields(const struct sk_buff *skb, - u8 *protocol, - __be32 *raddr, - __be32 *laddr, - __be16 *rport, - __be16 *lport) -{ - unsigned int outside_hdrlen = ip_hdrlen(skb); - struct iphdr *inside_iph, _inside_iph; - struct icmphdr *icmph, _icmph; - __be16 *ports, _ports[2]; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - switch (icmph->type) { - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_REDIRECT: - case ICMP_TIME_EXCEEDED: - case ICMP_PARAMETERPROB: - break; - default: - return 1; - } - - inside_iph = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr), - sizeof(_inside_iph), &_inside_iph); - if (inside_iph == NULL) - return 1; - - if (inside_iph->protocol != IPPROTO_TCP && - inside_iph->protocol != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr) + - (inside_iph->ihl << 2), - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_iph->protocol; - *laddr = inside_iph->saddr; - *lport = ports[0]; - *raddr = inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - /* "socket" match based redirection (no specific rule) * =================================================== * @@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb, * then local services could intercept traffic going through the * box. */ -static struct sock * -xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, - const u8 protocol, - const __be32 saddr, const __be32 daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - return NULL; -} - -static bool xt_socket_sk_is_transparent(struct sock *sk) -{ - switch (sk->sk_state) { - case TCP_TIME_WAIT: - return inet_twsk(sk)->tw_transparent; - - case TCP_NEW_SYN_RECV: - return inet_rsk(inet_reqsk(sk))->no_srccheck; - - default: - return inet_sk(sk)->transparent; - } -} - -static struct sock *xt_socket_lookup_slow_v4(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - const struct iphdr *iph = ip_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - __be32 uninitialized_var(daddr), uninitialized_var(saddr); - __be16 uninitialized_var(dport), uninitialized_var(sport); - u8 uninitialized_var(protocol); -#ifdef XT_SOCKET_HAVE_CONNTRACK - struct nf_conn const *ct; - enum ip_conntrack_info ctinfo; -#endif - - if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - protocol = iph->protocol; - saddr = iph->saddr; - sport = hp->source; - daddr = iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = iph->protocol == IPPROTO_TCP ? - ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : - ip_hdrlen(skb) + sizeof(*hp); - - } else if (iph->protocol == IPPROTO_ICMP) { - if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, - &sport, &dport)) - return NULL; - } else { - return NULL; - } - -#ifdef XT_SOCKET_HAVE_CONNTRACK - /* Do the lookup with the original socket address in - * case this is a reply packet of an established - * SNAT-ted connection. - */ - ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && - ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_ESTABLISHED_REPLY) || - (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_RELATED_REPLY)) && - (ct->status & IPS_SRC_NAT_DONE)) { - - daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; - dport = (iph->protocol == IPPROTO_TCP) ? - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; - } -#endif - - return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, - daddr, sport, dport, indev); -} - static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) @@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v4(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; @@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) return socket_match(skb, par, par->matchinfo); } -#ifdef XT_SOCKET_HAVE_IPV6 - -static int -extract_icmp6_fields(const struct sk_buff *skb, - unsigned int outside_hdrlen, - int *protocol, - const struct in6_addr **raddr, - const struct in6_addr **laddr, - __be16 *rport, - __be16 *lport, - struct ipv6hdr *ipv6_var) -{ - const struct ipv6hdr *inside_iph; - struct icmp6hdr *icmph, _icmph; - __be16 *ports, _ports[2]; - u8 inside_nexthdr; - __be16 inside_fragoff; - int inside_hdrlen; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) - return 1; - - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), - sizeof(*ipv6_var), ipv6_var); - if (inside_iph == NULL) - return 1; - inside_nexthdr = inside_iph->nexthdr; - - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + - sizeof(*ipv6_var), - &inside_nexthdr, &inside_fragoff); - if (inside_hdrlen < 0) - return 1; /* hjm: Packet has no/incomplete transport layer headers. */ - - if (inside_nexthdr != IPPROTO_TCP && - inside_nexthdr != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, inside_hdrlen, - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_nexthdr; - *laddr = &inside_iph->saddr; - *lport = ports[0]; - *raddr = &inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - -static struct sock * -xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, - const u8 protocol, - const struct in6_addr *saddr, const struct in6_addr *daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet6_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp6_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - - return NULL; -} - -static struct sock *xt_socket_lookup_slow_v6(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - __be16 uninitialized_var(dport), uninitialized_var(sport); - const struct in6_addr *daddr = NULL, *saddr = NULL; - struct ipv6hdr *iph = ipv6_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - int thoff = 0, tproto; - - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { - pr_debug("unable to find transport header in IPv6 packet, dropping\n"); - return NULL; - } - - if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - saddr = &iph->saddr; - sport = hp->source; - daddr = &iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = tproto == IPPROTO_TCP ? - thoff + __tcp_hdrlen((struct tcphdr *)hp) : - thoff + sizeof(*hp); - - } else if (tproto == IPPROTO_ICMPV6) { - struct ipv6hdr ipv6_var; - - if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport, &ipv6_var)) - return NULL; - } else { - return NULL; - } - - return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, - sport, dport, indev); -} - +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static bool socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { @@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v6(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; @@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -432,9 +147,28 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) } #endif +static int socket_mt_enable_defrag(struct net *net, int family) +{ + switch (family) { + case NFPROTO_IPV4: + return nf_defrag_ipv4_enable(net); +#ifdef XT_SOCKET_HAVE_IPV6 + case NFPROTO_IPV6: + return nf_defrag_ipv6_enable(net); +#endif + } + WARN_ONCE(1, "Unknown family %d\n", family); + return 0; +} + static int socket_mt_v1_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + int err; + + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); @@ -446,6 +180,11 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par) static int socket_mt_v2_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; + int err; + + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); @@ -458,7 +197,11 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo3 *info = (struct xt_socket_mtinfo3 *)par->matchinfo; + int err; + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V3); @@ -488,7 +231,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 1, @@ -512,7 +255,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 2, @@ -536,7 +279,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 3, @@ -553,11 +296,6 @@ static struct xt_match socket_mt_reg[] __read_mostly = { static int __init socket_mt_init(void) { - nf_defrag_ipv4_enable(); -#ifdef XT_SOCKET_HAVE_IPV6 - nf_defrag_ipv6_enable(); -#endif - return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index a507922d80cd..5746a33789a5 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -43,7 +43,7 @@ static int state_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -52,7 +52,7 @@ static int state_mt_check(const struct xt_mtchk_param *par) static void state_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match state_mt_reg __read_mostly = { |