diff options
Diffstat (limited to 'net/netfilter')
66 files changed, 1338 insertions, 502 deletions
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a135b1a46014..238b6a620e88 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o +ifeq ($(CONFIG_NF_CONNTRACK),m) +nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o +else ifeq ($(CONFIG_NF_CONNTRACK),y) +nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o +endif obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 77ae3e8d344c..dcf752b55a52 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -622,7 +622,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, case NF_ACCEPT: break; case NF_DROP: - kfree_skb(skb); + kfree_skb_reason(skb, + SKB_DROP_REASON_NETFILTER_DROP); ret = NF_DROP_GETERR(verdict); if (ret == 0) ret = -EPERM; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index d2e5a8f644b8..029171379884 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -610,7 +610,7 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, nf_reset_ct(skb); skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); } return ret; } @@ -652,7 +652,7 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, if (!local) { skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else @@ -674,7 +674,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 91bc8df3e4b0..385a5f458aba 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -22,26 +22,7 @@ static bool nf_ct_acct __read_mostly; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); -static const struct nf_ct_ext_type acct_extend = { - .len = sizeof(struct nf_conn_acct), - .align = __alignof__(struct nf_conn_acct), - .id = NF_CT_EXT_ACCT, -}; - void nf_conntrack_acct_pernet_init(struct net *net) { net->ct.sysctl_acct = nf_ct_acct; } - -int nf_conntrack_acct_init(void) -{ - int ret = nf_ct_extend_register(&acct_extend); - if (ret < 0) - pr_err("Unable to register extension\n"); - return ret; -} - -void nf_conntrack_acct_fini(void) -{ - nf_ct_extend_unregister(&acct_extend); -} diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c new file mode 100644 index 000000000000..fe98673dd5ac --- /dev/null +++ b/net/netfilter/nf_conntrack_bpf.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Conntrack Helpers for XDP and TC-BPF hook + * + * These are called from the XDP and SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/types.h> +#include <linux/btf_ids.h> +#include <linux/net_namespace.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_bpf.h> +#include <net/netfilter/nf_conntrack_core.h> + +/* bpf_ct_opts - Options for CT lookup helpers + * + * Members: + * @netns_id - Specify the network namespace for lookup + * Values: + * BPF_F_CURRENT_NETNS (-1) + * Use namespace associated with ctx (xdp_md, __sk_buff) + * [0, S32_MAX] + * Network Namespace ID + * @error - Out parameter, set for any errors encountered + * Values: + * -EINVAL - Passed NULL for bpf_tuple pointer + * -EINVAL - opts->reserved is not 0 + * -EINVAL - netns_id is less than -1 + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12) + * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP + * -ENONET - No network namespace found for netns_id + * -ENOENT - Conntrack lookup could not find entry for tuple + * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4) + * or sizeof(tuple->ipv6) + * @l4proto - Layer 4 protocol + * Values: + * IPPROTO_TCP, IPPROTO_UDP + * @reserved - Reserved member, will be reused for more options in future + * Values: + * 0 + */ +struct bpf_ct_opts { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +}; + +enum { + NF_BPF_CT_OPTS_SZ = 12, +}; + +static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, + struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, + s32 netns_id) +{ + struct nf_conntrack_tuple_hash *hash; + struct nf_conntrack_tuple tuple; + + if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) + return ERR_PTR(-EPROTO); + if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + memset(&tuple, 0, sizeof(tuple)); + switch (tuple_len) { + case sizeof(bpf_tuple->ipv4): + tuple.src.l3num = AF_INET; + tuple.src.u3.ip = bpf_tuple->ipv4.saddr; + tuple.src.u.tcp.port = bpf_tuple->ipv4.sport; + tuple.dst.u3.ip = bpf_tuple->ipv4.daddr; + tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport; + break; + case sizeof(bpf_tuple->ipv6): + tuple.src.l3num = AF_INET6; + memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); + tuple.src.u.tcp.port = bpf_tuple->ipv6.sport; + memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); + tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport; + break; + default: + return ERR_PTR(-EAFNOSUPPORT); + } + + tuple.dst.protonum = protonum; + + if (netns_id >= 0) { + net = get_net_ns_by_id(net, netns_id); + if (unlikely(!net)) + return ERR_PTR(-ENONET); + } + + hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); + if (netns_id >= 0) + put_net(net); + if (!hash) + return ERR_PTR(-ENOENT); + return nf_ct_tuplehash_to_ctrack(hash); +} + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in nf_conntrack BTF"); + +/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a + * reference to it + * + * Parameters: + * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; + struct net *caller_net; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + + if (!opts) + return NULL; + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + caller_net = dev_net(ctx->rxq->dev); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, + opts->netns_id); + if (IS_ERR(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a + * reference to it + * + * Parameters: + * @skb_ctx - Pointer to ctx (__sk_buff) in TC program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct net *caller_net; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + + if (!opts) + return NULL; + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, + opts->netns_id); + if (IS_ERR(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_ct_release - Release acquired nf_conn object + * + * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects + * the program if any references remain in the program in all of the explored + * states. + * + * Parameters: + * @nf_conn - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_lookup or bpf_skb_ct_lookup. + */ +void bpf_ct_release(struct nf_conn *nfct) +{ + if (!nfct) + return; + nf_ct_put(nfct); +} + +__diag_pop() + +BTF_SET_START(nf_ct_xdp_check_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_xdp_check_kfunc_ids) + +BTF_SET_START(nf_ct_tc_check_kfunc_ids) +BTF_ID(func, bpf_skb_ct_lookup) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_tc_check_kfunc_ids) + +BTF_SET_START(nf_ct_acquire_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_skb_ct_lookup) +BTF_SET_END(nf_ct_acquire_kfunc_ids) + +BTF_SET_START(nf_ct_release_kfunc_ids) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_release_kfunc_ids) + +/* Both sets are identical */ +#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids + +static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &nf_ct_xdp_check_kfunc_ids, + .acquire_set = &nf_ct_acquire_kfunc_ids, + .release_set = &nf_ct_release_kfunc_ids, + .ret_null_set = &nf_ct_ret_null_kfunc_ids, +}; + +static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &nf_ct_tc_check_kfunc_ids, + .acquire_set = &nf_ct_acquire_kfunc_ids, + .release_set = &nf_ct_release_kfunc_ids, + .ret_null_set = &nf_ct_ret_null_kfunc_ids, +}; + +int register_nf_conntrack_bpf(void) +{ + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); +} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index bf1e17c678f1..0164e5f522e8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -34,10 +34,10 @@ #include <linux/rculist_nulls.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> -#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> @@ -47,7 +47,6 @@ #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_synproxy.h> -#include <net/netfilter/nf_conntrack_act_ct.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netns/hash.h> @@ -67,6 +66,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; u32 next_bucket; + u32 avg_timeout; + u32 start_time; bool exiting; bool early_drop; }; @@ -78,8 +79,19 @@ static __read_mostly bool nf_conntrack_locks_all; /* serialize hash resizes and nf_ct_iterate_cleanup */ static DEFINE_MUTEX(nf_conntrack_mutex); -#define GC_SCAN_INTERVAL (120u * HZ) +#define GC_SCAN_INTERVAL_MAX (60ul * HZ) +#define GC_SCAN_INTERVAL_MIN (1ul * HZ) + +/* clamp timeouts to this value (TCP unacked) */ +#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ) + +/* large initial bias so that we don't scan often just because we have + * three entries with a 1s timeout. + */ +#define GC_SCAN_INTERVAL_INIT INT_MAX + #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) +#define GC_SCAN_EXPIRED_MAX (64000u / HZ) #define MIN_CHAINLEN 8u #define MAX_CHAINLEN (32u - MIN_CHAINLEN) @@ -594,7 +606,7 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); void nf_ct_tmpl_free(struct nf_conn *tmpl) { - nf_ct_ext_destroy(tmpl); + kfree(tmpl->ext); if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) kfree((char *)tmpl - tmpl->proto.tmpl_padto); @@ -1421,16 +1433,28 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) static void gc_worker(struct work_struct *work) { - unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; unsigned int i, hashsz, nf_conntrack_max95 = 0; - unsigned long next_run = GC_SCAN_INTERVAL; + u32 end_time, start_time = nfct_time_stamp; struct conntrack_gc_work *gc_work; + unsigned int expired_count = 0; + unsigned long next_run; + s32 delta_time; + gc_work = container_of(work, struct conntrack_gc_work, dwork.work); i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; + if (i == 0) { + gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT; + gc_work->start_time = start_time; + } + + next_run = gc_work->avg_timeout; + + end_time = start_time + GC_SCAN_MAX_DURATION; + do { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; @@ -1447,6 +1471,7 @@ static void gc_worker(struct work_struct *work) hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; + unsigned long expires; struct net *net; tmp = nf_ct_tuplehash_to_ctrack(h); @@ -1456,11 +1481,29 @@ static void gc_worker(struct work_struct *work) continue; } + if (expired_count > GC_SCAN_EXPIRED_MAX) { + rcu_read_unlock(); + + gc_work->next_bucket = i; + gc_work->avg_timeout = next_run; + + delta_time = nfct_time_stamp - gc_work->start_time; + + /* re-sched immediately if total cycle time is exceeded */ + next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX; + goto early_exit; + } + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); + expired_count++; continue; } + expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP); + next_run += expires; + next_run /= 2u; + if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) continue; @@ -1478,8 +1521,10 @@ static void gc_worker(struct work_struct *work) continue; } - if (gc_worker_can_early_drop(tmp)) + if (gc_worker_can_early_drop(tmp)) { nf_ct_kill(tmp); + expired_count++; + } nf_ct_put(tmp); } @@ -1492,33 +1537,38 @@ static void gc_worker(struct work_struct *work) cond_resched(); i++; - if (time_after(jiffies, end_time) && i < hashsz) { + delta_time = nfct_time_stamp - end_time; + if (delta_time > 0 && i < hashsz) { + gc_work->avg_timeout = next_run; gc_work->next_bucket = i; next_run = 0; - break; + goto early_exit; } } while (i < hashsz); + gc_work->next_bucket = 0; + + next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX); + + delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1); + if (next_run > (unsigned long)delta_time) + next_run -= delta_time; + else + next_run = 1; + +early_exit: if (gc_work->exiting) return; - /* - * Eviction will normally happen from the packet path, and not - * from this gc worker. - * - * This worker is only here to reap expired entries when system went - * idle after a busy period. - */ - if (next_run) { + if (next_run) gc_work->early_drop = false; - gc_work->next_bucket = 0; - } + queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { - INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); + INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); gc_work->exiting = false; } @@ -1597,7 +1647,17 @@ void nf_conntrack_free(struct nf_conn *ct) */ WARN_ON(refcount_read(&ct->ct_general.use) != 0); - nf_ct_ext_destroy(ct); + if (ct->status & IPS_SRC_NAT_DONE) { + const struct nf_nat_hook *nat_hook; + + rcu_read_lock(); + nat_hook = rcu_dereference(nf_nat_hook); + if (nat_hook) + nat_hook->remove_nat_bysrc(ct); + rcu_read_unlock(); + } + + kfree(ct->ext); kmem_cache_free(nf_conntrack_cachep, ct); cnet = nf_ct_pernet(net); @@ -2464,13 +2524,7 @@ void nf_conntrack_cleanup_end(void) kvfree(nf_conntrack_hash); nf_conntrack_proto_fini(); - nf_conntrack_seqadj_fini(); - nf_conntrack_labels_fini(); nf_conntrack_helper_fini(); - nf_conntrack_timeout_fini(); - nf_conntrack_ecache_fini(); - nf_conntrack_tstamp_fini(); - nf_conntrack_acct_fini(); nf_conntrack_expect_fini(); kmem_cache_destroy(nf_conntrack_cachep); @@ -2625,39 +2679,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) return nf_conntrack_hash_resize(hashsize); } -static __always_inline unsigned int total_extension_size(void) -{ - /* remember to add new extensions below */ - BUILD_BUG_ON(NF_CT_EXT_NUM > 10); - - return sizeof(struct nf_ct_ext) + - sizeof(struct nf_conn_help) -#if IS_ENABLED(CONFIG_NF_NAT) - + sizeof(struct nf_conn_nat) -#endif - + sizeof(struct nf_conn_seqadj) - + sizeof(struct nf_conn_acct) -#ifdef CONFIG_NF_CONNTRACK_EVENTS - + sizeof(struct nf_conntrack_ecache) -#endif -#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP - + sizeof(struct nf_conn_tstamp) -#endif -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - + sizeof(struct nf_conn_timeout) -#endif -#ifdef CONFIG_NF_CONNTRACK_LABELS - + sizeof(struct nf_conn_labels) -#endif -#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) - + sizeof(struct nf_conn_synproxy) -#endif -#if IS_ENABLED(CONFIG_NET_ACT_CT) - + sizeof(struct nf_conn_act_ct_ext) -#endif - ; -}; - int nf_conntrack_init_start(void) { unsigned long nr_pages = totalram_pages(); @@ -2665,9 +2686,6 @@ int nf_conntrack_init_start(void) int ret = -ENOMEM; int i; - /* struct nf_ct_ext uses u8 to store offsets/size */ - BUILD_BUG_ON(total_extension_size() > 255u); - seqcount_spinlock_init(&nf_conntrack_generation, &nf_conntrack_locks_all_lock); @@ -2712,34 +2730,10 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_expect; - ret = nf_conntrack_acct_init(); - if (ret < 0) - goto err_acct; - - ret = nf_conntrack_tstamp_init(); - if (ret < 0) - goto err_tstamp; - - ret = nf_conntrack_ecache_init(); - if (ret < 0) - goto err_ecache; - - ret = nf_conntrack_timeout_init(); - if (ret < 0) - goto err_timeout; - ret = nf_conntrack_helper_init(); if (ret < 0) goto err_helper; - ret = nf_conntrack_labels_init(); - if (ret < 0) - goto err_labels; - - ret = nf_conntrack_seqadj_init(); - if (ret < 0) - goto err_seqadj; - ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; @@ -2747,23 +2741,18 @@ int nf_conntrack_init_start(void) conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); + ret = register_nf_conntrack_bpf(); + if (ret < 0) + goto err_kfunc; + return 0; +err_kfunc: + cancel_delayed_work_sync(&conntrack_gc_work.dwork); + nf_conntrack_proto_fini(); err_proto: - nf_conntrack_seqadj_fini(); -err_seqadj: - nf_conntrack_labels_fini(); -err_labels: nf_conntrack_helper_fini(); err_helper: - nf_conntrack_timeout_fini(); -err_timeout: - nf_conntrack_ecache_fini(); -err_ecache: - nf_conntrack_tstamp_fini(); -err_tstamp: - nf_conntrack_acct_fini(); -err_acct: nf_conntrack_expect_fini(); err_expect: kmem_cache_destroy(nf_conntrack_cachep); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 41768ff19464..07e65b4e92f8 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -131,13 +131,13 @@ static void ecache_work(struct work_struct *work) } static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, - const unsigned int events, - const unsigned long missed, + const u32 events, + const u32 missed, const struct nf_ct_event *item) { - struct nf_conn *ct = item->ct; struct net *net = nf_ct_net(item->ct); struct nf_ct_event_notifier *notify; + u32 old, want; int ret; if (!((events | missed) & e->ctmask)) @@ -157,12 +157,13 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, if (likely(ret >= 0 && missed == 0)) return 0; - spin_lock_bh(&ct->lock); - if (ret < 0) - e->missed |= events; - else - e->missed &= ~missed; - spin_unlock_bh(&ct->lock); + do { + old = READ_ONCE(e->missed); + if (ret < 0) + want = old | events; + else + want = old & ~missed; + } while (cmpxchg(&e->missed, old, want) != old); return ret; } @@ -172,7 +173,7 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, { struct nf_conntrack_ecache *e; struct nf_ct_event item; - unsigned long missed; + unsigned int missed; int ret; if (!nf_ct_is_confirmed(ct)) @@ -211,7 +212,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) { struct nf_conntrack_ecache *e; struct nf_ct_event item; - unsigned long events; + unsigned int events; if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) return; @@ -304,12 +305,6 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) #define NF_CT_EVENTS_DEFAULT 1 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; -static const struct nf_ct_ext_type event_extend = { - .len = sizeof(struct nf_conntrack_ecache), - .align = __alignof__(struct nf_conntrack_ecache), - .id = NF_CT_EXT_ECACHE, -}; - void nf_conntrack_ecache_pernet_init(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); @@ -317,6 +312,8 @@ void nf_conntrack_ecache_pernet_init(struct net *net) net->ct.sysctl_events = nf_ct_events; cnet->ct_net = &net->ct; INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work); + + BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */ } void nf_conntrack_ecache_pernet_fini(struct net *net) @@ -325,19 +322,3 @@ void nf_conntrack_ecache_pernet_fini(struct net *net) cancel_delayed_work_sync(&cnet->ecache_dwork); } - -int nf_conntrack_ecache_init(void) -{ - int ret = nf_ct_extend_register(&event_extend); - if (ret < 0) - pr_err("Unable to register event extension\n"); - - BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */ - - return ret; -} - -void nf_conntrack_ecache_fini(void) -{ - nf_ct_extend_unregister(&event_extend); -} diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 3dbe2329c3f1..1296fda54ac6 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -13,40 +13,90 @@ #include <linux/skbuff.h> #include <net/netfilter/nf_conntrack_extend.h> -static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; -static DEFINE_MUTEX(nf_ct_ext_type_mutex); +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> +#include <net/netfilter/nf_conntrack_seqadj.h> +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <net/netfilter/nf_conntrack_timestamp.h> +#include <net/netfilter/nf_conntrack_timeout.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <net/netfilter/nf_conntrack_synproxy.h> +#include <net/netfilter/nf_conntrack_act_ct.h> +#include <net/netfilter/nf_nat.h> + #define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */ -void nf_ct_ext_destroy(struct nf_conn *ct) +static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = { + [NF_CT_EXT_HELPER] = sizeof(struct nf_conn_help), +#if IS_ENABLED(CONFIG_NF_NAT) + [NF_CT_EXT_NAT] = sizeof(struct nf_conn_nat), +#endif + [NF_CT_EXT_SEQADJ] = sizeof(struct nf_conn_seqadj), + [NF_CT_EXT_ACCT] = sizeof(struct nf_conn_acct), +#ifdef CONFIG_NF_CONNTRACK_EVENTS + [NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache), +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct), +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp), +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + [NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels), +#endif +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + [NF_CT_EXT_SYNPROXY] = sizeof(struct nf_conn_synproxy), +#endif +#if IS_ENABLED(CONFIG_NET_ACT_CT) + [NF_CT_EXT_ACT_CT] = sizeof(struct nf_conn_act_ct_ext), +#endif +}; + +static __always_inline unsigned int total_extension_size(void) { - unsigned int i; - struct nf_ct_ext_type *t; - - for (i = 0; i < NF_CT_EXT_NUM; i++) { - rcu_read_lock(); - t = rcu_dereference(nf_ct_ext_types[i]); - - /* Here the nf_ct_ext_type might have been unregisterd. - * I.e., it has responsible to cleanup private - * area in all conntracks when it is unregisterd. - */ - if (t && t->destroy) - t->destroy(ct); - rcu_read_unlock(); - } - - kfree(ct->ext); + /* remember to add new extensions below */ + BUILD_BUG_ON(NF_CT_EXT_NUM > 10); + + return sizeof(struct nf_ct_ext) + + sizeof(struct nf_conn_help) +#if IS_ENABLED(CONFIG_NF_NAT) + + sizeof(struct nf_conn_nat) +#endif + + sizeof(struct nf_conn_seqadj) + + sizeof(struct nf_conn_acct) +#ifdef CONFIG_NF_CONNTRACK_EVENTS + + sizeof(struct nf_conntrack_ecache) +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + + sizeof(struct nf_conn_tstamp) +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + + sizeof(struct nf_conn_timeout) +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + + sizeof(struct nf_conn_labels) +#endif +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + + sizeof(struct nf_conn_synproxy) +#endif +#if IS_ENABLED(CONFIG_NET_ACT_CT) + + sizeof(struct nf_conn_act_ct_ext) +#endif + ; } void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { unsigned int newlen, newoff, oldlen, alloc; - struct nf_ct_ext_type *t; struct nf_ct_ext *new; /* Conntrack must not be confirmed to avoid races on reallocation. */ WARN_ON(nf_ct_is_confirmed(ct)); + /* struct nf_ct_ext uses u8 to store offsets/size */ + BUILD_BUG_ON(total_extension_size() > 255u); if (ct->ext) { const struct nf_ct_ext *old = ct->ext; @@ -58,16 +108,8 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) oldlen = sizeof(*new); } - rcu_read_lock(); - t = rcu_dereference(nf_ct_ext_types[id]); - if (!t) { - rcu_read_unlock(); - return NULL; - } - - newoff = ALIGN(oldlen, t->align); - newlen = newoff + t->len; - rcu_read_unlock(); + newoff = ALIGN(oldlen, __alignof__(struct nf_ct_ext)); + newlen = newoff + nf_ct_ext_type_len[id]; alloc = max(newlen, NF_CT_EXT_PREALLOC); new = krealloc(ct->ext, alloc, gfp); @@ -85,31 +127,3 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) return (void *)new + newoff; } EXPORT_SYMBOL(nf_ct_ext_add); - -/* This MUST be called in process context. */ -int nf_ct_extend_register(const struct nf_ct_ext_type *type) -{ - int ret = 0; - - mutex_lock(&nf_ct_ext_type_mutex); - if (nf_ct_ext_types[type->id]) { - ret = -EBUSY; - goto out; - } - - rcu_assign_pointer(nf_ct_ext_types[type->id], type); -out: - mutex_unlock(&nf_ct_ext_type_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_extend_register); - -/* This MUST be called in process context. */ -void nf_ct_extend_unregister(const struct nf_ct_ext_type *type) -{ - mutex_lock(&nf_ct_ext_type_mutex); - RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); - mutex_unlock(&nf_ct_ext_type_mutex); - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ae4488a13c70..8dec42ec603e 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -550,11 +550,11 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat) } EXPORT_SYMBOL_GPL(nf_nat_helper_unregister); -static const struct nf_ct_ext_type helper_extend = { - .len = sizeof(struct nf_conn_help), - .align = __alignof__(struct nf_conn_help), - .id = NF_CT_EXT_HELPER, -}; +void nf_ct_set_auto_assign_helper_warned(struct net *net) +{ + nf_ct_pernet(net)->auto_assign_helper_warned = true; +} +EXPORT_SYMBOL_GPL(nf_ct_set_auto_assign_helper_warned); void nf_conntrack_helper_pernet_init(struct net *net) { @@ -565,28 +565,17 @@ void nf_conntrack_helper_pernet_init(struct net *net) int nf_conntrack_helper_init(void) { - int ret; nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); if (!nf_ct_helper_hash) return -ENOMEM; - ret = nf_ct_extend_register(&helper_extend); - if (ret < 0) { - pr_err("nf_ct_helper: Unable to register helper extension.\n"); - goto out_extend; - } - INIT_LIST_HEAD(&nf_ct_nat_helpers); return 0; -out_extend: - kvfree(nf_ct_helper_hash); - return ret; } void nf_conntrack_helper_fini(void) { - nf_ct_extend_unregister(&helper_extend); kvfree(nf_ct_helper_hash); } diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 522792556632..6e70e137a0a6 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -67,6 +67,8 @@ int nf_connlabels_get(struct net *net, unsigned int bits) net->ct.labels_used++; spin_unlock(&nf_connlabels_lock); + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); + return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_get); @@ -78,21 +80,3 @@ void nf_connlabels_put(struct net *net) spin_unlock(&nf_connlabels_lock); } EXPORT_SYMBOL_GPL(nf_connlabels_put); - -static const struct nf_ct_ext_type labels_extend = { - .len = sizeof(struct nf_conn_labels), - .align = __alignof__(struct nf_conn_labels), - .id = NF_CT_EXT_LABELS, -}; - -int nf_conntrack_labels_init(void) -{ - BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); - - return nf_ct_extend_register(&labels_extend); -} - -void nf_conntrack_labels_fini(void) -{ - nf_ct_extend_unregister(&labels_extend); -} diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7032402ffd33..1ea2ad732d57 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -58,6 +58,12 @@ MODULE_LICENSE("GPL"); +struct ctnetlink_list_dump_ctx { + struct nf_conn *last; + unsigned int cpu; + bool done; +}; + static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *l4proto) @@ -1694,14 +1700,18 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb, static int ctnetlink_done_list(struct netlink_callback *cb) { - if (cb->args[1]) - nf_ct_put((struct nf_conn *)cb->args[1]); + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; + + if (ctx->last) + nf_ct_put(ctx->last); + return 0; } static int ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) { + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; @@ -1712,12 +1722,12 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying struct hlist_nulls_head *list; struct net *net = sock_net(skb->sk); - if (cb->args[2]) + if (ctx->done) return 0; - last = (struct nf_conn *)cb->args[1]; + last = ctx->last; - for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { + for (cpu = ctx->cpu; cpu < nr_cpu_ids; cpu++) { struct ct_pcpu *pcpu; if (!cpu_possible(cpu)) @@ -1731,10 +1741,10 @@ restart: ct = nf_ct_tuplehash_to_ctrack(h); if (l3proto && nf_ct_l3num(ct) != l3proto) continue; - if (cb->args[1]) { + if (ctx->last) { if (ct != last) continue; - cb->args[1] = 0; + ctx->last = NULL; } /* We can't dump extension info for the unconfirmed @@ -1751,19 +1761,19 @@ restart: if (res < 0) { if (!refcount_inc_not_zero(&ct->ct_general.use)) continue; - cb->args[0] = cpu; - cb->args[1] = (unsigned long)ct; + ctx->cpu = cpu; + ctx->last = ct; spin_unlock_bh(&pcpu->lock); goto out; } } - if (cb->args[1]) { - cb->args[1] = 0; + if (ctx->last) { + ctx->last = NULL; goto restart; } spin_unlock_bh(&pcpu->lock); } - cb->args[2] = 1; + ctx->done = true; out: if (last) nf_ct_put(last); @@ -3878,6 +3888,8 @@ static int __init ctnetlink_init(void) { int ret; + BUILD_BUG_ON(sizeof(struct ctnetlink_list_dump_ctx) > sizeof_field(struct netlink_callback, ctx)); + ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { pr_err("ctnetlink_init: cannot register with nfnetlink.\n"); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 7d5708b92138..f3fa367b455f 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -45,30 +45,8 @@ MODULE_ALIAS_NFCT_HELPER("pptp"); static DEFINE_SPINLOCK(nf_pptp_lock); -int -(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, - struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned int protoff, struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound); - -int -(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, - struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned int protoff, struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound); - -void -(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *expect_orig, - struct nf_conntrack_expect *expect_reply) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_exp_gre); - -void -(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct, - struct nf_conntrack_expect *exp) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); +const struct nf_nat_pptp_hook *nf_nat_pptp_hook; +EXPORT_SYMBOL_GPL(nf_nat_pptp_hook); #if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) /* PptpControlMessageType names */ @@ -111,8 +89,8 @@ EXPORT_SYMBOL(pptp_msg_name); static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + const struct nf_nat_pptp_hook *hook; struct net *net = nf_ct_net(ct); - typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn; pr_debug("increasing timeouts\n"); /* increase timeout of GRE data channel conntrack entry */ @@ -122,9 +100,9 @@ static void pptp_expectfn(struct nf_conn *ct, /* Can you see how rusty this code is, compared with the pre-2.6.11 * one? That's what happened to my shiny newnat of 2002 ;( -HW */ - nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn); - if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK) - nf_nat_pptp_expectfn(ct, exp); + hook = rcu_dereference(nf_nat_pptp_hook); + if (hook && ct->master->status & IPS_NAT_MASK) + hook->expectfn(ct, exp); else { struct nf_conntrack_tuple inv_t; struct nf_conntrack_expect *exp_other; @@ -209,9 +187,9 @@ static void pptp_destroy_siblings(struct nf_conn *ct) static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) { struct nf_conntrack_expect *exp_orig, *exp_reply; + const struct nf_nat_pptp_hook *hook; enum ip_conntrack_dir dir; int ret = 1; - typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre; exp_orig = nf_ct_expect_alloc(ct); if (exp_orig == NULL) @@ -239,9 +217,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) IPPROTO_GRE, &callid, &peer_callid); exp_reply->expectfn = pptp_expectfn; - nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre); - if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK) - nf_nat_pptp_exp_gre(exp_orig, exp_reply); + hook = rcu_dereference(nf_nat_pptp_hook); + if (hook && ct->status & IPS_NAT_MASK) + hook->exp_gre(exp_orig, exp_reply); if (nf_ct_expect_related(exp_orig, 0) != 0) goto out_put_both; if (nf_ct_expect_related(exp_reply, 0) != 0) @@ -279,9 +257,9 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo) { struct nf_ct_pptp_master *info = nfct_help_data(ct); + const struct nf_nat_pptp_hook *hook; u_int16_t msg; __be16 cid = 0, pcid = 0; - typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound; msg = ntohs(ctlh->messageType); pr_debug("inbound control message %s\n", pptp_msg_name(msg)); @@ -383,10 +361,9 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, goto invalid; } - nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound); - if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_inbound(skb, ct, ctinfo, - protoff, ctlh, pptpReq); + hook = rcu_dereference(nf_nat_pptp_hook); + if (hook && ct->status & IPS_NAT_MASK) + return hook->inbound(skb, ct, ctinfo, protoff, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -407,9 +384,9 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo) { struct nf_ct_pptp_master *info = nfct_help_data(ct); + const struct nf_nat_pptp_hook *hook; u_int16_t msg; __be16 cid = 0, pcid = 0; - typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound; msg = ntohs(ctlh->messageType); pr_debug("outbound control message %s\n", pptp_msg_name(msg)); @@ -479,10 +456,9 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, goto invalid; } - nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound); - if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_outbound(skb, ct, ctinfo, - protoff, ctlh, pptpReq); + hook = rcu_dereference(nf_nat_pptp_hook); + if (hook && ct->status & IPS_NAT_MASK) + return hook->outbound(skb, ct, ctinfo, protoff, ctlh, pptpReq); return NF_ACCEPT; invalid: diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 3066449f8bd8..7ab2b25b57bc 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -232,19 +232,3 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct, this_way->offset_after : this_way->offset_before; } EXPORT_SYMBOL_GPL(nf_ct_seq_offset); - -static const struct nf_ct_ext_type nf_ct_seqadj_extend = { - .len = sizeof(struct nf_conn_seqadj), - .align = __alignof__(struct nf_conn_seqadj), - .id = NF_CT_EXT_SEQADJ, -}; - -int nf_conntrack_seqadj_init(void) -{ - return nf_ct_extend_register(&nf_ct_seqadj_extend); -} - -void nf_conntrack_seqadj_fini(void) -{ - nf_ct_extend_unregister(&nf_ct_seqadj_extend); -} diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 14387e0b8008..cec166ecba77 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -22,12 +22,8 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> -struct nf_ct_timeout * -(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly; -EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); - -void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout) __read_mostly; -EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook); +const struct nf_ct_timeout_hooks *nf_ct_timeout_hook __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_timeout_hook); static int untimeout(struct nf_conn *ct, void *timeout) { @@ -48,31 +44,30 @@ EXPORT_SYMBOL_GPL(nf_ct_untimeout); static void __nf_ct_timeout_put(struct nf_ct_timeout *timeout) { - typeof(nf_ct_timeout_put_hook) timeout_put; + const struct nf_ct_timeout_hooks *h = rcu_dereference(nf_ct_timeout_hook); - timeout_put = rcu_dereference(nf_ct_timeout_put_hook); - if (timeout_put) - timeout_put(timeout); + if (h) + h->timeout_put(timeout); } int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name) { - typeof(nf_ct_timeout_find_get_hook) timeout_find_get; + const struct nf_ct_timeout_hooks *h; struct nf_ct_timeout *timeout; struct nf_conn_timeout *timeout_ext; const char *errmsg = NULL; int ret = 0; rcu_read_lock(); - timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); - if (!timeout_find_get) { + h = rcu_dereference(nf_ct_timeout_hook); + if (!h) { ret = -ENOENT; errmsg = "Timeout policy base is empty"; goto out; } - timeout = timeout_find_get(net, timeout_name); + timeout = h->timeout_find_get(net, timeout_name); if (!timeout) { ret = -ENOENT; pr_info_ratelimited("No such timeout policy \"%s\"\n", @@ -119,37 +114,18 @@ EXPORT_SYMBOL_GPL(nf_ct_set_timeout); void nf_ct_destroy_timeout(struct nf_conn *ct) { struct nf_conn_timeout *timeout_ext; - typeof(nf_ct_timeout_put_hook) timeout_put; + const struct nf_ct_timeout_hooks *h; rcu_read_lock(); - timeout_put = rcu_dereference(nf_ct_timeout_put_hook); + h = rcu_dereference(nf_ct_timeout_hook); - if (timeout_put) { + if (h) { timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) { - timeout_put(timeout_ext->timeout); + h->timeout_put(timeout_ext->timeout); RCU_INIT_POINTER(timeout_ext->timeout, NULL); } } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_ct_destroy_timeout); - -static const struct nf_ct_ext_type timeout_extend = { - .len = sizeof(struct nf_conn_timeout), - .align = __alignof__(struct nf_conn_timeout), - .id = NF_CT_EXT_TIMEOUT, -}; - -int nf_conntrack_timeout_init(void) -{ - int ret = nf_ct_extend_register(&timeout_extend); - if (ret < 0) - pr_err("nf_ct_timeout: Unable to register timeout extension.\n"); - return ret; -} - -void nf_conntrack_timeout_fini(void) -{ - nf_ct_extend_unregister(&timeout_extend); -} diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index f656d393fa92..9e43a0a59e73 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -19,27 +19,7 @@ static bool nf_ct_tstamp __read_mostly; module_param_named(tstamp, nf_ct_tstamp, bool, 0644); MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); -static const struct nf_ct_ext_type tstamp_extend = { - .len = sizeof(struct nf_conn_tstamp), - .align = __alignof__(struct nf_conn_tstamp), - .id = NF_CT_EXT_TSTAMP, -}; - void nf_conntrack_tstamp_pernet_init(struct net *net) { net->ct.sysctl_tstamp = nf_ct_tstamp; } - -int nf_conntrack_tstamp_init(void) -{ - int ret; - ret = nf_ct_extend_register(&tstamp_extend); - if (ret < 0) - pr_err("Unable to register extension\n"); - return ret; -} - -void nf_conntrack_tstamp_fini(void) -{ - nf_ct_extend_unregister(&tstamp_extend); -} diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index a579e59ee5c5..7873bd1389c3 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -19,7 +19,7 @@ static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) skb_push(skb, skb->mac_len); skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); dev_queue_xmit(skb); } diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b90eca7a2f22..3db256da919b 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -39,8 +39,14 @@ flow_offload_fill_dir(struct flow_offload *flow, ft->l3proto = ctt->src.l3num; ft->l4proto = ctt->dst.protonum; - ft->src_port = ctt->src.u.tcp.port; - ft->dst_port = ctt->dst.u.tcp.port; + + switch (ctt->dst.protonum) { + case IPPROTO_TCP: + case IPPROTO_UDP: + ft->src_port = ctt->src.u.tcp.port; + ft->dst_port = ctt->dst.u.tcp.port; + break; + } } struct flow_offload *flow_offload_alloc(struct nf_conn *ct) @@ -399,7 +405,8 @@ EXPORT_SYMBOL_GPL(flow_offload_lookup); static int nf_flow_table_iterate(struct nf_flowtable *flow_table, - void (*iter)(struct flow_offload *flow, void *data), + void (*iter)(struct nf_flowtable *flowtable, + struct flow_offload *flow, void *data), void *data) { struct flow_offload_tuple_rhash *tuplehash; @@ -423,7 +430,7 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); - iter(flow, data); + iter(flow_table, flow, data); } rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); @@ -451,10 +458,9 @@ static bool nf_flow_has_stale_dst(struct flow_offload *flow) flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple); } -static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) +static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, + struct flow_offload *flow, void *data) { - struct nf_flowtable *flow_table = data; - if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || nf_flow_has_stale_dst(flow)) @@ -479,7 +485,7 @@ static void nf_flow_offload_work_gc(struct work_struct *work) struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } @@ -595,7 +601,8 @@ int nf_flow_table_init(struct nf_flowtable *flowtable) } EXPORT_SYMBOL_GPL(nf_flow_table_init); -static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) +static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table, + struct flow_offload *flow, void *data) { struct net_device *dev = data; @@ -637,11 +644,10 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); nf_flow_table_offload_flush(flow_table); if (nf_flowtable_hw_offload(flow_table)) - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, - flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 6257d87c3a56..32c0eb1b4821 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -170,6 +170,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, struct flow_ports *ports; unsigned int thoff; struct iphdr *iph; + u8 ipproto; if (!pskb_may_pull(skb, sizeof(*iph) + offset)) return -1; @@ -183,13 +184,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, thoff += offset; - switch (iph->protocol) { + ipproto = iph->protocol; + switch (ipproto) { case IPPROTO_TCP: *hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: *hdrsize = sizeof(struct udphdr); break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + *hdrsize = sizeof(struct gre_base_hdr); + break; +#endif default: return -1; } @@ -200,15 +207,29 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, if (!pskb_may_pull(skb, thoff + *hdrsize)) return -1; + switch (ipproto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + break; + case IPPROTO_GRE: { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return -1; + break; + } + } + iph = (struct iphdr *)(skb_network_header(skb) + offset); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v4.s_addr = iph->saddr; tuple->dst_v4.s_addr = iph->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; tuple->l3proto = AF_INET; - tuple->l4proto = iph->protocol; + tuple->l4proto = ipproto; tuple->iifidx = dev->ifindex; nf_flow_tuple_encap(skb, tuple); @@ -358,7 +379,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, nf_flow_nat_ip(flow, skb, thoff, dir, iph); ip_decrease_ttl(iph); - skb->tstamp = 0; + skb_clear_tstamp(skb); if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); @@ -503,6 +524,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, struct flow_ports *ports; struct ipv6hdr *ip6h; unsigned int thoff; + u8 nexthdr; thoff = sizeof(*ip6h) + offset; if (!pskb_may_pull(skb, thoff)) @@ -510,13 +532,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); - switch (ip6h->nexthdr) { + nexthdr = ip6h->nexthdr; + switch (nexthdr) { case IPPROTO_TCP: *hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: *hdrsize = sizeof(struct udphdr); break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + *hdrsize = sizeof(struct gre_base_hdr); + break; +#endif default: return -1; } @@ -527,15 +555,29 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, if (!pskb_may_pull(skb, thoff + *hdrsize)) return -1; + switch (nexthdr) { + case IPPROTO_TCP: + case IPPROTO_UDP: + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + break; + case IPPROTO_GRE: { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return -1; + break; + } + } + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v6 = ip6h->saddr; tuple->dst_v6 = ip6h->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; tuple->l3proto = AF_INET6; - tuple->l4proto = ip6h->nexthdr; + tuple->l4proto = nexthdr; tuple->iifidx = dev->ifindex; nf_flow_tuple_encap(skb, tuple); @@ -593,7 +635,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, nf_flow_nat_ipv6(flow, skb, dir, ip6h); ip6h->hop_limit--; - skb->tstamp = 0; + skb_clear_tstamp(skb); if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index fc4265acd9c4..11b6e1942092 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -20,7 +20,6 @@ static struct workqueue_struct *nf_flow_offload_stats_wq; struct flow_offload_work { struct list_head list; enum flow_cls_command cmd; - int priority; struct nf_flowtable *flowtable; struct flow_offload *flow; struct work_struct work; @@ -174,6 +173,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); break; case IPPROTO_UDP: + case IPPROTO_GRE: break; default: return -EOPNOTSUPP; @@ -182,15 +182,22 @@ static int nf_flow_rule_match(struct nf_flow_match *match, key->basic.ip_proto = tuple->l4proto; mask->basic.ip_proto = 0xff; - key->tp.src = tuple->src_port; - mask->tp.src = 0xffff; - key->tp.dst = tuple->dst_port; - mask->tp.dst = 0xffff; - match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | BIT(FLOW_DISSECTOR_KEY_CONTROL) | - BIT(FLOW_DISSECTOR_KEY_BASIC) | - BIT(FLOW_DISSECTOR_KEY_PORTS); + BIT(FLOW_DISSECTOR_KEY_BASIC); + + switch (tuple->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + key->tp.src = tuple->src_port; + mask->tp.src = 0xffff; + key->tp.dst = tuple->dst_port; + mask->tp.dst = 0xffff; + + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS); + break; + } + return 0; } @@ -866,7 +873,8 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir) { return nf_flow_offload_tuple(offload->flowtable, offload->flow, - flow_rule, dir, offload->priority, + flow_rule, dir, + offload->flowtable->priority, FLOW_CLS_REPLACE, NULL, &offload->flowtable->flow_block.cb_list); } @@ -875,7 +883,8 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir) { nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, - offload->priority, FLOW_CLS_DESTROY, NULL, + offload->flowtable->priority, + FLOW_CLS_DESTROY, NULL, &offload->flowtable->flow_block.cb_list); } @@ -926,7 +935,8 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload, struct flow_stats *stats) { nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, - offload->priority, FLOW_CLS_STATS, stats, + offload->flowtable->priority, + FLOW_CLS_STATS, stats, &offload->flowtable->flow_block.cb_list); } @@ -1004,7 +1014,6 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, offload->cmd = cmd; offload->flow = flow; - offload->priority = flowtable->priority; offload->flowtable = flowtable; INIT_WORK(&offload->work, flow_offload_work_handler); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ffcf6529afc3..7981be526f26 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -801,7 +801,7 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) return i->status & IPS_NAT_MASK ? 1 : 0; } -static void __nf_nat_cleanup_conntrack(struct nf_conn *ct) +static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { unsigned int h; @@ -823,7 +823,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) * will delete entry from already-freed table. */ if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status)) - __nf_nat_cleanup_conntrack(ct); + nf_nat_cleanup_conntrack(ct); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. @@ -831,20 +831,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) return 0; } -/* No one using conntrack by the time this called. */ -static void nf_nat_cleanup_conntrack(struct nf_conn *ct) -{ - if (ct->status & IPS_SRC_NAT_DONE) - __nf_nat_cleanup_conntrack(ct); -} - -static struct nf_ct_ext_type nat_extend __read_mostly = { - .len = sizeof(struct nf_conn_nat), - .align = __alignof__(struct nf_conn_nat), - .destroy = nf_nat_cleanup_conntrack, - .id = NF_CT_EXT_NAT, -}; - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> @@ -1136,6 +1122,7 @@ static const struct nf_nat_hook nat_hook = { .decode_session = __nf_nat_decode_session, #endif .manip_pkt = nf_nat_manip_pkt, + .remove_nat_bysrc = nf_nat_cleanup_conntrack, }; static int __init nf_nat_init(void) @@ -1151,19 +1138,11 @@ static int __init nf_nat_init(void) if (!nf_nat_bysource) return -ENOMEM; - ret = nf_ct_extend_register(&nat_extend); - if (ret < 0) { - kvfree(nf_nat_bysource); - pr_err("Unable to register extension\n"); - return ret; - } - for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_nat_locks[i]); ret = register_pernet_subsys(&nat_net_ops); if (ret < 0) { - nf_ct_extend_unregister(&nat_extend); kvfree(nf_nat_bysource); return ret; } @@ -1182,7 +1161,6 @@ static void __exit nf_nat_cleanup(void) nf_ct_iterate_destroy(nf_nat_proto_clean, &clean); - nf_ct_extend_unregister(&nat_extend); nf_ct_helper_expectfn_unregister(&follow_master_nat); RCU_INIT_POINTER(nf_nat_hook, NULL); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 2dfc5dae0656..e479dd0561c5 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -236,12 +236,6 @@ synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff, return 1; } -static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = { - .len = sizeof(struct nf_conn_synproxy), - .align = __alignof__(struct nf_conn_synproxy), - .id = NF_CT_EXT_SYNPROXY, -}; - #ifdef CONFIG_PROC_FS static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos) { @@ -387,28 +381,12 @@ static struct pernet_operations synproxy_net_ops = { static int __init synproxy_core_init(void) { - int err; - - err = nf_ct_extend_register(&nf_ct_synproxy_extend); - if (err < 0) - goto err1; - - err = register_pernet_subsys(&synproxy_net_ops); - if (err < 0) - goto err2; - - return 0; - -err2: - nf_ct_extend_unregister(&nf_ct_synproxy_extend); -err1: - return err; + return register_pernet_subsys(&synproxy_net_ops); } static void __exit synproxy_core_exit(void) { unregister_pernet_subsys(&synproxy_net_ops); - nf_ct_extend_unregister(&nf_ct_synproxy_extend); } module_init(synproxy_core_init); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a312dc961cab..5ddfdb2adaf1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -550,6 +550,58 @@ static int nft_delflowtable(struct nft_ctx *ctx, return err; } +static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) +{ + int i; + + for (i = track->regs[dreg].num_reg; i > 0; i--) + __nft_reg_track_cancel(track, dreg - i); +} + +static void __nft_reg_track_update(struct nft_regs_track *track, + const struct nft_expr *expr, + u8 dreg, u8 num_reg) +{ + track->regs[dreg].selector = expr; + track->regs[dreg].bitwise = NULL; + track->regs[dreg].num_reg = num_reg; +} + +void nft_reg_track_update(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg, u8 len) +{ + unsigned int regcount; + int i; + + __nft_reg_track_clobber(track, dreg); + + regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE); + for (i = 0; i < regcount; i++, dreg++) + __nft_reg_track_update(track, expr, dreg, i); +} +EXPORT_SYMBOL_GPL(nft_reg_track_update); + +void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len) +{ + unsigned int regcount; + int i; + + __nft_reg_track_clobber(track, dreg); + + regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE); + for (i = 0; i < regcount; i++, dreg++) + __nft_reg_track_cancel(track, dreg); +} +EXPORT_SYMBOL_GPL(nft_reg_track_cancel); + +void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg) +{ + track->regs[dreg].selector = NULL; + track->regs[dreg].bitwise = NULL; + track->regs[dreg].num_reg = 0; +} +EXPORT_SYMBOL_GPL(__nft_reg_track_cancel); + /* * Tables */ @@ -1072,6 +1124,30 @@ static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, return strcmp(obj->key.name, k->name); } +static bool nft_supported_family(u8 family) +{ + return false +#ifdef CONFIG_NF_TABLES_INET + || family == NFPROTO_INET +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + || family == NFPROTO_IPV4 +#endif +#ifdef CONFIG_NF_TABLES_ARP + || family == NFPROTO_ARP +#endif +#ifdef CONFIG_NF_TABLES_NETDEV + || family == NFPROTO_NETDEV +#endif +#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) + || family == NFPROTO_BRIDGE +#endif +#ifdef CONFIG_NF_TABLES_IPV6 + || family == NFPROTO_IPV6 +#endif + ; +} + static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { @@ -1086,6 +1162,9 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, u32 flags = 0; int err; + if (!nft_supported_family(family)) + return -EOPNOTSUPP; + lockdep_assert_held(&nft_net->commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask, @@ -8263,7 +8342,16 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); static bool nft_expr_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { - return false; + if (!expr->ops->reduce) { + pr_warn_once("missing reduce for expression %s ", + expr->ops->type->name); + return false; + } + + if (nft_reduce_is_readonly(expr)) + return false; + + return expr->ops->reduce(track, expr); } static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 8af98239655d..53f40e473855 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -67,6 +67,20 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +static void nft_cmp16_fast_eval(const struct nft_expr *expr, + struct nft_regs *regs) +{ + const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); + const u64 *reg_data = (const u64 *)®s->data[priv->sreg]; + const u64 *mask = (const u64 *)&priv->mask; + const u64 *data = (const u64 *)&priv->data; + + if (((reg_data[0] & mask[0]) == data[0] && + ((reg_data[1] & mask[1]) == data[1])) ^ priv->inv) + return; + regs->verdict.code = NFT_BREAK; +} + static noinline void __nft_trace_verdict(struct nft_traceinfo *info, const struct nft_chain *chain, const struct nft_regs *regs) @@ -225,6 +239,8 @@ next_rule: nft_rule_dp_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, ®s); + else if (expr->ops == &nft_cmp16_fast_ops) + nft_cmp16_fast_eval(expr, ®s); else if (expr->ops == &nft_bitwise_fast_ops) nft_bitwise_fast_eval(expr, ®s); else if (expr->ops != &nft_payload_fast_ops || diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c57673d499be..b0d8888a539b 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -605,6 +605,11 @@ static struct pernet_operations cttimeout_ops = { .size = sizeof(struct nfct_timeout_pernet), }; +static const struct nf_ct_timeout_hooks hooks = { + .timeout_find_get = ctnl_timeout_find_get, + .timeout_put = ctnl_timeout_put, +}; + static int __init cttimeout_init(void) { int ret; @@ -619,8 +624,7 @@ static int __init cttimeout_init(void) "nfnetlink.\n"); goto err_out; } - RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get); - RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put); + RCU_INIT_POINTER(nf_ct_timeout_hook, &hooks); return 0; err_out: @@ -633,8 +637,7 @@ static void __exit cttimeout_exit(void) nfnetlink_subsys_unregister(&cttimeout_subsys); unregister_pernet_subsys(&cttimeout_ops); - RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); - RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); + RCU_INIT_POINTER(nf_ct_timeout_hook, NULL); synchronize_rcu(); } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index ae9c0756bba5..d97eb280cb2e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -460,6 +460,7 @@ __build_packet_message(struct nfnl_log_net *log, sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; const unsigned char *hwhdrp; + ktime_t tstamp; nlh = nfnl_msg_put(inst->skb, 0, 0, nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), @@ -588,9 +589,10 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; } - if (hooknum <= NF_INET_FORWARD && skb->tstamp) { + tstamp = skb_tstamp_cond(skb, false); + if (hooknum <= NF_INET_FORWARD && tstamp) { struct nfulnl_msg_packet_timestamp ts; - struct timespec64 kts = ktime_to_timespec64(skb->tstamp); + struct timespec64 kts = ktime_to_timespec64(tstamp); ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 64a6acb6aeae..a364f8e5e698 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -392,6 +392,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, bool csum_verify; char *secdata = NULL; u32 seclen = 0; + ktime_t tstamp; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -402,11 +403,13 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(u_int32_t)) /* priority */ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + nla_total_size(sizeof(u_int32_t)) /* skbinfo */ + nla_total_size(sizeof(u_int32_t)); /* cap_len */ - if (entskb->tstamp) + tstamp = skb_tstamp_cond(entskb, false); + if (tstamp) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); size += nfqnl_get_bridge_size(entry); @@ -559,6 +562,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark))) goto nla_put_failure; + if (entskb->priority && + nla_put_be32(skb, NFQA_PRIORITY, htonl(entskb->priority))) + goto nla_put_failure; + if (indev && entskb->dev && skb_mac_header_was_set(entskb) && skb_mac_header_len(entskb) != 0) { @@ -577,9 +584,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (nfqnl_put_bridge(entry, skb) < 0) goto nla_put_failure; - if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) { + if (entry->state.hook <= NF_INET_FORWARD && tstamp) { struct nfqnl_msg_packet_timestamp ts; - struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); + struct timespec64 kts = ktime_to_timespec64(tstamp); ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); @@ -1020,11 +1027,13 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { [NFQA_CT] = { .type = NLA_UNSPEC }, [NFQA_EXP] = { .type = NLA_UNSPEC }, [NFQA_VLAN] = { .type = NLA_NESTED }, + [NFQA_PRIORITY] = { .type = NLA_U32 }, }; static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, [NFQA_MARK] = { .type = NLA_U32 }, + [NFQA_PRIORITY] = { .type = NLA_U32 }, }; static struct nfqnl_instance * @@ -1105,6 +1114,9 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb, if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + if (nfqa[NFQA_PRIORITY]) + entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY])); + nfqnl_reinject(entry, verdict); } return 0; @@ -1231,6 +1243,9 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + if (nfqa[NFQA_PRIORITY]) + entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY])); + nfqnl_reinject(entry, verdict); return 0; } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 7b727d3ebf9d..38caa66632b4 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -283,12 +283,16 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track, { const struct nft_bitwise *priv = nft_expr_priv(expr); const struct nft_bitwise *bitwise; + unsigned int regcount; + u8 dreg; + int i; if (!track->regs[priv->sreg].selector) return false; bitwise = nft_expr_priv(expr); if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && + track->regs[priv->sreg].num_reg == 0 && track->regs[priv->dreg].bitwise && track->regs[priv->dreg].bitwise->ops == expr->ops && priv->sreg == bitwise->sreg && @@ -302,17 +306,21 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track, return true; } - if (track->regs[priv->sreg].bitwise) { - track->regs[priv->dreg].selector = NULL; - track->regs[priv->dreg].bitwise = NULL; + if (track->regs[priv->sreg].bitwise || + track->regs[priv->sreg].num_reg != 0) { + nft_reg_track_cancel(track, priv->dreg, priv->len); return false; } if (priv->sreg != priv->dreg) { - track->regs[priv->dreg].selector = - track->regs[priv->sreg].selector; + nft_reg_track_update(track, track->regs[priv->sreg].selector, + priv->dreg, priv->len); } - track->regs[priv->dreg].bitwise = expr; + + dreg = priv->dreg; + regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE); + for (i = 0; i < regcount; i++, dreg++) + track->regs[priv->dreg].bitwise = expr; return false; } @@ -447,8 +455,7 @@ static bool nft_bitwise_fast_reduce(struct nft_regs_track *track, } if (track->regs[priv->sreg].bitwise) { - track->regs[priv->dreg].selector = NULL; - track->regs[priv->dreg].bitwise = NULL; + nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); return false; } @@ -522,3 +529,4 @@ bool nft_expr_reduce_bitwise(struct nft_regs_track *track, return false; } +EXPORT_SYMBOL_GPL(nft_expr_reduce_bitwise); diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e646e9ee4a98..d77609144b26 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -172,8 +172,7 @@ static bool nft_byteorder_reduce(struct nft_regs_track *track, { struct nft_byteorder *priv = nft_expr_priv(expr); - track->regs[priv->dreg].selector = NULL; - track->regs[priv->dreg].bitwise = NULL; + nft_reg_track_cancel(track, priv->dreg, priv->len); return false; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 47b6d05f1ae6..6528f76ca29e 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -193,6 +193,7 @@ static const struct nft_expr_ops nft_cmp_ops = { .eval = nft_cmp_eval, .init = nft_cmp_init, .dump = nft_cmp_dump, + .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_offload, }; @@ -269,15 +270,108 @@ const struct nft_expr_ops nft_cmp_fast_ops = { .eval = NULL, /* inlined */ .init = nft_cmp_fast_init, .dump = nft_cmp_fast_dump, + .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_fast_offload, }; +static u32 nft_cmp_mask(u32 bitlen) +{ + return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen)); +} + +static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen) +{ + int len = bitlen / BITS_PER_BYTE; + int i, words = len / sizeof(u32); + + for (i = 0; i < words; i++) { + data->data[i] = 0xffffffff; + bitlen -= sizeof(u32) * BITS_PER_BYTE; + } + + if (len % sizeof(u32)) + data->data[i++] = nft_cmp_mask(bitlen); + + for (; i < 4; i++) + data->data[i] = 0; +} + +static int nft_cmp16_fast_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc, + tb[NFTA_CMP_DATA]); + if (err < 0) + return err; + + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); + if (err < 0) + return err; + + nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE); + priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ; + priv->len = desc.len; + + return 0; +} + +static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); + struct nft_cmp_expr cmp = { + .data = priv->data, + .sreg = priv->sreg, + .len = priv->len, + .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ, + }; + + return __nft_cmp_offload(ctx, flow, &cmp); +} + +static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); + enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ; + + if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + + +const struct nft_expr_ops nft_cmp16_fast_ops = { + .type = &nft_cmp_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)), + .eval = NULL, /* inlined */ + .init = nft_cmp16_fast_init, + .dump = nft_cmp16_fast_dump, + .reduce = NFT_REDUCE_READONLY, + .offload = nft_cmp16_fast_offload, +}; + static const struct nft_expr_ops * nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_data_desc desc; struct nft_data data; enum nft_cmp_ops op; + u8 sreg; int err; if (tb[NFTA_CMP_SREG] == NULL || @@ -306,9 +400,16 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) if (desc.type != NFT_DATA_VALUE) goto err1; - if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ)) - return &nft_cmp_fast_ops; + sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) { + if (desc.len <= sizeof(u32)) + return &nft_cmp_fast_ops; + else if (desc.len <= sizeof(data) && + ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) || + (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0))) + return &nft_cmp16_fast_ops; + } return &nft_cmp_ops; err1: nft_data_release(&data, desc.type); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f69cc73c5813..c16172427622 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -731,6 +731,14 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { static struct nft_expr_type nft_match_type; +static bool nft_match_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct xt_match *match = expr->ops->data; + + return strcmp(match->name, "comment") == 0; +} + static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -773,6 +781,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, ops->dump = nft_match_dump; ops->validate = nft_match_validate; ops->data = match; + ops->reduce = nft_match_reduce; matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); if (matchsize > NFT_MATCH_LARGE_THRESH) { @@ -862,6 +871,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, ops->dump = nft_target_dump; ops->validate = nft_target_validate; ops->data = target; + ops->reduce = NFT_REDUCE_READONLY; if (family == NFPROTO_BRIDGE) ops->eval = nft_target_eval_bridge; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 3362417ebfdb..9de1462e4ac4 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -257,6 +257,7 @@ static const struct nft_expr_ops nft_connlimit_ops = { .destroy_clone = nft_connlimit_destroy_clone, .dump = nft_connlimit_dump, .gc = nft_connlimit_gc, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_connlimit_type __read_mostly = { diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index f179e8c3b0ca..da9083605a61 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -293,6 +293,7 @@ static const struct nft_expr_ops nft_counter_ops = { .destroy_clone = nft_counter_destroy, .dump = nft_counter_dump, .clone = nft_counter_clone, + .reduce = NFT_REDUCE_READONLY, .offload = nft_counter_offload, .offload_stats = nft_counter_offload_stats, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 5adf8bb628a8..d8e1614918a1 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -26,6 +26,7 @@ struct nft_ct { enum nft_ct_keys key:8; enum ip_conntrack_dir dir:8; + u8 len; union { u8 dreg; u8 sreg; @@ -500,6 +501,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, } } + priv->len = len; err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); if (err < 0) @@ -608,6 +610,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, } } + priv->len = len; err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len); if (err < 0) goto err1; @@ -677,6 +680,29 @@ nla_put_failure: return -1; } +static bool nft_ct_get_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + const struct nft_ct *ct; + + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + ct = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->key != ct->key) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return nft_expr_reduce_bitwise(track, expr); +} + static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ct *priv = nft_expr_priv(expr); @@ -710,8 +736,27 @@ static const struct nft_expr_ops nft_ct_get_ops = { .init = nft_ct_get_init, .destroy = nft_ct_get_destroy, .dump = nft_ct_get_dump, + .reduce = nft_ct_get_reduce, }; +static bool nft_ct_set_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + int i; + + for (i = 0; i < NFT_REG32_NUM; i++) { + if (!track->regs[i].selector) + continue; + + if (track->regs[i].selector->ops != &nft_ct_get_ops) + continue; + + __nft_reg_track_cancel(track, i); + } + + return false; +} + static const struct nft_expr_ops nft_ct_set_ops = { .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), @@ -719,6 +764,7 @@ static const struct nft_expr_ops nft_ct_set_ops = { .init = nft_ct_set_init, .destroy = nft_ct_set_destroy, .dump = nft_ct_set_dump, + .reduce = nft_ct_set_reduce, }; #ifdef CONFIG_NF_CONNTRACK_ZONES @@ -729,6 +775,7 @@ static const struct nft_expr_ops nft_ct_set_zone_ops = { .init = nft_ct_set_init, .destroy = nft_ct_set_destroy, .dump = nft_ct_set_dump, + .reduce = nft_ct_set_reduce, }; #endif @@ -785,6 +832,7 @@ static const struct nft_expr_ops nft_notrack_ops = { .type = &nft_notrack_type, .size = NFT_EXPR_SIZE(0), .eval = nft_notrack_eval, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_notrack_type __read_mostly = { @@ -1041,6 +1089,9 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, if (err < 0) goto err_put_helper; + /* Avoid the bogus warning, helper will be assigned after CT init */ + nf_ct_set_auto_assign_helper_warned(ctx->net); + return 0; err_put_helper: diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 5b5c607fbf83..63507402716d 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -79,6 +79,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .eval = nft_dup_netdev_eval, .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, + .reduce = NFT_REDUCE_READONLY, .offload = nft_dup_netdev_offload, .offload_action = nft_dup_netdev_offload_action, }; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 87f3af4645d9..22f70b543fa2 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -413,6 +413,7 @@ static const struct nft_expr_ops nft_dynset_ops = { .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, + .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_dynset_type __read_mostly = { diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 9e927ab4df15..22c3e05b52db 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -308,6 +308,63 @@ err: regs->verdict.code = NFT_BREAK; } +static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; + struct nft_exthdr *priv = nft_expr_priv(expr); + unsigned int i, tcphdr_len, optl; + struct tcphdr *tcph; + u8 *opt; + + tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); + if (!tcph) + goto err; + + if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len)) + goto drop; + + opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); + if (!opt) + goto err; + for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { + unsigned int j; + + optl = optlen(opt, i); + if (priv->type != opt[i]) + continue; + + if (i + optl > tcphdr_len) + goto drop; + + for (j = 0; j < optl; ++j) { + u16 n = TCPOPT_NOP; + u16 o = opt[i+j]; + + if ((i + j) % 2 == 0) { + o <<= 8; + n <<= 8; + } + inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o), + htons(n), false); + } + memset(opt + i, TCPOPT_NOP, optl); + return; + } + + /* option not found, continue. This allows to do multiple + * option removals per rule. + */ + return; +err: + regs->verdict.code = NFT_BREAK; + return; +drop: + /* can't remove, no choice but to drop */ + regs->verdict.code = NF_DROP; +} + static void nft_exthdr_sctp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -457,6 +514,28 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, priv->len); } +static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + + if (tb[NFTA_EXTHDR_SREG] || + tb[NFTA_EXTHDR_DREG] || + tb[NFTA_EXTHDR_FLAGS] || + tb[NFTA_EXTHDR_OFFSET] || + tb[NFTA_EXTHDR_LEN]) + return -EINVAL; + + if (!tb[NFTA_EXTHDR_TYPE]) + return -EINVAL; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->op = NFT_EXTHDR_OP_TCPOPT; + + return 0; +} + static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -517,12 +596,47 @@ static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr) return nft_exthdr_dump_common(skb, priv); } +static int nft_exthdr_dump_strip(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + return nft_exthdr_dump_common(skb, priv); +} + +static bool nft_exthdr_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + const struct nft_exthdr *exthdr; + + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + exthdr = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->type != exthdr->type || + priv->op != exthdr->op || + priv->flags != exthdr->flags || + priv->offset != exthdr->offset || + priv->len != exthdr->len) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return nft_expr_reduce_bitwise(track, expr); +} + static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_ipv6_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, + .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_ipv4_ops = { @@ -531,6 +645,7 @@ static const struct nft_expr_ops nft_exthdr_ipv4_ops = { .eval = nft_exthdr_ipv4_eval, .init = nft_exthdr_ipv4_init, .dump = nft_exthdr_dump, + .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_ops = { @@ -539,6 +654,7 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = { .eval = nft_exthdr_tcp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, + .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { @@ -547,6 +663,16 @@ static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { .eval = nft_exthdr_tcp_set_eval, .init = nft_exthdr_tcp_set_init, .dump = nft_exthdr_dump_set, + .reduce = NFT_REDUCE_READONLY, +}; + +static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = { + .type = &nft_exthdr_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .eval = nft_exthdr_tcp_strip_eval, + .init = nft_exthdr_tcp_strip_init, + .dump = nft_exthdr_dump_strip, + .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_sctp_ops = { @@ -555,6 +681,7 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = { .eval = nft_exthdr_sctp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, + .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops * @@ -576,7 +703,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, return &nft_exthdr_tcp_set_ops; if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_tcp_ops; - break; + return &nft_exthdr_tcp_strip_ops; case NFT_EXTHDR_OP_IPV6: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_ipv6_ops; diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index b10ce732b337..f198f2d9ef90 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -156,5 +156,47 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv, } EXPORT_SYMBOL_GPL(nft_fib_store_result); +bool nft_fib_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + unsigned int len = NFT_REG32_SIZE; + const struct nft_fib *fib; + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + break; + case NFT_FIB_RESULT_OIFNAME: + if (priv->flags & NFTA_FIB_F_PRESENT) + len = NFT_REG32_SIZE; + else + len = IFNAMSIZ; + break; + case NFT_FIB_RESULT_ADDRTYPE: + break; + default: + WARN_ON_ONCE(1); + break; + } + + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, len); + return false; + } + + fib = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->result != fib->result || + priv->flags != fib->flags) { + nft_reg_track_update(track, expr, priv->dreg, len); + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(nft_fib_reduce); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c index a88d44e163d1..666a3741d20b 100644 --- a/net/netfilter/nft_fib_inet.c +++ b/net/netfilter/nft_fib_inet.c @@ -49,6 +49,7 @@ static const struct nft_expr_ops nft_fib_inet_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, + .reduce = nft_fib_reduce, }; static struct nft_expr_type nft_fib_inet_type __read_mostly = { diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c index 3f3478abd845..9121ec64e918 100644 --- a/net/netfilter/nft_fib_netdev.c +++ b/net/netfilter/nft_fib_netdev.c @@ -58,6 +58,7 @@ static const struct nft_expr_ops nft_fib_netdev_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, + .reduce = nft_fib_reduce, }; static struct nft_expr_type nft_fib_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 0af34ad41479..900d48c810a1 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, break; case IPPROTO_UDP: break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: { + struct nf_conntrack_tuple *tuple; + + if (ct->status & IPS_NAT_MASK) + goto out; + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + /* No support for GRE v1 */ + if (tuple->src.u.gre.key || tuple->dst.u.gre.key) + goto out; + break; + } +#endif default: goto out; } @@ -428,6 +441,7 @@ static const struct nft_expr_ops nft_flow_offload_ops = { .destroy = nft_flow_offload_destroy, .validate = nft_flow_offload_validate, .dump = nft_flow_offload_dump, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_flow_offload_type __read_mostly = { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 619e394a91de..7c5876dc9ff2 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -145,7 +145,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, return; skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); neigh_xmit(neigh_table, dev, addr, skb); out: regs->verdict.code = verdict; @@ -217,6 +217,7 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = { .init = nft_fwd_neigh_init, .dump = nft_fwd_neigh_dump, .validate = nft_fwd_validate, + .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_fwd_netdev_ops = { @@ -226,6 +227,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, .validate = nft_fwd_validate, + .reduce = NFT_REDUCE_READONLY, .offload = nft_fwd_netdev_offload, .offload_action = nft_fwd_netdev_offload_action, }; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index f829f5289e16..e5631e88b285 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -165,6 +165,16 @@ nla_put_failure: return -1; } +static bool nft_jhash_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_jhash *priv = nft_expr_priv(expr); + + nft_reg_track_cancel(track, priv->dreg, sizeof(u32)); + + return false; +} + static int nft_symhash_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -185,6 +195,30 @@ nla_put_failure: return -1; } +static bool nft_symhash_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + struct nft_symhash *priv = nft_expr_priv(expr); + struct nft_symhash *symhash; + + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, sizeof(u32)); + return false; + } + + symhash = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->offset != symhash->offset || + priv->modulus != symhash->modulus) { + nft_reg_track_update(track, expr, priv->dreg, sizeof(u32)); + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return false; +} + static struct nft_expr_type nft_hash_type; static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, @@ -192,6 +226,7 @@ static const struct nft_expr_ops nft_jhash_ops = { .eval = nft_jhash_eval, .init = nft_jhash_init, .dump = nft_jhash_dump, + .reduce = nft_jhash_reduce, }; static const struct nft_expr_ops nft_symhash_ops = { @@ -200,6 +235,7 @@ static const struct nft_expr_ops nft_symhash_ops = { .eval = nft_symhash_eval, .init = nft_symhash_init, .dump = nft_symhash_dump, + .reduce = nft_symhash_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index d0f67d325bdf..b80f7b507349 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -223,6 +223,17 @@ static bool nft_immediate_offload_action(const struct nft_expr *expr) return false; } +static bool nft_immediate_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg != NFT_REG_VERDICT) + nft_reg_track_cancel(track, priv->dreg, priv->dlen); + + return false; +} + static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -233,6 +244,7 @@ static const struct nft_expr_ops nft_imm_ops = { .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, .validate = nft_immediate_validate, + .reduce = nft_immediate_reduce, .offload = nft_immediate_offload, .offload_action = nft_immediate_offload_action, }; diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 4f745a409d34..43d0d4aadb1f 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -120,6 +120,7 @@ static const struct nft_expr_ops nft_last_ops = { .destroy = nft_last_destroy, .clone = nft_last_clone, .dump = nft_last_dump, + .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_last_type __read_mostly = { diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index a726b623963d..d4a6cf3cd697 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -225,6 +225,7 @@ static const struct nft_expr_ops nft_limit_pkts_ops = { .destroy = nft_limit_pkts_destroy, .clone = nft_limit_pkts_clone, .dump = nft_limit_pkts_dump, + .reduce = NFT_REDUCE_READONLY, }; static void nft_limit_bytes_eval(const struct nft_expr *expr, @@ -279,6 +280,7 @@ static const struct nft_expr_ops nft_limit_bytes_ops = { .dump = nft_limit_bytes_dump, .clone = nft_limit_bytes_clone, .destroy = nft_limit_bytes_destroy, + .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 54f6c2035e84..0e13c003f0c1 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -290,6 +290,7 @@ static const struct nft_expr_ops nft_log_ops = { .init = nft_log_init, .destroy = nft_log_destroy, .dump = nft_log_dump, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_log_type __read_mostly = { diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 90becbf5bff3..dfae12759c7c 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -253,6 +253,17 @@ static int nft_lookup_validate(const struct nft_ctx *ctx, return 0; } +static bool nft_lookup_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + + if (priv->set->flags & NFT_SET_MAP) + nft_reg_track_cancel(track, priv->dreg, priv->set->dlen); + + return false; +} + static const struct nft_expr_ops nft_lookup_ops = { .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), @@ -263,6 +274,7 @@ static const struct nft_expr_ops nft_lookup_ops = { .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, .validate = nft_lookup_validate, + .reduce = nft_lookup_reduce, }; struct nft_expr_type nft_lookup_type __read_mostly = { diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 9953e8053753..2a0adc497bbb 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -129,6 +129,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = { .destroy = nft_masq_ipv4_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { @@ -175,6 +176,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = { .destroy = nft_masq_ipv6_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { @@ -230,6 +232,7 @@ static const struct nft_expr_ops nft_masq_inet_ops = { .destroy = nft_masq_inet_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_inet_type __read_mostly = { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5ab4df56c945..ac4859241e17 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -539,6 +539,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } + priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } @@ -664,6 +665,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } + priv->len = len; err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; @@ -750,24 +752,21 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, return 0; } -static bool nft_meta_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) +bool nft_meta_get_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); const struct nft_meta *meta; - if (!track->regs[priv->dreg].selector || - track->regs[priv->dreg].selector->ops != expr->ops) { - track->regs[priv->dreg].selector = expr; - track->regs[priv->dreg].bitwise = NULL; + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } meta = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->key != meta->key || priv->dreg != meta->dreg) { - track->regs[priv->dreg].selector = expr; - track->regs[priv->dreg].bitwise = NULL; + nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } @@ -776,6 +775,7 @@ static bool nft_meta_get_reduce(struct nft_regs_track *track, return nft_expr_reduce_bitwise(track, expr); } +EXPORT_SYMBOL_GPL(nft_meta_get_reduce); static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, @@ -800,8 +800,7 @@ static bool nft_meta_set_reduce(struct nft_regs_track *track, if (track->regs[i].selector->ops != &nft_meta_get_ops) continue; - track->regs[i].selector = NULL; - track->regs[i].bitwise = NULL; + __nft_reg_track_cancel(track, i); } return false; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index be1595d6979d..4394df4bc99b 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -317,6 +317,7 @@ static const struct nft_expr_ops nft_nat_ops = { .destroy = nft_nat_destroy, .dump = nft_nat_dump, .validate = nft_nat_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_nat_type __read_mostly = { @@ -346,6 +347,7 @@ static const struct nft_expr_ops nft_nat_inet_ops = { .destroy = nft_nat_destroy, .dump = nft_nat_dump, .validate = nft_nat_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_inet_nat_type __read_mostly = { diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 1d378efd8823..81b40c663d86 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -85,6 +85,16 @@ err: return err; } +static bool nft_ng_inc_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_ng_inc *priv = nft_expr_priv(expr); + + nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); + + return false; +} + static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, u32 modulus, enum nft_ng_types type, u32 offset) { @@ -172,6 +182,16 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } +static bool nft_ng_random_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_ng_random *priv = nft_expr_priv(expr); + + nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); + + return false; +} + static struct nft_expr_type nft_ng_type; static const struct nft_expr_ops nft_ng_inc_ops = { .type = &nft_ng_type, @@ -180,6 +200,7 @@ static const struct nft_expr_ops nft_ng_inc_ops = { .init = nft_ng_inc_init, .destroy = nft_ng_inc_destroy, .dump = nft_ng_inc_dump, + .reduce = nft_ng_inc_reduce, }; static const struct nft_expr_ops nft_ng_random_ops = { @@ -188,6 +209,7 @@ static const struct nft_expr_ops nft_ng_random_ops = { .eval = nft_ng_random_eval, .init = nft_ng_random_init, .dump = nft_ng_random_dump, + .reduce = nft_ng_random_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 94b2327e71dc..5d8d91b3904d 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -91,6 +91,7 @@ static const struct nft_expr_ops nft_objref_ops = { .activate = nft_objref_activate, .deactivate = nft_objref_deactivate, .dump = nft_objref_dump, + .reduce = NFT_REDUCE_READONLY, }; struct nft_objref_map { @@ -204,6 +205,7 @@ static const struct nft_expr_ops nft_objref_map_ops = { .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, + .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index d82677e83400..5eed18f90b02 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -120,6 +120,30 @@ static int nft_osf_validate(const struct nft_ctx *ctx, (1 << NF_INET_FORWARD)); } +static bool nft_osf_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + struct nft_osf *priv = nft_expr_priv(expr); + struct nft_osf *osf; + + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); + return false; + } + + osf = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->flags != osf->flags || + priv->ttl != osf->ttl) { + nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return false; +} + static struct nft_expr_type nft_osf_type; static const struct nft_expr_ops nft_osf_op = { .eval = nft_osf_eval, @@ -128,6 +152,7 @@ static const struct nft_expr_ops nft_osf_op = { .dump = nft_osf_dump, .type = &nft_osf_type, .validate = nft_osf_validate, + .reduce = nft_osf_reduce, }; static struct nft_expr_type nft_osf_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 5cc06aef4345..2e7ac007cb30 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -216,10 +216,8 @@ static bool nft_payload_reduce(struct nft_regs_track *track, const struct nft_payload *priv = nft_expr_priv(expr); const struct nft_payload *payload; - if (!track->regs[priv->dreg].selector || - track->regs[priv->dreg].selector->ops != expr->ops) { - track->regs[priv->dreg].selector = expr; - track->regs[priv->dreg].bitwise = NULL; + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } @@ -227,8 +225,7 @@ static bool nft_payload_reduce(struct nft_regs_track *track, if (priv->base != payload->base || priv->offset != payload->offset || priv->len != payload->len) { - track->regs[priv->dreg].selector = expr; - track->regs[priv->dreg].bitwise = NULL; + nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } @@ -815,8 +812,7 @@ static bool nft_payload_set_reduce(struct nft_regs_track *track, track->regs[i].selector->ops != &nft_payload_fast_ops) continue; - track->regs[i].selector = NULL; - track->regs[i].bitwise = NULL; + __nft_reg_track_cancel(track, i); } return false; diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 9ba1de51ac07..15e4b7640dc0 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -164,6 +164,7 @@ static const struct nft_expr_ops nft_queue_ops = { .eval = nft_queue_eval, .init = nft_queue_init, .dump = nft_queue_dump, + .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_queue_sreg_ops = { @@ -172,6 +173,7 @@ static const struct nft_expr_ops nft_queue_sreg_ops = { .eval = nft_queue_sreg_eval, .init = nft_queue_sreg_init, .dump = nft_queue_sreg_dump, + .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index f394a0b562f6..d7db57ed3bc1 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -254,6 +254,7 @@ static const struct nft_expr_ops nft_quota_ops = { .destroy = nft_quota_destroy, .clone = nft_quota_clone, .dump = nft_quota_dump, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_quota_type __read_mostly = { diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index e4a1c44d7f51..66f77484c227 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -140,6 +140,7 @@ static const struct nft_expr_ops nft_range_ops = { .eval = nft_range_eval, .init = nft_range_init, .dump = nft_range_dump, + .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_range_type __read_mostly = { diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index ba09890dddb5..5086adfe731c 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -134,6 +134,7 @@ static const struct nft_expr_ops nft_redir_ipv4_ops = { .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { @@ -183,6 +184,7 @@ static const struct nft_expr_ops nft_redir_ipv6_ops = { .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { @@ -225,6 +227,7 @@ static const struct nft_expr_ops nft_redir_inet_ops = { .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_inet_type __read_mostly = { diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 554caf967baa..973fa31a9dd6 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -80,6 +80,7 @@ static const struct nft_expr_ops nft_reject_inet_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_inet_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c index 61cd8c4ac385..7865cd8b11bb 100644 --- a/net/netfilter/nft_reject_netdev.c +++ b/net/netfilter/nft_reject_netdev.c @@ -159,6 +159,7 @@ static const struct nft_expr_ops nft_reject_netdev_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_netdev_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index bcd01a63e38f..71931ec91721 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -191,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = { .init = nft_rt_get_init, .dump = nft_rt_get_dump, .validate = nft_rt_validate, + .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_rt_type __read_mostly = { diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index d601974c9d2e..bd3792f080ed 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -10,6 +10,7 @@ struct nft_socket { enum nft_socket_keys key:8; u8 level; + u8 len; union { u8 dreg; }; @@ -179,6 +180,7 @@ static int nft_socket_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } + priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } @@ -198,6 +200,31 @@ static int nft_socket_dump(struct sk_buff *skb, return 0; } +static bool nft_socket_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_socket *priv = nft_expr_priv(expr); + const struct nft_socket *socket; + + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + socket = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->key != socket->key || + priv->dreg != socket->dreg || + priv->level != socket->level) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return nft_expr_reduce_bitwise(track, expr); +} + static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, @@ -205,6 +232,7 @@ static const struct nft_expr_ops nft_socket_ops = { .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, + .reduce = nft_socket_reduce, }; static struct nft_expr_type nft_socket_type __read_mostly = { diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 1133e06f3c40..6cf9a04fbfe2 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -288,6 +288,7 @@ static const struct nft_expr_ops nft_synproxy_ops = { .dump = nft_synproxy_dump, .type = &nft_synproxy_type, .validate = nft_synproxy_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_synproxy_type __read_mostly = { diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index b5b09a902c7a..801f013971df 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -320,6 +320,7 @@ static const struct nft_expr_ops nft_tproxy_ops = { .init = nft_tproxy_init, .destroy = nft_tproxy_destroy, .dump = nft_tproxy_dump, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_tproxy_type __read_mostly = { diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 3b27926d5382..d0f9b1d51b0e 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -17,6 +17,7 @@ struct nft_tunnel { enum nft_tunnel_keys key:8; u8 dreg; enum nft_tunnel_mode mode:8; + u8 len; }; static void nft_tunnel_get_eval(const struct nft_expr *expr, @@ -101,6 +102,7 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, priv->mode = NFT_TUNNEL_MODE_NONE; } + priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } @@ -122,6 +124,31 @@ nla_put_failure: return -1; } +static bool nft_tunnel_get_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_tunnel *priv = nft_expr_priv(expr); + const struct nft_tunnel *tunnel; + + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + tunnel = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->key != tunnel->key || + priv->dreg != tunnel->dreg || + priv->mode != tunnel->mode) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return false; +} + static struct nft_expr_type nft_tunnel_type; static const struct nft_expr_ops nft_tunnel_get_ops = { .type = &nft_tunnel_type, @@ -129,6 +156,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = { .eval = nft_tunnel_get_eval, .init = nft_tunnel_get_init, .dump = nft_tunnel_get_dump, + .reduce = nft_tunnel_get_reduce, }; static struct nft_expr_type nft_tunnel_type __read_mostly = { diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index cbbbc4ecad3a..becb88fa4e9b 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -27,6 +27,7 @@ struct nft_xfrm { u8 dreg; u8 dir; u8 spnum; + u8 len; }; static int nft_xfrm_get_init(const struct nft_ctx *ctx, @@ -86,6 +87,7 @@ static int nft_xfrm_get_init(const struct nft_ctx *ctx, priv->spnum = spnum; + priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } @@ -252,6 +254,31 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e return nft_chain_validate_hooks(ctx->chain, hooks); } +static bool nft_xfrm_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_xfrm *priv = nft_expr_priv(expr); + const struct nft_xfrm *xfrm; + + if (!nft_reg_track_cmp(track, expr, priv->dreg)) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + xfrm = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->key != xfrm->key || + priv->dreg != xfrm->dreg || + priv->dir != xfrm->dir || + priv->spnum != xfrm->spnum) { + nft_reg_track_update(track, expr, priv->dreg, priv->len); + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return nft_expr_reduce_bitwise(track, expr); +} static struct nft_expr_type nft_xfrm_type; static const struct nft_expr_ops nft_xfrm_get_ops = { @@ -261,6 +288,7 @@ static const struct nft_expr_ops nft_xfrm_get_ops = { .init = nft_xfrm_get_init, .dump = nft_xfrm_get_dump, .validate = nft_xfrm_validate, + .reduce = nft_xfrm_reduce, }; static struct nft_expr_type nft_xfrm_type __read_mostly = { |