summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/dt-bindings/net/microchip-lan78xx.h21
-rw-r--r--include/linux/avf/virtchnl.h4
-rw-r--r--include/linux/binfmts.h1
-rw-r--r--include/linux/bpf-cgroup.h24
-rw-r--r--include/linux/bpf.h86
-rw-r--r--include/linux/bpf_lirc.h29
-rw-r--r--include/linux/bpf_trace.h1
-rw-r--r--include/linux/bpf_types.h12
-rw-r--r--include/linux/bpf_verifier.h13
-rw-r--r--include/linux/bpfilter.h15
-rw-r--r--include/linux/brcmphy.h4
-rw-r--r--include/linux/btf.h50
-rw-r--r--include/linux/crash_dump.h18
-rw-r--r--include/linux/ethtool.h5
-rw-r--r--include/linux/filter.h81
-rw-r--r--include/linux/fsl/ptp_qoriq.h141
-rw-r--r--include/linux/if_bridge.h29
-rw-r--r--include/linux/if_macvlan.h29
-rw-r--r--include/linux/if_tun.h4
-rw-r--r--include/linux/if_vlan.h4
-rw-r--r--include/linux/inetdevice.h1
-rw-r--r--include/linux/kcore.h6
-rw-r--r--include/linux/mdio-bitbang.h2
-rw-r--r--include/linux/mdio-gpio.h9
-rw-r--r--include/linux/mdio.h1
-rw-r--r--include/linux/microchipphy.h3
-rw-r--r--include/linux/mlx4/device.h1
-rw-r--r--include/linux/mlx5/device.h11
-rw-r--r--include/linux/mlx5/driver.h18
-rw-r--r--include/linux/mlx5/fs.h6
-rw-r--r--include/linux/mlx5/mlx5_ifc.h84
-rw-r--r--include/linux/mlx5/mlx5_ifc_fpga.h93
-rw-r--r--include/linux/mmc/sdio_ids.h1
-rw-r--r--include/linux/mroute_base.h10
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/net_dim.h69
-rw-r--r--include/linux/netdev_features.h6
-rw-r--r--include/linux/netdevice.h96
-rw-r--r--include/linux/netfilter.h34
-rw-r--r--include/linux/netfilter/nf_osf.h33
-rw-r--r--include/linux/netfilter/nfnetlink.h1
-rw-r--r--include/linux/netfilter_bridge/ebtables.h4
-rw-r--r--include/linux/perf_event.h5
-rw-r--r--include/linux/phy.h69
-rw-r--r--include/linux/phy/phy.h1
-rw-r--r--include/linux/platform_data/b53.h4
-rw-r--r--include/linux/platform_data/mdio-gpio.h33
-rw-r--r--include/linux/platform_data/mv88e6xxx.h18
-rw-r--r--include/linux/qed/common_hsi.h4
-rw-r--r--include/linux/qed/iscsi_common.h8
-rw-r--r--include/linux/qed/qed_eth_if.h5
-rw-r--r--include/linux/qed/qed_if.h284
-rw-r--r--include/linux/qed/qed_ll2_if.h10
-rw-r--r--include/linux/qed/qed_rdma_if.h16
-rw-r--r--include/linux/qed/roce_common.h1
-rw-r--r--include/linux/rhashtable.h38
-rw-r--r--include/linux/skb_array.h5
-rw-r--r--include/linux/skbuff.h28
-rw-r--r--include/linux/soc/ti/knav_dma.h12
-rw-r--r--include/linux/soc/ti/knav_qmss.h1
-rw-r--r--include/linux/socket.h5
-rw-r--r--include/linux/tcp.h5
-rw-r--r--include/linux/tnum.h4
-rw-r--r--include/linux/trace_events.h17
-rw-r--r--include/linux/u64_stats_sync.h14
-rw-r--r--include/linux/udp.h3
-rw-r--r--include/linux/umh.h12
-rw-r--r--include/net/addrconf.h43
-rw-r--r--include/net/ax88796.h14
-rw-r--r--include/net/bluetooth/hci_core.h2
-rw-r--r--include/net/bonding.h11
-rw-r--r--include/net/cfg80211.h131
-rw-r--r--include/net/dcbnl.h4
-rw-r--r--include/net/devlink.h39
-rw-r--r--include/net/dsa.h37
-rw-r--r--include/net/erspan.h28
-rw-r--r--include/net/failover.h36
-rw-r--r--include/net/fib_rules.h3
-rw-r--r--include/net/flow_dissector.h7
-rw-r--r--include/net/if_inet6.h6
-rw-r--r--include/net/inet_connection_sock.h24
-rw-r--r--include/net/inet_sock.h1
-rw-r--r--include/net/inet_timewait_sock.h2
-rw-r--r--include/net/ip.h9
-rw-r--r--include/net/ip6_fib.h211
-rw-r--r--include/net/ip6_route.h85
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/ip_tunnels.h15
-rw-r--r--include/net/ip_vs.h11
-rw-r--r--include/net/ipv6.h11
-rw-r--r--include/net/mac80211.h18
-rw-r--r--include/net/neighbour.h19
-rw-r--r--include/net/net_failover.h40
-rw-r--r--include/net/netfilter/ipv4/nf_nat_masquerade.h2
-rw-r--r--include/net/netfilter/ipv6/nf_nat_masquerade.h2
-rw-r--r--include/net/netfilter/nf_conntrack_count.h11
-rw-r--r--include/net/netfilter/nf_flow_table.h24
-rw-r--r--include/net/netfilter/nf_nat.h6
-rw-r--r--include/net/netfilter/nf_nat_core.h11
-rw-r--r--include/net/netfilter/nf_nat_l3proto.h64
-rw-r--r--include/net/netfilter/nf_nat_l4proto.h8
-rw-r--r--include/net/netfilter/nf_nat_redirect.h2
-rw-r--r--include/net/netfilter/nf_socket.h17
-rw-r--r--include/net/netfilter/nf_tables.h88
-rw-r--r--include/net/netfilter/nf_tables_core.h11
-rw-r--r--include/net/netfilter/nf_tproxy.h113
-rw-r--r--include/net/netfilter/nfnetlink_log.h17
-rw-r--r--include/net/netfilter/nft_meta.h44
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/netns/ipv6.h4
-rw-r--r--include/net/netns/nftables.h3
-rw-r--r--include/net/page_pool.h144
-rw-r--r--include/net/pkt_cls.h20
-rw-r--r--include/net/route.h1
-rw-r--r--include/net/rtnetlink.h4
-rw-r--r--include/net/sch_generic.h22
-rw-r--r--include/net/sctp/constants.h5
-rw-r--r--include/net/sctp/sctp.h52
-rw-r--r--include/net/sctp/sm.h4
-rw-r--r--include/net/sctp/structs.h6
-rw-r--r--include/net/seg6.h7
-rw-r--r--include/net/seg6_local.h32
-rw-r--r--include/net/sock.h25
-rw-r--r--include/net/strparser.h2
-rw-r--r--include/net/switchdev.h1
-rw-r--r--include/net/tcp.h30
-rw-r--r--include/net/tipc.h4
-rw-r--r--include/net/tls.h127
-rw-r--r--include/net/udp.h4
-rw-r--r--include/net/vxlan.h1
-rw-r--r--include/net/xdp.h99
-rw-r--r--include/net/xdp_sock.h99
-rw-r--r--include/trace/events/bpf.h355
-rw-r--r--include/trace/events/fib.h107
-rw-r--r--include/trace/events/fib6.h43
-rw-r--r--include/trace/events/rxrpc.h32
-rw-r--r--include/trace/events/tcp.h78
-rw-r--r--include/trace/events/xdp.h50
-rw-r--r--include/uapi/linux/bpf.h2220
-rw-r--r--include/uapi/linux/bpfilter.h21
-rw-r--r--include/uapi/linux/btf.h113
-rw-r--r--include/uapi/linux/cn_proc.h4
-rw-r--r--include/uapi/linux/dcbnl.h11
-rw-r--r--include/uapi/linux/devlink.h14
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--include/uapi/linux/if_addr.h1
-rw-r--r--include/uapi/linux/if_link.h2
-rw-r--r--include/uapi/linux/if_xdp.h78
-rw-r--r--include/uapi/linux/netfilter/nf_nat.h12
-rw-r--r--include/uapi/linux/netfilter/nf_osf.h86
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h67
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_conntrack.h1
-rw-r--r--include/uapi/linux/netfilter/xt_osf.h106
-rw-r--r--include/uapi/linux/netfilter_bridge/ebtables.h6
-rw-r--r--include/uapi/linux/netfilter_ipv6/ip6t_srh.h43
-rw-r--r--include/uapi/linux/nl80211.h99
-rw-r--r--include/uapi/linux/openvswitch.h28
-rw-r--r--include/uapi/linux/pci_regs.h2
-rw-r--r--include/uapi/linux/pkt_cls.h1
-rw-r--r--include/uapi/linux/rtnetlink.h8
-rw-r--r--include/uapi/linux/seg6_local.h12
-rw-r--r--include/uapi/linux/snmp.h3
-rw-r--r--include/uapi/linux/tcp.h16
-rw-r--r--include/uapi/linux/tipc.h12
-rw-r--r--include/uapi/linux/tipc_config.h5
-rw-r--r--include/uapi/linux/tipc_netlink.h1
-rw-r--r--include/uapi/linux/udp.h1
-rw-r--r--include/uapi/linux/virtio_net.h3
-rw-r--r--include/uapi/linux/vmcore.h18
169 files changed, 5538 insertions, 1712 deletions
diff --git a/include/dt-bindings/net/microchip-lan78xx.h b/include/dt-bindings/net/microchip-lan78xx.h
new file mode 100644
index 0000000000000..0742ff075307f
--- /dev/null
+++ b/include/dt-bindings/net/microchip-lan78xx.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DT_BINDINGS_MICROCHIP_LAN78XX_H
+#define _DT_BINDINGS_MICROCHIP_LAN78XX_H
+
+/* LED modes for LAN7800/LAN7850 embedded PHY */
+
+#define LAN78XX_LINK_ACTIVITY 0
+#define LAN78XX_LINK_1000_ACTIVITY 1
+#define LAN78XX_LINK_100_ACTIVITY 2
+#define LAN78XX_LINK_10_ACTIVITY 3
+#define LAN78XX_LINK_100_1000_ACTIVITY 4
+#define LAN78XX_LINK_10_1000_ACTIVITY 5
+#define LAN78XX_LINK_10_100_ACTIVITY 6
+#define LAN78XX_DUPLEX_COLLISION 8
+#define LAN78XX_COLLISION 9
+#define LAN78XX_ACTIVITY 10
+#define LAN78XX_AUTONEG_FAULT 12
+#define LAN78XX_FORCE_LED_OFF 14
+#define LAN78XX_FORCE_LED_ON 15
+
+#endif
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index b0a7f315bfbed..212b3822d1804 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -485,7 +485,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key);
struct virtchnl_rss_lut {
u16 vsi_id;
u16 lut_entries;
- u8 lut[1]; /* RSS lookup table*/
+ u8 lut[1]; /* RSS lookup table */
};
VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut);
@@ -819,7 +819,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
return VIRTCHNL_ERR_PARAM;
}
/* few more checks */
- if ((valid_len != msglen) || (err_msg_format))
+ if (err_msg_format || valid_len != msglen)
return VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH;
return 0;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 4955e0863b83d..c05f24fac4f62 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -150,5 +150,6 @@ extern int do_execveat(int, struct filename *,
const char __user * const __user *,
const char __user * const __user *,
int);
+int do_execve_file(struct file *file, void *__argv, void *__envp);
#endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 30d15e64b9933..975fb4cf1bb74 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -66,7 +66,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
- enum bpf_attach_type type);
+ enum bpf_attach_type type,
+ void *t_ctx);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
@@ -120,16 +121,18 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ NULL); \
__ret; \
})
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ t_ctx); \
release_sock(sk); \
} \
__ret; \
@@ -151,10 +154,16 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
+
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
@@ -185,6 +194,7 @@ struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
+#define cgroup_bpf_enabled (0)
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
@@ -197,6 +207,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 469b20e1dd7e4..995c3b1e59bfa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -22,6 +22,8 @@ struct perf_event;
struct bpf_prog;
struct bpf_map;
struct sock;
+struct seq_file;
+struct btf;
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
@@ -44,10 +46,14 @@ struct bpf_map_ops {
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
+ void (*map_seq_show_elem)(struct bpf_map *map, void *key,
+ struct seq_file *m);
+ int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
+ u32 key_type_id, u32 value_type_id);
};
struct bpf_map {
- /* 1st cacheline with read-mostly members of which some
+ /* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
*/
const struct bpf_map_ops *ops ____cacheline_aligned;
@@ -63,10 +69,13 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
+ u32 btf_key_type_id;
+ u32 btf_value_type_id;
+ struct btf *btf;
bool unpriv_array;
- /* 7 bytes hole */
+ /* 55 bytes hole */
- /* 2nd cacheline with misc members to avoid false sharing
+ /* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
struct user_struct *user ____cacheline_aligned;
@@ -101,6 +110,16 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
return container_of(map, struct bpf_offloaded_map, map);
}
+static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
+{
+ return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
+static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
+{
+ return map->ops->map_seq_show_elem && map->ops->map_check_btf;
+}
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* function argument constraints */
@@ -221,6 +240,8 @@ struct bpf_verifier_ops {
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
+ int (*gen_ld_abs)(const struct bpf_insn *orig,
+ struct bpf_insn *insn_buf);
u32 (*convert_ctx_access)(enum bpf_access_type type,
const struct bpf_insn *src,
struct bpf_insn *dst,
@@ -442,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
int bpf_get_file_flag(int flags);
+int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
+ size_t actual_size);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
@@ -464,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */
-struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+struct xdp_buff;
+
+struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+ struct net_device *dev_rx);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map);
-struct xdp_buff;
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@@ -550,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
{
}
+struct xdp_buff;
+struct bpf_dtab_netdev;
+
+static inline
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+ struct net_device *dev_rx)
+{
+ return 0;
+}
+
static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
@@ -564,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
{
}
-struct xdp_buff;
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff *xdp,
struct net_device *dev_rx)
@@ -606,7 +641,7 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
-static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
+static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
{
return aux->offload_requested;
}
@@ -647,6 +682,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
+struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
#else
static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
@@ -654,6 +690,12 @@ static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
return NULL;
}
+static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map,
+ void *key)
+{
+ return NULL;
+}
+
static inline int sock_map_prog(struct bpf_map *map,
struct bpf_prog *prog,
u32 type)
@@ -662,6 +704,31 @@ static inline int sock_map_prog(struct bpf_map *map,
}
#endif
+#if defined(CONFIG_XDP_SOCKETS)
+struct xdp_sock;
+struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs);
+void __xsk_map_flush(struct bpf_map *map);
+#else
+struct xdp_sock;
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+ u32 key)
+{
+ return NULL;
+}
+
+static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void __xsk_map_flush(struct bpf_map *map)
+{
+}
+#endif
+
/* verifier prototypes for helper functions called from eBPF programs */
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
@@ -675,10 +742,11 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
-extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
-extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
+extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
+extern const struct bpf_func_proto bpf_sock_hash_update_proto;
+extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
diff --git a/include/linux/bpf_lirc.h b/include/linux/bpf_lirc.h
new file mode 100644
index 0000000000000..5f8a4283092d0
--- /dev/null
+++ b/include/linux/bpf_lirc.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BPF_LIRC_H
+#define _BPF_LIRC_H
+
+#include <uapi/linux/bpf.h>
+
+#ifdef CONFIG_BPF_LIRC_MODE2
+int lirc_prog_attach(const union bpf_attr *attr);
+int lirc_prog_detach(const union bpf_attr *attr);
+int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
+#else
+static inline int lirc_prog_attach(const union bpf_attr *attr)
+{
+ return -EINVAL;
+}
+
+static inline int lirc_prog_detach(const union bpf_attr *attr)
+{
+ return -EINVAL;
+}
+
+static inline int lirc_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
+#endif
+
+#endif /* _BPF_LIRC_H */
diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h
index e6fe98ae37943..ddf896abcfb6e 100644
--- a/include/linux/bpf_trace.h
+++ b/include/linux/bpf_trace.h
@@ -2,7 +2,6 @@
#ifndef __LINUX_BPF_TRACE_H__
#define __LINUX_BPF_TRACE_H__
-#include <trace/events/bpf.h>
#include <trace/events/xdp.h>
#endif /* __LINUX_BPF_TRACE_H__ */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2b28fcf6f6ae8..c5700c2d55490 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -9,9 +9,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
@@ -25,6 +26,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
#endif
+#ifdef CONFIG_BPF_LIRC_MODE2
+BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
+#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
@@ -47,6 +51,10 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
+#if defined(CONFIG_XDP_SOCKETS)
+BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
+#endif
#endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index df36b1b08af05..38b04f559ad37 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -174,6 +174,11 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
#define BPF_MAX_SUBPROGS 256
+struct bpf_subprog_info {
+ u32 start; /* insn idx of function entry point */
+ u16 stack_depth; /* max. stack depth used by this function */
+};
+
/* single container for all structs
* one verifier_env per bpf_check() call
*/
@@ -192,14 +197,12 @@ struct bpf_verifier_env {
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
struct bpf_verifier_log log;
- u32 subprog_starts[BPF_MAX_SUBPROGS];
- /* computes the stack depth of each bpf function */
- u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
+ struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
u32 subprog_cnt;
};
-void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
- va_list args);
+__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
+ const char *fmt, va_list args);
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...);
diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
new file mode 100644
index 0000000000000..687b1760bb9f8
--- /dev/null
+++ b/include/linux/bpfilter.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BPFILTER_H
+#define _LINUX_BPFILTER_H
+
+#include <uapi/linux/bpfilter.h>
+
+struct sock;
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval,
+ unsigned int optlen);
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval,
+ int *optlen);
+extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set);
+#endif
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index b324e01ccf2da..daa9234a9baf8 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -85,6 +85,7 @@
#define MII_BCM54XX_EXP_SEL 0x17 /* Expansion register select */
#define MII_BCM54XX_EXP_SEL_SSD 0x0e00 /* Secondary SerDes select */
#define MII_BCM54XX_EXP_SEL_ER 0x0f00 /* Expansion register select */
+#define MII_BCM54XX_EXP_SEL_ETC 0x0d00 /* Expansion register spare + 2k mem */
#define MII_BCM54XX_AUX_CTL 0x18 /* Auxiliary control register */
#define MII_BCM54XX_ISR 0x1a /* BCM54xx interrupt status register */
@@ -219,6 +220,9 @@
#define BCM54810_SHD_CLK_CTL 0x3
#define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9)
+/* BCM54612E Registers */
+#define BCM54612E_EXP_SPARE0 (MII_BCM54XX_EXP_SEL_ETC + 0x34)
+#define BCM54612E_LED4_CLK125OUT_EN (1 << 1)
/*****************************************************************************/
/* Fast Ethernet Transceiver definitions. */
diff --git a/include/linux/btf.h b/include/linux/btf.h
new file mode 100644
index 0000000000000..e076c4697049d
--- /dev/null
+++ b/include/linux/btf.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef _LINUX_BTF_H
+#define _LINUX_BTF_H 1
+
+#include <linux/types.h>
+
+struct btf;
+struct btf_type;
+union bpf_attr;
+
+extern const struct file_operations btf_fops;
+
+void btf_put(struct btf *btf);
+int btf_new_fd(const union bpf_attr *attr);
+struct btf *btf_get_by_fd(int fd);
+int btf_get_info_by_fd(const struct btf *btf,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+/* Figure out the size of a type_id. If type_id is a modifier
+ * (e.g. const), it will be resolved to find out the type with size.
+ *
+ * For example:
+ * In describing "const void *", type_id is "const" and "const"
+ * refers to "void *". The return type will be "void *".
+ *
+ * If type_id is a simple "int", then return type will be "int".
+ *
+ * @btf: struct btf object
+ * @type_id: Find out the size of type_id. The type_id of the return
+ * type is set to *type_id.
+ * @ret_size: It can be NULL. If not NULL, the size of the return
+ * type is set to *ret_size.
+ * Return: The btf_type (resolved to another type with size info if needed).
+ * NULL is returned if type_id itself does not have size info
+ * (e.g. void) or it cannot be resolved to another type that
+ * has size info.
+ * *type_id and *ret_size will not be changed in the
+ * NULL return case.
+ */
+const struct btf_type *btf_type_id_size(const struct btf *btf,
+ u32 *type_id,
+ u32 *ret_size);
+void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
+ struct seq_file *m);
+int btf_get_fd_by_id(u32 id);
+u32 btf_id(const struct btf *btf);
+
+#endif
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index f7ac2aa932699..3e4ba9d753c88 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -5,6 +5,7 @@
#include <linux/kexec.h>
#include <linux/proc_fs.h>
#include <linux/elf.h>
+#include <uapi/linux/vmcore.h>
#include <asm/pgtable.h> /* for pgprot_t */
@@ -93,4 +94,21 @@ static inline bool is_kdump_kernel(void) { return 0; }
#endif /* CONFIG_CRASH_DUMP */
extern unsigned long saved_max_pfn;
+
+/* Device Dump information to be filled by drivers */
+struct vmcoredd_data {
+ char dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Unique name of the dump */
+ unsigned int size; /* Size of the dump */
+ /* Driver's registered callback to be invoked to collect dump */
+ int (*vmcoredd_callback)(struct vmcoredd_data *data, void *buf);
+};
+
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+int vmcore_add_device_dump(struct vmcoredd_data *data);
+#else
+static inline int vmcore_add_device_dump(struct vmcoredd_data *data)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
#endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b32cd2062f187..f8a2245b70ac3 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -312,6 +312,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
* by kernel. Returns a negative error code or zero.
* @get_fecparam: Get the network device Forward Error Correction parameters.
* @set_fecparam: Set the network device Forward Error Correction parameters.
+ * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
+ * This is only useful if the device maintains PHY statistics and
+ * cannot use the standard PHY library helpers.
*
* All operations are optional (i.e. the function pointer may be set
* to %NULL) and callers must take this into account. Callers must
@@ -407,5 +410,7 @@ struct ethtool_ops {
struct ethtool_fecparam *);
int (*set_fecparam)(struct net_device *,
struct ethtool_fecparam *);
+ void (*get_ethtool_phy_stats)(struct net_device *,
+ struct ethtool_stats *, u64 *);
};
#endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fc4e8f91b03dc..45fc0f5000d88 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -30,6 +30,7 @@ struct sock;
struct seccomp_data;
struct bpf_prog_aux;
struct xdp_rxq_info;
+struct xdp_buff;
/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -46,7 +47,9 @@ struct xdp_rxq_info;
/* Additional register mappings for converted user programs. */
#define BPF_REG_A BPF_REG_0
#define BPF_REG_X BPF_REG_7
-#define BPF_REG_TMP BPF_REG_8
+#define BPF_REG_TMP BPF_REG_2 /* scratch reg */
+#define BPF_REG_D BPF_REG_8 /* data, callee-saved */
+#define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */
/* Kernel hidden auxiliary/helper register for hardening step.
* Only used by eBPF JITs. It's nothing more than a temporary
@@ -286,8 +289,21 @@ struct xdp_rxq_info;
.off = OFF, \
.imm = 0 })
+/* Relative call */
+
+#define BPF_CALL_REL(TGT) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = BPF_PSEUDO_CALL, \
+ .off = 0, \
+ .imm = TGT })
+
/* Function call */
+#define BPF_CAST_CALL(x) \
+ ((u64 (*)(u64, u64, u64, u64, u64))(x))
+
#define BPF_EMIT_CALL(FUNC) \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_CALL, \
@@ -467,7 +483,8 @@ struct bpf_prog {
dst_needed:1, /* Do we need dst entry? */
blinded:1, /* Was blinded */
is_func:1, /* program is a bpf function */
- kprobe_override:1; /* Do we override a kprobe? */
+ kprobe_override:1, /* Do we override a kprobe? */
+ has_callchain_buf:1; /* callchain buffer allocated? */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
@@ -500,14 +517,6 @@ struct bpf_skb_data_end {
void *data_end;
};
-struct xdp_buff {
- void *data;
- void *data_end;
- void *data_meta;
- void *data_hard_start;
- struct xdp_rxq_info *rxq;
-};
-
struct sk_msg_buff {
void *data;
void *data_end;
@@ -519,9 +528,9 @@ struct sk_msg_buff {
int sg_end;
struct scatterlist sg_data[MAX_SKB_FRAGS];
bool sg_copy[MAX_SKB_FRAGS];
- __u32 key;
__u32 flags;
- struct bpf_map *map;
+ struct sock *sk_redir;
+ struct sock *sk;
struct sk_buff *skb;
struct list_head list;
};
@@ -630,16 +639,34 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
return prog->type == BPF_PROG_TYPE_UNSPEC;
}
-static inline bool
-bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default)
+static inline u32 bpf_ctx_off_adjust_machine(u32 size)
{
- bool off_ok;
+ const u32 size_machine = sizeof(unsigned long);
+
+ if (size > size_machine && size % size_machine == 0)
+ size = size_machine;
+
+ return size;
+}
+
+static inline bool bpf_ctx_narrow_align_ok(u32 off, u32 size_access,
+ u32 size_default)
+{
+ size_default = bpf_ctx_off_adjust_machine(size_default);
+ size_access = bpf_ctx_off_adjust_machine(size_access);
+
#ifdef __LITTLE_ENDIAN
- off_ok = (off & (size_default - 1)) == 0;
+ return (off & (size_default - 1)) == 0;
#else
- off_ok = (off & (size_default - 1)) + size == size_default;
+ return (off & (size_default - 1)) + size_access == size_default;
#endif
- return off_ok && size <= size_default && (size & (size - 1)) == 0;
+}
+
+static inline bool
+bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
+{
+ return bpf_ctx_narrow_align_ok(off, size, size_default) &&
+ size <= size_default && (size & (size - 1)) == 0;
}
#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
@@ -766,27 +793,12 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
* This does not appear to be a real limitation for existing software.
*/
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *prog);
+ struct xdp_buff *xdp, struct bpf_prog *prog);
int xdp_do_redirect(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *prog);
void xdp_do_flush_map(void);
-/* Drivers not supporting XDP metadata can use this helper, which
- * rejects any room expansion for metadata as a result.
- */
-static __always_inline void
-xdp_set_data_meta_invalid(struct xdp_buff *xdp)
-{
- xdp->data_meta = xdp->data + 1;
-}
-
-static __always_inline bool
-xdp_data_meta_unsupported(const struct xdp_buff *xdp)
-{
- return unlikely(xdp->data_meta > xdp->data);
-}
-
void bpf_warn_invalid_xdp_action(u32 act);
struct sock *do_sk_redirect_map(struct sk_buff *skb);
@@ -1029,6 +1041,7 @@ struct bpf_sock_addr_kern {
* only two (src and dst) are available at convert_ctx_access time
*/
u64 tmp_reg;
+ void *t_ctx; /* Attach type specific context. */
};
struct bpf_sock_ops_kern {
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
new file mode 100644
index 0000000000000..b462d9ea80078
--- /dev/null
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ * Copyright 2018 NXP
+ */
+#ifndef __PTP_QORIQ_H__
+#define __PTP_QORIQ_H__
+
+#include <linux/io.h>
+#include <linux/ptp_clock_kernel.h>
+
+/*
+ * qoriq ptp registers
+ * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010
+ */
+struct qoriq_ptp_registers {
+ u32 tmr_ctrl; /* Timer control register */
+ u32 tmr_tevent; /* Timestamp event register */
+ u32 tmr_temask; /* Timer event mask register */
+ u32 tmr_pevent; /* Timestamp event register */
+ u32 tmr_pemask; /* Timer event mask register */
+ u32 tmr_stat; /* Timestamp status register */
+ u32 tmr_cnt_h; /* Timer counter high register */
+ u32 tmr_cnt_l; /* Timer counter low register */
+ u32 tmr_add; /* Timer drift compensation addend register */
+ u32 tmr_acc; /* Timer accumulator register */
+ u32 tmr_prsc; /* Timer prescale */
+ u8 res1[4];
+ u32 tmroff_h; /* Timer offset high */
+ u32 tmroff_l; /* Timer offset low */
+ u8 res2[8];
+ u32 tmr_alarm1_h; /* Timer alarm 1 high register */
+ u32 tmr_alarm1_l; /* Timer alarm 1 high register */
+ u32 tmr_alarm2_h; /* Timer alarm 2 high register */
+ u32 tmr_alarm2_l; /* Timer alarm 2 high register */
+ u8 res3[48];
+ u32 tmr_fiper1; /* Timer fixed period interval */
+ u32 tmr_fiper2; /* Timer fixed period interval */
+ u32 tmr_fiper3; /* Timer fixed period interval */
+ u8 res4[20];
+ u32 tmr_etts1_h; /* Timestamp of general purpose external trigger */
+ u32 tmr_etts1_l; /* Timestamp of general purpose external trigger */
+ u32 tmr_etts2_h; /* Timestamp of general purpose external trigger */
+ u32 tmr_etts2_l; /* Timestamp of general purpose external trigger */
+};
+
+/* Bit definitions for the TMR_CTRL register */
+#define ALM1P (1<<31) /* Alarm1 output polarity */
+#define ALM2P (1<<30) /* Alarm2 output polarity */
+#define FIPERST (1<<28) /* FIPER start indication */
+#define PP1L (1<<27) /* Fiper1 pulse loopback mode enabled. */
+#define PP2L (1<<26) /* Fiper2 pulse loopback mode enabled. */
+#define TCLK_PERIOD_SHIFT (16) /* 1588 timer reference clock period. */
+#define TCLK_PERIOD_MASK (0x3ff)
+#define RTPE (1<<15) /* Record Tx Timestamp to PAL Enable. */
+#define FRD (1<<14) /* FIPER Realignment Disable */
+#define ESFDP (1<<11) /* External Tx/Rx SFD Polarity. */
+#define ESFDE (1<<10) /* External Tx/Rx SFD Enable. */
+#define ETEP2 (1<<9) /* External trigger 2 edge polarity */
+#define ETEP1 (1<<8) /* External trigger 1 edge polarity */
+#define COPH (1<<7) /* Generated clock output phase. */
+#define CIPH (1<<6) /* External oscillator input clock phase */
+#define TMSR (1<<5) /* Timer soft reset. */
+#define BYP (1<<3) /* Bypass drift compensated clock */
+#define TE (1<<2) /* 1588 timer enable. */
+#define CKSEL_SHIFT (0) /* 1588 Timer reference clock source */
+#define CKSEL_MASK (0x3)
+
+/* Bit definitions for the TMR_TEVENT register */
+#define ETS2 (1<<25) /* External trigger 2 timestamp sampled */
+#define ETS1 (1<<24) /* External trigger 1 timestamp sampled */
+#define ALM2 (1<<17) /* Current time = alarm time register 2 */
+#define ALM1 (1<<16) /* Current time = alarm time register 1 */
+#define PP1 (1<<7) /* periodic pulse generated on FIPER1 */
+#define PP2 (1<<6) /* periodic pulse generated on FIPER2 */
+#define PP3 (1<<5) /* periodic pulse generated on FIPER3 */
+
+/* Bit definitions for the TMR_TEMASK register */
+#define ETS2EN (1<<25) /* External trigger 2 timestamp enable */
+#define ETS1EN (1<<24) /* External trigger 1 timestamp enable */
+#define ALM2EN (1<<17) /* Timer ALM2 event enable */
+#define ALM1EN (1<<16) /* Timer ALM1 event enable */
+#define PP1EN (1<<7) /* Periodic pulse event 1 enable */
+#define PP2EN (1<<6) /* Periodic pulse event 2 enable */
+
+/* Bit definitions for the TMR_PEVENT register */
+#define TXP2 (1<<9) /* PTP transmitted timestamp im TXTS2 */
+#define TXP1 (1<<8) /* PTP transmitted timestamp in TXTS1 */
+#define RXP (1<<0) /* PTP frame has been received */
+
+/* Bit definitions for the TMR_PEMASK register */
+#define TXP2EN (1<<9) /* Transmit PTP packet event 2 enable */
+#define TXP1EN (1<<8) /* Transmit PTP packet event 1 enable */
+#define RXPEN (1<<0) /* Receive PTP packet event enable */
+
+/* Bit definitions for the TMR_STAT register */
+#define STAT_VEC_SHIFT (0) /* Timer general purpose status vector */
+#define STAT_VEC_MASK (0x3f)
+
+/* Bit definitions for the TMR_PRSC register */
+#define PRSC_OCK_SHIFT (0) /* Output clock division/prescale factor. */
+#define PRSC_OCK_MASK (0xffff)
+
+
+#define DRIVER "ptp_qoriq"
+#define DEFAULT_CKSEL 1
+#define N_EXT_TS 2
+#define REG_SIZE sizeof(struct qoriq_ptp_registers)
+
+struct qoriq_ptp {
+ struct qoriq_ptp_registers __iomem *regs;
+ spinlock_t lock; /* protects regs */
+ struct ptp_clock *clock;
+ struct ptp_clock_info caps;
+ struct resource *rsrc;
+ int irq;
+ int phc_index;
+ u64 alarm_interval; /* for periodic alarm */
+ u64 alarm_value;
+ u32 tclk_period; /* nanoseconds */
+ u32 tmr_prsc;
+ u32 tmr_add;
+ u32 cksel;
+ u32 tmr_fiper1;
+ u32 tmr_fiper2;
+};
+
+static inline u32 qoriq_read(unsigned __iomem *addr)
+{
+ u32 val;
+
+ val = ioread32be(addr);
+ return val;
+}
+
+static inline void qoriq_write(unsigned __iomem *addr, u32 val)
+{
+ iowrite32be(val, addr);
+}
+
+#endif
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 02639ebea2f06..7843b98e1c6ea 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -50,6 +50,7 @@ struct br_ip_list {
#define BR_VLAN_TUNNEL BIT(13)
#define BR_BCAST_FLOOD BIT(14)
#define BR_NEIGH_SUPPRESS BIT(15)
+#define BR_ISOLATED BIT(16)
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
@@ -93,11 +94,39 @@ static inline bool br_multicast_router(const struct net_device *dev)
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
bool br_vlan_enabled(const struct net_device *dev);
+int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid);
+int br_vlan_get_info(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo);
#else
static inline bool br_vlan_enabled(const struct net_device *dev)
{
return false;
}
+
+static inline int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+{
+ return -1;
+}
+
+static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo)
+{
+ return -1;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_BRIDGE)
+struct net_device *br_fdb_find_port(const struct net_device *br_dev,
+ const unsigned char *addr,
+ __u16 vid);
+#else
+static inline struct net_device *
+br_fdb_find_port(const struct net_device *br_dev,
+ const unsigned char *addr,
+ __u16 vid)
+{
+ return NULL;
+}
#endif
#endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 4cb7aeeafce03..2e55e4cdbd8aa 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -21,7 +21,7 @@ struct macvlan_dev {
struct hlist_node hlist;
struct macvlan_port *port;
struct net_device *lowerdev;
- void *fwd_priv;
+ void *accel_priv;
struct vlan_pcpu_stats __percpu *pcpu_stats;
DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
@@ -61,10 +61,6 @@ extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack);
-extern void macvlan_count_rx(const struct macvlan_dev *vlan,
- unsigned int len, bool success,
- bool multicast);
-
extern void macvlan_dellink(struct net_device *dev, struct list_head *head);
extern int macvlan_link_register(struct rtnl_link_ops *ops);
@@ -86,4 +82,27 @@ macvlan_dev_real_dev(const struct net_device *dev)
}
#endif
+static inline void *macvlan_accel_priv(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->accel_priv;
+}
+
+static inline bool macvlan_supports_dest_filter(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->mode == MACVLAN_MODE_PRIVATE ||
+ macvlan->mode == MACVLAN_MODE_VEPA ||
+ macvlan->mode == MACVLAN_MODE_BRIDGE;
+}
+
+static inline int macvlan_release_l2fw_offload(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ macvlan->accel_priv = NULL;
+ return dev_uc_add(macvlan->lowerdev, dev->dev_addr);
+}
#endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index fd00170b494f7..3d2996dc7d85c 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -22,7 +22,7 @@
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *);
struct ptr_ring *tun_get_tx_ring(struct file *file);
-bool tun_is_xdp_buff(void *ptr);
+bool tun_is_xdp_frame(void *ptr);
void *tun_xdp_to_ptr(void *ptr);
void *tun_ptr_to_xdp(void *ptr);
void tun_ptr_free(void *ptr);
@@ -39,7 +39,7 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
{
return ERR_PTR(-EINVAL);
}
-static inline bool tun_is_xdp_buff(void *ptr)
+static inline bool tun_is_xdp_frame(void *ptr)
{
return false;
}
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 78a5a90b42672..83ea4df6ab816 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -331,7 +331,7 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features,
* @mac_len: MAC header length including outer vlan headers
*
* Inserts the VLAN tag into @skb as part of the payload at offset mac_len
- * Returns error if skb_cow_head failes.
+ * Returns error if skb_cow_head fails.
*
* Does not change skb->protocol so this function can be used during receive.
*/
@@ -379,7 +379,7 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
* @vlan_tci: VLAN TCI to insert
*
* Inserts the VLAN tag into @skb as part of the payload
- * Returns error if skb_cow_head failes.
+ * Returns error if skb_cow_head fails.
*
* Does not change skb->protocol so this function can be used during receive.
*/
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index e16fe7d44a712..27650f1bff3d7 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -139,6 +139,7 @@ struct in_ifaddr {
__be32 ifa_local;
__be32 ifa_address;
__be32 ifa_mask;
+ __u32 ifa_rt_priority;
__be32 ifa_broadcast;
unsigned char ifa_scope;
unsigned char ifa_prefixlen;
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 80db19d3a5054..8de55e4b5ee93 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -28,6 +28,12 @@ struct vmcore {
loff_t offset;
};
+struct vmcoredd_node {
+ struct list_head list; /* List of dumps */
+ void *buf; /* Buffer containing device's dump */
+ unsigned int size; /* Size of the buffer */
+};
+
#ifdef CONFIG_PROC_KCORE
extern void kclist_add(struct kcore_list *, void *, size_t, int type);
#else
diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h
index a8ac9cfa014cc..5d71e8a8500f5 100644
--- a/include/linux/mdio-bitbang.h
+++ b/include/linux/mdio-bitbang.h
@@ -33,8 +33,6 @@ struct mdiobb_ops {
struct mdiobb_ctrl {
const struct mdiobb_ops *ops;
- /* reset callback */
- int (*reset)(struct mii_bus *bus);
};
/* The returned bus is not yet registered with the phy layer. */
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
new file mode 100644
index 0000000000000..cea443a672cb4
--- /dev/null
+++ b/include/linux/mdio-gpio.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_MDIO_GPIO_H
+#define __LINUX_MDIO_GPIO_H
+
+#define MDIO_GPIO_MDC 0
+#define MDIO_GPIO_MDIO 1
+#define MDIO_GPIO_MDO 2
+
+#endif
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 2cfffe586885c..bfa7114167d71 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -29,7 +29,6 @@ enum mdio_mutex_lock_class {
struct mdio_device {
struct device dev;
- const struct dev_pm_ops *pm_ops;
struct mii_bus *bus;
char modalias[MDIO_NAME_SIZE];
diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h
index 8f9c90379732b..8c40128af2402 100644
--- a/include/linux/microchipphy.h
+++ b/include/linux/microchipphy.h
@@ -70,6 +70,9 @@
#define LAN88XX_MMD3_CHIP_ID (32877)
#define LAN88XX_MMD3_CHIP_REV (32878)
+/* Registers specific to the LAN7800/LAN7850 embedded phy */
+#define LAN78XX_PHY_LED_MODE_SELECT (0x1D)
+
/* DSP registers */
#define PHY_ARDENNES_MMD_DEV_3_PHY_CFG (0x806A)
#define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_ (0x2000)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 81d0799b60916..122e7e9d3091b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -225,6 +225,7 @@ enum {
MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36,
MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37,
MLX4_DEV_CAP_FLAG2_USER_MAC_EN = 1ULL << 38,
+ MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1ULL << 39,
};
enum {
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 2bc27f8c5b877..0f006cf8343df 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -314,6 +314,7 @@ enum mlx5_event {
MLX5_EVENT_TYPE_PORT_CHANGE = 0x09,
MLX5_EVENT_TYPE_GPIO_EVENT = 0x15,
MLX5_EVENT_TYPE_PORT_MODULE_EVENT = 0x16,
+ MLX5_EVENT_TYPE_TEMP_WARN_EVENT = 0x17,
MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19,
MLX5_EVENT_TYPE_GENERAL_EVENT = 0x22,
MLX5_EVENT_TYPE_PPS_EVENT = 0x25,
@@ -330,6 +331,7 @@ enum mlx5_event {
MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c,
MLX5_EVENT_TYPE_FPGA_ERROR = 0x20,
+ MLX5_EVENT_TYPE_FPGA_QP_ERROR = 0x21,
};
enum {
@@ -626,6 +628,11 @@ struct mlx5_eqe_dct {
__be32 dctn;
};
+struct mlx5_eqe_temp_warning {
+ __be64 sensor_warning_msb;
+ __be64 sensor_warning_lsb;
+} __packed;
+
union ev_data {
__be32 raw[7];
struct mlx5_eqe_cmd cmd;
@@ -642,6 +649,7 @@ union ev_data {
struct mlx5_eqe_port_module port_module;
struct mlx5_eqe_pps pps;
struct mlx5_eqe_dct dct;
+ struct mlx5_eqe_temp_warning temp_warning;
} __packed;
struct mlx5_eqe {
@@ -1152,6 +1160,9 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
+#define MLX5_CAP_PCAM_REG(mdev, reg) \
+ MLX5_GET(pcam_reg, (mdev)->caps.pcam, port_access_reg_cap_mask.regs_5000_to_507f.reg)
+
#define MLX5_CAP_MCAM_REG(mdev, reg) \
MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d703774982cad..80cbb7fdce4a1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -124,6 +124,8 @@ enum {
MLX5_REG_PAOS = 0x5006,
MLX5_REG_PFCC = 0x5007,
MLX5_REG_PPCNT = 0x5008,
+ MLX5_REG_PPTB = 0x500b,
+ MLX5_REG_PBMC = 0x500c,
MLX5_REG_PMAOS = 0x5012,
MLX5_REG_PUDE = 0x5009,
MLX5_REG_PMPE = 0x5010,
@@ -981,16 +983,24 @@ static inline u32 mlx5_base_mkey(const u32 key)
return key & 0xffffff00u;
}
-static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
- void *cqc)
+static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+ struct mlx5_frag_buf_ctrl *fbc)
{
- fbc->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
- fbc->log_sz = MLX5_GET(cqc, cqc, log_cq_size);
+ fbc->log_stride = log_stride;
+ fbc->log_sz = log_sz;
fbc->sz_m1 = (1 << fbc->log_sz) - 1;
fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
fbc->frag_sz_m1 = (1 << fbc->log_frag_strides) - 1;
}
+static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
+ void *cqc)
+{
+ mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz),
+ MLX5_GET(cqc, cqc, log_cq_size),
+ fbc);
+}
+
static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
u32 ix)
{
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 47aecc4fa8c2b..9f4d32e41c069 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -90,8 +90,12 @@ struct mlx5_flow_destination {
union {
u32 tir_num;
struct mlx5_flow_table *ft;
- u32 vport_num;
struct mlx5_fc *counter;
+ struct {
+ u16 num;
+ u16 vhca_id;
+ bool vhca_id_valid;
+ } vport;
};
};
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1aad455538f40..8e0b8865f91e1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -60,6 +60,7 @@ enum {
MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION = 0xa,
MLX5_EVENT_TYPE_CODING_PAGE_REQUEST = 0xb,
MLX5_EVENT_TYPE_CODING_FPGA_ERROR = 0x20,
+ MLX5_EVENT_TYPE_CODING_FPGA_QP_ERROR = 0x21
};
enum {
@@ -356,22 +357,6 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
u8 reserved_at_6[0x1a];
};
-struct mlx5_ifc_ipv4_layout_bits {
- u8 reserved_at_0[0x60];
-
- u8 ipv4[0x20];
-};
-
-struct mlx5_ifc_ipv6_layout_bits {
- u8 ipv6[16][0x8];
-};
-
-union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
- struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
- struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
- u8 reserved_at_0[0x80];
-};
-
struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 smac_47_16[0x20];
@@ -412,7 +397,7 @@ struct mlx5_ifc_fte_match_set_misc_bits {
u8 reserved_at_0[0x8];
u8 source_sqn[0x18];
- u8 reserved_at_20[0x10];
+ u8 source_eswitch_owner_vhca_id[0x10];
u8 source_port[0x10];
u8 outer_second_prio[0x3];
@@ -540,7 +525,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
};
struct mlx5_ifc_flow_table_eswitch_cap_bits {
- u8 reserved_at_0[0x200];
+ u8 reserved_at_0[0x1c];
+ u8 fdb_multi_path_to_table[0x1];
+ u8 reserved_at_1d[0x1e3];
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
@@ -557,7 +544,8 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_insert_if_not_exist[0x1];
u8 vport_cvlan_insert_overwrite[0x1];
- u8 reserved_at_5[0x19];
+ u8 reserved_at_5[0x18];
+ u8 merged_eswitch[0x1];
u8 nic_vport_node_guid_modify[0x1];
u8 nic_vport_port_guid_modify[0x1];
@@ -925,7 +913,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_msg[0x5];
u8 reserved_at_1c8[0x4];
u8 max_tc[0x4];
- u8 reserved_at_1d0[0x1];
+ u8 temp_warn_event[0x1];
u8 dcbx[0x1];
u8 general_notification_event[0x1];
u8 reserved_at_1d3[0x2];
@@ -1147,8 +1135,9 @@ enum mlx5_flow_destination_type {
struct mlx5_ifc_dest_format_struct_bits {
u8 destination_type[0x8];
u8 destination_id[0x18];
-
- u8 reserved_at_20[0x20];
+ u8 destination_eswitch_owner_vhca_id_valid[0x1];
+ u8 reserved_at_21[0xf];
+ u8 destination_eswitch_owner_vhca_id[0x10];
};
struct mlx5_ifc_flow_counter_list_bits {
@@ -6993,7 +6982,9 @@ struct mlx5_ifc_create_flow_group_in_bits {
u8 reserved_at_a0[0x8];
u8 table_id[0x18];
- u8 reserved_at_c0[0x20];
+ u8 source_eswitch_owner_vhca_id_valid[0x1];
+
+ u8 reserved_at_c1[0x1f];
u8 start_flow_index[0x20];
@@ -8015,6 +8006,17 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
u8 ppcnt_statistical_group[0x1];
};
+struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
+ u8 port_access_reg_cap_mask_127_to_96[0x20];
+ u8 port_access_reg_cap_mask_95_to_64[0x20];
+ u8 port_access_reg_cap_mask_63_to_32[0x20];
+
+ u8 port_access_reg_cap_mask_31_to_13[0x13];
+ u8 pbmc[0x1];
+ u8 pptb[0x1];
+ u8 port_access_reg_cap_mask_10_to_0[0xb];
+};
+
struct mlx5_ifc_pcam_reg_bits {
u8 reserved_at_0[0x8];
u8 feature_group[0x8];
@@ -8024,6 +8026,7 @@ struct mlx5_ifc_pcam_reg_bits {
u8 reserved_at_20[0x20];
union {
+ struct mlx5_ifc_pcam_regs_5000_to_507f_bits regs_5000_to_507f;
u8 reserved_at_0[0x80];
} port_access_reg_cap_mask;
@@ -8788,6 +8791,41 @@ struct mlx5_ifc_qpts_reg_bits {
u8 trust_state[0x3];
};
+struct mlx5_ifc_pptb_reg_bits {
+ u8 reserved_at_0[0x2];
+ u8 mm[0x2];
+ u8 reserved_at_4[0x4];
+ u8 local_port[0x8];
+ u8 reserved_at_10[0x6];
+ u8 cm[0x1];
+ u8 um[0x1];
+ u8 pm[0x8];
+
+ u8 prio_x_buff[0x20];
+
+ u8 pm_msb[0x8];
+ u8 reserved_at_48[0x10];
+ u8 ctrl_buff[0x4];
+ u8 untagged_buff[0x4];
+};
+
+struct mlx5_ifc_pbmc_reg_bits {
+ u8 reserved_at_0[0x8];
+ u8 local_port[0x8];
+ u8 reserved_at_10[0x10];
+
+ u8 xoff_timer_value[0x10];
+ u8 xoff_refresh[0x10];
+
+ u8 reserved_at_40[0x9];
+ u8 fullness_threshold[0x7];
+ u8 port_buffer_size[0x10];
+
+ struct mlx5_ifc_bufferx_reg_bits buffer[10];
+
+ u8 reserved_at_2e0[0x40];
+};
+
struct mlx5_ifc_qtct_reg_bits {
u8 reserved_at_0[0x8];
u8 port_number[0x8];
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index ec052491ba3d7..64d0f40d4cc36 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -32,12 +32,29 @@
#ifndef MLX5_IFC_FPGA_H
#define MLX5_IFC_FPGA_H
+struct mlx5_ifc_ipv4_layout_bits {
+ u8 reserved_at_0[0x60];
+
+ u8 ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+ u8 ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+ struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+ struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+ u8 reserved_at_0[0x80];
+};
+
enum {
MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9,
};
enum {
MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2,
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS = 0x3,
};
struct mlx5_ifc_fpga_shell_caps_bits {
@@ -370,6 +387,27 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_tls_extended_cap_bits {
+ u8 aes_gcm_128[0x1];
+ u8 aes_gcm_256[0x1];
+ u8 reserved_at_2[0x1e];
+ u8 reserved_at_20[0x20];
+ u8 context_capacity_total[0x20];
+ u8 context_capacity_rx[0x20];
+ u8 context_capacity_tx[0x20];
+ u8 reserved_at_a0[0x10];
+ u8 tls_counter_size[0x10];
+ u8 tls_counters_addr_low[0x20];
+ u8 tls_counters_addr_high[0x20];
+ u8 rx[0x1];
+ u8 tx[0x1];
+ u8 tls_v12[0x1];
+ u8 tls_v13[0x1];
+ u8 lro[0x1];
+ u8 ipv6[0x1];
+ u8 reserved_at_106[0x1a];
+};
+
struct mlx5_ifc_ipsec_extended_cap_bits {
u8 encapsulation[0x20];
@@ -432,6 +470,22 @@ struct mlx5_ifc_ipsec_counters_bits {
u8 dropped_cmd[0x40];
};
+enum {
+ MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RETRY_COUNTER_EXPIRED = 0x1,
+ MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RNR_EXPIRED = 0x2,
+};
+
+struct mlx5_ifc_fpga_qp_error_event_bits {
+ u8 reserved_at_0[0x40];
+
+ u8 reserved_at_40[0x18];
+ u8 syndrome[0x8];
+
+ u8 reserved_at_60[0x60];
+
+ u8 reserved_at_c0[0x8];
+ u8 fpga_qpn[0x18];
+};
enum mlx5_ifc_fpga_ipsec_response_syndrome {
MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0,
MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
@@ -519,4 +573,43 @@ struct mlx5_ifc_fpga_ipsec_sa {
__be16 reserved2;
} __packed;
+enum fpga_tls_cmds {
+ CMD_SETUP_STREAM = 0x1001,
+ CMD_TEARDOWN_STREAM = 0x1002,
+};
+
+#define MLX5_TLS_1_2 (0)
+
+#define MLX5_TLS_ALG_AES_GCM_128 (0)
+#define MLX5_TLS_ALG_AES_GCM_256 (1)
+
+struct mlx5_ifc_tls_cmd_bits {
+ u8 command_type[0x20];
+ u8 ipv6[0x1];
+ u8 direction_sx[0x1];
+ u8 tls_version[0x2];
+ u8 reserved[0x1c];
+ u8 swid[0x20];
+ u8 src_port[0x10];
+ u8 dst_port[0x10];
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+ u8 tls_rcd_sn[0x40];
+ u8 tcp_sn[0x20];
+ u8 tls_implicit_iv[0x20];
+ u8 tls_xor_iv[0x40];
+ u8 encryption_key[0x100];
+ u8 alg[4];
+ u8 reserved2[0x1c];
+ u8 reserved3[0x4a0];
+};
+
+struct mlx5_ifc_tls_resp_bits {
+ u8 syndrome[0x20];
+ u8 stream_id[0x20];
+ u8 reserverd[0x40];
+};
+
+#define MLX5_TLS_COMMAND_SIZE (0x100)
+
#endif /* MLX5_IFC_FPGA_H */
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 2836a96e014ac..4224902a8e22a 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -35,6 +35,7 @@
#define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335
#define SDIO_DEVICE_ID_BROADCOM_4339 0x4339
#define SDIO_DEVICE_ID_BROADCOM_43362 0xa962
+#define SDIO_DEVICE_ID_BROADCOM_43364 0xa9a4
#define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6
#define SDIO_DEVICE_ID_BROADCOM_4345 0x4345
#define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index d617fe45543e0..d633f737b3c63 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -307,16 +307,6 @@ static inline void vif_device_init(struct vif_device *v,
{
}
-static inline void *
-mr_table_alloc(struct net *net, u32 id,
- struct mr_table_ops *ops,
- void (*expire_func)(struct timer_list *t),
- void (*table_set)(struct mr_table *mrt,
- struct net *net))
-{
- return NULL;
-}
-
static inline void *mr_mfc_find_parent(struct mr_table *mrt,
void *hasharg, int parent)
{
diff --git a/include/linux/net.h b/include/linux/net.h
index 3fd9d8c165813..08b6eb964dd68 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -198,6 +198,7 @@ struct proto_ops {
int offset, size_t size, int flags);
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
size_t size);
+ int (*set_rcvlowat)(struct sock *sk, int val);
};
#define DECLARE_SOCKADDR(type, dst, src) \
diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index 29ed8fd6379a7..db99240d00bdf 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -103,11 +103,12 @@ enum {
#define NET_DIM_PARAMS_NUM_PROFILES 5
/* Adaptive moderation profiles */
#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
#define NET_DIM_DEF_PROFILE_CQE 1
#define NET_DIM_DEF_PROFILE_EQE 1
/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */
-#define NET_DIM_EQE_PROFILES { \
+#define NET_DIM_RX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
@@ -115,7 +116,7 @@ enum {
{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
}
-#define NET_DIM_CQE_PROFILES { \
+#define NET_DIM_RX_CQE_PROFILES { \
{2, 256}, \
{8, 128}, \
{16, 64}, \
@@ -123,32 +124,68 @@ enum {
{64, 64} \
}
+#define NET_DIM_TX_EQE_PROFILES { \
+ {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
+}
+
+#define NET_DIM_TX_CQE_PROFILES { \
+ {5, 128}, \
+ {8, 64}, \
+ {16, 32}, \
+ {32, 32}, \
+ {64, 32} \
+}
+
static const struct net_dim_cq_moder
-profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
- NET_DIM_EQE_PROFILES,
- NET_DIM_CQE_PROFILES,
+rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+ NET_DIM_RX_EQE_PROFILES,
+ NET_DIM_RX_CQE_PROFILES,
};
-static inline struct net_dim_cq_moder net_dim_get_profile(u8 cq_period_mode,
- int ix)
+static const struct net_dim_cq_moder
+tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+ NET_DIM_TX_EQE_PROFILES,
+ NET_DIM_TX_CQE_PROFILES,
+};
+
+static inline struct net_dim_cq_moder
+net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
{
- struct net_dim_cq_moder cq_moder;
+ struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
- cq_moder = profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
-static inline struct net_dim_cq_moder net_dim_get_def_profile(u8 rx_cq_period_mode)
+static inline struct net_dim_cq_moder
+net_dim_get_def_rx_moderation(u8 cq_period_mode)
+{
+ u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
+
+ return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
+}
+
+static inline struct net_dim_cq_moder
+net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
{
- int default_profile_ix;
+ struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
+
+ cq_moder.cq_period_mode = cq_period_mode;
+ return cq_moder;
+}
- if (rx_cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE)
- default_profile_ix = NET_DIM_DEF_PROFILE_CQE;
- else /* NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE */
- default_profile_ix = NET_DIM_DEF_PROFILE_EQE;
+static inline struct net_dim_cq_moder
+net_dim_get_def_tx_moderation(u8 cq_period_mode)
+{
+ u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
- return net_dim_get_profile(rx_cq_period_mode, default_profile_ix);
+ return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
}
static inline bool net_dim_on_top(struct net_dim *dim)
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 35b79f47a13de..623bb8ced0600 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -55,8 +55,9 @@ enum {
NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */
NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */
NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */
+ NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
- NETIF_F_GSO_UDP_BIT,
+ NETIF_F_GSO_UDP_L4_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
@@ -77,6 +78,7 @@ enum {
NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload */
NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */
NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
+ NETIF_F_HW_TLS_TX_BIT, /* Hardware TLS TX offload */
NETIF_F_GRO_HW_BIT, /* Hardware Generic receive offload */
NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */
@@ -147,6 +149,8 @@ enum {
#define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM)
#define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT)
#define NETIF_F_HW_TLS_RECORD __NETIF_F(HW_TLS_RECORD)
+#define NETIF_F_GSO_UDP_L4 __NETIF_F(GSO_UDP_L4)
+#define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX)
#define for_each_netdev_feature(mask_addr, bit) \
for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf44503ea81ab..3ec9850c7936f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -791,6 +791,7 @@ enum tc_setup_type {
TC_SETUP_QDISC_CBS,
TC_SETUP_QDISC_RED,
TC_SETUP_QDISC_PRIO,
+ TC_SETUP_QDISC_MQ,
};
/* These structures hold the attributes of bpf state that are being passed
@@ -817,10 +818,13 @@ enum bpf_netdev_command {
BPF_OFFLOAD_DESTROY,
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
+ XDP_QUERY_XSK_UMEM,
+ XDP_SETUP_XSK_UMEM,
};
struct bpf_prog_offload_ops;
struct netlink_ext_ack;
+struct xdp_umem;
struct netdev_bpf {
enum bpf_netdev_command command;
@@ -851,6 +855,11 @@ struct netdev_bpf {
struct {
struct bpf_offloaded_map *offmap;
};
+ /* XDP_SETUP_XSK_UMEM */
+ struct {
+ struct xdp_umem *umem;
+ u16 queue_id;
+ } xsk;
};
};
@@ -865,6 +874,26 @@ struct xfrmdev_ops {
};
#endif
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+enum tls_offload_ctx_dir {
+ TLS_OFFLOAD_CTX_DIR_RX,
+ TLS_OFFLOAD_CTX_DIR_TX,
+};
+
+struct tls_crypto_info;
+struct tls_context;
+
+struct tlsdev_ops {
+ int (*tls_dev_add)(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn);
+ void (*tls_dev_del)(struct net_device *netdev,
+ struct tls_context *ctx,
+ enum tls_offload_ctx_dir direction);
+};
+#endif
+
struct dev_ifalias {
struct rcu_head rcuhead;
char ifalias[];
@@ -1165,12 +1194,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp);
- * This function is used to submit a XDP packet for transmit on a
- * netdevice.
- * void (*ndo_xdp_flush)(struct net_device *dev);
- * This function is used to inform the driver to flush a particular
- * xdp tx queue. Must be called on same CPU as xdp_xmit.
+ * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp,
+ * u32 flags);
+ * This function is used to submit @n XDP packets for transmit on a
+ * netdevice. Returns number of frames successfully transmitted, frames
+ * that got dropped are freed/returned via xdp_return_frame().
+ * Returns negative number, means general error invoking ndo, meaning
+ * no frames were xmit'ed and core-caller will free all frames.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1355,9 +1385,11 @@ struct net_device_ops {
int needed_headroom);
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
- int (*ndo_xdp_xmit)(struct net_device *dev,
- struct xdp_buff *xdp);
- void (*ndo_xdp_flush)(struct net_device *dev);
+ int (*ndo_xdp_xmit)(struct net_device *dev, int n,
+ struct xdp_frame **xdp,
+ u32 flags);
+ int (*ndo_xsk_async_xmit)(struct net_device *dev,
+ u32 queue_id);
};
/**
@@ -1401,6 +1433,8 @@ struct net_device_ops {
* entity (i.e. the master device for bridged veth)
* @IFF_MACSEC: device is a MACsec device
* @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
+ * @IFF_FAILOVER: device is a failover master device
+ * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@ -1430,6 +1464,8 @@ enum netdev_priv_flags {
IFF_PHONY_HEADROOM = 1<<24,
IFF_MACSEC = 1<<25,
IFF_NO_RX_HANDLER = 1<<26,
+ IFF_FAILOVER = 1<<27,
+ IFF_FAILOVER_SLAVE = 1<<28,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@ -1458,6 +1494,8 @@ enum netdev_priv_flags {
#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
#define IFF_MACSEC IFF_MACSEC
#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
+#define IFF_FAILOVER IFF_FAILOVER
+#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
/**
* struct net_device - The DEVICE structure.
@@ -1750,6 +1788,10 @@ struct net_device {
const struct xfrmdev_ops *xfrmdev_ops;
#endif
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+ const struct tlsdev_ops *tlsdev_ops;
+#endif
+
const struct header_ops *header_ops;
unsigned int flags;
@@ -2304,8 +2346,19 @@ enum netdev_lag_tx_type {
NETDEV_LAG_TX_TYPE_HASH,
};
+enum netdev_lag_hash {
+ NETDEV_LAG_HASH_NONE,
+ NETDEV_LAG_HASH_L2,
+ NETDEV_LAG_HASH_L34,
+ NETDEV_LAG_HASH_L23,
+ NETDEV_LAG_HASH_E23,
+ NETDEV_LAG_HASH_E34,
+ NETDEV_LAG_HASH_UNKNOWN,
+};
+
struct netdev_lag_upper_info {
enum netdev_lag_tx_type tx_type;
+ enum netdev_lag_hash hash_type;
};
struct netdev_lag_lower_state_info {
@@ -2486,6 +2539,7 @@ void dev_disable_lro(struct net_device *dev);
int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
int dev_queue_xmit(struct sk_buff *skb);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
+int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
int register_netdevice(struct net_device *dev);
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
void unregister_netdevice_many(struct list_head *head);
@@ -3213,19 +3267,6 @@ static inline int netif_set_xps_queue(struct net_device *dev,
}
#endif
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
- unsigned int num_tx_queues);
-
-/*
- * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
- * as a distribution range limit for the returned value.
- */
-static inline u16 skb_tx_hash(const struct net_device *dev,
- struct sk_buff *skb)
-{
- return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
-}
-
/**
* netif_is_multiqueue - test if device has multiple transmit queues
* @dev: network device
@@ -4186,6 +4227,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}
@@ -4308,6 +4350,16 @@ static inline bool netif_is_rxfh_configured(const struct net_device *dev)
return dev->priv_flags & IFF_RXFH_CONFIGURED;
}
+static inline bool netif_is_failover(const struct net_device *dev)
+{
+ return dev->priv_flags & IFF_FAILOVER;
+}
+
+static inline bool netif_is_failover_slave(const struct net_device *dev)
+{
+ return dev->priv_flags & IFF_FAILOVER_SLAVE;
+}
+
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
static inline void netif_keep_dst(struct net_device *dev)
{
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 85a1a0b32c665..04551af2ff230 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -67,7 +67,6 @@ struct nf_hook_ops {
struct net_device *dev;
void *priv;
u_int8_t pf;
- bool nat_hook;
unsigned int hooknum;
/* Hooks are ordered in ascending priority. */
int priority;
@@ -321,18 +320,33 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
#include <net/flow.h>
-extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
+
+struct nf_conn;
+enum nf_nat_manip_type;
+struct nlattr;
+enum ip_conntrack_dir;
+
+struct nf_nat_hook {
+ int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip,
+ const struct nlattr *attr);
+ void (*decode_session)(struct sk_buff *skb, struct flowi *fl);
+ unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct,
+ enum nf_nat_manip_type mtype,
+ enum ip_conntrack_dir dir);
+};
+
+extern struct nf_nat_hook __rcu *nf_nat_hook;
static inline void
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
{
#ifdef CONFIG_NF_NAT_NEEDED
- void (*decodefn)(struct sk_buff *, struct flowi *);
+ struct nf_nat_hook *nat_hook;
rcu_read_lock();
- decodefn = rcu_dereference(nf_nat_decode_session_hook);
- if (decodefn)
- decodefn(skb, fl);
+ nat_hook = rcu_dereference(nf_nat_hook);
+ if (nat_hook->decode_session)
+ nat_hook->decode_session(skb, fl);
rcu_read_unlock();
#endif
}
@@ -374,13 +388,19 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
-extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
#else
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
#endif
struct nf_conn;
enum ip_conntrack_info;
+
+struct nf_ct_hook {
+ int (*update)(struct net *net, struct sk_buff *skb);
+ void (*destroy)(struct nf_conntrack *);
+};
+extern struct nf_ct_hook __rcu *nf_ct_hook;
+
struct nlattr;
struct nfnl_ct_hook {
diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
new file mode 100644
index 0000000000000..0e114c492fb8f
--- /dev/null
+++ b/include/linux/netfilter/nf_osf.h
@@ -0,0 +1,33 @@
+#include <uapi/linux/netfilter/nf_osf.h>
+
+/* Initial window size option state machine: multiple of mss, mtu or
+ * plain numeric value. Can also be made as plain numeric value which
+ * is not a multiple of specified value.
+ */
+enum nf_osf_window_size_options {
+ OSF_WSS_PLAIN = 0,
+ OSF_WSS_MSS,
+ OSF_WSS_MTU,
+ OSF_WSS_MODULO,
+ OSF_WSS_MAX,
+};
+
+enum osf_fmatch_states {
+ /* Packet does not match the fingerprint */
+ FMATCH_WRONG = 0,
+ /* Packet matches the fingerprint */
+ FMATCH_OK,
+ /* Options do not match the fingerprint, but header does */
+ FMATCH_OPT_WRONG,
+};
+
+struct nf_osf_finger {
+ struct rcu_head rcu_head;
+ struct list_head finger_entry;
+ struct nf_osf_user_finger finger;
+};
+
+bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+ int hooknum, struct net_device *in, struct net_device *out,
+ const struct nf_osf_info *info, struct net *net,
+ const struct list_head *nf_osf_fingers);
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 34551f8aaf9d4..3ecc3050be0ec 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -31,6 +31,7 @@ struct nfnetlink_subsystem {
const struct nfnl_callback *cb; /* callback for individual types */
int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb);
+ void (*cleanup)(struct net *net);
bool (*valid_genid)(struct net *net, u32 genid);
};
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 0773b5a032f1f..c6935be7c6ca3 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -17,10 +17,6 @@
#include <linux/if_ether.h>
#include <uapi/linux/netfilter_bridge/ebtables.h>
-/* return values for match() functions */
-#define EBT_MATCH 0
-#define EBT_NOMATCH 1
-
struct ebt_match {
struct list_head list;
const char name[EBT_FUNCTION_MAXNAMELEN];
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index bea0b0cd4bf7e..1fa12887ec020 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void perf_event_delayed_put(struct task_struct *task);
extern struct file *perf_event_get(unsigned int fd);
+extern const struct perf_event *perf_get_event(struct file *file);
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
extern void perf_event_print_debug(void);
extern void perf_pmu_disable(struct pmu *pmu);
@@ -1297,6 +1298,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
static inline void perf_event_delayed_put(struct task_struct *task) { }
static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
+static inline const struct perf_event *perf_get_event(struct file *file)
+{
+ return ERR_PTR(-EINVAL);
+}
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
return ERR_PTR(-EINVAL);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index f0b5870a6d40b..6cd09098427c5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -406,13 +406,17 @@ struct phy_device {
u32 phy_id;
struct phy_c45_device_ids c45_ids;
- bool is_c45;
- bool is_internal;
- bool is_pseudo_fixed_link;
- bool has_fixups;
- bool suspended;
- bool sysfs_links;
- bool loopback_enabled;
+ unsigned is_c45:1;
+ unsigned is_internal:1;
+ unsigned is_pseudo_fixed_link:1;
+ unsigned has_fixups:1;
+ unsigned suspended:1;
+ unsigned sysfs_links:1;
+ unsigned loopback_enabled:1;
+
+ unsigned autoneg:1;
+ /* The most recently read link state */
+ unsigned link:1;
enum phy_state state;
@@ -429,9 +433,6 @@ struct phy_device {
int pause;
int asym_pause;
- /* The most recently read link state */
- int link;
-
/* Enabled Interrupts */
u32 interrupts;
@@ -444,8 +445,6 @@ struct phy_device {
/* Energy efficient ethernet modes which should be prohibited */
u32 eee_broken_modes;
- int autoneg;
-
int link_timeout;
#ifdef CONFIG_LED_TRIGGER_PHY
@@ -1068,6 +1067,52 @@ int __init mdio_bus_init(void);
void mdio_bus_exit(void);
#endif
+/* Inline function for use within net/core/ethtool.c (built-in) */
+static inline int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data)
+{
+ if (!phydev->drv)
+ return -EIO;
+
+ mutex_lock(&phydev->lock);
+ phydev->drv->get_strings(phydev, data);
+ mutex_unlock(&phydev->lock);
+
+ return 0;
+}
+
+static inline int phy_ethtool_get_sset_count(struct phy_device *phydev)
+{
+ int ret;
+
+ if (!phydev->drv)
+ return -EIO;
+
+ if (phydev->drv->get_sset_count &&
+ phydev->drv->get_strings &&
+ phydev->drv->get_stats) {
+ mutex_lock(&phydev->lock);
+ ret = phydev->drv->get_sset_count(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return ret;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static inline int phy_ethtool_get_stats(struct phy_device *phydev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ if (!phydev->drv)
+ return -EIO;
+
+ mutex_lock(&phydev->lock);
+ phydev->drv->get_stats(phydev, stats, data);
+ mutex_unlock(&phydev->lock);
+
+ return 0;
+}
+
extern struct bus_type mdio_bus_type;
struct mdio_board_info {
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index c9d14eeee7f5a..9713aebdd348b 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -36,6 +36,7 @@ enum phy_mode {
PHY_MODE_USB_DEVICE_SS,
PHY_MODE_USB_OTG,
PHY_MODE_SGMII,
+ PHY_MODE_2500SGMII,
PHY_MODE_10GKR,
PHY_MODE_UFS_HS_A,
PHY_MODE_UFS_HS_B,
diff --git a/include/linux/platform_data/b53.h b/include/linux/platform_data/b53.h
index 69d279c0da968..8eaef2f2b691e 100644
--- a/include/linux/platform_data/b53.h
+++ b/include/linux/platform_data/b53.h
@@ -20,8 +20,12 @@
#define __B53_H
#include <linux/kernel.h>
+#include <net/dsa.h>
struct b53_platform_data {
+ /* Must be first such that dsa_register_switch() can access it */
+ struct dsa_chip_data cd;
+
u32 chip_id;
u16 enabled_ports;
diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h
deleted file mode 100644
index 11f00cdabe3d4..0000000000000
--- a/include/linux/platform_data/mdio-gpio.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * MDIO-GPIO bus platform data structures
- *
- * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __LINUX_MDIO_GPIO_H
-#define __LINUX_MDIO_GPIO_H
-
-#include <linux/mdio-bitbang.h>
-
-struct mdio_gpio_platform_data {
- /* GPIO numbers for bus pins */
- unsigned int mdc;
- unsigned int mdio;
- unsigned int mdo;
-
- bool mdc_active_low;
- bool mdio_active_low;
- bool mdo_active_low;
-
- u32 phy_mask;
- u32 phy_ignore_ta_mask;
- int irqs[PHY_MAX_ADDR];
- /* reset callback */
- int (*reset)(struct mii_bus *bus);
-};
-
-#endif /* __LINUX_MDIO_GPIO_H */
diff --git a/include/linux/platform_data/mv88e6xxx.h b/include/linux/platform_data/mv88e6xxx.h
new file mode 100644
index 0000000000000..f63af2955ea07
--- /dev/null
+++ b/include/linux/platform_data/mv88e6xxx.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DSA_MV88E6XXX_H
+#define __DSA_MV88E6XXX_H
+
+#include <net/dsa.h>
+
+struct dsa_mv88e6xxx_pdata {
+ /* Must be first, such that dsa_register_switch() can access this
+ * without gory pointer manipulations
+ */
+ struct dsa_chip_data cd;
+ const char *compatible;
+ unsigned int enabled_ports;
+ struct net_device *netdev;
+ u32 eeprom_len;
+};
+
+#endif
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 13c8ab171437b..0081fa6d12687 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -109,8 +109,8 @@
#define MAX_NUM_LL2_TX_STATS_COUNTERS 48
#define FW_MAJOR_VERSION 8
-#define FW_MINOR_VERSION 33
-#define FW_REVISION_VERSION 11
+#define FW_MINOR_VERSION 37
+#define FW_REVISION_VERSION 2
#define FW_ENGINEERING_VERSION 0
/***********************/
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
index 938df614cb6a7..b34c573f2b30d 100644
--- a/include/linux/qed/iscsi_common.h
+++ b/include/linux/qed/iscsi_common.h
@@ -799,8 +799,8 @@ struct e4_mstorm_iscsi_task_ag_ctx {
#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0
#define E4_MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1
#define E4_MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5
+#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK 0x1
+#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT 5
#define E4_MSTORM_ISCSI_TASK_AG_CTX_VALID_MASK 0x1
#define E4_MSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT 6
#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK 0x1
@@ -849,8 +849,8 @@ struct e4_ustorm_iscsi_task_ag_ctx {
#define E4_USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0
#define E4_USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1
#define E4_USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4
-#define E4_USTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5
+#define E4_USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK 0x1
+#define E4_USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT 5
#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_MASK 0x3
#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT 6
u8 flags1;
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 147d08ccf8135..2978fa4add420 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -66,6 +66,7 @@ enum qed_filter_config_mode {
QED_FILTER_CONFIG_MODE_5_TUPLE,
QED_FILTER_CONFIG_MODE_L4_PORT,
QED_FILTER_CONFIG_MODE_IP_DEST,
+ QED_FILTER_CONFIG_MODE_IP_SRC,
};
struct qed_ntuple_filter_params {
@@ -88,6 +89,9 @@ struct qed_ntuple_filter_params {
/* true iff this filter is to be added. Else to be removed */
bool b_is_add;
+
+ /* If flow needs to be dropped */
+ bool b_is_drop;
};
struct qed_dev_eth_info {
@@ -352,6 +356,7 @@ struct qed_eth_ops {
int (*configure_arfs_searcher)(struct qed_dev *cdev,
enum qed_filter_config_mode mode);
int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle);
+ int (*req_bulletin_update_mac)(struct qed_dev *cdev, u8 *mac);
};
const struct qed_eth_ops *qed_get_eth_ops(void);
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b5b2bc9eacca2..b4040023cbfba 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -159,6 +159,9 @@ struct qed_dcbx_get {
enum qed_nvm_images {
QED_NVM_IMAGE_ISCSI_CFG,
QED_NVM_IMAGE_FCOE_CFG,
+ QED_NVM_IMAGE_NVM_CFG1,
+ QED_NVM_IMAGE_DEFAULT_CFG,
+ QED_NVM_IMAGE_NVM_META,
};
struct qed_link_eee_params {
@@ -179,6 +182,272 @@ enum qed_led_mode {
QED_LED_MODE_RESTORE
};
+struct qed_mfw_tlv_eth {
+ u16 lso_maxoff_size;
+ bool lso_maxoff_size_set;
+ u16 lso_minseg_size;
+ bool lso_minseg_size_set;
+ u8 prom_mode;
+ bool prom_mode_set;
+ u16 tx_descr_size;
+ bool tx_descr_size_set;
+ u16 rx_descr_size;
+ bool rx_descr_size_set;
+ u16 netq_count;
+ bool netq_count_set;
+ u32 tcp4_offloads;
+ bool tcp4_offloads_set;
+ u32 tcp6_offloads;
+ bool tcp6_offloads_set;
+ u16 tx_descr_qdepth;
+ bool tx_descr_qdepth_set;
+ u16 rx_descr_qdepth;
+ bool rx_descr_qdepth_set;
+ u8 iov_offload;
+#define QED_MFW_TLV_IOV_OFFLOAD_NONE (0)
+#define QED_MFW_TLV_IOV_OFFLOAD_MULTIQUEUE (1)
+#define QED_MFW_TLV_IOV_OFFLOAD_VEB (2)
+#define QED_MFW_TLV_IOV_OFFLOAD_VEPA (3)
+ bool iov_offload_set;
+ u8 txqs_empty;
+ bool txqs_empty_set;
+ u8 rxqs_empty;
+ bool rxqs_empty_set;
+ u8 num_txqs_full;
+ bool num_txqs_full_set;
+ u8 num_rxqs_full;
+ bool num_rxqs_full_set;
+};
+
+#define QED_MFW_TLV_TIME_SIZE 14
+struct qed_mfw_tlv_time {
+ bool b_set;
+ u8 month;
+ u8 day;
+ u8 hour;
+ u8 min;
+ u16 msec;
+ u16 usec;
+};
+
+struct qed_mfw_tlv_fcoe {
+ u8 scsi_timeout;
+ bool scsi_timeout_set;
+ u32 rt_tov;
+ bool rt_tov_set;
+ u32 ra_tov;
+ bool ra_tov_set;
+ u32 ed_tov;
+ bool ed_tov_set;
+ u32 cr_tov;
+ bool cr_tov_set;
+ u8 boot_type;
+ bool boot_type_set;
+ u8 npiv_state;
+ bool npiv_state_set;
+ u32 num_npiv_ids;
+ bool num_npiv_ids_set;
+ u8 switch_name[8];
+ bool switch_name_set;
+ u16 switch_portnum;
+ bool switch_portnum_set;
+ u8 switch_portid[3];
+ bool switch_portid_set;
+ u8 vendor_name[8];
+ bool vendor_name_set;
+ u8 switch_model[8];
+ bool switch_model_set;
+ u8 switch_fw_version[8];
+ bool switch_fw_version_set;
+ u8 qos_pri;
+ bool qos_pri_set;
+ u8 port_alias[3];
+ bool port_alias_set;
+ u8 port_state;
+#define QED_MFW_TLV_PORT_STATE_OFFLINE (0)
+#define QED_MFW_TLV_PORT_STATE_LOOP (1)
+#define QED_MFW_TLV_PORT_STATE_P2P (2)
+#define QED_MFW_TLV_PORT_STATE_FABRIC (3)
+ bool port_state_set;
+ u16 fip_tx_descr_size;
+ bool fip_tx_descr_size_set;
+ u16 fip_rx_descr_size;
+ bool fip_rx_descr_size_set;
+ u16 link_failures;
+ bool link_failures_set;
+ u8 fcoe_boot_progress;
+ bool fcoe_boot_progress_set;
+ u64 rx_bcast;
+ bool rx_bcast_set;
+ u64 tx_bcast;
+ bool tx_bcast_set;
+ u16 fcoe_txq_depth;
+ bool fcoe_txq_depth_set;
+ u16 fcoe_rxq_depth;
+ bool fcoe_rxq_depth_set;
+ u64 fcoe_rx_frames;
+ bool fcoe_rx_frames_set;
+ u64 fcoe_rx_bytes;
+ bool fcoe_rx_bytes_set;
+ u64 fcoe_tx_frames;
+ bool fcoe_tx_frames_set;
+ u64 fcoe_tx_bytes;
+ bool fcoe_tx_bytes_set;
+ u16 crc_count;
+ bool crc_count_set;
+ u32 crc_err_src_fcid[5];
+ bool crc_err_src_fcid_set[5];
+ struct qed_mfw_tlv_time crc_err[5];
+ u16 losync_err;
+ bool losync_err_set;
+ u16 losig_err;
+ bool losig_err_set;
+ u16 primtive_err;
+ bool primtive_err_set;
+ u16 disparity_err;
+ bool disparity_err_set;
+ u16 code_violation_err;
+ bool code_violation_err_set;
+ u32 flogi_param[4];
+ bool flogi_param_set[4];
+ struct qed_mfw_tlv_time flogi_tstamp;
+ u32 flogi_acc_param[4];
+ bool flogi_acc_param_set[4];
+ struct qed_mfw_tlv_time flogi_acc_tstamp;
+ u32 flogi_rjt;
+ bool flogi_rjt_set;
+ struct qed_mfw_tlv_time flogi_rjt_tstamp;
+ u32 fdiscs;
+ bool fdiscs_set;
+ u8 fdisc_acc;
+ bool fdisc_acc_set;
+ u8 fdisc_rjt;
+ bool fdisc_rjt_set;
+ u8 plogi;
+ bool plogi_set;
+ u8 plogi_acc;
+ bool plogi_acc_set;
+ u8 plogi_rjt;
+ bool plogi_rjt_set;
+ u32 plogi_dst_fcid[5];
+ bool plogi_dst_fcid_set[5];
+ struct qed_mfw_tlv_time plogi_tstamp[5];
+ u32 plogi_acc_src_fcid[5];
+ bool plogi_acc_src_fcid_set[5];
+ struct qed_mfw_tlv_time plogi_acc_tstamp[5];
+ u8 tx_plogos;
+ bool tx_plogos_set;
+ u8 plogo_acc;
+ bool plogo_acc_set;
+ u8 plogo_rjt;
+ bool plogo_rjt_set;
+ u32 plogo_src_fcid[5];
+ bool plogo_src_fcid_set[5];
+ struct qed_mfw_tlv_time plogo_tstamp[5];
+ u8 rx_logos;
+ bool rx_logos_set;
+ u8 tx_accs;
+ bool tx_accs_set;
+ u8 tx_prlis;
+ bool tx_prlis_set;
+ u8 rx_accs;
+ bool rx_accs_set;
+ u8 tx_abts;
+ bool tx_abts_set;
+ u8 rx_abts_acc;
+ bool rx_abts_acc_set;
+ u8 rx_abts_rjt;
+ bool rx_abts_rjt_set;
+ u32 abts_dst_fcid[5];
+ bool abts_dst_fcid_set[5];
+ struct qed_mfw_tlv_time abts_tstamp[5];
+ u8 rx_rscn;
+ bool rx_rscn_set;
+ u32 rx_rscn_nport[4];
+ bool rx_rscn_nport_set[4];
+ u8 tx_lun_rst;
+ bool tx_lun_rst_set;
+ u8 abort_task_sets;
+ bool abort_task_sets_set;
+ u8 tx_tprlos;
+ bool tx_tprlos_set;
+ u8 tx_nos;
+ bool tx_nos_set;
+ u8 rx_nos;
+ bool rx_nos_set;
+ u8 ols;
+ bool ols_set;
+ u8 lr;
+ bool lr_set;
+ u8 lrr;
+ bool lrr_set;
+ u8 tx_lip;
+ bool tx_lip_set;
+ u8 rx_lip;
+ bool rx_lip_set;
+ u8 eofa;
+ bool eofa_set;
+ u8 eofni;
+ bool eofni_set;
+ u8 scsi_chks;
+ bool scsi_chks_set;
+ u8 scsi_cond_met;
+ bool scsi_cond_met_set;
+ u8 scsi_busy;
+ bool scsi_busy_set;
+ u8 scsi_inter;
+ bool scsi_inter_set;
+ u8 scsi_inter_cond_met;
+ bool scsi_inter_cond_met_set;
+ u8 scsi_rsv_conflicts;
+ bool scsi_rsv_conflicts_set;
+ u8 scsi_tsk_full;
+ bool scsi_tsk_full_set;
+ u8 scsi_aca_active;
+ bool scsi_aca_active_set;
+ u8 scsi_tsk_abort;
+ bool scsi_tsk_abort_set;
+ u32 scsi_rx_chk[5];
+ bool scsi_rx_chk_set[5];
+ struct qed_mfw_tlv_time scsi_chk_tstamp[5];
+};
+
+struct qed_mfw_tlv_iscsi {
+ u8 target_llmnr;
+ bool target_llmnr_set;
+ u8 header_digest;
+ bool header_digest_set;
+ u8 data_digest;
+ bool data_digest_set;
+ u8 auth_method;
+#define QED_MFW_TLV_AUTH_METHOD_NONE (1)
+#define QED_MFW_TLV_AUTH_METHOD_CHAP (2)
+#define QED_MFW_TLV_AUTH_METHOD_MUTUAL_CHAP (3)
+ bool auth_method_set;
+ u16 boot_taget_portal;
+ bool boot_taget_portal_set;
+ u16 frame_size;
+ bool frame_size_set;
+ u16 tx_desc_size;
+ bool tx_desc_size_set;
+ u16 rx_desc_size;
+ bool rx_desc_size_set;
+ u8 boot_progress;
+ bool boot_progress_set;
+ u16 tx_desc_qdepth;
+ bool tx_desc_qdepth_set;
+ u16 rx_desc_qdepth;
+ bool rx_desc_qdepth_set;
+ u64 rx_frames;
+ bool rx_frames_set;
+ u64 rx_bytes;
+ bool rx_bytes_set;
+ u64 tx_frames;
+ bool tx_frames_set;
+ u64 tx_bytes;
+ bool tx_bytes_set;
+};
+
#define DIRECT_REG_WR(reg_addr, val) writel((u32)val, \
(void __iomem *)(reg_addr))
@@ -336,7 +605,6 @@ struct qed_dev_info {
u8 num_hwfns;
u8 hw_mac[ETH_ALEN];
- bool is_mf_default;
/* FW version */
u16 fw_major;
@@ -356,7 +624,7 @@ struct qed_dev_info {
#define QED_MFW_VERSION_3_OFFSET 24
u32 flash_size;
- u8 mf_mode;
+ bool b_inter_pf_switch;
bool tx_switching;
bool rdma_supported;
u16 mtu;
@@ -483,6 +751,14 @@ struct qed_int_info {
u8 used_cnt;
};
+struct qed_generic_tlvs {
+#define QED_TLV_IP_CSUM BIT(0)
+#define QED_TLV_LSO BIT(1)
+ u16 feat_flags;
+#define QED_TLV_MAC_COUNT 3
+ u8 mac[QED_TLV_MAC_COUNT][ETH_ALEN];
+};
+
#define QED_NVM_SIGNATURE 0x12435687
enum qed_nvm_flash_cmd {
@@ -497,6 +773,8 @@ struct qed_common_cb_ops {
void (*link_update)(void *dev,
struct qed_link_output *link);
void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
+ void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
+ void (*get_protocol_tlv_data)(void *dev, void *data);
};
struct qed_selftest_ops {
@@ -851,6 +1129,7 @@ struct qed_eth_stats_common {
u64 rx_bcast_pkts;
u64 mftag_filter_discards;
u64 mac_filter_discards;
+ u64 gft_filter_drop;
u64 tx_ucast_bytes;
u64 tx_mcast_bytes;
u64 tx_bcast_bytes;
@@ -901,6 +1180,7 @@ struct qed_eth_stats_common {
u64 tx_mac_mc_packets;
u64 tx_mac_bc_packets;
u64 tx_mac_ctrl_frames;
+ u64 link_change_count;
};
struct qed_eth_stats_bb {
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 266c1fb453875..5eb022953acad 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -202,6 +202,7 @@ struct qed_ll2_tx_pkt_info {
bool enable_ip_cksum;
bool enable_l4_cksum;
bool calc_ip_len;
+ bool remove_stag;
};
#define QED_LL2_UNUSED_HANDLE (0xff)
@@ -220,6 +221,11 @@ struct qed_ll2_params {
u8 ll2_mac_address[ETH_ALEN];
};
+enum qed_ll2_xmit_flags {
+ /* FIP discovery packet */
+ QED_LL2_XMIT_FLAGS_FIP_DISCOVERY
+};
+
struct qed_ll2_ops {
/**
* @brief start - initializes ll2
@@ -245,10 +251,12 @@ struct qed_ll2_ops {
*
* @param cdev
* @param skb
+ * @param xmit_flags - Transmit options defined by the enum qed_ll2_xmit_flags.
*
* @return 0 on success, otherwise error value.
*/
- int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb);
+ int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb,
+ unsigned long xmit_flags);
/**
* @brief register_cb_ops - protocol driver register the callback for Rx/Tx
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index 4dd72ba210f5c..df4d13f7e1919 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -65,8 +65,7 @@ enum qed_roce_qp_state {
enum qed_rdma_tid_type {
QED_RDMA_TID_REGISTERED_MR,
QED_RDMA_TID_FMR,
- QED_RDMA_TID_MW_TYPE1,
- QED_RDMA_TID_MW_TYPE2A
+ QED_RDMA_TID_MW
};
struct qed_rdma_events {
@@ -280,7 +279,6 @@ struct qed_rdma_register_tid_in_params {
bool dif_enabled;
u64 dif_error_addr;
- u64 dif_runt_addr;
};
struct qed_rdma_create_cq_in_params {
@@ -485,7 +483,9 @@ enum qed_iwarp_event_type {
QED_IWARP_EVENT_ACTIVE_MPA_REPLY,
QED_IWARP_EVENT_LOCAL_ACCESS_ERROR,
QED_IWARP_EVENT_REMOTE_OPERATION_ERROR,
- QED_IWARP_EVENT_TERMINATE_RECEIVED
+ QED_IWARP_EVENT_TERMINATE_RECEIVED,
+ QED_IWARP_EVENT_SRQ_LIMIT,
+ QED_IWARP_EVENT_SRQ_EMPTY,
};
enum qed_tcp_ip_version {
@@ -646,6 +646,14 @@ struct qed_rdma_ops {
int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid);
void (*rdma_free_tid)(void *rdma_cxt, u32 itid);
+ int (*rdma_create_srq)(void *rdma_cxt,
+ struct qed_rdma_create_srq_in_params *iparams,
+ struct qed_rdma_create_srq_out_params *oparams);
+ int (*rdma_destroy_srq)(void *rdma_cxt,
+ struct qed_rdma_destroy_srq_in_params *iparams);
+ int (*rdma_modify_srq)(void *rdma_cxt,
+ struct qed_rdma_modify_srq_in_params *iparams);
+
int (*ll2_acquire_connection)(void *rdma_cxt,
struct qed_ll2_acquire_data *data);
diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h
index 193bcef302e1d..473fba76aa775 100644
--- a/include/linux/qed/roce_common.h
+++ b/include/linux/qed/roce_common.h
@@ -43,6 +43,7 @@
#define ROCE_MAX_QPS (32 * 1024)
#define ROCE_DCQCN_NP_MAX_QPS (64)
#define ROCE_DCQCN_RP_MAX_QPS (64)
+#define ROCE_LKEY_MW_DIF_EN_BIT (28)
/* Affiliated asynchronous events / errors enumeration */
enum roce_async_events_type {
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 1f8ad121eb434..4e1f535c2034e 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -836,9 +836,8 @@ out:
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhashtable_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
@@ -866,9 +865,8 @@ static inline int rhashtable_insert_fast(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhltable_insert_key(
struct rhltable *hlt, const void *key, struct rhlist_head *list,
@@ -890,9 +888,8 @@ static inline int rhltable_insert_key(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhltable_insert(
struct rhltable *hlt, struct rhlist_head *list,
@@ -922,9 +919,8 @@ static inline int rhltable_insert(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhashtable_lookup_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
@@ -981,9 +977,8 @@ static inline void *rhashtable_lookup_get_insert_fast(
*
* Lookups may occur in parallel with hashtable mutations and resizing.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*
* Returns zero on success.
*/
@@ -1134,8 +1129,8 @@ static inline int __rhashtable_remove_fast(
* walk the bucket chain upon removal. The removal operation is thus
* considerable slow if the hash table is not correctly sized.
*
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%.
*
* Returns zero on success, -ENOENT if the entry could not be found.
*/
@@ -1156,8 +1151,8 @@ static inline int rhashtable_remove_fast(
* walk the bucket chain upon removal. The removal operation is thus
* considerable slow if the hash table is not correctly sized.
*
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%
*
* Returns zero on success, -ENOENT if the entry could not be found.
*/
@@ -1273,8 +1268,9 @@ static inline int rhashtable_walk_init(struct rhashtable *ht,
* For a completely stable walk you should construct your own data
* structure outside the hash table.
*
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
*
* You must call rhashtable_walk_exit after this function returns.
*/
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index a6b6e8bb3d7b8..62d9b0a6329f1 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -97,6 +97,11 @@ static inline bool skb_array_empty_any(struct skb_array *a)
return ptr_ring_empty_any(&a->ring);
}
+static inline struct sk_buff *__skb_array_consume(struct skb_array *a)
+{
+ return __ptr_ring_consume(&a->ring);
+}
+
static inline struct sk_buff *skb_array_consume(struct skb_array *a)
{
return ptr_ring_consume(&a->ring);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 89198379b39df..c868859549942 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -573,6 +573,8 @@ enum {
SKB_GSO_ESP = 1 << 15,
SKB_GSO_UDP = 1 << 16,
+
+ SKB_GSO_UDP_L4 = 1 << 17,
};
#if BITS_PER_LONG > 32
@@ -852,8 +854,6 @@ struct sk_buff {
/*
* Handling routines are only of interest to the kernel
*/
-#include <linux/slab.h>
-
#define SKB_ALLOC_FCLONE 0x01
#define SKB_ALLOC_RX 0x02
@@ -1032,6 +1032,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
+void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
gfp_t gfp_mask, bool fclone);
@@ -1168,7 +1169,7 @@ void __skb_get_hash(struct sk_buff *skb);
u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
u32 skb_get_poff(const struct sk_buff *skb);
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
- const struct flow_keys *keys, int hlen);
+ const struct flow_keys_basic *keys, int hlen);
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
void *data, int hlen_proto);
@@ -1205,13 +1206,14 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
NULL, 0, 0, 0, flags);
}
-static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
- void *data, __be16 proto,
- int nhoff, int hlen,
- unsigned int flags)
+static inline bool
+skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb,
+ struct flow_keys_basic *flow, void *data,
+ __be16 proto, int nhoff, int hlen,
+ unsigned int flags)
{
memset(flow, 0, sizeof(*flow));
- return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow,
+ return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow,
data, proto, nhoff, hlen, flags);
}
@@ -2347,11 +2349,12 @@ static inline void skb_pop_mac_header(struct sk_buff *skb)
static inline void skb_probe_transport_header(struct sk_buff *skb,
const int offset_hint)
{
- struct flow_keys keys;
+ struct flow_keys_basic keys;
if (skb_transport_header_was_set(skb))
return;
- else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
+
+ if (skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0))
skb_set_transport_header(skb, keys.control.thoff);
else
skb_set_transport_header(skb, offset_hint);
@@ -3131,6 +3134,7 @@ static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len)
return skb->data;
}
+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
/**
* pskb_trim_rcsum - trim received skb and update checksum
* @skb: buffer to trim
@@ -3144,9 +3148,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
{
if (likely(len >= skb->len))
return 0;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
- return __pskb_trim(skb, len);
+ return pskb_trim_rcsum_slow(skb, len);
}
static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index 66693bc4c6ad6..7127ec3015376 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -167,6 +167,8 @@ struct knav_dma_desc {
void *knav_dma_open_channel(struct device *dev, const char *name,
struct knav_dma_cfg *config);
void knav_dma_close_channel(void *channel);
+int knav_dma_get_flow(void *channel);
+bool knav_dma_device_ready(void);
#else
static inline void *knav_dma_open_channel(struct device *dev, const char *name,
struct knav_dma_cfg *config)
@@ -176,6 +178,16 @@ static inline void *knav_dma_open_channel(struct device *dev, const char *name,
static inline void knav_dma_close_channel(void *channel)
{}
+static inline int knav_dma_get_flow(void *channel)
+{
+ return -EINVAL;
+}
+
+static inline bool knav_dma_device_ready(void)
+{
+ return false;
+}
+
#endif
#endif /* __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ */
diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h
index 9f0ebb3bad278..9745df6ed9d33 100644
--- a/include/linux/soc/ti/knav_qmss.h
+++ b/include/linux/soc/ti/knav_qmss.h
@@ -86,5 +86,6 @@ int knav_pool_desc_map(void *ph, void *desc, unsigned size,
void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz);
dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt);
void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma);
+bool knav_qmss_device_ready(void);
#endif /* __SOC_TI_KNAV_QMSS_H__ */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index ea50f4a658162..7ed4713d53372 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -207,8 +207,9 @@ struct ucred {
* PF_SMC protocol family that
* reuses AF_INET address family
*/
+#define AF_XDP 44 /* XDP sockets */
-#define AF_MAX 44 /* For now.. */
+#define AF_MAX 45 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
@@ -257,6 +258,7 @@ struct ucred {
#define PF_KCM AF_KCM
#define PF_QIPCRTR AF_QIPCRTR
#define PF_SMC AF_SMC
+#define PF_XDP AF_XDP
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
@@ -338,6 +340,7 @@ struct ucred {
#define SOL_NFC 280
#define SOL_KCM 281
#define SOL_TLS 282
+#define SOL_XDP 283
/* IPX options */
#define IPX_TYPE 1
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8f4c54986f974..72705eaf4b840 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -218,6 +218,7 @@ struct tcp_sock {
reord:1; /* reordering detected */
} rack;
u16 advmss; /* Advertised MSS */
+ u8 compressed_ack;
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
u8 chrono_type:2, /* current chronograph type */
@@ -228,7 +229,7 @@ struct tcp_sock {
unused:2;
u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */
- unused1 : 1,
+ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
repair : 1,
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
u8 repair_queue;
@@ -281,6 +282,7 @@ struct tcp_sock {
* receiver in Recovery. */
u32 prr_out; /* Total number of pkts sent during Recovery. */
u32 delivered; /* Total data packets delivered incl. rexmits */
+ u32 delivered_ce; /* Like the above but only ECE marked packets */
u32 lost; /* Total data packets lost incl. rexmits */
u32 app_limited; /* limited until "delivered" reaches this val */
u64 first_tx_mstamp; /* start of window send phase */
@@ -296,6 +298,7 @@ struct tcp_sock {
u32 sacked_out; /* SACK'd packets */
struct hrtimer pacing_timer;
+ struct hrtimer compressed_ack_timer;
/* from STCP, retrans queue hinting */
struct sk_buff* lost_skb_hint;
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 0d2d3da461397..c7dc2b5902c05 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max);
/* Arithmetic and logical ops */
/* Shift a tnum left (by a fixed shift) */
struct tnum tnum_lshift(struct tnum a, u8 shift);
-/* Shift a tnum right (by a fixed shift) */
+/* Shift (rsh) a tnum right (by a fixed shift) */
struct tnum tnum_rshift(struct tnum a, u8 shift);
+/* Shift (arsh) a tnum right (by a fixed min_shift) */
+struct tnum tnum_arshift(struct tnum a, u8 min_shift);
/* Add two tnums, return @a + @b */
struct tnum tnum_add(struct tnum a, struct tnum b);
/* Subtract two tnums, return @a - @b */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index d1c442d9bd853..78a010e19ed41 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -472,6 +472,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+ u32 *fd_type, const char **buf,
+ u64 *probe_offset, u64 *probe_addr);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -503,6 +506,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
{
return NULL;
}
+static inline int bpf_get_perf_event_info(const struct perf_event *event,
+ u32 *prog_id, u32 *fd_type,
+ const char **buf, u64 *probe_offset,
+ u64 *probe_addr)
+{
+ return -EOPNOTSUPP;
+}
#endif
enum {
@@ -559,10 +569,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
#ifdef CONFIG_KPROBE_EVENTS
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_kprobe_destroy(struct perf_event *event);
+extern int bpf_get_kprobe_info(const struct perf_event *event,
+ u32 *fd_type, const char **symbol,
+ u64 *probe_offset, u64 *probe_addr,
+ bool perf_type_tracepoint);
#endif
#ifdef CONFIG_UPROBE_EVENTS
extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_uprobe_destroy(struct perf_event *event);
+extern int bpf_get_uprobe_info(const struct perf_event *event,
+ u32 *fd_type, const char **filename,
+ u64 *probe_offset, bool perf_type_tracepoint);
#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index 07ee0f84a46ca..a27604f99ed04 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -112,20 +112,6 @@ u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
#endif
}
-static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- raw_write_seqcount_begin(&syncp->seq);
-#endif
-}
-
-static inline void u64_stats_update_end_raw(struct u64_stats_sync *syncp)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- raw_write_seqcount_end(&syncp->seq);
-#endif
-}
-
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index eaea63bc79bb2..ca840345571bf 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -55,6 +55,7 @@ struct udp_sock {
* when the socket is uncorked.
*/
__u16 len; /* total length of pending frames */
+ __u16 gso_size;
/*
* Fields specific to UDP-Lite.
*/
@@ -87,6 +88,8 @@ struct udp_sock {
int forward_deficit;
};
+#define UDP_MAX_SEGMENTS (1 << 6UL)
+
static inline struct udp_sock *udp_sk(const struct sock *sk)
{
return (struct udp_sock *)sk;
diff --git a/include/linux/umh.h b/include/linux/umh.h
index 244aff6382208..5c812acbb80ae 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -22,8 +22,10 @@ struct subprocess_info {
const char *path;
char **argv;
char **envp;
+ struct file *file;
int wait;
int retval;
+ pid_t pid;
int (*init)(struct subprocess_info *info, struct cred *new);
void (*cleanup)(struct subprocess_info *info);
void *data;
@@ -38,6 +40,16 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp,
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *), void *data);
+struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
+ int (*init)(struct subprocess_info *info, struct cred *new),
+ void (*cleanup)(struct subprocess_info *), void *data);
+struct umh_info {
+ struct file *pipe_to_umh;
+ struct file *pipe_from_umh;
+ pid_t pid;
+};
+int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
+
extern int
call_usermodehelper_exec(struct subprocess_info *info, int wait);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 378d601258bea..5f43f7a70fe69 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -59,6 +59,19 @@ struct in6_validator_info {
struct netlink_ext_ack *extack;
};
+struct ifa6_config {
+ const struct in6_addr *pfx;
+ unsigned int plen;
+
+ const struct in6_addr *peer_pfx;
+
+ u32 rt_priority;
+ u32 ifa_flags;
+ u32 preferred_lft;
+ u32 valid_lft;
+ u16 scope;
+};
+
int addrconf_init(void);
void addrconf_cleanup(void);
@@ -223,6 +236,22 @@ struct ipv6_stub {
const struct in6_addr *addr);
int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6);
+
+ struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
+ struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
+ struct flowi6 *fl6, int flags);
+ struct fib6_info *(*fib6_table_lookup)(struct net *net,
+ struct fib6_table *table,
+ int oif, struct flowi6 *fl6,
+ int flags);
+ struct fib6_info *(*fib6_multipath_select)(const struct net *net,
+ struct fib6_info *f6i,
+ struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
+ int strict);
+ u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
+ struct in6_addr *saddr);
+
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
@@ -308,6 +337,20 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
}
/**
+ * __in6_dev_get_safely - get inet6_dev pointer from netdevice
+ * @dev: network device
+ *
+ * This is a safer version of __in6_dev_get
+ */
+static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev)
+{
+ if (likely(dev))
+ return rcu_dereference_rtnl(dev->ip6_ptr);
+ else
+ return NULL;
+}
+
+/**
* in6_dev_get - get inet6_dev pointer from netdevice
* @dev: network device
*
diff --git a/include/net/ax88796.h b/include/net/ax88796.h
index b9a3beca0ce41..84b3785d0e66a 100644
--- a/include/net/ax88796.h
+++ b/include/net/ax88796.h
@@ -12,6 +12,10 @@
#ifndef __NET_AX88796_PLAT_H
#define __NET_AX88796_PLAT_H
+struct sk_buff;
+struct net_device;
+struct platform_device;
+
#define AXFLG_HAS_EEPROM (1<<0)
#define AXFLG_MAC_FROMDEV (1<<1) /* device already has MAC */
#define AXFLG_HAS_93CX6 (1<<2) /* use eeprom_93cx6 driver */
@@ -26,6 +30,16 @@ struct ax_plat_data {
u32 *reg_offsets; /* register offsets */
u8 *mac_addr; /* MAC addr (only used when
AXFLG_MAC_FROMPLATFORM is used */
+
+ /* uses default ax88796 buffer if set to NULL */
+ void (*block_output)(struct net_device *dev, int count,
+ const unsigned char *buf, int star_page);
+ void (*block_input)(struct net_device *dev, int count,
+ struct sk_buff *skb, int ring_offset);
+ /* returns nonzero if a pending interrupt request might by caused by
+ * the ax88786. Handles all interrupts if set to NULL
+ */
+ int (*check_irq)(struct platform_device *pdev);
};
#endif /* __NET_AX88796_PLAT_H */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b619a190ff128..893bbbb5d2fa8 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1393,6 +1393,8 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param, u32 timeout);
struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param, u8 event, u32 timeout);
+int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param);
int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
const void *param);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index b522351588360..808f1d1673494 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -285,8 +285,15 @@ static inline bool bond_needs_speed_duplex(const struct bonding *bond)
static inline bool bond_is_nondyn_tlb(const struct bonding *bond)
{
- return (BOND_MODE(bond) == BOND_MODE_TLB) &&
- (bond->params.tlb_dynamic_lb == 0);
+ return (bond_is_lb(bond) && bond->params.tlb_dynamic_lb == 0);
+}
+
+static inline bool bond_mode_can_use_xmit_hash(const struct bonding *bond)
+{
+ return (BOND_MODE(bond) == BOND_MODE_8023AD ||
+ BOND_MODE(bond) == BOND_MODE_XOR ||
+ BOND_MODE(bond) == BOND_MODE_TLB ||
+ BOND_MODE(bond) == BOND_MODE_ALB);
}
static inline bool bond_mode_uses_xmit_hash(const struct bonding *bond)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 250dac390806f..5fbfe61f41c67 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1080,6 +1080,37 @@ struct sta_bss_parameters {
};
/**
+ * struct cfg80211_txq_stats - TXQ statistics for this TID
+ * @filled: bitmap of flags using the bits of &enum nl80211_txq_stats to
+ * indicate the relevant values in this struct are filled
+ * @backlog_bytes: total number of bytes currently backlogged
+ * @backlog_packets: total number of packets currently backlogged
+ * @flows: number of new flows seen
+ * @drops: total number of packets dropped
+ * @ecn_marks: total number of packets marked with ECN CE
+ * @overlimit: number of drops due to queue space overflow
+ * @overmemory: number of drops due to memory limit overflow
+ * @collisions: number of hash collisions
+ * @tx_bytes: total number of bytes dequeued
+ * @tx_packets: total number of packets dequeued
+ * @max_flows: maximum number of flows supported
+ */
+struct cfg80211_txq_stats {
+ u32 filled;
+ u32 backlog_bytes;
+ u32 backlog_packets;
+ u32 flows;
+ u32 drops;
+ u32 ecn_marks;
+ u32 overlimit;
+ u32 overmemory;
+ u32 collisions;
+ u32 tx_bytes;
+ u32 tx_packets;
+ u32 max_flows;
+};
+
+/**
* struct cfg80211_tid_stats - per-TID statistics
* @filled: bitmap of flags using the bits of &enum nl80211_tid_stats to
* indicate the relevant values in this struct are filled
@@ -1088,6 +1119,7 @@ struct sta_bss_parameters {
* @tx_msdu_retries: number of retries (not counting the first) for
* transmitted MSDUs
* @tx_msdu_failed: number of failed transmitted MSDUs
+ * @txq_stats: TXQ statistics
*/
struct cfg80211_tid_stats {
u32 filled;
@@ -1095,6 +1127,7 @@ struct cfg80211_tid_stats {
u64 tx_msdu;
u64 tx_msdu_retries;
u64 tx_msdu_failed;
+ struct cfg80211_txq_stats txq_stats;
};
#define IEEE80211_MAX_CHAINS 4
@@ -1151,7 +1184,10 @@ struct cfg80211_tid_stats {
* @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
* @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
* (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
+ * Note that this doesn't use the @filled bit, but is used if non-NULL.
* @ack_signal: signal strength (in dBm) of the last ACK frame.
+ * @avg_ack_signal: average rssi value of ack packet for the no of msdu's has
+ * been sent.
*/
struct station_info {
u64 filled;
@@ -1195,8 +1231,9 @@ struct station_info {
u64 rx_beacon;
u64 rx_duration;
u8 rx_beacon_signal_avg;
- struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
+ struct cfg80211_tid_stats *pertid;
s8 ack_signal;
+ s8 avg_ack_signal;
};
#if IS_ENABLED(CONFIG_CFG80211)
@@ -2188,9 +2225,14 @@ struct cfg80211_connect_params {
* have to be updated as part of update_connect_params() call.
*
* @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated
+ * @UPDATE_FILS_ERP_INFO: Indicates that FILS connection parameters (realm,
+ * username, erp sequence number and rrk) are updated
+ * @UPDATE_AUTH_TYPE: Indicates that authentication type is updated
*/
enum cfg80211_connect_params_changed {
UPDATE_ASSOC_IES = BIT(0),
+ UPDATE_FILS_ERP_INFO = BIT(1),
+ UPDATE_AUTH_TYPE = BIT(2),
};
/**
@@ -2201,6 +2243,9 @@ enum cfg80211_connect_params_changed {
* @WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed
* @WIPHY_PARAM_COVERAGE_CLASS: coverage class changed
* @WIPHY_PARAM_DYN_ACK: dynack has been enabled
+ * @WIPHY_PARAM_TXQ_LIMIT: TXQ packet limit has been changed
+ * @WIPHY_PARAM_TXQ_MEMORY_LIMIT: TXQ memory limit has been changed
+ * @WIPHY_PARAM_TXQ_QUANTUM: TXQ scheduler quantum
*/
enum wiphy_params_flags {
WIPHY_PARAM_RETRY_SHORT = 1 << 0,
@@ -2209,6 +2254,9 @@ enum wiphy_params_flags {
WIPHY_PARAM_RTS_THRESHOLD = 1 << 3,
WIPHY_PARAM_COVERAGE_CLASS = 1 << 4,
WIPHY_PARAM_DYN_ACK = 1 << 5,
+ WIPHY_PARAM_TXQ_LIMIT = 1 << 6,
+ WIPHY_PARAM_TXQ_MEMORY_LIMIT = 1 << 7,
+ WIPHY_PARAM_TXQ_QUANTUM = 1 << 8,
};
/**
@@ -2961,6 +3009,9 @@ struct cfg80211_external_auth_params {
*
* @set_multicast_to_unicast: configure multicast to unicast conversion for BSS
*
+ * @get_txq_stats: Get TXQ stats for interface or phy. If wdev is %NULL, this
+ * function should return phy stats, and interface stats otherwise.
+ *
* @set_pmk: configure the PMK to be used for offloaded 802.1X 4-Way handshake.
* If not deleted through @del_pmk the PMK remains valid until disconnect
* upon which the driver should clear it.
@@ -3262,6 +3313,10 @@ struct cfg80211_ops {
struct net_device *dev,
const bool enabled);
+ int (*get_txq_stats)(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ struct cfg80211_txq_stats *txqstats);
+
int (*set_pmk)(struct wiphy *wiphy, struct net_device *dev,
const struct cfg80211_pmk_conf *conf);
int (*del_pmk)(struct wiphy *wiphy, struct net_device *dev,
@@ -3806,6 +3861,10 @@ struct wiphy_iftype_ext_capab {
* bitmap of &enum nl80211_band values. For instance, for
* NL80211_BAND_2GHZ, bit 0 would be set
* (i.e. BIT(NL80211_BAND_2GHZ)).
+ *
+ * @txq_limit: configuration of internal TX queue frame limit
+ * @txq_memory_limit: configuration internal TX queue memory limit
+ * @txq_quantum: configuration of internal TX queue scheduler quantum
*/
struct wiphy {
/* assign these fields before you register the wiphy */
@@ -3940,6 +3999,10 @@ struct wiphy {
u8 nan_supported_bands;
+ u32 txq_limit;
+ u32 txq_memory_limit;
+ u32 txq_quantum;
+
char priv[0] __aligned(NETDEV_ALIGN);
};
@@ -5363,6 +5426,30 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
#endif
/**
+ * struct cfg80211_fils_resp_params - FILS connection response params
+ * @kek: KEK derived from a successful FILS connection (may be %NULL)
+ * @kek_len: Length of @fils_kek in octets
+ * @update_erp_next_seq_num: Boolean value to specify whether the value in
+ * @erp_next_seq_num is valid.
+ * @erp_next_seq_num: The next sequence number to use in ERP message in
+ * FILS Authentication. This value should be specified irrespective of the
+ * status for a FILS connection.
+ * @pmk: A new PMK if derived from a successful FILS connection (may be %NULL).
+ * @pmk_len: Length of @pmk in octets
+ * @pmkid: A new PMKID if derived from a successful FILS connection or the PMKID
+ * used for this FILS connection (may be %NULL).
+ */
+struct cfg80211_fils_resp_params {
+ const u8 *kek;
+ size_t kek_len;
+ bool update_erp_next_seq_num;
+ u16 erp_next_seq_num;
+ const u8 *pmk;
+ size_t pmk_len;
+ const u8 *pmkid;
+};
+
+/**
* struct cfg80211_connect_resp_params - Connection response params
* @status: Status code, %WLAN_STATUS_SUCCESS for successful connection, use
* %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you
@@ -5380,17 +5467,7 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
* @req_ie_len: Association request IEs length
* @resp_ie: Association response IEs (may be %NULL)
* @resp_ie_len: Association response IEs length
- * @fils_kek: KEK derived from a successful FILS connection (may be %NULL)
- * @fils_kek_len: Length of @fils_kek in octets
- * @update_erp_next_seq_num: Boolean value to specify whether the value in
- * @fils_erp_next_seq_num is valid.
- * @fils_erp_next_seq_num: The next sequence number to use in ERP message in
- * FILS Authentication. This value should be specified irrespective of the
- * status for a FILS connection.
- * @pmk: A new PMK if derived from a successful FILS connection (may be %NULL).
- * @pmk_len: Length of @pmk in octets
- * @pmkid: A new PMKID if derived from a successful FILS connection or the PMKID
- * used for this FILS connection (may be %NULL).
+ * @fils: FILS connection response parameters.
* @timeout_reason: Reason for connection timeout. This is used when the
* connection fails due to a timeout instead of an explicit rejection from
* the AP. %NL80211_TIMEOUT_UNSPECIFIED is used when the timeout reason is
@@ -5406,13 +5483,7 @@ struct cfg80211_connect_resp_params {
size_t req_ie_len;
const u8 *resp_ie;
size_t resp_ie_len;
- const u8 *fils_kek;
- size_t fils_kek_len;
- bool update_erp_next_seq_num;
- u16 fils_erp_next_seq_num;
- const u8 *pmk;
- size_t pmk_len;
- const u8 *pmkid;
+ struct cfg80211_fils_resp_params fils;
enum nl80211_timeout_reason timeout_reason;
};
@@ -5558,6 +5629,7 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid,
* @req_ie_len: association request IEs length
* @resp_ie: association response IEs (may be %NULL)
* @resp_ie_len: assoc response IEs length
+ * @fils: FILS related roaming information.
*/
struct cfg80211_roam_info {
struct ieee80211_channel *channel;
@@ -5567,6 +5639,7 @@ struct cfg80211_roam_info {
size_t req_ie_len;
const u8 *resp_ie;
size_t resp_ie_len;
+ struct cfg80211_fils_resp_params fils;
};
/**
@@ -5648,6 +5721,26 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
struct ieee80211_channel *chan,
gfp_t gfp);
+/**
+ * cfg80211_sinfo_alloc_tid_stats - allocate per-tid statistics.
+ *
+ * @sinfo: the station information
+ * @gfp: allocation flags
+ */
+int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp);
+
+/**
+ * cfg80211_sinfo_release_content - release contents of station info
+ * @sinfo: the station information
+ *
+ * Releases any potentially allocated sub-information of the station
+ * information, but not the struct itself (since it's typically on
+ * the stack.)
+ */
+static inline void cfg80211_sinfo_release_content(struct station_info *sinfo)
+{
+ kfree(sinfo->pertid);
+}
/**
* cfg80211_new_sta - notify userspace about station
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 207d9ba1f92c8..0e5e91be2d30c 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -101,6 +101,10 @@ struct dcbnl_rtnl_ops {
/* CEE peer */
int (*cee_peer_getpg) (struct net_device *, struct cee_pg *);
int (*cee_peer_getpfc) (struct net_device *, struct cee_pfc *);
+
+ /* buffer settings */
+ int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *);
+ int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *);
};
#endif /* __NET_DCBNL_H__ */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 2e4f71e16e956..e336ea9c73df3 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -35,6 +35,14 @@ struct devlink {
char priv[0] __aligned(NETDEV_ALIGN);
};
+struct devlink_port_attrs {
+ bool set;
+ enum devlink_port_flavour flavour;
+ u32 port_number; /* same value as "split group" */
+ bool split;
+ u32 split_subport_number;
+};
+
struct devlink_port {
struct list_head list;
struct devlink *devlink;
@@ -43,8 +51,7 @@ struct devlink_port {
enum devlink_port_type type;
enum devlink_port_type desired_type;
void *type_dev;
- bool split;
- u32 split_group;
+ struct devlink_port_attrs attrs;
};
struct devlink_sb_pool_info {
@@ -289,12 +296,13 @@ struct devlink_resource {
#define DEVLINK_RESOURCE_ID_PARENT_TOP 0
struct devlink_ops {
- int (*reload)(struct devlink *devlink);
+ int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
int (*port_type_set)(struct devlink_port *devlink_port,
enum devlink_port_type port_type);
int (*port_split)(struct devlink *devlink, unsigned int port_index,
- unsigned int count);
- int (*port_unsplit)(struct devlink *devlink, unsigned int port_index);
+ unsigned int count, struct netlink_ext_ack *extack);
+ int (*port_unsplit)(struct devlink *devlink, unsigned int port_index,
+ struct netlink_ext_ack *extack);
int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index,
u16 pool_index,
struct devlink_sb_pool_info *pool_info);
@@ -367,8 +375,12 @@ void devlink_port_type_eth_set(struct devlink_port *devlink_port,
void devlink_port_type_ib_set(struct devlink_port *devlink_port,
struct ib_device *ibdev);
void devlink_port_type_clear(struct devlink_port *devlink_port);
-void devlink_port_split_set(struct devlink_port *devlink_port,
- u32 split_group);
+void devlink_port_attrs_set(struct devlink_port *devlink_port,
+ enum devlink_port_flavour flavour,
+ u32 port_number, bool split,
+ u32 split_subport_number);
+int devlink_port_get_phys_port_name(struct devlink_port *devlink_port,
+ char *name, size_t len);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
@@ -466,11 +478,20 @@ static inline void devlink_port_type_clear(struct devlink_port *devlink_port)
{
}
-static inline void devlink_port_split_set(struct devlink_port *devlink_port,
- u32 split_group)
+static inline void devlink_port_attrs_set(struct devlink_port *devlink_port,
+ enum devlink_port_flavour flavour,
+ u32 port_number, bool split,
+ u32 split_subport_number)
{
}
+static inline int
+devlink_port_get_phys_port_name(struct devlink_port *devlink_port,
+ char *name, size_t len)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int devlink_sb_register(struct devlink *devlink,
unsigned int sb_index, u32 size,
u16 ingress_pools_count,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 60fb4ec8ba616..fdbd6082945d6 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -20,12 +20,14 @@
#include <linux/of.h>
#include <linux/ethtool.h>
#include <linux/net_tstamp.h>
+#include <linux/phy.h>
#include <net/devlink.h>
#include <net/switchdev.h>
struct tc_action;
struct phy_device;
struct fixed_phy_status;
+struct phylink_link_state;
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = 0,
@@ -199,6 +201,7 @@ struct dsa_port {
u8 stp_state;
struct net_device *bridge_dev;
struct devlink_port devlink_port;
+ struct phylink *pl;
/*
* Original copy of the master netdev ethtool_ops
*/
@@ -354,12 +357,36 @@ struct dsa_switch_ops {
struct fixed_phy_status *st);
/*
+ * PHYLINK integration
+ */
+ void (*phylink_validate)(struct dsa_switch *ds, int port,
+ unsigned long *supported,
+ struct phylink_link_state *state);
+ int (*phylink_mac_link_state)(struct dsa_switch *ds, int port,
+ struct phylink_link_state *state);
+ void (*phylink_mac_config)(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ const struct phylink_link_state *state);
+ void (*phylink_mac_an_restart)(struct dsa_switch *ds, int port);
+ void (*phylink_mac_link_down)(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface);
+ void (*phylink_mac_link_up)(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev);
+ void (*phylink_fixed_state)(struct dsa_switch *ds, int port,
+ struct phylink_link_state *state);
+ /*
* ethtool hardware statistics.
*/
- void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
+ void (*get_strings)(struct dsa_switch *ds, int port,
+ u32 stringset, uint8_t *data);
void (*get_ethtool_stats)(struct dsa_switch *ds,
int port, uint64_t *data);
- int (*get_sset_count)(struct dsa_switch *ds, int port);
+ int (*get_sset_count)(struct dsa_switch *ds, int port, int sset);
+ void (*get_ethtool_phy_stats)(struct dsa_switch *ds,
+ int port, uint64_t *data);
/*
* ethtool Wake-on-LAN
@@ -588,4 +615,10 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
#define BRCM_TAG_GET_PORT(v) ((v) >> 8)
#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
+
+int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
+int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data);
+int dsa_port_get_phy_sset_count(struct dsa_port *dp);
+void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up);
+
#endif
diff --git a/include/net/erspan.h b/include/net/erspan.h
index d044aa60cc76f..b39643ef4c95f 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -219,6 +219,33 @@ static inline __be32 erspan_get_timestamp(void)
return htonl((u32)h_usecs);
}
+/* ERSPAN BSO (Bad/Short/Oversized), see RFC1757
+ * 00b --> Good frame with no error, or unknown integrity
+ * 01b --> Payload is a Short Frame
+ * 10b --> Payload is an Oversized Frame
+ * 11b --> Payload is a Bad Frame with CRC or Alignment Error
+ */
+enum erspan_bso {
+ BSO_NOERROR = 0x0,
+ BSO_SHORT = 0x1,
+ BSO_OVERSIZED = 0x2,
+ BSO_BAD = 0x3,
+};
+
+static inline u8 erspan_detect_bso(struct sk_buff *skb)
+{
+ /* BSO_BAD is not handled because the frame CRC
+ * or alignment error information is in FCS.
+ */
+ if (skb->len < ETH_ZLEN)
+ return BSO_SHORT;
+
+ if (skb->len > ETH_FRAME_LEN)
+ return BSO_OVERSIZED;
+
+ return BSO_NOERROR;
+}
+
static inline void erspan_build_header_v2(struct sk_buff *skb,
u32 id, u8 direction, u16 hwid,
bool truncate, bool is_ipv4)
@@ -248,6 +275,7 @@ static inline void erspan_build_header_v2(struct sk_buff *skb,
vlan_tci = ntohs(qp->tci);
}
+ bso = erspan_detect_bso(skb);
skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
ershdr = (struct erspan_base_hdr *)skb->data;
memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
diff --git a/include/net/failover.h b/include/net/failover.h
new file mode 100644
index 0000000000000..bb15438f39c7d
--- /dev/null
+++ b/include/net/failover.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _FAILOVER_H
+#define _FAILOVER_H
+
+#include <linux/netdevice.h>
+
+struct failover_ops {
+ int (*slave_pre_register)(struct net_device *slave_dev,
+ struct net_device *failover_dev);
+ int (*slave_register)(struct net_device *slave_dev,
+ struct net_device *failover_dev);
+ int (*slave_pre_unregister)(struct net_device *slave_dev,
+ struct net_device *failover_dev);
+ int (*slave_unregister)(struct net_device *slave_dev,
+ struct net_device *failover_dev);
+ int (*slave_link_change)(struct net_device *slave_dev,
+ struct net_device *failover_dev);
+ int (*slave_name_change)(struct net_device *slave_dev,
+ struct net_device *failover_dev);
+ rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb);
+};
+
+struct failover {
+ struct list_head list;
+ struct net_device __rcu *failover_dev;
+ struct failover_ops __rcu *ops;
+};
+
+struct failover *failover_register(struct net_device *dev,
+ struct failover_ops *ops);
+void failover_unregister(struct failover *failover);
+int failover_slave_unregister(struct net_device *slave_dev);
+
+#endif /* _FAILOVER_H */
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e5cfcfc7dd93c..b473df5b95121 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -75,7 +75,8 @@ struct fib_rules_ops {
int (*configure)(struct fib_rule *,
struct sk_buff *,
struct fib_rule_hdr *,
- struct nlattr **);
+ struct nlattr **,
+ struct netlink_ext_ack *);
int (*delete)(struct fib_rule *);
int (*compare)(struct fib_rule *,
struct fib_rule_hdr *,
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index d1fcf2442a423..adc24df56b907 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -226,6 +226,11 @@ struct flow_dissector {
unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
};
+struct flow_keys_basic {
+ struct flow_dissector_key_control control;
+ struct flow_dissector_key_basic basic;
+};
+
struct flow_keys {
struct flow_dissector_key_control control;
#define FLOW_KEYS_HASH_START_FIELD basic
@@ -244,7 +249,7 @@ __be32 flow_get_u32_src(const struct flow_keys *flow);
__be32 flow_get_u32_dst(const struct flow_keys *flow);
extern struct flow_dissector flow_keys_dissector;
-extern struct flow_dissector flow_keys_buf_dissector;
+extern struct flow_dissector flow_keys_basic_dissector;
/* struct flow_keys_digest:
*
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d4088d1a688d4..d7578cf49c3af 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -42,6 +42,7 @@ enum {
struct inet6_ifaddr {
struct in6_addr addr;
__u32 prefix_len;
+ __u32 rt_priority;
/* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */
__u32 valid_lft;
@@ -64,7 +65,7 @@ struct inet6_ifaddr {
struct delayed_work dad_work;
struct inet6_dev *idev;
- struct rt6_info *rt;
+ struct fib6_info *rt;
struct hlist_node addr_lst;
struct list_head if_list;
@@ -143,8 +144,7 @@ struct ipv6_ac_socklist {
struct ifacaddr6 {
struct in6_addr aca_addr;
- struct inet6_dev *aca_idev;
- struct rt6_info *aca_rt;
+ struct fib6_info *aca_rt;
struct ifacaddr6 *aca_next;
int aca_users;
refcount_t aca_refcnt;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b68fea022a82e..0a6c9e0f2b5a3 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -23,8 +23,6 @@
#include <net/inet_sock.h>
#include <net/request_sock.h>
-#define INET_CSK_DEBUG 1
-
/* Cancel timers, when they are not required. */
#undef INET_CSK_CLEAR_TIMERS
@@ -77,6 +75,7 @@ struct inet_connection_sock_af_ops {
* @icsk_af_ops Operations which are AF_INET{4,6} specific
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
+ * @icsk_clean_acked Clean acked data hook
* @icsk_listen_portaddr_node hash to the portaddr listener hashtable
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
@@ -102,6 +101,7 @@ struct inet_connection_sock {
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops;
void *icsk_ulp_data;
+ void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:6,
@@ -194,10 +194,6 @@ static inline void inet_csk_delack_init(struct sock *sk)
void inet_csk_delete_keepalive_timer(struct sock *sk);
void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
-#ifdef INET_CSK_DEBUG
-extern const char inet_csk_timer_bug_msg[];
-#endif
-
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -212,12 +208,9 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
#ifdef INET_CSK_CLEAR_TIMERS
sk_stop_timer(sk, &icsk->icsk_delack_timer);
#endif
+ } else {
+ pr_debug("inet_csk BUG: unknown timer value\n");
}
-#ifdef INET_CSK_DEBUG
- else {
- pr_debug("%s", inet_csk_timer_bug_msg);
- }
-#endif
}
/*
@@ -230,10 +223,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
struct inet_connection_sock *icsk = inet_csk(sk);
if (when > max_when) {
-#ifdef INET_CSK_DEBUG
pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
sk, what, when, current_text_addr());
-#endif
when = max_when;
}
@@ -247,12 +238,9 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = jiffies + when;
sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
+ } else {
+ pr_debug("inet_csk BUG: unknown timer value\n");
}
-#ifdef INET_CSK_DEBUG
- else {
- pr_debug("%s", inet_csk_timer_bug_msg);
- }
-#endif
}
static inline unsigned long
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 0a671c32d6b96..83d5b3c2ac421 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -147,6 +147,7 @@ struct inet_cork {
__u8 ttl;
__s16 tos;
char priority;
+ __u16 gso_size;
};
struct inet_cork_full {
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index c7be1ca8e562f..78775038f011f 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -61,7 +61,7 @@ struct inet_timewait_sock {
#define tw_cookie __tw_common.skc_cookie
#define tw_dr __tw_common.skc_tw_dr
- int tw_timeout;
+ __u32 tw_mark;
volatile unsigned char tw_substate;
unsigned char tw_rcv_wscale;
diff --git a/include/net/ip.h b/include/net/ip.h
index ecffd843e7b89..0d2281b4b27ac 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -76,6 +76,7 @@ struct ipcm_cookie {
__u8 ttl;
__s16 tos;
char priority;
+ __u16 gso_size;
};
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
@@ -171,7 +172,7 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
struct ipcm_cookie *ipc, struct rtable **rtp,
- unsigned int flags);
+ struct inet_cork *cork, unsigned int flags);
static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
{
@@ -396,6 +397,9 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
}
+int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
+ u32 *metrics);
+
u32 ip_idents_reserve(u32 hash, int segs);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
@@ -660,4 +664,7 @@ extern int sysctl_icmp_msgs_burst;
int ip_misc_proc_init(void);
#endif
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+ struct netlink_ext_ack *extack);
+
#endif /* _IP_H */
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0e79c3408569a..5cba71d2dc44b 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -38,6 +38,7 @@
#endif
struct rt6_info;
+struct fib6_info;
struct fib6_config {
u32 fc_table;
@@ -74,12 +75,12 @@ struct fib6_node {
#ifdef CONFIG_IPV6_SUBTREES
struct fib6_node __rcu *subtree;
#endif
- struct rt6_info __rcu *leaf;
+ struct fib6_info __rcu *leaf;
__u16 fn_bit; /* bit key */
__u16 fn_flags;
int fn_sernum;
- struct rt6_info __rcu *rr_ptr;
+ struct fib6_info __rcu *rr_ptr;
struct rcu_head rcu;
};
@@ -94,11 +95,6 @@ struct fib6_gc_args {
#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1))
#endif
-struct mx6_config {
- const u32 *mx;
- DECLARE_BITMAP(mx_valid, RTAX_MAX);
-};
-
/*
* routing information
*
@@ -127,92 +123,104 @@ struct rt6_exception {
#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
#define FIB6_MAX_DEPTH 5
-struct rt6_info {
- struct dst_entry dst;
- struct rt6_info __rcu *rt6_next;
- struct rt6_info *from;
+struct fib6_nh {
+ struct in6_addr nh_gw;
+ struct net_device *nh_dev;
+ struct lwtunnel_state *nh_lwtstate;
- /*
- * Tail elements of dst_entry (__refcnt etc.)
- * and these elements (rarely used in hot path) are in
- * the same cache line.
- */
- struct fib6_table *rt6i_table;
- struct fib6_node __rcu *rt6i_node;
+ unsigned int nh_flags;
+ atomic_t nh_upper_bound;
+ int nh_weight;
+};
- struct in6_addr rt6i_gateway;
+struct fib6_info {
+ struct fib6_table *fib6_table;
+ struct fib6_info __rcu *fib6_next;
+ struct fib6_node __rcu *fib6_node;
/* Multipath routes:
- * siblings is a list of rt6_info that have the the same metric/weight,
+ * siblings is a list of fib6_info that have the the same metric/weight,
* destination, but not the same gateway. nsiblings is just a cache
* to speed up lookup.
*/
- struct list_head rt6i_siblings;
- unsigned int rt6i_nsiblings;
- atomic_t rt6i_nh_upper_bound;
+ struct list_head fib6_siblings;
+ unsigned int fib6_nsiblings;
- atomic_t rt6i_ref;
+ atomic_t fib6_ref;
+ unsigned long expires;
+ struct dst_metrics *fib6_metrics;
+#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
- unsigned int rt6i_nh_flags;
+ struct rt6key fib6_dst;
+ u32 fib6_flags;
+ struct rt6key fib6_src;
+ struct rt6key fib6_prefsrc;
- /* These are in a separate cache line. */
- struct rt6key rt6i_dst ____cacheline_aligned_in_smp;
- u32 rt6i_flags;
+ struct rt6_info * __percpu *rt6i_pcpu;
+ struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
+
+ u32 fib6_metric;
+ u8 fib6_protocol;
+ u8 fib6_type;
+ u8 exception_bucket_flushed:1,
+ should_flush:1,
+ dst_nocount:1,
+ dst_nopolicy:1,
+ dst_host:1,
+ unused:3;
+
+ struct fib6_nh fib6_nh;
+};
+
+struct rt6_info {
+ struct dst_entry dst;
+ struct fib6_info __rcu *from;
+
+ struct rt6key rt6i_dst;
struct rt6key rt6i_src;
+ struct in6_addr rt6i_gateway;
+ struct inet6_dev *rt6i_idev;
+ u32 rt6i_flags;
struct rt6key rt6i_prefsrc;
struct list_head rt6i_uncached;
struct uncached_list *rt6i_uncached_list;
- struct inet6_dev *rt6i_idev;
- struct rt6_info * __percpu *rt6i_pcpu;
- struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-
- u32 rt6i_metric;
- u32 rt6i_pmtu;
/* more non-fragment space at head required */
- int rt6i_nh_weight;
unsigned short rt6i_nfheader_len;
- u8 rt6i_protocol;
- u8 exception_bucket_flushed:1,
- should_flush:1,
- unused:6;
};
#define for_each_fib6_node_rt_rcu(fn) \
for (rt = rcu_dereference((fn)->leaf); rt; \
- rt = rcu_dereference(rt->rt6_next))
+ rt = rcu_dereference(rt->fib6_next))
#define for_each_fib6_walker_rt(w) \
for (rt = (w)->leaf; rt; \
- rt = rcu_dereference_protected(rt->rt6_next, 1))
+ rt = rcu_dereference_protected(rt->fib6_next, 1))
static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
return ((struct rt6_info *)dst)->rt6i_idev;
}
-static inline void rt6_clean_expires(struct rt6_info *rt)
+static inline void fib6_clean_expires(struct fib6_info *f6i)
{
- rt->rt6i_flags &= ~RTF_EXPIRES;
- rt->dst.expires = 0;
+ f6i->fib6_flags &= ~RTF_EXPIRES;
+ f6i->expires = 0;
}
-static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires)
+static inline void fib6_set_expires(struct fib6_info *f6i,
+ unsigned long expires)
{
- rt->dst.expires = expires;
- rt->rt6i_flags |= RTF_EXPIRES;
+ f6i->expires = expires;
+ f6i->fib6_flags |= RTF_EXPIRES;
}
-static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
+static inline bool fib6_check_expired(const struct fib6_info *f6i)
{
- struct rt6_info *rt;
-
- for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
- if (rt && rt != rt0)
- rt0->dst.expires = rt->dst.expires;
- dst_set_expires(&rt0->dst, timeout);
- rt0->rt6i_flags |= RTF_EXPIRES;
+ if (f6i->fib6_flags & RTF_EXPIRES)
+ return time_after(jiffies, f6i->expires);
+ return false;
}
/* Function to safely get fn->sernum for passed in rt
@@ -220,14 +228,13 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
* Return true if we can get cookie safely
* Return false if not
*/
-static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
- u32 *cookie)
+static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
+ u32 *cookie)
{
struct fib6_node *fn;
bool status = false;
- rcu_read_lock();
- fn = rcu_dereference(rt->rt6i_node);
+ fn = rcu_dereference(f6i->fib6_node);
if (fn) {
*cookie = fn->fn_sernum;
@@ -236,19 +243,22 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
status = true;
}
- rcu_read_unlock();
return status;
}
static inline u32 rt6_get_cookie(const struct rt6_info *rt)
{
+ struct fib6_info *from;
u32 cookie = 0;
- if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
- rt = rt->from;
+ rcu_read_lock();
+
+ from = rcu_dereference(rt->from);
+ if (from && (rt->rt6i_flags & RTF_PCPU ||
+ unlikely(!list_empty(&rt->rt6i_uncached))))
+ fib6_get_cookie_safe(from, &cookie);
- rt6_get_cookie_safe(rt, &cookie);
+ rcu_read_unlock();
return cookie;
}
@@ -262,20 +272,18 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(&rt->dst);
}
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
+void fib6_info_destroy(struct fib6_info *f6i);
-static inline void rt6_hold(struct rt6_info *rt)
+static inline void fib6_info_hold(struct fib6_info *f6i)
{
- atomic_inc(&rt->rt6i_ref);
+ atomic_inc(&f6i->fib6_ref);
}
-static inline void rt6_release(struct rt6_info *rt)
+static inline void fib6_info_release(struct fib6_info *f6i)
{
- if (atomic_dec_and_test(&rt->rt6i_ref)) {
- rt6_free_pcpu(rt);
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
+ if (f6i && atomic_dec_and_test(&f6i->fib6_ref))
+ fib6_info_destroy(f6i);
}
enum fib6_walk_state {
@@ -291,7 +299,7 @@ enum fib6_walk_state {
struct fib6_walker {
struct list_head lh;
struct fib6_node *root, *node;
- struct rt6_info *leaf;
+ struct fib6_info *leaf;
enum fib6_walk_state state;
unsigned int skip;
unsigned int count;
@@ -355,7 +363,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
- struct rt6_info *rt;
+ struct fib6_info *rt;
};
/*
@@ -368,24 +376,49 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup);
-struct fib6_node *fib6_lookup(struct fib6_node *root,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr);
+/* called with rcu lock held; can return error pointer
+ * caller needs to select path
+ */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ int flags);
+
+/* called with rcu lock held; caller needs to select path */
+struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int strict);
+
+struct fib6_info *fib6_multipath_select(const struct net *net,
+ struct fib6_info *match,
+ struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb, int strict);
+
+struct fib6_node *fib6_node_lookup(struct fib6_node *root,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
const struct in6_addr *saddr, int src_len,
bool exact_match);
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
void *arg);
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
- struct nl_info *info, struct mx6_config *mxc,
- struct netlink_ext_ack *extack);
-int fib6_del(struct rt6_info *rt, struct nl_info *info);
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
+ struct nl_info *info, struct netlink_ext_ack *extack);
+int fib6_del(struct fib6_info *rt, struct nl_info *info);
+
+static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
+{
+ return f6i->fib6_nh.nh_dev;
+}
+
+static inline
+struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
+{
+ return f6i->fib6_nh.nh_lwtstate;
+}
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
void fib6_run_gc(unsigned long expires, struct net *net, bool force);
@@ -416,8 +449,14 @@ void __net_exit fib6_notifier_exit(struct net *net);
unsigned int fib6_tables_seq_read(struct net *net);
int fib6_tables_dump(struct net *net, struct notifier_block *nb);
-void fib6_update_sernum(struct rt6_info *rt);
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
+void fib6_update_sernum(struct net *net, struct fib6_info *rt);
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
+
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
+static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
+{
+ return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
+}
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 08b132381984f..59656fc580df7 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,12 +66,6 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
-static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
-{
- return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
- RTF_GATEWAY;
-}
-
void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
@@ -100,29 +94,29 @@ void ip6_route_cleanup(void);
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
-int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack);
-int ip6_ins_rt(struct rt6_info *);
-int ip6_del_rt(struct rt6_info *);
+int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+int ip6_ins_rt(struct net *net, struct fib6_info *f6i);
+int ip6_del_rt(struct net *net, struct fib6_info *f6i);
-void rt6_flush_exceptions(struct rt6_info *rt);
-int rt6_remove_exception_rt(struct rt6_info *rt);
-void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
+void rt6_flush_exceptions(struct fib6_info *f6i);
+void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args,
unsigned long now);
-static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
+static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i,
const struct in6_addr *daddr,
unsigned int prefs,
struct in6_addr *saddr)
{
- struct inet6_dev *idev =
- rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
int err = 0;
- if (rt && rt->rt6i_prefsrc.plen)
- *saddr = rt->rt6i_prefsrc.addr;
- else
- err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
- daddr, prefs, saddr);
+ if (f6i && f6i->fib6_prefsrc.plen) {
+ *saddr = f6i->fib6_prefsrc.addr;
+ } else {
+ struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL;
+
+ err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr);
+ }
return err;
}
@@ -137,8 +131,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
void fib6_force_start_gc(struct net *net);
-struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
- const struct in6_addr *addr, bool anycast);
+struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev,
+ const struct in6_addr *addr, bool anycast,
+ gfp_t gfp_flags);
struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags);
@@ -147,9 +142,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
* support functions for ND
*
*/
-struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr,
+struct fib6_info *rt6_get_dflt_router(struct net *net,
+ const struct in6_addr *addr,
struct net_device *dev);
-struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
+ const struct in6_addr *gwaddr,
struct net_device *dev, unsigned int pref);
void rt6_purge_dflt_routers(struct net *net);
@@ -174,14 +171,14 @@ struct rt6_rtnl_dump_arg {
struct net *net;
};
-int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
void rt6_disable_ip(struct net_device *dev, unsigned long event);
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
-void rt6_multipath_rebalance(struct rt6_info *rt);
+void rt6_multipath_rebalance(struct fib6_info *f6i);
void rt6_uncached_list_add(struct rt6_info *rt);
void rt6_uncached_list_del(struct rt6_info *rt);
@@ -269,12 +266,38 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
return daddr;
}
-static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
+static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
- return a->dst.dev == b->dst.dev &&
- a->rt6i_idev == b->rt6i_idev &&
- ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
- !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
+ return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
+ ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) &&
+ !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
}
+static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+{
+ struct inet6_dev *idev;
+ unsigned int mtu;
+
+ if (dst_metric_locked(dst, RTAX_MTU)) {
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ return mtu;
+ }
+
+ mtu = IPV6_MIN_MTU;
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ if (idev)
+ mtu = idev->cnf.mtu6;
+ rcu_read_unlock();
+
+ return mtu;
+}
+
+u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+ struct in6_addr *saddr);
+
+struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
+ struct net_device *dev, struct sk_buff *skb,
+ const void *daddr);
#endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 81d0f2107ff1a..69c91d1934c15 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net)
}
#endif
+u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
+
#endif /* _NET_FIB_H */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 540a4b4417bfb..90ff430f5e9d0 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -379,6 +379,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
return 0;
}
+static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
+ const struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_IP))
+ return iph->ttl;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return ((const struct ipv6hdr *)iph)->hop_limit;
+ else
+ return 0;
+}
+
/* Propogate ECN bits out */
static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
const struct sk_buff *skb)
@@ -466,12 +477,12 @@ static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstat
return (struct ip_tunnel_info *)lwtstate->data;
}
-extern struct static_key ip_tunnel_metadata_cnt;
+DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
/* Returns > 0 if metadata should be collected */
static inline int ip_tunnel_collect_metadata(void)
{
- return static_key_false(&ip_tunnel_metadata_cnt);
+ return static_branch_unlikely(&ip_tunnel_metadata_cnt);
}
void __init ip_tunnel_core_init(void);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index aea7a124e66b0..6d6e21dee4621 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -656,6 +656,7 @@ struct ip_vs_dest {
volatile unsigned int flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
+ atomic_t last_weight; /* server latest weight */
refcount_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
@@ -750,14 +751,14 @@ struct ip_vs_app {
* 2=Mangled but checksum was not updated
*/
int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
- struct sk_buff *, int *diff);
+ struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh);
/* input hook: Process packet in outin direction, diff set for TCP.
* Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
* 2=Mangled but checksum was not updated
*/
int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
- struct sk_buff *, int *diff);
+ struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh);
/* ip_vs_app initializer */
int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
@@ -1315,8 +1316,10 @@ int register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16
int ip_vs_app_inc_get(struct ip_vs_app *inc);
void ip_vs_app_inc_put(struct ip_vs_app *inc);
-int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
-int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
+int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb,
+ struct ip_vs_iphdr *ipvsh);
+int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb,
+ struct ip_vs_iphdr *ipvsh);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 836f31af1369b..16475c269749a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -298,6 +298,7 @@ struct ipcm6_cookie {
__s16 tclass;
__s8 dontfrag;
struct ipv6_txoptions *opt;
+ __u16 gso_size;
};
static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
@@ -906,6 +907,11 @@ static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
}
+static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6)
+{
+ return fl6->flowlabel & IPV6_FLOWLABEL_MASK;
+}
+
/*
* Prototypes exported by ipv6
*/
@@ -950,6 +956,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
void *from, int length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
+ struct inet_cork_full *cork,
const struct sockcm_cookie *sockc);
static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
@@ -958,8 +965,6 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
&inet6_sk(sk)->cork);
}
-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
-
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
@@ -1044,8 +1049,6 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
int inet6_release(struct socket *sock);
-int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b2f3a0c018e78..851a5e19ae320 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3378,6 +3378,8 @@ enum ieee80211_reconfig_type {
* frame in case that no beacon was heard from the AP/P2P GO.
* The callback will be called before each transmission and upon return
* mac80211 will transmit the frame right away.
+ * If duration is greater than zero, mac80211 hints to the driver the
+ * duration for which the operation is requested.
* The callback is optional and can (should!) sleep.
*
* @mgd_protect_tdls_discover: Protect a TDLS discovery session. After sending
@@ -3697,7 +3699,8 @@ struct ieee80211_ops {
u32 sset, u8 *data);
void (*mgd_prepare_tx)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ u16 duration);
void (*mgd_protect_tdls_discover)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif);
@@ -4450,6 +4453,19 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif);
/**
+ * ieee80211_csa_set_counter - request mac80211 to set csa counter
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @counter: the new value for the counter
+ *
+ * The csa counter can be changed by the device, this API should be
+ * used by the device driver to update csa counter in mac80211.
+ *
+ * It should never be used together with ieee80211_csa_update_counter(),
+ * as it will cause a race condition around the counter value.
+ */
+void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
+
+/**
* ieee80211_csa_finish - notify mac80211 about channel switch
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
*
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e421f86af043b..6c1eecd56a4d0 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -246,6 +246,7 @@ static inline void *neighbour_priv(const struct neighbour *n)
#define NEIGH_UPDATE_F_OVERRIDE 0x00000001
#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002
#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004
+#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000
#define NEIGH_UPDATE_F_ISROUTER 0x40000000
#define NEIGH_UPDATE_F_ADMIN 0x80000000
@@ -526,5 +527,21 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
} while (read_seqretry(&n->ha_lock, seq));
}
-
+static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
+ int *notify)
+{
+ u8 ndm_flags = 0;
+
+ if (!(flags & NEIGH_UPDATE_F_ADMIN))
+ return;
+
+ ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if (ndm_flags & NTF_EXT_LEARNED)
+ neigh->flags |= NTF_EXT_LEARNED;
+ else
+ neigh->flags &= ~NTF_EXT_LEARNED;
+ *notify = 1;
+ }
+}
#endif
diff --git a/include/net/net_failover.h b/include/net/net_failover.h
new file mode 100644
index 0000000000000..b12a1c469d1ce
--- /dev/null
+++ b/include/net/net_failover.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _NET_FAILOVER_H
+#define _NET_FAILOVER_H
+
+#include <net/failover.h>
+
+/* failover state */
+struct net_failover_info {
+ /* primary netdev with same MAC */
+ struct net_device __rcu *primary_dev;
+
+ /* standby netdev */
+ struct net_device __rcu *standby_dev;
+
+ /* primary netdev stats */
+ struct rtnl_link_stats64 primary_stats;
+
+ /* standby netdev stats */
+ struct rtnl_link_stats64 standby_stats;
+
+ /* aggregated stats */
+ struct rtnl_link_stats64 failover_stats;
+
+ /* spinlock while updating stats */
+ spinlock_t stats_lock;
+};
+
+struct failover *net_failover_create(struct net_device *standby_dev);
+void net_failover_destroy(struct failover *failover);
+
+#define FAILOVER_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
+ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+ NETIF_F_HIGHDMA | NETIF_F_LRO)
+
+#define FAILOVER_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
+ NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
+
+#endif /* _NET_FAILOVER_H */
diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h
index ebd869473603a..cd24be4c4a99b 100644
--- a/include/net/netfilter/ipv4/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h
@@ -6,7 +6,7 @@
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
- const struct nf_nat_range *range,
+ const struct nf_nat_range2 *range,
const struct net_device *out);
void nf_nat_masquerade_ipv4_register_notifier(void);
diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h
index 1ed4f2631ed6d..0c3b5ebf0bb8d 100644
--- a/include/net/netfilter/ipv6/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h
@@ -3,7 +3,7 @@
#define _NF_NAT_MASQUERADE_IPV6_H_
unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
const struct net_device *out);
void nf_nat_masquerade_ipv6_register_notifier(void);
void nf_nat_masquerade_ipv6_unregister_notifier(void);
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index e61184fbfb710..1910b65724302 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -13,4 +13,15 @@ unsigned int nf_conncount_count(struct net *net,
const u32 *key,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
+
+unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone,
+ bool *addit);
+
+bool nf_conncount_add(struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple);
+
+void nf_conncount_cache_free(struct hlist_head *hhead);
+
#endif
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 833752dd0c583..ba9fa4592f2b2 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -6,6 +6,7 @@
#include <linux/netdevice.h>
#include <linux/rhashtable.h>
#include <linux/rcupdate.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/dst.h>
struct nf_flowtable;
@@ -13,25 +14,24 @@ struct nf_flowtable;
struct nf_flowtable_type {
struct list_head list;
int family;
- void (*gc)(struct work_struct *work);
+ int (*init)(struct nf_flowtable *ft);
void (*free)(struct nf_flowtable *ft);
- const struct rhashtable_params *params;
nf_hookfn *hook;
struct module *owner;
};
struct nf_flowtable {
+ struct list_head list;
struct rhashtable rhashtable;
const struct nf_flowtable_type *type;
struct delayed_work gc_work;
};
enum flow_offload_tuple_dir {
- FLOW_OFFLOAD_DIR_ORIGINAL,
- FLOW_OFFLOAD_DIR_REPLY,
- __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY,
+ FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
+ FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
+ FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
};
-#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
struct flow_offload_tuple {
union {
@@ -55,6 +55,8 @@ struct flow_offload_tuple {
int oifidx;
+ u16 mtu;
+
struct dst_entry *dst_cache;
};
@@ -66,6 +68,7 @@ struct flow_offload_tuple_rhash {
#define FLOW_OFFLOAD_SNAT 0x1
#define FLOW_OFFLOAD_DNAT 0x2
#define FLOW_OFFLOAD_DYING 0x4
+#define FLOW_OFFLOAD_TEARDOWN 0x8
struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
@@ -98,11 +101,14 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
-void nf_flow_offload_work_gc(struct work_struct *work);
-extern const struct rhashtable_params nf_flow_offload_rhash_params;
-void flow_offload_dead(struct flow_offload *flow);
+void flow_offload_teardown(struct flow_offload *flow);
+static inline void flow_offload_dead(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_DYING;
+}
int nf_flow_snat_port(const struct flow_offload *flow,
struct sk_buff *skb, unsigned int thoff,
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 207a467e7ca60..a17eb2f8d40e8 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -39,7 +39,7 @@ struct nf_conn_nat {
/* Set up the info structure to map into this range. */
unsigned int nf_nat_setup_info(struct nf_conn *ct,
- const struct nf_nat_range *range,
+ const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype);
extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
@@ -75,4 +75,8 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
#endif
}
+int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops,
+ const struct nf_hook_ops *nat_ops, unsigned int ops_count);
+void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
+ unsigned int ops_count);
#endif
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 235bd0e9a5aac..dc7cd0440229a 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -11,6 +11,10 @@
unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
unsigned int hooknum, struct sk_buff *skb);
+unsigned int
+nf_nat_inet_fn(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
+
int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family);
static inline int nf_nat_initialized(struct nf_conn *ct,
@@ -22,11 +26,4 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
return ct->status & IPS_DST_NAT_DONE;
}
-struct nlattr;
-
-extern int
-(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
- enum nf_nat_manip_type manip,
- const struct nlattr *attr);
-
#endif /* _NF_NAT_CORE_H */
diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index ce7c2b4e64bb7..d300b8f03972c 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -7,7 +7,7 @@ struct nf_nat_l3proto {
u8 l3proto;
bool (*in_range)(const struct nf_conntrack_tuple *t,
- const struct nf_nat_range *range);
+ const struct nf_nat_range2 *range);
u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16);
@@ -33,7 +33,7 @@ struct nf_nat_l3proto {
struct flowi *fl);
int (*nlattr_to_range)(struct nlattr *tb[],
- struct nf_nat_range *range);
+ struct nf_nat_range2 *range);
};
int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
@@ -44,66 +44,14 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum);
-unsigned int nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state,
- unsigned int (*do_chain)(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct nf_conn *ct));
-
-unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state,
- unsigned int (*do_chain)(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct nf_conn *ct));
-
-unsigned int nf_nat_ipv4_local_fn(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- unsigned int (*do_chain)(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct nf_conn *ct));
-
-unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state,
- unsigned int (*do_chain)(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct nf_conn *ct));
-
int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum, unsigned int hdrlen);
-unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state,
- unsigned int (*do_chain)(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct nf_conn *ct));
-
-unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state,
- unsigned int (*do_chain)(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct nf_conn *ct));
-
-unsigned int nf_nat_ipv6_local_fn(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- unsigned int (*do_chain)(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct nf_conn *ct));
+int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops);
+void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
-unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state,
- unsigned int (*do_chain)(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct nf_conn *ct));
+int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops);
+void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
#endif /* _NF_NAT_L3PROTO_H */
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 67835ff8a2d98..b4d6b29bca62a 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -34,12 +34,12 @@ struct nf_nat_l4proto {
*/
void (*unique_tuple)(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct);
int (*nlattr_to_range)(struct nlattr *tb[],
- struct nf_nat_range *range);
+ struct nf_nat_range2 *range);
};
/* Protocol registration. */
@@ -72,11 +72,11 @@ bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct, u16 *rover);
int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
- struct nf_nat_range *range);
+ struct nf_nat_range2 *range);
#endif /*_NF_NAT_L4PROTO_H*/
diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h
index 5ddabb08c472d..c129aacc8ae8f 100644
--- a/include/net/netfilter/nf_nat_redirect.h
+++ b/include/net/netfilter/nf_nat_redirect.h
@@ -7,7 +7,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
const struct nf_nat_ipv4_multi_range_compat *mr,
unsigned int hooknum);
unsigned int
-nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
unsigned int hooknum);
#endif /* _NF_NAT_REDIRECT_H_ */
diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h
index 8230fefff9f5e..f9d7bee9bd4e5 100644
--- a/include/net/netfilter/nf_socket.h
+++ b/include/net/netfilter/nf_socket.h
@@ -2,22 +2,7 @@
#ifndef _NF_SOCK_H_
#define _NF_SOCK_H_
-struct net_device;
-struct sk_buff;
-struct sock;
-struct net;
-
-static inline bool nf_sk_is_transparent(struct sock *sk)
-{
- switch (sk->sk_state) {
- case TCP_TIME_WAIT:
- return inet_twsk(sk)->tw_transparent;
- case TCP_NEW_SYN_RECV:
- return inet_rsk(inet_reqsk(sk))->no_srccheck;
- default:
- return inet_sk(sk)->transparent;
- }
-}
+#include <net/sock.h>
struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
const struct net_device *indev);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index a1e28dd5d0bff..08c005ce56e9c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -9,6 +9,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/u64_stats_sync.h>
+#include <linux/rhashtable.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h>
@@ -276,23 +277,6 @@ struct nft_set_estimate {
enum nft_set_class space;
};
-/**
- * struct nft_set_type - nf_tables set type
- *
- * @select_ops: function to select nft_set_ops
- * @ops: default ops, used when no select_ops functions is present
- * @list: used internally
- * @owner: module reference
- */
-struct nft_set_type {
- const struct nft_set_ops *(*select_ops)(const struct nft_ctx *,
- const struct nft_set_desc *desc,
- u32 flags);
- const struct nft_set_ops *ops;
- struct list_head list;
- struct module *owner;
-};
-
struct nft_set_ext;
struct nft_expr;
@@ -311,7 +295,6 @@ struct nft_expr;
* @init: initialize private data of new set instance
* @destroy: destroy private data of set instance
* @elemsize: element private size
- * @features: features supported by the implementation
*/
struct nft_set_ops {
bool (*lookup)(const struct net *net,
@@ -360,11 +343,26 @@ struct nft_set_ops {
const struct nft_set_desc *desc,
const struct nlattr * const nla[]);
void (*destroy)(const struct nft_set *set);
+ void (*gc_init)(const struct nft_set *set);
unsigned int elemsize;
+};
+
+/**
+ * struct nft_set_type - nf_tables set type
+ *
+ * @ops: set ops for this type
+ * @list: used internally
+ * @owner: module reference
+ * @features: features supported by the implementation
+ */
+struct nft_set_type {
+ const struct nft_set_ops ops;
+ struct list_head list;
+ struct module *owner;
u32 features;
- const struct nft_set_type *type;
};
+#define to_set_type(o) container_of(o, struct nft_set_type, ops)
int nft_register_set(struct nft_set_type *type);
void nft_unregister_set(struct nft_set_type *type);
@@ -374,6 +372,8 @@ void nft_unregister_set(struct nft_set_type *type);
*
* @list: table set list node
* @bindings: list of set bindings
+ * @table: table this set belongs to
+ * @net: netnamespace this set belongs to
* @name: name of the set
* @handle: unique handle of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
@@ -397,6 +397,8 @@ void nft_unregister_set(struct nft_set_type *type);
struct nft_set {
struct list_head list;
struct list_head bindings;
+ struct nft_table *table;
+ possible_net_t net;
char *name;
u64 handle;
u32 ktype;
@@ -590,7 +592,7 @@ static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
}
-static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext)
+static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext)
{
return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
}
@@ -608,7 +610,7 @@ static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext)
static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
{
return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
- time_is_before_eq_jiffies(*nft_set_ext_expiration(ext));
+ time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
}
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
@@ -712,6 +714,7 @@ struct nft_expr_type {
};
#define NFT_EXPR_STATEFUL 0x1
+#define NFT_EXPR_GC 0x2
/**
* struct nft_expr_ops - nf_tables expression operations
@@ -743,11 +746,15 @@ struct nft_expr_ops {
const struct nft_expr *expr);
void (*destroy)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
+ void (*destroy_clone)(const struct nft_ctx *ctx,
+ const struct nft_expr *expr);
int (*dump)(struct sk_buff *skb,
const struct nft_expr *expr);
int (*validate)(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data);
+ bool (*gc)(struct net *net,
+ const struct nft_expr *expr);
const struct nft_expr_type *type;
void *data;
};
@@ -854,6 +861,7 @@ enum nft_chain_flags {
*
* @rules: list of rules in the chain
* @list: used internally
+ * @rhlhead: used internally
* @table: table that this chain belongs to
* @handle: chain handle
* @use: number of jump references to this chain
@@ -862,8 +870,11 @@ enum nft_chain_flags {
* @name: name of the chain
*/
struct nft_chain {
+ struct nft_rule *__rcu *rules_gen_0;
+ struct nft_rule *__rcu *rules_gen_1;
struct list_head rules;
struct list_head list;
+ struct rhlist_head rhlhead;
struct nft_table *table;
u64 handle;
u32 use;
@@ -871,8 +882,13 @@ struct nft_chain {
u8 flags:6,
genmask:2;
char *name;
+
+ /* Only used during control plane commit phase: */
+ struct nft_rule **rules_next;
};
+int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
+
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
NFT_CHAIN_T_ROUTE,
@@ -889,8 +905,8 @@ enum nft_chain_types {
* @owner: module owner
* @hook_mask: mask of valid hooks
* @hooks: array of hook functions
- * @init: chain initialization function
- * @free: chain release function
+ * @ops_register: base chain register function
+ * @ops_unregister: base chain unregister function
*/
struct nft_chain_type {
const char *name;
@@ -899,8 +915,8 @@ struct nft_chain_type {
struct module *owner;
unsigned int hook_mask;
nf_hookfn *hooks[NF_MAX_HOOKS];
- int (*init)(struct nft_ctx *ctx);
- void (*free)(struct nft_ctx *ctx);
+ int (*ops_register)(struct net *net, const struct nf_hook_ops *ops);
+ void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
};
int nft_chain_validate_dependency(const struct nft_chain *chain,
@@ -952,7 +968,8 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* struct nft_table - nf_tables table
*
* @list: used internally
- * @chains: chains in the table
+ * @chains_ht: chains in the table
+ * @chains: same, for stable walks
* @sets: sets in the table
* @objects: stateful objects in the table
* @flowtables: flow tables in the table
@@ -966,6 +983,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
*/
struct nft_table {
struct list_head list;
+ struct rhltable chains_ht;
struct list_head chains;
struct list_head sets;
struct list_head objects;
@@ -1020,9 +1038,9 @@ static inline void *nft_obj_data(const struct nft_object *obj)
#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr))
-struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
- const struct nlattr *nla, u32 objtype,
- u8 genmask);
+struct nft_object *nft_obj_lookup(const struct nft_table *table,
+ const struct nlattr *nla, u32 objtype,
+ u8 genmask);
void nft_obj_notify(struct net *net, struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq,
@@ -1067,7 +1085,8 @@ struct nft_object_ops {
int (*init)(const struct nft_ctx *ctx,
const struct nlattr *const tb[],
struct nft_object *obj);
- void (*destroy)(struct nft_object *obj);
+ void (*destroy)(const struct nft_ctx *ctx,
+ struct nft_object *obj);
int (*dump)(struct sk_buff *skb,
struct nft_object *obj,
bool reset);
@@ -1111,12 +1130,9 @@ struct nft_flowtable {
struct nf_flowtable data;
};
-struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
- const struct nlattr *nla,
- u8 genmask);
-void nft_flow_table_iterate(struct net *net,
- void (*iter)(struct nf_flowtable *flowtable, void *data),
- void *data);
+struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+ const struct nlattr *nla,
+ u8 genmask);
void nft_register_flowtable_type(struct nf_flowtable_type *type);
void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index ea5aab568be83..e0c0c2558ec48 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -2,6 +2,8 @@
#ifndef _NET_NF_TABLES_CORE_H
#define _NET_NF_TABLES_CORE_H
+#include <net/netfilter/nf_tables.h>
+
extern struct nft_expr_type nft_imm_type;
extern struct nft_expr_type nft_cmp_type;
extern struct nft_expr_type nft_lookup_type;
@@ -10,6 +12,9 @@ extern struct nft_expr_type nft_byteorder_type;
extern struct nft_expr_type nft_payload_type;
extern struct nft_expr_type nft_dynset_type;
extern struct nft_expr_type nft_range_type;
+extern struct nft_expr_type nft_meta_type;
+extern struct nft_expr_type nft_rt_type;
+extern struct nft_expr_type nft_exthdr_type;
int nf_tables_core_module_init(void);
void nf_tables_core_module_exit(void);
@@ -20,6 +25,12 @@ struct nft_cmp_fast_expr {
u8 len;
};
+struct nft_immediate_expr {
+ struct nft_data data;
+ enum nft_registers dreg:8;
+ u8 dlen;
+};
+
/* Calculate the mask for the nft_cmp_fast expression. On big endian the
* mask needs to include the *upper* bytes when interpreting that data as
* something smaller than the full u32, therefore a cpu_to_le32 is done.
diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
new file mode 100644
index 0000000000000..9754a50ecde9c
--- /dev/null
+++ b/include/net/netfilter/nf_tproxy.h
@@ -0,0 +1,113 @@
+#ifndef _NF_TPROXY_H_
+#define _NF_TPROXY_H_
+
+#include <net/tcp.h>
+
+enum nf_tproxy_lookup_t {
+ NF_TPROXY_LOOKUP_LISTENER,
+ NF_TPROXY_LOOKUP_ESTABLISHED,
+};
+
+static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
+{
+ if (inet_sk_transparent(sk))
+ return true;
+
+ sock_gen_put(sk);
+ return false;
+}
+
+__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
+
+/**
+ * nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
+ * @skb: The skb being processed.
+ * @laddr: IPv4 address to redirect to or zero.
+ * @lport: TCP port to redirect to or zero.
+ * @sk: The TIME_WAIT TCP socket found by the lookup.
+ *
+ * We have to handle SYN packets arriving to TIME_WAIT sockets
+ * differently: instead of reopening the connection we should rather
+ * redirect the new connection to the proxy if there's a listener
+ * socket present.
+ *
+ * nf_tproxy_handle_time_wait4() consumes the socket reference passed in.
+ *
+ * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * no such listener is found, or NULL if the TCP header is incomplete.
+ */
+struct sock *
+nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+ __be32 laddr, __be16 lport, struct sock *sk);
+
+/*
+ * This is used when the user wants to intercept a connection matching
+ * an explicit iptables rule. In this case the sockets are assumed
+ * matching in preference order:
+ *
+ * - match: if there's a fully established connection matching the
+ * _packet_ tuple, it is returned, assuming the redirection
+ * already took place and we process a packet belonging to an
+ * established connection
+ *
+ * - match: if there's a listening socket matching the redirection
+ * (e.g. on-port & on-ip of the connection), it is returned,
+ * regardless if it was bound to 0.0.0.0 or an explicit
+ * address. The reasoning is that if there's an explicit rule, it
+ * does not really matter if the listener is bound to an interface
+ * or to 0. The user already stated that he wants redirection
+ * (since he added the rule).
+ *
+ * Please note that there's an overlap between what a TPROXY target
+ * and a socket match will match. Normally if you have both rules the
+ * "socket" match will be the first one, effectively all packets
+ * belonging to established connections going through that one.
+ */
+struct sock *
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+ const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type);
+
+const struct in6_addr *
+nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
+ const struct in6_addr *daddr);
+
+/**
+ * nf_tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections
+ * @skb: The skb being processed.
+ * @tproto: Transport protocol.
+ * @thoff: Transport protocol header offset.
+ * @net: Network namespace.
+ * @laddr: IPv6 address to redirect to.
+ * @lport: TCP port to redirect to or zero.
+ * @sk: The TIME_WAIT TCP socket found by the lookup.
+ *
+ * We have to handle SYN packets arriving to TIME_WAIT sockets
+ * differently: instead of reopening the connection we should rather
+ * redirect the new connection to the proxy if there's a listener
+ * socket present.
+ *
+ * nf_tproxy_handle_time_wait6() consumes the socket reference passed in.
+ *
+ * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * no such listener is found, or NULL if the TCP header is incomplete.
+ */
+struct sock *
+nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
+ struct net *net,
+ const struct in6_addr *laddr,
+ const __be16 lport,
+ struct sock *sk);
+
+struct sock *
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+ const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type);
+
+#endif /* _NF_TPROXY_H_ */
diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h
index 612cfb63ac682..ea32a7d3cf1bc 100644
--- a/include/net/netfilter/nfnetlink_log.h
+++ b/include/net/netfilter/nfnetlink_log.h
@@ -1,18 +1 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _KER_NFNETLINK_LOG_H
-#define _KER_NFNETLINK_LOG_H
-
-void
-nfulnl_log_packet(struct net *net,
- u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *li_user,
- const char *prefix);
-
-#define NFULNL_COPY_DISABLED 0xff
-
-#endif /* _KER_NFNETLINK_LOG_H */
-
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
deleted file mode 100644
index 5c69e9b093887..0000000000000
--- a/include/net/netfilter/nft_meta.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NFT_META_H_
-#define _NFT_META_H_
-
-struct nft_meta {
- enum nft_meta_keys key:8;
- union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
- };
-};
-
-extern const struct nla_policy nft_meta_policy[];
-
-int nft_meta_get_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[]);
-
-int nft_meta_set_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[]);
-
-int nft_meta_get_dump(struct sk_buff *skb,
- const struct nft_expr *expr);
-
-int nft_meta_set_dump(struct sk_buff *skb,
- const struct nft_expr *expr);
-
-void nft_meta_get_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt);
-
-void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt);
-
-void nft_meta_set_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr);
-
-int nft_meta_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data);
-
-#endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 8491bc9c86b15..661348f23ea5a 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -160,6 +160,8 @@ struct netns_ipv4 {
int sysctl_tcp_pacing_ca_ratio;
int sysctl_tcp_wmem[3];
int sysctl_tcp_rmem[3];
+ int sysctl_tcp_comp_sack_nr;
+ unsigned long sysctl_tcp_comp_sack_delay_ns;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index c29f09cfc9d73..c978a31b0f846 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -43,6 +43,7 @@ struct netns_sysctl_ipv6 {
int max_hbh_opts_cnt;
int max_dst_opts_len;
int max_hbh_opts_len;
+ int seg6_flowlabel;
};
struct netns_ipv6 {
@@ -60,7 +61,8 @@ struct netns_ipv6 {
#endif
struct xt_table *ip6table_nat;
#endif
- struct rt6_info *ip6_null_entry;
+ struct fib6_info *fib6_null_entry;
+ struct rt6_info *ip6_null_entry;
struct rt6_statistics *rt6_stats;
struct timer_list ip6_fib_timer;
struct hlist_head *fib_table_hash;
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 48134353411d1..94767ea3a4906 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -4,13 +4,12 @@
#include <linux/list.h>
-struct nft_af_info;
-
struct netns_nftables {
struct list_head tables;
struct list_head commit_list;
unsigned int base_seq;
u8 gencursor;
+ u8 validate_state;
};
#endif
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
new file mode 100644
index 0000000000000..694d055e01efe
--- /dev/null
+++ b/include/net/page_pool.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * page_pool.h
+ * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
+ * Copyright (C) 2016 Red Hat, Inc.
+ */
+
+/**
+ * DOC: page_pool allocator
+ *
+ * This page_pool allocator is optimized for the XDP mode that
+ * uses one-frame-per-page, but have fallbacks that act like the
+ * regular page allocator APIs.
+ *
+ * Basic use involve replacing alloc_pages() calls with the
+ * page_pool_alloc_pages() call. Drivers should likely use
+ * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
+ *
+ * If page_pool handles DMA mapping (use page->private), then API user
+ * is responsible for invoking page_pool_put_page() once. In-case of
+ * elevated refcnt, the DMA state is released, assuming other users of
+ * the page will eventually call put_page().
+ *
+ * If no DMA mapping is done, then it can act as shim-layer that
+ * fall-through to alloc_page. As no state is kept on the page, the
+ * regular put_page() call is sufficient.
+ */
+#ifndef _NET_PAGE_POOL_H
+#define _NET_PAGE_POOL_H
+
+#include <linux/mm.h> /* Needed by ptr_ring */
+#include <linux/ptr_ring.h>
+#include <linux/dma-direction.h>
+
+#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */
+#define PP_FLAG_ALL PP_FLAG_DMA_MAP
+
+/*
+ * Fast allocation side cache array/stack
+ *
+ * The cache size and refill watermark is related to the network
+ * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
+ * ring is usually refilled and the max consumed elements will be 64,
+ * thus a natural max size of objects needed in the cache.
+ *
+ * Keeping room for more objects, is due to XDP_DROP use-case. As
+ * XDP_DROP allows the opportunity to recycle objects directly into
+ * this array, as it shares the same softirq/NAPI protection. If
+ * cache is already full (or partly full) then the XDP_DROP recycles
+ * would have to take a slower code path.
+ */
+#define PP_ALLOC_CACHE_SIZE 128
+#define PP_ALLOC_CACHE_REFILL 64
+struct pp_alloc_cache {
+ u32 count;
+ void *cache[PP_ALLOC_CACHE_SIZE];
+};
+
+struct page_pool_params {
+ unsigned int flags;
+ unsigned int order;
+ unsigned int pool_size;
+ int nid; /* Numa node id to allocate from pages from */
+ struct device *dev; /* device, for DMA pre-mapping purposes */
+ enum dma_data_direction dma_dir; /* DMA mapping direction */
+};
+
+struct page_pool {
+ struct rcu_head rcu;
+ struct page_pool_params p;
+
+ /*
+ * Data structure for allocation side
+ *
+ * Drivers allocation side usually already perform some kind
+ * of resource protection. Piggyback on this protection, and
+ * require driver to protect allocation side.
+ *
+ * For NIC drivers this means, allocate a page_pool per
+ * RX-queue. As the RX-queue is already protected by
+ * Softirq/BH scheduling and napi_schedule. NAPI schedule
+ * guarantee that a single napi_struct will only be scheduled
+ * on a single CPU (see napi_schedule).
+ */
+ struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
+
+ /* Data structure for storing recycled pages.
+ *
+ * Returning/freeing pages is more complicated synchronization
+ * wise, because free's can happen on remote CPUs, with no
+ * association with allocation resource.
+ *
+ * Use ptr_ring, as it separates consumer and producer
+ * effeciently, it a way that doesn't bounce cache-lines.
+ *
+ * TODO: Implement bulk return pages into this structure.
+ */
+ struct ptr_ring ring;
+};
+
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_pages(pool, gfp);
+}
+
+struct page_pool *page_pool_create(const struct page_pool_params *params);
+
+void page_pool_destroy(struct page_pool *pool);
+
+/* Never call this directly, use helpers below */
+void __page_pool_put_page(struct page_pool *pool,
+ struct page *page, bool allow_direct);
+
+static inline void page_pool_put_page(struct page_pool *pool,
+ struct page *page, bool allow_direct)
+{
+ /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+#ifdef CONFIG_PAGE_POOL
+ __page_pool_put_page(pool, page, allow_direct);
+#endif
+}
+/* Very limited use-cases allow recycle direct */
+static inline void page_pool_recycle_direct(struct page_pool *pool,
+ struct page *page)
+{
+ __page_pool_put_page(pool, page, true);
+}
+
+static inline bool is_page_pool_compiled_in(void)
+{
+#ifdef CONFIG_PAGE_POOL
+ return true;
+#else
+ return false;
+#endif
+}
+
+#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e828d31be5dae..a3c1a2c47cd4b 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -33,7 +33,7 @@ struct tcf_block_ext_info {
};
struct tcf_block_cb;
-bool tcf_queue_work(struct work_struct *work);
+bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
#ifdef CONFIG_NET_CLS
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -683,9 +683,11 @@ static inline bool tc_skip_sw(u32 flags)
/* SKIP_HW and SKIP_SW are mutually exclusive flags. */
static inline bool tc_flags_valid(u32 flags)
{
- if (flags & ~(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW))
+ if (flags & ~(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW |
+ TCA_CLS_FLAGS_VERBOSE))
return false;
+ flags &= TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW;
if (!(flags ^ (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)))
return false;
@@ -705,7 +707,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
cls_common->chain_index = tp->chain->index;
cls_common->protocol = tp->protocol;
cls_common->prio = tp->prio;
- if (tc_skip_sw(flags))
+ if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
cls_common->extack = extack;
}
@@ -776,6 +778,18 @@ struct tc_qopt_offload_stats {
struct gnet_stats_queue *qstats;
};
+enum tc_mq_command {
+ TC_MQ_CREATE,
+ TC_MQ_DESTROY,
+ TC_MQ_STATS,
+};
+
+struct tc_mq_qopt_offload {
+ enum tc_mq_command command;
+ u32 handle;
+ struct tc_qopt_offload_stats stats;
+};
+
enum tc_red_command {
TC_RED_REPLACE,
TC_RED_DESTROY,
diff --git a/include/net/route.h b/include/net/route.h
index dbb032d5921b4..bb53cdba38dc2 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -225,6 +225,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
struct in_ifaddr;
void fib_add_ifaddr(struct in_ifaddr *);
void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
+void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric);
void rt_add_uncached_list(struct rtable *rt);
void rt_del_uncached_list(struct rtable *rt);
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 14b6b3af89188..0bbaa54884233 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -64,7 +64,7 @@ struct rtnl_link_ops {
size_t priv_size;
void (*setup)(struct net_device *dev);
- int maxtype;
+ unsigned int maxtype;
const struct nla_policy *policy;
int (*validate)(struct nlattr *tb[],
struct nlattr *data[],
@@ -92,7 +92,7 @@ struct rtnl_link_ops {
unsigned int (*get_num_tx_queues)(void);
unsigned int (*get_num_rx_queues)(void);
- int slave_maxtype;
+ unsigned int slave_maxtype;
const struct nla_policy *slave_policy;
int (*slave_changelink)(struct net_device *dev,
struct net_device *slave_dev,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5154c8300262a..6488daa32f829 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -30,7 +30,6 @@ struct qdisc_rate_table {
enum qdisc_state_t {
__QDISC_STATE_SCHED,
__QDISC_STATE_DEACTIVATED,
- __QDISC_STATE_RUNNING,
};
struct qdisc_size_table {
@@ -86,6 +85,8 @@ struct Qdisc {
struct net_rate_estimator __rcu *rate_est;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
+ int padded;
+ refcount_t refcnt;
/*
* For performance sake on SMP, we put highly modified fields at the end
@@ -98,10 +99,9 @@ struct Qdisc {
unsigned long state;
struct Qdisc *next_sched;
struct sk_buff_head skb_bad_txq;
- int padded;
- refcount_t refcnt;
spinlock_t busylock ____cacheline_aligned_in_smp;
+ spinlock_t seqlock;
};
static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
@@ -111,15 +111,21 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
refcount_inc(&qdisc->refcnt);
}
-static inline bool qdisc_is_running(const struct Qdisc *qdisc)
+static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
+ if (qdisc->flags & TCQ_F_NOLOCK)
+ return spin_is_locked(&qdisc->seqlock);
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
- if (qdisc_is_running(qdisc))
+ if (qdisc->flags & TCQ_F_NOLOCK) {
+ if (!spin_trylock(&qdisc->seqlock))
+ return false;
+ } else if (qdisc_is_running(qdisc)) {
return false;
+ }
/* Variant of write_seqcount_begin() telling lockdep a trylock
* was attempted.
*/
@@ -131,6 +137,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
static inline void qdisc_run_end(struct Qdisc *qdisc)
{
write_seqcount_end(&qdisc->running);
+ if (qdisc->flags & TCQ_F_NOLOCK)
+ spin_unlock(&qdisc->seqlock);
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -342,14 +350,14 @@ static inline int qdisc_qlen(const struct Qdisc *q)
static inline int qdisc_qlen_sum(const struct Qdisc *q)
{
- __u32 qlen = 0;
+ __u32 qlen = q->qstats.qlen;
int i;
if (q->flags & TCQ_F_NOLOCK) {
for_each_possible_cpu(i)
qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
} else {
- qlen = q->q.qlen;
+ qlen += q->q.qlen;
}
return qlen;
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 20ff237c5eb2f..86f034b524d46 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -254,11 +254,10 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 };
#define SCTP_TSN_MAP_SIZE 4096
/* We will not record more than this many duplicate TSNs between two
- * SACKs. The minimum PMTU is 576. Remove all the headers and there
- * is enough room for 131 duplicate reports. Round down to the
+ * SACKs. The minimum PMTU is 512. Remove all the headers and there
+ * is enough room for 117 duplicate reports. Round down to the
* nearest power of 2.
*/
-enum { SCTP_MIN_PMTU = 576 };
enum { SCTP_MAX_DUP_TSNS = 16 };
enum { SCTP_MAX_GABS = 16 };
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index e6d349b2a7915..30b3e2fe240a8 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -429,32 +429,6 @@ static inline int sctp_list_single_entry(struct list_head *head)
return (head->next != head) && (head->next == head->prev);
}
-/* Break down data chunks at this point. */
-static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
-{
- struct sctp_sock *sp = sctp_sk(asoc->base.sk);
- struct sctp_af *af = sp->pf->af;
- int frag = pmtu;
-
- frag -= af->ip_options_len(asoc->base.sk);
- frag -= af->net_header_len;
- frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
-
- if (asoc->user_frag)
- frag = min_t(int, frag, asoc->user_frag);
-
- frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN -
- sctp_datachk_len(&asoc->stream)));
-
- return frag;
-}
-
-static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc)
-{
- sctp_assoc_sync_pmtu(asoc);
- asoc->pmtu_pending = 0;
-}
-
static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
{
return !list_empty(&chunk->list);
@@ -608,17 +582,29 @@ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *
return t->dst;
}
-static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
+/* Calculate max payload size given a MTU, or the total overhead if
+ * given MTU is zero
+ */
+static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
+ __u32 mtu, __u32 extra)
{
- __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
- SCTP_DEFAULT_MINSEGMENT);
+ __u32 overhead = sizeof(struct sctphdr) + extra;
- if (t->pathmtu == pmtu)
- return true;
+ if (sp)
+ overhead += sp->pf->af->net_header_len;
+ else
+ overhead += sizeof(struct ipv6hdr);
- t->pathmtu = pmtu;
+ if (WARN_ON_ONCE(mtu && mtu <= overhead))
+ mtu = overhead;
- return false;
+ return mtu ? mtu - overhead : overhead;
+}
+
+static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
+{
+ return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst),
+ SCTP_DEFAULT_MINSEGMENT));
}
#endif /* __net_sctp_h__ */
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2d0e782c90551..5ef1bad81ef54 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -207,7 +207,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
int len, __u8 flags, gfp_t gfp);
struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
const __u32 lowest_tsn);
-struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc);
+struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc);
struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
const struct sctp_chunk *chunk);
struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
@@ -215,7 +215,7 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
struct sctp_chunk *sctp_make_shutdown_complete(
const struct sctp_association *asoc,
const struct sctp_chunk *chunk);
-void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen);
+int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen);
struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
const size_t hint);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a0ec462bc1a9f..ebf809eed33ad 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -2091,16 +2091,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
enum sctp_transport_cmd command,
sctp_sn_error_t error);
struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32);
-struct sctp_transport *sctp_assoc_is_match(struct sctp_association *,
- struct net *,
- const union sctp_addr *,
- const union sctp_addr *);
void sctp_assoc_migrate(struct sctp_association *, struct sock *);
int sctp_assoc_update(struct sctp_association *old,
struct sctp_association *new);
__u32 sctp_association_get_next_tsn(struct sctp_association *);
+void sctp_assoc_update_frag_point(struct sctp_association *asoc);
+void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu);
void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
diff --git a/include/net/seg6.h b/include/net/seg6.h
index 099bad59dc901..e029e301faa51 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -49,7 +49,11 @@ struct seg6_pernet_data {
static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
{
+#if IS_ENABLED(CONFIG_IPV6)
return net->ipv6.seg6_data;
+#else
+ return NULL;
+#endif
}
extern int seg6_init(void);
@@ -63,5 +67,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
int proto);
extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
-
+extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id);
#endif
diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
new file mode 100644
index 0000000000000..661fd5b4d3e0b
--- /dev/null
+++ b/include/net/seg6_local.h
@@ -0,0 +1,32 @@
+/*
+ * SR-IPv6 implementation
+ *
+ * Authors:
+ * David Lebrun <david.lebrun@uclouvain.be>
+ * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_SEG6_LOCAL_H
+#define _NET_SEG6_LOCAL_H
+
+#include <linux/percpu.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+
+extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id);
+
+struct seg6_bpf_srh_state {
+ bool valid;
+ u16 hdrlen;
+};
+
+DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+
+#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 4d2e8ad98985d..b3b75419eafec 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -481,6 +481,11 @@ struct sock {
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+#endif
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
struct rcu_head sk_rcu;
@@ -803,10 +808,10 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
}
#ifdef CONFIG_NET
-extern struct static_key memalloc_socks;
+DECLARE_STATIC_KEY_FALSE(memalloc_socks_key);
static inline int sk_memalloc_socks(void)
{
- return static_key_false(&memalloc_socks);
+ return static_branch_unlikely(&memalloc_socks_key);
}
#else
@@ -2330,6 +2335,22 @@ static inline bool sk_fullsock(const struct sock *sk)
return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
}
+/* Checks if this SKB belongs to an HW offloaded socket
+ * and whether any SW fallbacks are required based on dev.
+ */
+static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
+ struct net_device *dev)
+{
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sock *sk = skb->sk;
+
+ if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb)
+ skb = sk->sk_validate_xmit_skb(sk, dev, skb);
+#endif
+
+ return skb;
+}
+
/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV
* SYNACK messages can be attached to either ones (depending on SYNCOOKIE)
*/
diff --git a/include/net/strparser.h b/include/net/strparser.h
index d96b59f45ebaa..f177c87ce38bd 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -90,6 +90,8 @@ static inline void strp_pause(struct strparser *strp)
/* May be called without holding lock for attached socket */
void strp_unpause(struct strparser *strp);
+/* Must be called with process lock held (lock_sock) */
+void __strp_unpause(struct strparser *strp);
static inline void save_strp_stats(struct strparser *strp,
struct strp_aggr_stats *agg_stats)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 39bc855d7fee6..d574ce63bf220 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -155,6 +155,7 @@ struct switchdev_notifier_fdb_info {
struct switchdev_notifier_info info; /* must be first */
const unsigned char *addr;
u16 vid;
+ bool added_by_user;
};
static inline struct net_device *
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f88f8a2cab0dd..0448e7c5d2b40 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -245,6 +245,7 @@ extern long sysctl_tcp_mem[3];
#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */
+#define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */
extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
@@ -333,8 +334,7 @@ void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
-void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th);
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
void tcp_twsk_destructor(struct sock *sk);
@@ -401,6 +401,10 @@ void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
+int tcp_set_rcvlowat(struct sock *sk, int val);
+void tcp_data_ready(struct sock *sk);
+int tcp_mmap(struct file *file, struct socket *sock,
+ struct vm_area_struct *vma);
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc);
@@ -552,7 +556,12 @@ void tcp_fin(struct sock *sk);
void tcp_init_xmit_timers(struct sock *);
static inline void tcp_clear_xmit_timers(struct sock *sk)
{
- hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
+ if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1)
+ __sock_put(sk);
+
+ if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1)
+ __sock_put(sk);
+
inet_csk_clear_xmit_timers(sk);
}
@@ -809,9 +818,8 @@ struct tcp_skb_cb {
#endif
} header; /* For incoming skbs */
struct {
- __u32 key;
__u32 flags;
- struct bpf_map *map;
+ struct sock *sk_redir;
void *data_end;
} bpf;
};
@@ -1865,6 +1873,10 @@ void tcp_v4_init(void);
void tcp_init(void);
/* tcp_recovery.c */
+void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb);
+void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
+extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
+ u32 reo_wnd);
extern void tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
u64 xmit_time);
@@ -2095,4 +2107,12 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
+
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+void clean_acked_data_enable(struct inet_connection_sock *icsk,
+ void (*cad)(struct sock *sk, u32 ack_seq));
+void clean_acked_data_disable(struct inet_connection_sock *icsk);
+
+#endif
+
#endif /* _TCP_H */
diff --git a/include/net/tipc.h b/include/net/tipc.h
index 07670ec022a71..f0e7e6bc1befa 100644
--- a/include/net/tipc.h
+++ b/include/net/tipc.h
@@ -44,11 +44,11 @@ struct tipc_basic_hdr {
__be32 w[4];
};
-static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr)
+static inline __be32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr)
{
u32 w0 = ntohl(hdr->w[0]);
bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK;
- int key;
+ __be32 key;
/* Return source node identity as key */
if (likely(!keepalive_msg))
diff --git a/include/net/tls.h b/include/net/tls.h
index f5fb16da38606..70c273777fe9f 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -83,24 +83,10 @@ struct tls_device {
void (*unhash)(struct tls_device *device, struct sock *sk);
};
-struct tls_sw_context {
+struct tls_sw_context_tx {
struct crypto_aead *aead_send;
- struct crypto_aead *aead_recv;
struct crypto_wait async_wait;
- /* Receive context */
- struct strparser strp;
- void (*saved_data_ready)(struct sock *sk);
- unsigned int (*sk_poll)(struct file *file, struct socket *sock,
- struct poll_table_struct *wait);
- struct sk_buff *recv_pkt;
- u8 control;
- bool decrypted;
-
- char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE];
- char rx_aad_plaintext[TLS_AAD_SPACE_SIZE];
-
- /* Sending context */
char aad_space[TLS_AAD_SPACE_SIZE];
unsigned int sg_plaintext_size;
@@ -117,6 +103,54 @@ struct tls_sw_context {
struct scatterlist sg_aead_out[2];
};
+struct tls_sw_context_rx {
+ struct crypto_aead *aead_recv;
+ struct crypto_wait async_wait;
+
+ struct strparser strp;
+ void (*saved_data_ready)(struct sock *sk);
+ unsigned int (*sk_poll)(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait);
+ struct sk_buff *recv_pkt;
+ u8 control;
+ bool decrypted;
+
+ char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE];
+ char rx_aad_plaintext[TLS_AAD_SPACE_SIZE];
+
+};
+
+struct tls_record_info {
+ struct list_head list;
+ u32 end_seq;
+ int len;
+ int num_frags;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct tls_offload_context {
+ struct crypto_aead *aead_send;
+ spinlock_t lock; /* protects records list */
+ struct list_head records_list;
+ struct tls_record_info *open_record;
+ struct tls_record_info *retransmit_hint;
+ u64 hint_record_sn;
+ u64 unacked_record_sn;
+
+ struct scatterlist sg_tx_data[MAX_SKB_FRAGS];
+ void (*sk_destruct)(struct sock *sk);
+ u8 driver_state[];
+ /* The TLS layer reserves room for driver specific state
+ * Currently the belief is that there is not enough
+ * driver specific state to justify another layer of indirection
+ */
+#define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *)))
+};
+
+#define TLS_OFFLOAD_CONTEXT_SIZE \
+ (ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) + \
+ TLS_DRIVER_STATE_SIZE)
+
enum {
TLS_PENDING_CLOSED_RECORD
};
@@ -141,9 +175,15 @@ struct tls_context {
struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128;
};
- void *priv_ctx;
+ struct list_head list;
+ struct net_device *netdev;
+ refcount_t refcount;
+
+ void *priv_ctx_tx;
+ void *priv_ctx_rx;
- u8 conf:3;
+ u8 tx_conf:3;
+ u8 rx_conf:3;
struct cipher_context tx;
struct cipher_context rx;
@@ -181,7 +221,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_sw_close(struct sock *sk, long timeout);
-void tls_sw_free_resources(struct sock *sk);
+void tls_sw_free_resources_tx(struct sock *sk);
+void tls_sw_free_resources_rx(struct sock *sk);
int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
unsigned int tls_sw_poll(struct file *file, struct socket *sock,
@@ -190,9 +231,28 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
-void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
-void tls_icsk_clean_acked(struct sock *sk);
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_device_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
+void tls_device_sk_destruct(struct sock *sk);
+void tls_device_init(void);
+void tls_device_cleanup(void);
+
+struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+ u32 seq, u64 *p_record_sn);
+
+static inline bool tls_record_is_start_marker(struct tls_record_info *rec)
+{
+ return rec->len == 0;
+}
+static inline u32 tls_record_start_seq(struct tls_record_info *rec)
+{
+ return rec->end_seq - rec->len;
+}
+
+void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
int tls_push_sg(struct sock *sk, struct tls_context *ctx,
struct scatterlist *sg, u16 first_offset,
int flags);
@@ -229,6 +289,13 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
return tls_ctx->pending_open_record_frags;
}
+static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
+{
+ return sk_fullsock(sk) &&
+ /* matches smp_store_release in tls_set_device_offload */
+ smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct;
+}
+
static inline void tls_err_abort(struct sock *sk, int err)
{
sk->sk_err = err;
@@ -301,16 +368,22 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk)
return icsk->icsk_ulp_data;
}
-static inline struct tls_sw_context *tls_sw_ctx(
+static inline struct tls_sw_context_rx *tls_sw_ctx_rx(
const struct tls_context *tls_ctx)
{
- return (struct tls_sw_context *)tls_ctx->priv_ctx;
+ return (struct tls_sw_context_rx *)tls_ctx->priv_ctx_rx;
+}
+
+static inline struct tls_sw_context_tx *tls_sw_ctx_tx(
+ const struct tls_context *tls_ctx)
+{
+ return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx;
}
static inline struct tls_offload_context *tls_offload_ctx(
const struct tls_context *tls_ctx)
{
- return (struct tls_offload_context *)tls_ctx->priv_ctx;
+ return (struct tls_offload_context *)tls_ctx->priv_ctx_tx;
}
int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
@@ -318,4 +391,12 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
void tls_register_device(struct tls_device *device);
void tls_unregister_device(struct tls_device *device);
+struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+
+int tls_sw_fallback_init(struct sock *sk,
+ struct tls_offload_context *offload_ctx,
+ struct tls_crypto_info *crypto_info);
+
#endif /* _TLS_OFFLOAD_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index d8ca3b26964d9..7ba0ed252c52c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -174,6 +174,9 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
struct udphdr *uh, udp_lookup_t lookup);
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
+struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
+ netdev_features_t features);
+
static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
{
struct udphdr *uh;
@@ -269,6 +272,7 @@ int udp_abort(struct sock *sk, int err);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
int udp_push_pending_frames(struct sock *sk);
void udp_flush_pending_frames(struct sock *sk);
+int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index ad73d8b3fcc25..b99a02ae39343 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -262,6 +262,7 @@ struct vxlan_dev {
#define VXLAN_F_COLLECT_METADATA 0x2000
#define VXLAN_F_GPE 0x4000
#define VXLAN_F_IPV6_LINKLOCAL 0x8000
+#define VXLAN_F_TTL_INHERIT 0x10000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b2362ddfa694f..2deea7166a348 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -33,16 +33,115 @@
* also mandatory during RX-ring setup.
*/
+enum xdp_mem_type {
+ MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
+ MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */
+ MEM_TYPE_PAGE_POOL,
+ MEM_TYPE_ZERO_COPY,
+ MEM_TYPE_MAX,
+};
+
+/* XDP flags for ndo_xdp_xmit */
+#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
+#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
+
+struct xdp_mem_info {
+ u32 type; /* enum xdp_mem_type, but known size type */
+ u32 id;
+};
+
+struct page_pool;
+
+struct zero_copy_allocator {
+ void (*free)(struct zero_copy_allocator *zca, unsigned long handle);
+};
+
struct xdp_rxq_info {
struct net_device *dev;
u32 queue_index;
u32 reg_state;
+ struct xdp_mem_info mem;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
+struct xdp_buff {
+ void *data;
+ void *data_end;
+ void *data_meta;
+ void *data_hard_start;
+ unsigned long handle;
+ struct xdp_rxq_info *rxq;
+};
+
+struct xdp_frame {
+ void *data;
+ u16 len;
+ u16 headroom;
+ u16 metasize;
+ /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
+ * while mem info is valid on remote CPU.
+ */
+ struct xdp_mem_info mem;
+ struct net_device *dev_rx; /* used by cpumap */
+};
+
+/* Convert xdp_buff to xdp_frame */
+static inline
+struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
+{
+ struct xdp_frame *xdp_frame;
+ int metasize;
+ int headroom;
+
+ /* TODO: implement clone, copy, use "native" MEM_TYPE */
+ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
+ return NULL;
+
+ /* Assure headroom is available for storing info */
+ headroom = xdp->data - xdp->data_hard_start;
+ metasize = xdp->data - xdp->data_meta;
+ metasize = metasize > 0 ? metasize : 0;
+ if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
+ return NULL;
+
+ /* Store info in top of packet */
+ xdp_frame = xdp->data_hard_start;
+
+ xdp_frame->data = xdp->data;
+ xdp_frame->len = xdp->data_end - xdp->data;
+ xdp_frame->headroom = headroom - sizeof(*xdp_frame);
+ xdp_frame->metasize = metasize;
+
+ /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
+ xdp_frame->mem = xdp->rxq->mem;
+
+ return xdp_frame;
+}
+
+void xdp_return_frame(struct xdp_frame *xdpf);
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
+void xdp_return_buff(struct xdp_buff *xdp);
+
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+ enum xdp_mem_type type, void *allocator);
+
+/* Drivers not supporting XDP metadata can use this helper, which
+ * rejects any room expansion for metadata as a result.
+ */
+static __always_inline void
+xdp_set_data_meta_invalid(struct xdp_buff *xdp)
+{
+ xdp->data_meta = xdp->data + 1;
+}
+
+static __always_inline bool
+xdp_data_meta_unsupported(const struct xdp_buff *xdp)
+{
+ return unlikely(xdp->data_meta > xdp->data);
+}
#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
new file mode 100644
index 0000000000000..9fe472f2ac950
--- /dev/null
+++ b/include/net/xdp_sock.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* AF_XDP internal functions
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef _LINUX_XDP_SOCK_H
+#define _LINUX_XDP_SOCK_H
+
+#include <linux/workqueue.h>
+#include <linux/if_xdp.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <net/sock.h>
+
+struct net_device;
+struct xsk_queue;
+
+struct xdp_umem_props {
+ u64 chunk_mask;
+ u64 size;
+};
+
+struct xdp_umem_page {
+ void *addr;
+ dma_addr_t dma;
+};
+
+struct xdp_umem {
+ struct xsk_queue *fq;
+ struct xsk_queue *cq;
+ struct xdp_umem_page *pages;
+ struct xdp_umem_props props;
+ u32 headroom;
+ u32 chunk_size_nohr;
+ struct user_struct *user;
+ struct pid *pid;
+ unsigned long address;
+ refcount_t users;
+ struct work_struct work;
+ struct page **pgs;
+ u32 npgs;
+ struct net_device *dev;
+ u16 queue_id;
+ bool zc;
+ spinlock_t xsk_list_lock;
+ struct list_head xsk_list;
+};
+
+struct xdp_sock {
+ /* struct sock must be the first member of struct xdp_sock */
+ struct sock sk;
+ struct xsk_queue *rx;
+ struct net_device *dev;
+ struct xdp_umem *umem;
+ struct list_head flush_node;
+ u16 queue_id;
+ struct xsk_queue *tx ____cacheline_aligned_in_smp;
+ struct list_head list;
+ bool zc;
+ /* Protects multiple processes in the control path */
+ struct mutex mutex;
+ u64 rx_dropped;
+};
+
+struct xdp_buff;
+#ifdef CONFIG_XDP_SOCKETS
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+void xsk_flush(struct xdp_sock *xs);
+bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
+/* Used from netdev driver */
+u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
+void xsk_umem_discard_addr(struct xdp_umem *umem);
+void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
+bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
+void xsk_umem_consume_tx_done(struct xdp_umem *umem);
+#else
+static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+ return -ENOTSUPP;
+}
+
+static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+ return -ENOTSUPP;
+}
+
+static inline void xsk_flush(struct xdp_sock *xs)
+{
+}
+
+static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
+{
+ return false;
+}
+#endif /* CONFIG_XDP_SOCKETS */
+
+#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
deleted file mode 100644
index 150185647e6b2..0000000000000
--- a/include/trace/events/bpf.h
+++ /dev/null
@@ -1,355 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM bpf
-
-#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_BPF_H
-
-/* These are only used within the BPF_SYSCALL code */
-#ifdef CONFIG_BPF_SYSCALL
-
-#include <linux/filter.h>
-#include <linux/bpf.h>
-#include <linux/fs.h>
-#include <linux/tracepoint.h>
-
-#define __PROG_TYPE_MAP(FN) \
- FN(SOCKET_FILTER) \
- FN(KPROBE) \
- FN(SCHED_CLS) \
- FN(SCHED_ACT) \
- FN(TRACEPOINT) \
- FN(XDP) \
- FN(PERF_EVENT) \
- FN(CGROUP_SKB) \
- FN(CGROUP_SOCK) \
- FN(LWT_IN) \
- FN(LWT_OUT) \
- FN(LWT_XMIT)
-
-#define __MAP_TYPE_MAP(FN) \
- FN(HASH) \
- FN(ARRAY) \
- FN(PROG_ARRAY) \
- FN(PERF_EVENT_ARRAY) \
- FN(PERCPU_HASH) \
- FN(PERCPU_ARRAY) \
- FN(STACK_TRACE) \
- FN(CGROUP_ARRAY) \
- FN(LRU_HASH) \
- FN(LRU_PERCPU_HASH) \
- FN(LPM_TRIE)
-
-#define __PROG_TYPE_TP_FN(x) \
- TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x);
-#define __PROG_TYPE_SYM_FN(x) \
- { BPF_PROG_TYPE_##x, #x },
-#define __PROG_TYPE_SYM_TAB \
- __PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 }
-__PROG_TYPE_MAP(__PROG_TYPE_TP_FN)
-
-#define __MAP_TYPE_TP_FN(x) \
- TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x);
-#define __MAP_TYPE_SYM_FN(x) \
- { BPF_MAP_TYPE_##x, #x },
-#define __MAP_TYPE_SYM_TAB \
- __MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 }
-__MAP_TYPE_MAP(__MAP_TYPE_TP_FN)
-
-DECLARE_EVENT_CLASS(bpf_prog_event,
-
- TP_PROTO(const struct bpf_prog *prg),
-
- TP_ARGS(prg),
-
- TP_STRUCT__entry(
- __array(u8, prog_tag, 8)
- __field(u32, type)
- ),
-
- TP_fast_assign(
- BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
- memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
- __entry->type = prg->type;
- ),
-
- TP_printk("prog=%s type=%s",
- __print_hex_str(__entry->prog_tag, 8),
- __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB))
-);
-
-DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type,
-
- TP_PROTO(const struct bpf_prog *prg),
-
- TP_ARGS(prg)
-);
-
-DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu,
-
- TP_PROTO(const struct bpf_prog *prg),
-
- TP_ARGS(prg)
-);
-
-TRACE_EVENT(bpf_prog_load,
-
- TP_PROTO(const struct bpf_prog *prg, int ufd),
-
- TP_ARGS(prg, ufd),
-
- TP_STRUCT__entry(
- __array(u8, prog_tag, 8)
- __field(u32, type)
- __field(int, ufd)
- ),
-
- TP_fast_assign(
- BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
- memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
- __entry->type = prg->type;
- __entry->ufd = ufd;
- ),
-
- TP_printk("prog=%s type=%s ufd=%d",
- __print_hex_str(__entry->prog_tag, 8),
- __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB),
- __entry->ufd)
-);
-
-TRACE_EVENT(bpf_map_create,
-
- TP_PROTO(const struct bpf_map *map, int ufd),
-
- TP_ARGS(map, ufd),
-
- TP_STRUCT__entry(
- __field(u32, type)
- __field(u32, size_key)
- __field(u32, size_value)
- __field(u32, max_entries)
- __field(u32, flags)
- __field(int, ufd)
- ),
-
- TP_fast_assign(
- __entry->type = map->map_type;
- __entry->size_key = map->key_size;
- __entry->size_value = map->value_size;
- __entry->max_entries = map->max_entries;
- __entry->flags = map->map_flags;
- __entry->ufd = ufd;
- ),
-
- TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x",
- __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
- __entry->ufd, __entry->size_key, __entry->size_value,
- __entry->max_entries, __entry->flags)
-);
-
-DECLARE_EVENT_CLASS(bpf_obj_prog,
-
- TP_PROTO(const struct bpf_prog *prg, int ufd,
- const struct filename *pname),
-
- TP_ARGS(prg, ufd, pname),
-
- TP_STRUCT__entry(
- __array(u8, prog_tag, 8)
- __field(int, ufd)
- __string(path, pname->name)
- ),
-
- TP_fast_assign(
- BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
- memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
- __assign_str(path, pname->name);
- __entry->ufd = ufd;
- ),
-
- TP_printk("prog=%s path=%s ufd=%d",
- __print_hex_str(__entry->prog_tag, 8),
- __get_str(path), __entry->ufd)
-);
-
-DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog,
-
- TP_PROTO(const struct bpf_prog *prg, int ufd,
- const struct filename *pname),
-
- TP_ARGS(prg, ufd, pname)
-);
-
-DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog,
-
- TP_PROTO(const struct bpf_prog *prg, int ufd,
- const struct filename *pname),
-
- TP_ARGS(prg, ufd, pname)
-);
-
-DECLARE_EVENT_CLASS(bpf_obj_map,
-
- TP_PROTO(const struct bpf_map *map, int ufd,
- const struct filename *pname),
-
- TP_ARGS(map, ufd, pname),
-
- TP_STRUCT__entry(
- __field(u32, type)
- __field(int, ufd)
- __string(path, pname->name)
- ),
-
- TP_fast_assign(
- __assign_str(path, pname->name);
- __entry->type = map->map_type;
- __entry->ufd = ufd;
- ),
-
- TP_printk("map type=%s ufd=%d path=%s",
- __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
- __entry->ufd, __get_str(path))
-);
-
-DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map,
-
- TP_PROTO(const struct bpf_map *map, int ufd,
- const struct filename *pname),
-
- TP_ARGS(map, ufd, pname)
-);
-
-DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map,
-
- TP_PROTO(const struct bpf_map *map, int ufd,
- const struct filename *pname),
-
- TP_ARGS(map, ufd, pname)
-);
-
-DECLARE_EVENT_CLASS(bpf_map_keyval,
-
- TP_PROTO(const struct bpf_map *map, int ufd,
- const void *key, const void *val),
-
- TP_ARGS(map, ufd, key, val),
-
- TP_STRUCT__entry(
- __field(u32, type)
- __field(u32, key_len)
- __dynamic_array(u8, key, map->key_size)
- __field(bool, key_trunc)
- __field(u32, val_len)
- __dynamic_array(u8, val, map->value_size)
- __field(bool, val_trunc)
- __field(int, ufd)
- ),
-
- TP_fast_assign(
- memcpy(__get_dynamic_array(key), key, map->key_size);
- memcpy(__get_dynamic_array(val), val, map->value_size);
- __entry->type = map->map_type;
- __entry->key_len = min(map->key_size, 16U);
- __entry->key_trunc = map->key_size != __entry->key_len;
- __entry->val_len = min(map->value_size, 16U);
- __entry->val_trunc = map->value_size != __entry->val_len;
- __entry->ufd = ufd;
- ),
-
- TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]",
- __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
- __entry->ufd,
- __print_hex(__get_dynamic_array(key), __entry->key_len),
- __entry->key_trunc ? " ..." : "",
- __print_hex(__get_dynamic_array(val), __entry->val_len),
- __entry->val_trunc ? " ..." : "")
-);
-
-DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem,
-
- TP_PROTO(const struct bpf_map *map, int ufd,
- const void *key, const void *val),
-
- TP_ARGS(map, ufd, key, val)
-);
-
-DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem,
-
- TP_PROTO(const struct bpf_map *map, int ufd,
- const void *key, const void *val),
-
- TP_ARGS(map, ufd, key, val)
-);
-
-TRACE_EVENT(bpf_map_delete_elem,
-
- TP_PROTO(const struct bpf_map *map, int ufd,
- const void *key),
-
- TP_ARGS(map, ufd, key),
-
- TP_STRUCT__entry(
- __field(u32, type)
- __field(u32, key_len)
- __dynamic_array(u8, key, map->key_size)
- __field(bool, key_trunc)
- __field(int, ufd)
- ),
-
- TP_fast_assign(
- memcpy(__get_dynamic_array(key), key, map->key_size);
- __entry->type = map->map_type;
- __entry->key_len = min(map->key_size, 16U);
- __entry->key_trunc = map->key_size != __entry->key_len;
- __entry->ufd = ufd;
- ),
-
- TP_printk("map type=%s ufd=%d key=[%s%s]",
- __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
- __entry->ufd,
- __print_hex(__get_dynamic_array(key), __entry->key_len),
- __entry->key_trunc ? " ..." : "")
-);
-
-TRACE_EVENT(bpf_map_next_key,
-
- TP_PROTO(const struct bpf_map *map, int ufd,
- const void *key, const void *key_next),
-
- TP_ARGS(map, ufd, key, key_next),
-
- TP_STRUCT__entry(
- __field(u32, type)
- __field(u32, key_len)
- __dynamic_array(u8, key, map->key_size)
- __dynamic_array(u8, nxt, map->key_size)
- __field(bool, key_trunc)
- __field(bool, key_null)
- __field(int, ufd)
- ),
-
- TP_fast_assign(
- if (key)
- memcpy(__get_dynamic_array(key), key, map->key_size);
- __entry->key_null = !key;
- memcpy(__get_dynamic_array(nxt), key_next, map->key_size);
- __entry->type = map->map_type;
- __entry->key_len = min(map->key_size, 16U);
- __entry->key_trunc = map->key_size != __entry->key_len;
- __entry->ufd = ufd;
- ),
-
- TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]",
- __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
- __entry->ufd,
- __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key),
- __entry->key_len),
- __entry->key_trunc && !__entry->key_null ? " ..." : "",
- __print_hex(__get_dynamic_array(nxt), __entry->key_len),
- __entry->key_trunc ? " ..." : "")
-);
-#endif /* CONFIG_BPF_SYSCALL */
-#endif /* _TRACE_BPF_H */
-
-#include <trace/define_trace.h>
diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index 81b7e985bb45a..9763cddd05947 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -12,12 +12,14 @@
TRACE_EVENT(fib_table_lookup,
- TP_PROTO(u32 tb_id, const struct flowi4 *flp),
+ TP_PROTO(u32 tb_id, const struct flowi4 *flp,
+ const struct fib_nh *nh, int err),
- TP_ARGS(tb_id, flp),
+ TP_ARGS(tb_id, flp, nh, err),
TP_STRUCT__entry(
__field( u32, tb_id )
+ __field( int, err )
__field( int, oif )
__field( int, iif )
__field( __u8, tos )
@@ -25,12 +27,19 @@ TRACE_EVENT(fib_table_lookup,
__field( __u8, flags )
__array( __u8, src, 4 )
__array( __u8, dst, 4 )
+ __array( __u8, gw, 4 )
+ __array( __u8, saddr, 4 )
+ __field( u16, sport )
+ __field( u16, dport )
+ __field( u8, proto )
+ __dynamic_array(char, name, IFNAMSIZ )
),
TP_fast_assign(
__be32 *p32;
__entry->tb_id = tb_id;
+ __entry->err = err;
__entry->oif = flp->flowi4_oif;
__entry->iif = flp->flowi4_iif;
__entry->tos = flp->flowi4_tos;
@@ -42,71 +51,41 @@ TRACE_EVENT(fib_table_lookup,
p32 = (__be32 *) __entry->dst;
*p32 = flp->daddr;
- ),
-
- TP_printk("table %u oif %d iif %d src %pI4 dst %pI4 tos %d scope %d flags %x",
- __entry->tb_id, __entry->oif, __entry->iif,
- __entry->src, __entry->dst, __entry->tos, __entry->scope,
- __entry->flags)
-);
-
-TRACE_EVENT(fib_table_lookup_nh,
-
- TP_PROTO(const struct fib_nh *nh),
-
- TP_ARGS(nh),
-
- TP_STRUCT__entry(
- __string( name, nh->nh_dev->name)
- __field( int, oif )
- __array( __u8, src, 4 )
- ),
-
- TP_fast_assign(
- __be32 *p32 = (__be32 *) __entry->src;
-
- __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "not set");
- __entry->oif = nh->nh_oif;
- *p32 = nh->nh_saddr;
- ),
-
- TP_printk("nexthop dev %s oif %d src %pI4",
- __get_str(name), __entry->oif, __entry->src)
-);
-
-TRACE_EVENT(fib_validate_source,
-
- TP_PROTO(const struct net_device *dev, const struct flowi4 *flp),
-
- TP_ARGS(dev, flp),
- TP_STRUCT__entry(
- __string( name, dev->name )
- __field( int, oif )
- __field( int, iif )
- __field( __u8, tos )
- __array( __u8, src, 4 )
- __array( __u8, dst, 4 )
- ),
-
- TP_fast_assign(
- __be32 *p32;
-
- __assign_str(name, dev ? dev->name : "not set");
- __entry->oif = flp->flowi4_oif;
- __entry->iif = flp->flowi4_iif;
- __entry->tos = flp->flowi4_tos;
-
- p32 = (__be32 *) __entry->src;
- *p32 = flp->saddr;
-
- p32 = (__be32 *) __entry->dst;
- *p32 = flp->daddr;
+ __entry->proto = flp->flowi4_proto;
+ if (__entry->proto == IPPROTO_TCP ||
+ __entry->proto == IPPROTO_UDP) {
+ __entry->sport = ntohs(flp->fl4_sport);
+ __entry->dport = ntohs(flp->fl4_dport);
+ } else {
+ __entry->sport = 0;
+ __entry->dport = 0;
+ }
+
+ if (nh) {
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = nh->nh_saddr;
+
+ p32 = (__be32 *) __entry->gw;
+ *p32 = nh->nh_gw;
+
+ __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "-");
+ } else {
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = 0;
+
+ p32 = (__be32 *) __entry->gw;
+ *p32 = 0;
+
+ __assign_str(name, "-");
+ }
),
- TP_printk("dev %s oif %d iif %d tos %d src %pI4 dst %pI4",
- __get_str(name), __entry->oif, __entry->iif, __entry->tos,
- __entry->src, __entry->dst)
+ TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4 src %pI4 err %d",
+ __entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
+ __entry->src, __entry->sport, __entry->dst, __entry->dport,
+ __entry->tos, __entry->scope, __entry->flags,
+ __get_str(name), __entry->gw, __entry->saddr, __entry->err)
);
#endif /* _TRACE_FIB_H */
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 7e8d48a81b919..b088b54d699ce 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -12,14 +12,14 @@
TRACE_EVENT(fib6_table_lookup,
- TP_PROTO(const struct net *net, const struct rt6_info *rt,
+ TP_PROTO(const struct net *net, const struct fib6_info *f6i,
struct fib6_table *table, const struct flowi6 *flp),
- TP_ARGS(net, rt, table, flp),
+ TP_ARGS(net, f6i, table, flp),
TP_STRUCT__entry(
__field( u32, tb_id )
-
+ __field( int, err )
__field( int, oif )
__field( int, iif )
__field( __u8, tos )
@@ -27,7 +27,10 @@ TRACE_EVENT(fib6_table_lookup,
__field( __u8, flags )
__array( __u8, src, 16 )
__array( __u8, dst, 16 )
-
+ __field( u16, sport )
+ __field( u16, dport )
+ __field( u8, proto )
+ __field( u8, rt_type )
__dynamic_array( char, name, IFNAMSIZ )
__array( __u8, gw, 16 )
),
@@ -36,6 +39,7 @@ TRACE_EVENT(fib6_table_lookup,
struct in6_addr *in6;
__entry->tb_id = table->tb6_id;
+ __entry->err = ip6_rt_type_to_error(f6i->fib6_type);
__entry->oif = flp->flowi6_oif;
__entry->iif = flp->flowi6_iif;
__entry->tos = ip6_tclass(flp->flowlabel);
@@ -48,27 +52,38 @@ TRACE_EVENT(fib6_table_lookup,
in6 = (struct in6_addr *)__entry->dst;
*in6 = flp->daddr;
- if (rt->rt6i_idev) {
- __assign_str(name, rt->rt6i_idev->dev->name);
+ __entry->proto = flp->flowi6_proto;
+ if (__entry->proto == IPPROTO_TCP ||
+ __entry->proto == IPPROTO_UDP) {
+ __entry->sport = ntohs(flp->fl6_sport);
+ __entry->dport = ntohs(flp->fl6_dport);
+ } else {
+ __entry->sport = 0;
+ __entry->dport = 0;
+ }
+
+ if (f6i->fib6_nh.nh_dev) {
+ __assign_str(name, f6i->fib6_nh.nh_dev);
} else {
- __assign_str(name, "");
+ __assign_str(name, "-");
}
- if (rt == net->ipv6.ip6_null_entry) {
+ if (f6i == net->ipv6.fib6_null_entry) {
struct in6_addr in6_zero = {};
in6 = (struct in6_addr *)__entry->gw;
*in6 = in6_zero;
- } else if (rt) {
+ } else if (f6i) {
in6 = (struct in6_addr *)__entry->gw;
- *in6 = rt->rt6i_gateway;
+ *in6 = f6i->fib6_nh.nh_gw;
}
),
- TP_printk("table %3u oif %d iif %d src %pI6c dst %pI6c tos %d scope %d flags %x ==> dev %s gw %pI6c",
- __entry->tb_id, __entry->oif, __entry->iif,
- __entry->src, __entry->dst, __entry->tos, __entry->scope,
- __entry->flags, __get_str(name), __entry->gw)
+ TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d",
+ __entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
+ __entry->src, __entry->sport, __entry->dst, __entry->dport,
+ __entry->tos, __entry->scope, __entry->flags,
+ __get_str(name), __entry->gw, __entry->err)
);
#endif /* _TRACE_FIB6_H */
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 077e664ac9a20..4fff00e9da8ac 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -1459,6 +1459,38 @@ TRACE_EVENT(rxrpc_tx_fail,
__print_symbolic(__entry->what, rxrpc_tx_fail_traces))
);
+TRACE_EVENT(rxrpc_call_reset,
+ TP_PROTO(struct rxrpc_call *call),
+
+ TP_ARGS(call),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, debug_id )
+ __field(u32, cid )
+ __field(u32, call_id )
+ __field(rxrpc_serial_t, call_serial )
+ __field(rxrpc_serial_t, conn_serial )
+ __field(rxrpc_seq_t, tx_seq )
+ __field(rxrpc_seq_t, rx_seq )
+ ),
+
+ TP_fast_assign(
+ __entry->debug_id = call->debug_id;
+ __entry->cid = call->cid;
+ __entry->call_id = call->call_id;
+ __entry->call_serial = call->rx_serial;
+ __entry->conn_serial = call->conn->hi_serial;
+ __entry->tx_seq = call->tx_hard_ack;
+ __entry->rx_seq = call->ackr_seen;
+ ),
+
+ TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x",
+ __entry->debug_id,
+ __entry->cid, __entry->call_id,
+ __entry->call_serial, __entry->conn_serial,
+ __entry->tx_seq, __entry->rx_seq)
+ );
+
#endif /* _TRACE_RXRPC_H */
/* This part must be outside protection */
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 878b2be7ce770..ac55b328d61b2 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -10,6 +10,7 @@
#include <linux/tracepoint.h>
#include <net/ipv6.h>
#include <net/tcp.h>
+#include <linux/sock_diag.h>
#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \
do { \
@@ -113,7 +114,7 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
*/
DECLARE_EVENT_CLASS(tcp_event_sk,
- TP_PROTO(const struct sock *sk),
+ TP_PROTO(struct sock *sk),
TP_ARGS(sk),
@@ -125,6 +126,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
__array(__u8, daddr_v6, 16)
+ __field(__u64, sock_cookie)
),
TP_fast_assign(
@@ -144,73 +146,36 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
+
+ __entry->sock_cookie = sock_gen_cookie(sk);
),
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+ TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx",
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
- __entry->saddr_v6, __entry->daddr_v6)
+ __entry->saddr_v6, __entry->daddr_v6,
+ __entry->sock_cookie)
);
DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
- TP_PROTO(const struct sock *sk),
+ TP_PROTO(struct sock *sk),
TP_ARGS(sk)
);
DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock,
- TP_PROTO(const struct sock *sk),
+ TP_PROTO(struct sock *sk),
TP_ARGS(sk)
);
-TRACE_EVENT(tcp_set_state,
-
- TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
-
- TP_ARGS(sk, oldstate, newstate),
-
- TP_STRUCT__entry(
- __field(const void *, skaddr)
- __field(int, oldstate)
- __field(int, newstate)
- __field(__u16, sport)
- __field(__u16, dport)
- __array(__u8, saddr, 4)
- __array(__u8, daddr, 4)
- __array(__u8, saddr_v6, 16)
- __array(__u8, daddr_v6, 16)
- ),
-
- TP_fast_assign(
- struct inet_sock *inet = inet_sk(sk);
- __be32 *p32;
-
- __entry->skaddr = sk;
- __entry->oldstate = oldstate;
- __entry->newstate = newstate;
-
- __entry->sport = ntohs(inet->inet_sport);
- __entry->dport = ntohs(inet->inet_dport);
-
- p32 = (__be32 *) __entry->saddr;
- *p32 = inet->inet_saddr;
-
- p32 = (__be32 *) __entry->daddr;
- *p32 = inet->inet_daddr;
+DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
- TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
- sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
- ),
+ TP_PROTO(struct sock *sk),
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
- __entry->sport, __entry->dport,
- __entry->saddr, __entry->daddr,
- __entry->saddr_v6, __entry->daddr_v6,
- show_tcp_state_name(__entry->oldstate),
- show_tcp_state_name(__entry->newstate))
+ TP_ARGS(sk)
);
TRACE_EVENT(tcp_retransmit_synack,
@@ -271,7 +236,7 @@ TRACE_EVENT(tcp_probe,
__field(__u16, sport)
__field(__u16, dport)
__field(__u32, mark)
- __field(__u16, length)
+ __field(__u16, data_len)
__field(__u32, snd_nxt)
__field(__u32, snd_una)
__field(__u32, snd_cwnd)
@@ -279,11 +244,13 @@ TRACE_EVENT(tcp_probe,
__field(__u32, snd_wnd)
__field(__u32, srtt)
__field(__u32, rcv_wnd)
+ __field(__u64, sock_cookie)
),
TP_fast_assign(
- const struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcphdr *th = (const struct tcphdr *)skb->data;
const struct inet_sock *inet = inet_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
@@ -295,7 +262,7 @@ TRACE_EVENT(tcp_probe,
__entry->dport = ntohs(inet->inet_dport);
__entry->mark = skb->mark;
- __entry->length = skb->len;
+ __entry->data_len = skb->len - __tcp_hdrlen(th);
__entry->snd_nxt = tp->snd_nxt;
__entry->snd_una = tp->snd_una;
__entry->snd_cwnd = tp->snd_cwnd;
@@ -303,15 +270,14 @@ TRACE_EVENT(tcp_probe,
__entry->rcv_wnd = tp->rcv_wnd;
__entry->ssthresh = tcp_current_ssthresh(sk);
__entry->srtt = tp->srtt_us >> 3;
+ __entry->sock_cookie = sock_gen_cookie(sk);
),
- TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
- "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
- "rcv_wnd=%u",
+ TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx",
__entry->saddr, __entry->daddr, __entry->mark,
- __entry->length, __entry->snd_nxt, __entry->snd_una,
+ __entry->data_len, __entry->snd_nxt, __entry->snd_una,
__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
- __entry->srtt, __entry->rcv_wnd)
+ __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
);
#endif /* _TRACE_TCP_H */
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 8989a92c571a2..1ecf4c67fcf75 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
__entry->map_id, __entry->map_index)
);
+#ifndef __DEVMAP_OBJ_TYPE
+#define __DEVMAP_OBJ_TYPE
+struct _bpf_dtab_netdev {
+ struct net_device *dev;
+};
+#endif /* __DEVMAP_OBJ_TYPE */
+
#define devmap_ifindex(fwd, map) \
(!fwd ? 0 : \
(!map ? 0 : \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
- ((struct net_device *)fwd)->ifindex : 0)))
+ ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
@@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue,
__entry->to_cpu)
);
+TRACE_EVENT(xdp_devmap_xmit,
+
+ TP_PROTO(const struct bpf_map *map, u32 map_index,
+ int sent, int drops,
+ const struct net_device *from_dev,
+ const struct net_device *to_dev, int err),
+
+ TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
+
+ TP_STRUCT__entry(
+ __field(int, map_id)
+ __field(u32, act)
+ __field(u32, map_index)
+ __field(int, drops)
+ __field(int, sent)
+ __field(int, from_ifindex)
+ __field(int, to_ifindex)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->map_id = map->id;
+ __entry->act = XDP_REDIRECT;
+ __entry->map_index = map_index;
+ __entry->drops = drops;
+ __entry->sent = sent;
+ __entry->from_ifindex = from_dev->ifindex;
+ __entry->to_ifindex = to_dev->ifindex;
+ __entry->err = err;
+ ),
+
+ TP_printk("ndo_xdp_xmit"
+ " map_id=%d map_index=%d action=%s"
+ " sent=%d drops=%d"
+ " from_ifindex=%d to_ifindex=%d err=%d",
+ __entry->map_id, __entry->map_index,
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+ __entry->sent, __entry->drops,
+ __entry->from_ifindex, __entry->to_ifindex, __entry->err)
+);
+
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8c317737ba3f0..59b19b6a40d73 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -95,6 +95,9 @@ enum bpf_cmd {
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
+ BPF_BTF_LOAD,
+ BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
};
enum bpf_map_type {
@@ -115,6 +118,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
+ BPF_MAP_TYPE_XSKMAP,
+ BPF_MAP_TYPE_SOCKHASH,
};
enum bpf_prog_type {
@@ -137,6 +142,8 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_PROG_TYPE_LWT_SEG6LOCAL,
+ BPF_PROG_TYPE_LIRC_MODE2,
};
enum bpf_attach_type {
@@ -154,6 +161,9 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_LIRC_MODE2,
__MAX_BPF_ATTACH_TYPE
};
@@ -279,6 +289,9 @@ union bpf_attr {
*/
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -339,6 +352,7 @@ union bpf_attr {
__u32 start_id;
__u32 prog_id;
__u32 map_id;
+ __u32 btf_id;
};
__u32 next_id;
__u32 open_flags;
@@ -363,398 +377,1704 @@ union bpf_attr {
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
+
+ struct { /* anonymous struct for BPF_BTF_LOAD */
+ __aligned_u64 btf;
+ __aligned_u64 btf_log_buf;
+ __u32 btf_size;
+ __u32 btf_log_size;
+ __u32 btf_log_level;
+ };
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
} __attribute__((aligned(8)));
-/* BPF helper function descriptions:
- *
- * void *bpf_map_lookup_elem(&map, &key)
- * Return: Map value or NULL
- *
- * int bpf_map_update_elem(&map, &key, &value, flags)
- * Return: 0 on success or negative error
- *
- * int bpf_map_delete_elem(&map, &key)
- * Return: 0 on success or negative error
- *
- * int bpf_probe_read(void *dst, int size, void *src)
- * Return: 0 on success or negative error
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ * $ ./scripts/bpf_helpers_doc.py \
+ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ * $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Perform a lookup in *map* for an entry associated to *key*.
+ * Return
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * Description
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Delete entry with *key* from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * Description
+ * For tracing programs, safely attempt to read *size* bytes from
+ * address *src* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* u64 bpf_ktime_get_ns(void)
- * Return: current ktime
- *
- * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
- * Return: length of buffer written or negative error
- *
- * u32 bpf_prandom_u32(void)
- * Return: random value
- *
- * u32 bpf_raw_smp_processor_id(void)
- * Return: SMP processor ID
- *
- * int bpf_skb_store_bytes(skb, offset, from, len, flags)
- * store bytes into packet
- * @skb: pointer to skb
- * @offset: offset within packet from skb->mac_header
- * @from: pointer where to copy bytes from
- * @len: number of bytes to store into packet
- * @flags: bit 0 - if true, recompute skb->csum
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_l3_csum_replace(skb, offset, from, to, flags)
- * recompute IP checksum
- * @skb: pointer to skb
- * @offset: offset within packet where IP checksum is located
- * @from: old value of header field
- * @to: new value of header field
- * @flags: bits 0-3 - size of header field
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_l4_csum_replace(skb, offset, from, to, flags)
- * recompute TCP/UDP checksum
- * @skb: pointer to skb
- * @offset: offset within packet where TCP/UDP checksum is located
- * @from: old value of header field
- * @to: new value of header field
- * @flags: bits 0-3 - size of header field
- * bit 4 - is pseudo header
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_tail_call(ctx, prog_array_map, index)
- * jump into another BPF program
- * @ctx: context pointer passed to next program
- * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- * @index: 32-bit index inside array that selects specific program to run
- * Return: 0 on success or negative error
- *
- * int bpf_clone_redirect(skb, ifindex, flags)
- * redirect to another netdev
- * @skb: pointer to skb
- * @ifindex: ifindex of the net device
- * @flags: bit 0 - if set, redirect to ingress instead of egress
- * other bits - reserved
- * Return: 0 on success or negative error
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * Description
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ * Return
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * Description
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ * Return
+ * A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * Description
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ * Return
+ * The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * Description
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * Description
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * Description
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * Description
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_current_pid_tgid(void)
- * Return: current->tgid << 32 | current->pid
+ * Return
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
*
* u64 bpf_get_current_uid_gid(void)
- * Return: current_gid << 32 | current_uid
- *
- * int bpf_get_current_comm(char *buf, int size_of_buf)
- * stores current->comm into buf
- * Return: 0 on success or negative error
- *
- * u32 bpf_get_cgroup_classid(skb)
- * retrieve a proc's classid
- * @skb: pointer to skb
- * Return: classid if != 0
- *
- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
- * Return: 0 on success or negative error
- *
- * int bpf_skb_vlan_pop(skb)
- * Return: 0 on success or negative error
- *
- * int bpf_skb_get_tunnel_key(skb, key, size, flags)
- * int bpf_skb_set_tunnel_key(skb, key, size, flags)
- * retrieve or populate tunnel metadata
- * @skb: pointer to skb
- * @key: pointer to 'struct bpf_tunnel_key'
- * @size: size of 'struct bpf_tunnel_key'
- * @flags: room for future extensions
- * Return: 0 on success or negative error
- *
- * u64 bpf_perf_event_read(map, flags)
- * read perf event counter value
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * Return: value of perf event counter read or error code
- *
- * int bpf_redirect(ifindex, flags)
- * redirect to another netdev
- * @ifindex: ifindex of the net device
- * @flags:
- * cls_bpf:
- * bit 0 - if set, redirect to ingress instead of egress
- * other bits - reserved
- * xdp_bpf:
- * all bits - reserved
- * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- * int bpf_redirect_map(map, key, flags)
- * redirect to endpoint in map
- * @map: pointer to dev map
- * @key: index in map to lookup
- * @flags: --
- * Return: XDP_REDIRECT on success or XDP_ABORT on error
- *
- * u32 bpf_get_route_realm(skb)
- * retrieve a dst's tclassid
- * @skb: pointer to skb
- * Return: realm if != 0
- *
- * int bpf_perf_event_output(ctx, map, flags, data, size)
- * output perf raw sample
- * @ctx: struct pt_regs*
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * @data: data on stack to be output as raw data
- * @size: size of data
- * Return: 0 on success or negative error
- *
- * int bpf_get_stackid(ctx, map, flags)
- * walk user or kernel stack and return id
- * @ctx: struct pt_regs*
- * @map: pointer to stack_trace map
- * @flags: bits 0-7 - numer of stack frames to skip
- * bit 8 - collect user stack instead of kernel
- * bit 9 - compare stacks by hash only
- * bit 10 - if two different stacks hash into the same stackid
- * discard old
- * other bits - reserved
- * Return: >= 0 stackid on success or negative error
- *
- * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
- * calculate csum diff
- * @from: raw from buffer
- * @from_size: length of from buffer
- * @to: raw to buffer
- * @to_size: length of to buffer
- * @seed: optional seed
- * Return: csum result or negative error code
- *
- * int bpf_skb_get_tunnel_opt(skb, opt, size)
- * retrieve tunnel options metadata
- * @skb: pointer to skb
- * @opt: pointer to raw tunnel option data
- * @size: size of @opt
- * Return: option size
- *
- * int bpf_skb_set_tunnel_opt(skb, opt, size)
- * populate tunnel options metadata
- * @skb: pointer to skb
- * @opt: pointer to raw tunnel option data
- * @size: size of @opt
- * Return: 0 on success or negative error
- *
- * int bpf_skb_change_proto(skb, proto, flags)
- * Change protocol of the skb. Currently supported is v4 -> v6,
- * v6 -> v4 transitions. The helper will also resize the skb. eBPF
- * program is expected to fill the new headers via skb_store_bytes
- * and lX_csum_replace.
- * @skb: pointer to skb
- * @proto: new skb->protocol type
- * @flags: reserved
- * Return: 0 on success or negative error
- *
- * int bpf_skb_change_type(skb, type)
- * Change packet type of skb.
- * @skb: pointer to skb
- * @type: new skb->pkt_type type
- * Return: 0 on success or negative error
- *
- * int bpf_skb_under_cgroup(skb, map, index)
- * Check cgroup2 membership of skb
- * @skb: pointer to skb
- * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- * @index: index of the cgroup in the bpf_map
- * Return:
- * == 0 skb failed the cgroup2 descendant test
- * == 1 skb succeeded the cgroup2 descendant test
- * < 0 error
- *
- * u32 bpf_get_hash_recalc(skb)
- * Retrieve and possibly recalculate skb->hash.
- * @skb: pointer to skb
- * Return: hash
+ * Return
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * Description
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ * Return
+ * The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * Description
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * Description
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * Description
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read**\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * time provides more features over the **bpf_perf_event_read**\
+ * () interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ * Return
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ * Return
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * Description
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * indentifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * Return
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * Description
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * Description
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * Description
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ * Return
+ * The checksum result, or a negative error code in case of
+ * failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ * Return
+ * The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * Description
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * Description
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * Description
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * Description
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ * Return
+ * The 32-bit hash.
*
* u64 bpf_get_current_task(void)
- * Returns current task_struct
- * Return: current
- *
- * int bpf_probe_write_user(void *dst, void *src, int len)
- * safely attempt to write to a location
- * @dst: destination address in userspace
- * @src: source address on stack
- * @len: number of bytes to copy
- * Return: 0 on success or negative error
- *
- * int bpf_current_task_under_cgroup(map, index)
- * Check cgroup2 membership of current task
- * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- * @index: index of the cgroup in the bpf_map
- * Return:
- * == 0 current failed the cgroup2 descendant test
- * == 1 current succeeded the cgroup2 descendant test
- * < 0 error
- *
- * int bpf_skb_change_tail(skb, len, flags)
- * The helper will resize the skb to the given new size, to be used f.e.
- * with control messages.
- * @skb: pointer to skb
- * @len: new skb length
- * @flags: reserved
- * Return: 0 on success or negative error
- *
- * int bpf_skb_pull_data(skb, len)
- * The helper will pull in non-linear data in case the skb is non-linear
- * and not all of len are part of the linear section. Only needed for
- * read/write with direct packet access.
- * @skb: pointer to skb
- * @len: len to make read/writeable
- * Return: 0 on success or negative error
- *
- * s64 bpf_csum_update(skb, csum)
- * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
- * @skb: pointer to skb
- * @csum: csum to add
- * Return: csum on success or negative error
- *
- * void bpf_set_hash_invalid(skb)
- * Invalidate current skb->hash.
- * @skb: pointer to skb
- *
- * int bpf_get_numa_node_id()
- * Return: Id of current NUMA node.
- *
- * int bpf_skb_change_head()
- * Grows headroom of skb and adjusts MAC header offset accordingly.
- * Will extends/reallocae as required automatically.
- * May change skb data pointer and will thus invalidate any check
- * performed for direct packet access.
- * @skb: pointer to skb
- * @len: length of header to be pushed in front
- * @flags: Flags (unused for now)
- * Return: 0 on success or negative error
- *
- * int bpf_xdp_adjust_head(xdp_md, delta)
- * Adjust the xdp_md.data by delta
- * @xdp_md: pointer to xdp_md
- * @delta: An positive/negative integer to be added to xdp_md.data
- * Return: 0 on success or negative on error
+ * Return
+ * A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * Description
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * Description
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * Description
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * Description
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ * Return
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * Description
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * Description
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ * Return
+ * The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
- * Copy a NUL terminated string from unsafe address. In case the string
- * length is smaller than size, the target is not padded with further NUL
- * bytes. In case the string length is larger than size, just count-1
- * bytes are copied and the last byte is set to NUL.
- * @dst: destination address
- * @size: maximum number of bytes to copy, including the trailing NUL
- * @unsafe_ptr: unsafe address
- * Return:
- * > 0 length of the string including the trailing NUL on success
- * < 0 error
- *
- * u64 bpf_get_socket_cookie(skb)
- * Get the cookie for the socket stored inside sk_buff.
- * @skb: pointer to skb
- * Return: 8 Bytes non-decreasing number on success or 0 if the socket
- * field is missing inside sk_buff
- *
- * u32 bpf_get_socket_uid(skb)
- * Get the owner uid of the socket stored inside sk_buff.
- * @skb: pointer to skb
- * Return: uid of the socket owner on success or overflowuid if failed.
- *
- * u32 bpf_set_hash(skb, hash)
- * Set full skb->hash.
- * @skb: pointer to skb
- * @hash: hash to set
- *
- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
- * Calls setsockopt. Not all opts are available, only those with
- * integer optvals plus TCP_CONGESTION.
- * Supported levels: SOL_SOCKET and IPPROTO_TCP
- * @bpf_socket: pointer to bpf_socket
- * @level: SOL_SOCKET or IPPROTO_TCP
- * @optname: option name
- * @optval: pointer to option value
- * @optlen: length of optval in bytes
- * Return: 0 or negative error
- *
- * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
- * Calls getsockopt. Not all opts are available.
- * Supported levels: IPPROTO_TCP
- * @bpf_socket: pointer to bpf_socket
- * @level: IPPROTO_TCP
- * @optname: option name
- * @optval: pointer to option value
- * @optlen: length of optval in bytes
- * Return: 0 or negative error
- *
- * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
- * Set callback flags for sock_ops
- * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
- * @flags: flags value
- * Return: 0 for no error
- * -EINVAL if there is no full tcp socket
- * bits in flags that are not supported by current kernel
- *
- * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
- * Grow or shrink room in sk_buff.
- * @skb: pointer to skb
- * @len_diff: (signed) amount of room to grow/shrink
- * @mode: operation mode (enum bpf_adj_room_mode)
- * @flags: reserved for future use
- * Return: 0 on success or negative error code
- *
- * int bpf_sk_redirect_map(map, key, flags)
- * Redirect skb to a sock in map using key as a lookup key for the
- * sock in map.
- * @map: pointer to sockmap
- * @key: key to lookup sock in map
- * @flags: reserved for future use
- * Return: SK_PASS
- *
- * int bpf_sock_map_update(skops, map, key, flags)
- * @skops: pointer to bpf_sock_ops
- * @map: pointer to sockmap to update
- * @key: key to insert/update sock in map
- * @flags: same flags as map update elem
- *
- * int bpf_xdp_adjust_meta(xdp_md, delta)
- * Adjust the xdp_md.data_meta by delta
- * @xdp_md: pointer to xdp_md
- * @delta: An positive/negative integer to be added to xdp_md.data_meta
- * Return: 0 on success or negative on error
- *
- * int bpf_perf_event_read_value(map, flags, buf, buf_size)
- * read perf event counter value and perf event enabled/running time
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * @buf: buf to fill
- * @buf_size: size of the buf
- * Return: 0 on success or negative error code
- *
- * int bpf_perf_prog_read_value(ctx, buf, buf_size)
- * read perf prog attached perf event counter and enabled/running time
- * @ctx: pointer to ctx
- * @buf: buf to fill
- * @buf_size: size of the buf
- * Return : 0 on success or negative error code
- *
- * int bpf_override_return(pt_regs, rc)
- * @pt_regs: pointer to struct pt_regs
- * @rc: the return value to set
- *
- * int bpf_msg_redirect_map(map, key, flags)
- * Redirect msg to a sock in map using key as a lookup key for the
- * sock in map.
- * @map: pointer to sockmap
- * @key: key to lookup sock in map
- * @flags: reserved for future use
- * Return: SK_PASS
- *
- * int bpf_bind(ctx, addr, addr_len)
- * Bind socket to address. Only binding to IP is supported, no port can be
- * set in addr.
- * @ctx: pointer to context of type bpf_sock_addr
- * @addr: pointer to struct sockaddr to bind socket to
- * @addr_len: length of sockaddr structure
- * Return: 0 on success or negative error code
+ * Description
+ * Copy a NUL terminated string from an unsafe address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read()** helper here instead
+ * to read the string would require to estimate the length at
+ * compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * Description
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a unique socket
+ * identifier per namespace.
+ * Return
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Return
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * Description
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ * Return
+ * 0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ * Description
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There is a single supported mode at this time:
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ * Return
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * Description
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ * Return
+ * 0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * Description
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * The supported callback values that *argval* can combine are:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ * Return
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ * Return
+ * 0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ * Return
+ * 0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * Description
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * Description
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * Description
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *ctx*, which is a pointer
+ * to the context on which the tracing program is executed.
+ * To store the stacktrace, the bpf program provides *buf* with
+ * a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
+ * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * Description
+ * This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * it provides an easy way to load *len* bytes from *offset*
+ * from the packet associated to *skb*, into the buffer pointed
+ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * a fifth argument *start_header* exists in order to select a
+ * base offset to start from. *start_header* can be one of:
+ *
+ * **BPF_HDR_START_MAC**
+ * Base offset to load data from is *skb*'s mac header.
+ * **BPF_HDR_START_NET**
+ * Base offset to load data from is *skb*'s network header.
+ *
+ * In general, "direct packet access" is the preferred method to
+ * access packet data, however, this helper is in particular useful
+ * in socket filters where *skb*\ **->data** does not always point
+ * to the start of the mac header and where "direct packet access"
+ * is not available.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only).
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ * Return
+ * Egress device index on success, 0 if packet needs to continue
+ * up the stack for further processing or a negative error in case
+ * of failure.
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a sockhash *map* referencing sockets.
+ * The *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * Description
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * Description
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * Description
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown** () again with the same values, or calling
+ * **bpf_rc_repeat** ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ *
+ * Return
+ * 0
+ *
+ * int bpf_rc_repeat(void *ctx)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ *
+ * Return
+ * 0
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * Description
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -821,7 +2141,23 @@ union bpf_attr {
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data), \
- FN(bind),
+ FN(bind), \
+ FN(xdp_adjust_tail), \
+ FN(skb_get_xfrm_state), \
+ FN(get_stack), \
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup), \
+ FN(sock_hash_update), \
+ FN(msg_redirect_hash), \
+ FN(sk_redirect_hash), \
+ FN(lwt_push_encap), \
+ FN(lwt_seg6_store_bytes), \
+ FN(lwt_seg6_adjust_srh), \
+ FN(lwt_seg6_action), \
+ FN(rc_repeat), \
+ FN(rc_keydown), \
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -855,11 +2191,14 @@ enum bpf_func_id {
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
#define BPF_F_SKIP_FIELD_MASK 0xffULL
#define BPF_F_USER_STACK (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
#define BPF_F_REUSE_STACKID (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID (1ULL << 11)
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
@@ -879,6 +2218,18 @@ enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
};
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+ BPF_HDR_START_MAC,
+ BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+ BPF_LWT_ENCAP_SEG6,
+ BPF_LWT_ENCAP_SEG6_INLINE
+};
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -923,10 +2274,24 @@ struct bpf_tunnel_key {
};
__u8 tunnel_tos;
__u8 tunnel_ttl;
- __u16 tunnel_ext;
+ __u16 tunnel_ext; /* Padding, future use. */
__u32 tunnel_label;
};
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+ __u32 reqid;
+ __u32 spi; /* Stored in network byte order */
+ __u16 family;
+ __u16 ext; /* Padding, future use. */
+ union {
+ __u32 remote_ipv4; /* Stored in network byte order */
+ __u32 remote_ipv6[4]; /* Stored in network byte order */
+ };
+};
+
/* Generic BPF return codes which all BPF program types may support.
* The values are binary compatible with their TC_ACT_* counter-part to
* provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -999,6 +2364,14 @@ enum sk_action {
struct sk_msg_md {
void *data;
void *data_end;
+
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
};
#define BPF_TAG_SIZE 8
@@ -1017,9 +2390,13 @@ struct bpf_prog_info {
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
- __u32 :32;
+ __u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
+ __u32 nr_jited_ksyms;
+ __u32 nr_jited_func_lens;
+ __aligned_u64 jited_ksyms;
+ __aligned_u64 jited_func_lens;
} __attribute__((aligned(8)));
struct bpf_map_info {
@@ -1034,6 +2411,15 @@ struct bpf_map_info {
__u32 :32;
__u64 netns_dev;
__u64 netns_ino;
+ __u32 btf_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+ __aligned_u64 btf;
+ __u32 btf_size;
+ __u32 id;
} __attribute__((aligned(8)));
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -1054,6 +2440,12 @@ struct bpf_sock_addr {
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
};
/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -1214,4 +2606,64 @@ struct bpf_raw_tracepoint_args {
__u64 args[0];
};
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+
+struct bpf_fib_lookup {
+ /* input: network family for lookup (AF_INET, AF_INET6)
+ * output: network family of egress nexthop
+ */
+ __u8 family;
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+ __u32 ifindex; /* L3 device index for lookup */
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowinfo; /* AF_INET6, flow_label + priority */
+
+ /* output: metric of fib result (IPv4/IPv6 only) */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ };
+
+ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+ * network header. output: bpf_fib_lookup sets to gateway address
+ * if FIB lookup returns gateway route
+ */
+ union {
+ __be32 ipv4_dst;
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h
new file mode 100644
index 0000000000000..2ec3cc99ea4c3
--- /dev/null
+++ b/include/uapi/linux/bpfilter.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_LINUX_BPFILTER_H
+#define _UAPI_LINUX_BPFILTER_H
+
+#include <linux/if.h>
+
+enum {
+ BPFILTER_IPT_SO_SET_REPLACE = 64,
+ BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65,
+ BPFILTER_IPT_SET_MAX,
+};
+
+enum {
+ BPFILTER_IPT_SO_GET_INFO = 64,
+ BPFILTER_IPT_SO_GET_ENTRIES = 65,
+ BPFILTER_IPT_SO_GET_REVISION_MATCH = 66,
+ BPFILTER_IPT_SO_GET_REVISION_TARGET = 67,
+ BPFILTER_IPT_GET_MAX,
+};
+
+#endif /* _UAPI_LINUX_BPFILTER_H */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
new file mode 100644
index 0000000000000..0b5ddbe135a47
--- /dev/null
+++ b/include/uapi/linux/btf.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC 0xeB9F
+#define BTF_VERSION 1
+
+struct btf_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 type_off; /* offset of type section */
+ __u32 type_len; /* length of type section */
+ __u32 str_off; /* offset of string section */
+ __u32 str_len; /* length of string section */
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE 0x0000ffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET 0x0000ffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN 0xffff
+
+struct btf_type {
+ __u32 name_off;
+ /* "info" bits arrangement
+ * bits 0-15: vlen (e.g. # of struct's members)
+ * bits 16-23: unused
+ * bits 24-27: kind (e.g. int, ptr, array...etc)
+ * bits 28-31: unused
+ */
+ __u32 info;
+ /* "size" is used by INT, ENUM, STRUCT and UNION.
+ * "size" tells the size of the type it is describing.
+ *
+ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+ * "type" is a type_id referring to another type.
+ */
+ union {
+ __u32 size;
+ __u32 type;
+ };
+};
+
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_VLEN(info) ((info) & 0xffff)
+
+#define BTF_KIND_UNKN 0 /* Unknown */
+#define BTF_KIND_INT 1 /* Integer */
+#define BTF_KIND_PTR 2 /* Pointer */
+#define BTF_KIND_ARRAY 3 /* Array */
+#define BTF_KIND_STRUCT 4 /* Struct */
+#define BTF_KIND_UNION 5 /* Union */
+#define BTF_KIND_ENUM 6 /* Enumeration */
+#define BTF_KIND_FWD 7 /* Forward */
+#define BTF_KIND_TYPEDEF 8 /* Typedef */
+#define BTF_KIND_VOLATILE 9 /* Volatile */
+#define BTF_KIND_CONST 10 /* Const */
+#define BTF_KIND_RESTRICT 11 /* Restrict */
+#define BTF_KIND_MAX 11
+#define NR_BTF_KINDS 12
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
+#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
+#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED (1 << 0)
+#define BTF_INT_CHAR (1 << 1)
+#define BTF_INT_BOOL (1 << 2)
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+ __u32 name_off;
+ __s32 val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+ __u32 type;
+ __u32 index_type;
+ __u32 nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member". The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+ __u32 name_off;
+ __u32 type;
+ __u32 offset; /* offset in bits */
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h
index 68ff254147005..db210625cee8c 100644
--- a/include/uapi/linux/cn_proc.h
+++ b/include/uapi/linux/cn_proc.h
@@ -116,12 +116,16 @@ struct proc_event {
struct coredump_proc_event {
__kernel_pid_t process_pid;
__kernel_pid_t process_tgid;
+ __kernel_pid_t parent_pid;
+ __kernel_pid_t parent_tgid;
} coredump;
struct exit_proc_event {
__kernel_pid_t process_pid;
__kernel_pid_t process_tgid;
__u32 exit_code, exit_signal;
+ __kernel_pid_t parent_pid;
+ __kernel_pid_t parent_tgid;
} exit;
} event_data;
diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index 2c0c6453c3f40..60aa2e4466981 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -163,6 +163,16 @@ struct ieee_pfc {
__u64 indications[IEEE_8021QAZ_MAX_TCS];
};
+#define IEEE_8021Q_MAX_PRIORITIES 8
+#define DCBX_MAX_BUFFERS 8
+struct dcbnl_buffer {
+ /* priority to buffer mapping */
+ __u8 prio2buffer[IEEE_8021Q_MAX_PRIORITIES];
+ /* buffer size in Bytes */
+ __u32 buffer_size[DCBX_MAX_BUFFERS];
+ __u32 total_size;
+};
+
/* CEE DCBX std supported values */
#define CEE_DCBX_MAX_PGS 8
#define CEE_DCBX_MAX_PRIO 8
@@ -406,6 +416,7 @@ enum ieee_attrs {
DCB_ATTR_IEEE_MAXRATE,
DCB_ATTR_IEEE_QCN,
DCB_ATTR_IEEE_QCN_STATS,
+ DCB_ATTR_DCB_BUFFER,
__DCB_ATTR_IEEE_MAX
};
#define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 1df65a4c20441..75cb5450c8512 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -132,6 +132,16 @@ enum devlink_eswitch_encap_mode {
DEVLINK_ESWITCH_ENCAP_MODE_BASIC,
};
+enum devlink_port_flavour {
+ DEVLINK_PORT_FLAVOUR_PHYSICAL, /* Any kind of a port physically
+ * facing the user.
+ */
+ DEVLINK_PORT_FLAVOUR_CPU, /* CPU port */
+ DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture
+ * interconnect port.
+ */
+};
+
enum devlink_attr {
/* don't change the order or add anything between, this is ABI! */
DEVLINK_ATTR_UNSPEC,
@@ -224,6 +234,10 @@ enum devlink_attr {
DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, /* u64 */
DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,/* u64 */
+ DEVLINK_ATTR_PORT_FLAVOUR, /* u16 */
+ DEVLINK_ATTR_PORT_NUMBER, /* u32 */
+ DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, /* u32 */
+
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index e2535d6dcec79..4e12c423b9fe9 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -421,6 +421,7 @@ typedef struct elf64_shdr {
#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */
#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
+#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
/* Note header in a PT_NOTE section */
typedef struct elf32_note {
diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h
index 2ef053d265de8..ebaf5701c9db9 100644
--- a/include/uapi/linux/if_addr.h
+++ b/include/uapi/linux/if_addr.h
@@ -33,6 +33,7 @@ enum {
IFA_CACHEINFO,
IFA_MULTICAST,
IFA_FLAGS,
+ IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */
__IFA_MAX,
};
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 68699f6541185..cf01b68242448 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -333,6 +333,7 @@ enum {
IFLA_BRPORT_BCAST_FLOOD,
IFLA_BRPORT_GROUP_FWD_MASK,
IFLA_BRPORT_NEIGH_SUPPRESS,
+ IFLA_BRPORT_ISOLATED,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -516,6 +517,7 @@ enum {
IFLA_VXLAN_COLLECT_METADATA,
IFLA_VXLAN_LABEL,
IFLA_VXLAN_GPE,
+ IFLA_VXLAN_TTL_INHERIT,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
new file mode 100644
index 0000000000000..1fa0e977ea8d0
--- /dev/null
+++ b/include/uapi/linux/if_xdp.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * if_xdp: XDP socket user-space interface
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * Author(s): Björn Töpel <bjorn.topel@intel.com>
+ * Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef _LINUX_IF_XDP_H
+#define _LINUX_IF_XDP_H
+
+#include <linux/types.h>
+
+/* Options for the sxdp_flags field */
+#define XDP_SHARED_UMEM (1 << 0)
+#define XDP_COPY (1 << 1) /* Force copy-mode */
+#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+
+struct sockaddr_xdp {
+ __u16 sxdp_family;
+ __u16 sxdp_flags;
+ __u32 sxdp_ifindex;
+ __u32 sxdp_queue_id;
+ __u32 sxdp_shared_umem_fd;
+};
+
+struct xdp_ring_offset {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+struct xdp_mmap_offsets {
+ struct xdp_ring_offset rx;
+ struct xdp_ring_offset tx;
+ struct xdp_ring_offset fr; /* Fill */
+ struct xdp_ring_offset cr; /* Completion */
+};
+
+/* XDP socket options */
+#define XDP_MMAP_OFFSETS 1
+#define XDP_RX_RING 2
+#define XDP_TX_RING 3
+#define XDP_UMEM_REG 4
+#define XDP_UMEM_FILL_RING 5
+#define XDP_UMEM_COMPLETION_RING 6
+#define XDP_STATISTICS 7
+
+struct xdp_umem_reg {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+};
+
+struct xdp_statistics {
+ __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+ __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+ __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+};
+
+/* Pgoff for mmaping the rings */
+#define XDP_PGOFF_RX_RING 0
+#define XDP_PGOFF_TX_RING 0x80000000
+#define XDP_UMEM_PGOFF_FILL_RING 0x100000000
+#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
+
+/* Rx/Tx descriptor */
+struct xdp_desc {
+ __u64 addr;
+ __u32 len;
+ __u32 options;
+};
+
+/* UMEM descriptor is __u64 */
+
+#endif /* _LINUX_IF_XDP_H */
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a33000da7229c..4a95c0db14d4f 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -10,6 +10,7 @@
#define NF_NAT_RANGE_PROTO_RANDOM (1 << 2)
#define NF_NAT_RANGE_PERSISTENT (1 << 3)
#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4)
+#define NF_NAT_RANGE_PROTO_OFFSET (1 << 5)
#define NF_NAT_RANGE_PROTO_RANDOM_ALL \
(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -17,7 +18,7 @@
#define NF_NAT_RANGE_MASK \
(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \
NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \
- NF_NAT_RANGE_PROTO_RANDOM_FULLY)
+ NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET)
struct nf_nat_ipv4_range {
unsigned int flags;
@@ -40,4 +41,13 @@ struct nf_nat_range {
union nf_conntrack_man_proto max_proto;
};
+struct nf_nat_range2 {
+ unsigned int flags;
+ union nf_inet_addr min_addr;
+ union nf_inet_addr max_addr;
+ union nf_conntrack_man_proto min_proto;
+ union nf_conntrack_man_proto max_proto;
+ union nf_conntrack_man_proto base_proto;
+};
+
#endif /* _NETFILTER_NF_NAT_H */
diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
new file mode 100644
index 0000000000000..8f2f2f4031836
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_osf.h
@@ -0,0 +1,86 @@
+#ifndef _NF_OSF_H
+#define _NF_OSF_H
+
+#include <linux/types.h>
+
+#define MAXGENRELEN 32
+
+#define NF_OSF_GENRE (1 << 0)
+#define NF_OSF_TTL (1 << 1)
+#define NF_OSF_LOG (1 << 2)
+#define NF_OSF_INVERT (1 << 3)
+
+#define NF_OSF_LOGLEVEL_ALL 0 /* log all matched fingerprints */
+#define NF_OSF_LOGLEVEL_FIRST 1 /* log only the first matced fingerprint */
+#define NF_OSF_LOGLEVEL_ALL_KNOWN 2 /* do not log unknown packets */
+
+#define NF_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */
+
+/* Do not compare ip and fingerprint TTL at all */
+#define NF_OSF_TTL_NOCHECK 2
+
+/* Wildcard MSS (kind of).
+ * It is used to implement a state machine for the different wildcard values
+ * of the MSS and window sizes.
+ */
+struct nf_osf_wc {
+ __u32 wc;
+ __u32 val;
+};
+
+/* This struct represents IANA options
+ * http://www.iana.org/assignments/tcp-parameters
+ */
+struct nf_osf_opt {
+ __u16 kind, length;
+ struct nf_osf_wc wc;
+};
+
+struct nf_osf_info {
+ char genre[MAXGENRELEN];
+ __u32 len;
+ __u32 flags;
+ __u32 loglevel;
+ __u32 ttl;
+};
+
+struct nf_osf_user_finger {
+ struct nf_osf_wc wss;
+
+ __u8 ttl, df;
+ __u16 ss, mss;
+ __u16 opt_num;
+
+ char genre[MAXGENRELEN];
+ char version[MAXGENRELEN];
+ char subtype[MAXGENRELEN];
+
+ /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
+ struct nf_osf_opt opt[MAX_IPOPTLEN];
+};
+
+struct nf_osf_nlmsg {
+ struct nf_osf_user_finger f;
+ struct iphdr ip;
+ struct tcphdr tcp;
+};
+
+/* Defines for IANA option kinds */
+enum iana_options {
+ OSFOPT_EOL = 0, /* End of options */
+ OSFOPT_NOP, /* NOP */
+ OSFOPT_MSS, /* Maximum segment size */
+ OSFOPT_WSO, /* Window scale option */
+ OSFOPT_SACKP, /* SACK permitted */
+ OSFOPT_SACK, /* SACK */
+ OSFOPT_ECHO,
+ OSFOPT_ECHOREPLY,
+ OSFOPT_TS, /* Timestamp option */
+ OSFOPT_POCP, /* Partial Order Connection Permitted */
+ OSFOPT_POSP, /* Partial Order Service Profile */
+
+ /* Others are not used in the current OSF */
+ OSFOPT_EMPTY = 255,
+};
+
+#endif /* _NF_OSF_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6a3d653d5b274..ae00a3c49b8a4 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -831,7 +831,9 @@ enum nft_rt_keys {
NFT_RT_NEXTHOP4,
NFT_RT_NEXTHOP6,
NFT_RT_TCPMSS,
+ __NFT_RT_MAX
};
+#define NFT_RT_MAX (__NFT_RT_MAX - 1)
/**
* enum nft_hash_types - nf_tables hash expression types
@@ -854,6 +856,8 @@ enum nft_hash_types {
* @NFTA_HASH_SEED: seed value (NLA_U32)
* @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32)
* @NFTA_HASH_TYPE: hash operation (NLA_U32: nft_hash_types)
+ * @NFTA_HASH_SET_NAME: name of the map to lookup (NLA_STRING)
+ * @NFTA_HASH_SET_ID: id of the map (NLA_U32)
*/
enum nft_hash_attributes {
NFTA_HASH_UNSPEC,
@@ -864,6 +868,8 @@ enum nft_hash_attributes {
NFTA_HASH_SEED,
NFTA_HASH_OFFSET,
NFTA_HASH_TYPE,
+ NFTA_HASH_SET_NAME,
+ NFTA_HASH_SET_ID,
__NFTA_HASH_MAX,
};
#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1)
@@ -899,6 +905,31 @@ enum nft_rt_attributes {
#define NFTA_RT_MAX (__NFTA_RT_MAX - 1)
/**
+ * enum nft_socket_attributes - nf_tables socket expression netlink attributes
+ *
+ * @NFTA_SOCKET_KEY: socket key to match
+ * @NFTA_SOCKET_DREG: destination register
+ */
+enum nft_socket_attributes {
+ NFTA_SOCKET_UNSPEC,
+ NFTA_SOCKET_KEY,
+ NFTA_SOCKET_DREG,
+ __NFTA_SOCKET_MAX
+};
+#define NFTA_SOCKET_MAX (__NFTA_SOCKET_MAX - 1)
+
+/*
+ * enum nft_socket_keys - nf_tables socket expression keys
+ *
+ * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_
+ */
+enum nft_socket_keys {
+ NFT_SOCKET_TRANSPARENT,
+ __NFT_SOCKET_MAX
+};
+#define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)
+
+/**
* enum nft_ct_keys - nf_tables ct expression keys
*
* @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info)
@@ -949,7 +980,9 @@ enum nft_ct_keys {
NFT_CT_DST_IP,
NFT_CT_SRC_IP6,
NFT_CT_DST_IP6,
+ __NFT_CT_MAX
};
+#define NFT_CT_MAX (__NFT_CT_MAX - 1)
/**
* enum nft_ct_attributes - nf_tables ct expression netlink attributes
@@ -1010,6 +1043,24 @@ enum nft_limit_attributes {
};
#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1)
+enum nft_connlimit_flags {
+ NFT_CONNLIMIT_F_INV = (1 << 0),
+};
+
+/**
+ * enum nft_connlimit_attributes - nf_tables connlimit expression netlink attributes
+ *
+ * @NFTA_CONNLIMIT_COUNT: number of connections (NLA_U32)
+ * @NFTA_CONNLIMIT_FLAGS: flags (NLA_U32: enum nft_connlimit_flags)
+ */
+enum nft_connlimit_attributes {
+ NFTA_CONNLIMIT_UNSPEC,
+ NFTA_CONNLIMIT_COUNT,
+ NFTA_CONNLIMIT_FLAGS,
+ __NFTA_CONNLIMIT_MAX
+};
+#define NFTA_CONNLIMIT_MAX (__NFTA_CONNLIMIT_MAX - 1)
+
/**
* enum nft_counter_attributes - nf_tables counter expression netlink attributes
*
@@ -1048,6 +1099,11 @@ enum nft_log_attributes {
#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1)
/**
+ * LOGLEVEL_AUDIT - a pseudo log level enabling audit logging
+ */
+#define LOGLEVEL_AUDIT 8
+
+/**
* enum nft_queue_attributes - nf_tables queue expression netlink attributes
*
* @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
@@ -1222,10 +1278,14 @@ enum nft_dup_attributes {
* enum nft_fwd_attributes - nf_tables fwd expression netlink attributes
*
* @NFTA_FWD_SREG_DEV: source register of output interface (NLA_U32: nft_register)
+ * @NFTA_FWD_SREG_ADDR: source register of destination address (NLA_U32: nft_register)
+ * @NFTA_FWD_NFPROTO: layer 3 family of source register address (NLA_U32: enum nfproto)
*/
enum nft_fwd_attributes {
NFTA_FWD_UNSPEC,
NFTA_FWD_SREG_DEV,
+ NFTA_FWD_SREG_ADDR,
+ NFTA_FWD_NFPROTO,
__NFTA_FWD_MAX
};
#define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1)
@@ -1315,7 +1375,8 @@ enum nft_ct_helper_attributes {
#define NFT_OBJECT_QUOTA 2
#define NFT_OBJECT_CT_HELPER 3
#define NFT_OBJECT_LIMIT 4
-#define __NFT_OBJECT_MAX 5
+#define NFT_OBJECT_CONNLIMIT 5
+#define __NFT_OBJECT_MAX 6
#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1)
/**
@@ -1450,6 +1511,8 @@ enum nft_trace_types {
* @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
* @NFTA_NG_TYPE: operation type (NLA_U32)
* @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32)
+ * @NFTA_NG_SET_NAME: name of the map to lookup (NLA_STRING)
+ * @NFTA_NG_SET_ID: id of the map (NLA_U32)
*/
enum nft_ng_attributes {
NFTA_NG_UNSPEC,
@@ -1457,6 +1520,8 @@ enum nft_ng_attributes {
NFTA_NG_MODULUS,
NFTA_NG_TYPE,
NFTA_NG_OFFSET,
+ NFTA_NG_SET_NAME,
+ NFTA_NG_SET_ID,
__NFTA_NG_MAX
};
#define NFTA_NG_MAX (__NFTA_NG_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 77987111cab0c..1d41810d17e2c 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -262,6 +262,7 @@ enum ctattr_stats_cpu {
enum ctattr_stats_global {
CTA_STATS_GLOBAL_UNSPEC,
CTA_STATS_GLOBAL_ENTRIES,
+ CTA_STATS_GLOBAL_MAX_ENTRIES,
__CTA_STATS_GLOBAL_MAX,
};
#define CTA_STATS_GLOBAL_MAX (__CTA_STATS_GLOBAL_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index dad197e2ab99b..72956eceeb096 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -23,101 +23,29 @@
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/tcp.h>
+#include <linux/netfilter/nf_osf.h>
-#define MAXGENRELEN 32
+#define XT_OSF_GENRE NF_OSF_GENRE
+#define XT_OSF_INVERT NF_OSF_INVERT
-#define XT_OSF_GENRE (1<<0)
-#define XT_OSF_TTL (1<<1)
-#define XT_OSF_LOG (1<<2)
-#define XT_OSF_INVERT (1<<3)
+#define XT_OSF_TTL NF_OSF_TTL
+#define XT_OSF_LOG NF_OSF_LOG
-#define XT_OSF_LOGLEVEL_ALL 0 /* log all matched fingerprints */
-#define XT_OSF_LOGLEVEL_FIRST 1 /* log only the first matced fingerprint */
-#define XT_OSF_LOGLEVEL_ALL_KNOWN 2 /* do not log unknown packets */
+#define XT_OSF_LOGLEVEL_ALL NF_OSF_LOGLEVEL_ALL
+#define XT_OSF_LOGLEVEL_FIRST NF_OSF_LOGLEVEL_FIRST
+#define XT_OSF_LOGLEVEL_ALL_KNOWN NF_OSF_LOGLEVEL_ALL_KNOWN
-#define XT_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */
-#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */
-#define XT_OSF_TTL_NOCHECK 2 /* Do not compare ip and fingerprint TTL at all */
+#define XT_OSF_TTL_TRUE NF_OSF_TTL_TRUE
+#define XT_OSF_TTL_NOCHECK NF_OSF_TTL_NOCHECK
-struct xt_osf_info {
- char genre[MAXGENRELEN];
- __u32 len;
- __u32 flags;
- __u32 loglevel;
- __u32 ttl;
-};
-
-/*
- * Wildcard MSS (kind of).
- * It is used to implement a state machine for the different wildcard values
- * of the MSS and window sizes.
- */
-struct xt_osf_wc {
- __u32 wc;
- __u32 val;
-};
-
-/*
- * This struct represents IANA options
- * http://www.iana.org/assignments/tcp-parameters
- */
-struct xt_osf_opt {
- __u16 kind, length;
- struct xt_osf_wc wc;
-};
-
-struct xt_osf_user_finger {
- struct xt_osf_wc wss;
-
- __u8 ttl, df;
- __u16 ss, mss;
- __u16 opt_num;
-
- char genre[MAXGENRELEN];
- char version[MAXGENRELEN];
- char subtype[MAXGENRELEN];
+#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */
- /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
- struct xt_osf_opt opt[MAX_IPOPTLEN];
-};
-
-struct xt_osf_nlmsg {
- struct xt_osf_user_finger f;
- struct iphdr ip;
- struct tcphdr tcp;
-};
-
-/* Defines for IANA option kinds */
-
-enum iana_options {
- OSFOPT_EOL = 0, /* End of options */
- OSFOPT_NOP, /* NOP */
- OSFOPT_MSS, /* Maximum segment size */
- OSFOPT_WSO, /* Window scale option */
- OSFOPT_SACKP, /* SACK permitted */
- OSFOPT_SACK, /* SACK */
- OSFOPT_ECHO,
- OSFOPT_ECHOREPLY,
- OSFOPT_TS, /* Timestamp option */
- OSFOPT_POCP, /* Partial Order Connection Permitted */
- OSFOPT_POSP, /* Partial Order Service Profile */
-
- /* Others are not used in the current OSF */
- OSFOPT_EMPTY = 255,
-};
-
-/*
- * Initial window size option state machine: multiple of mss, mtu or
- * plain numeric value. Can also be made as plain numeric value which
- * is not a multiple of specified value.
- */
-enum xt_osf_window_size_options {
- OSF_WSS_PLAIN = 0,
- OSF_WSS_MSS,
- OSF_WSS_MTU,
- OSF_WSS_MODULO,
- OSF_WSS_MAX,
-};
+#define xt_osf_wc nf_osf_wc
+#define xt_osf_opt nf_osf_opt
+#define xt_osf_info nf_osf_info
+#define xt_osf_user_finger nf_osf_user_finger
+#define xt_osf_finger nf_osf_finger
+#define xt_osf_nlmsg nf_osf_nlmsg
/*
* Add/remove fingerprint from the kernel.
diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index 0c7dc83150139..3b86c14ea49d0 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -191,6 +191,12 @@ struct ebt_entry {
unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
};
+static __inline__ struct ebt_entry_target *
+ebt_get_target(struct ebt_entry *e)
+{
+ return (void *)e + e->target_offset;
+}
+
/* {g,s}etsockopt numbers */
#define EBT_BASE_CTL 128
diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h
index f3cc0ef514a70..54ed83360dac8 100644
--- a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h
@@ -17,7 +17,10 @@
#define IP6T_SRH_LAST_GT 0x0100
#define IP6T_SRH_LAST_LT 0x0200
#define IP6T_SRH_TAG 0x0400
-#define IP6T_SRH_MASK 0x07FF
+#define IP6T_SRH_PSID 0x0800
+#define IP6T_SRH_NSID 0x1000
+#define IP6T_SRH_LSID 0x2000
+#define IP6T_SRH_MASK 0x3FFF
/* Values for "mt_invflags" field in struct ip6t_srh */
#define IP6T_SRH_INV_NEXTHDR 0x0001
@@ -31,7 +34,10 @@
#define IP6T_SRH_INV_LAST_GT 0x0100
#define IP6T_SRH_INV_LAST_LT 0x0200
#define IP6T_SRH_INV_TAG 0x0400
-#define IP6T_SRH_INV_MASK 0x07FF
+#define IP6T_SRH_INV_PSID 0x0800
+#define IP6T_SRH_INV_NSID 0x1000
+#define IP6T_SRH_INV_LSID 0x2000
+#define IP6T_SRH_INV_MASK 0x3FFF
/**
* struct ip6t_srh - SRH match options
@@ -54,4 +60,37 @@ struct ip6t_srh {
__u16 mt_invflags;
};
+/**
+ * struct ip6t_srh1 - SRH match options (revision 1)
+ * @ next_hdr: Next header field of SRH
+ * @ hdr_len: Extension header length field of SRH
+ * @ segs_left: Segments left field of SRH
+ * @ last_entry: Last entry field of SRH
+ * @ tag: Tag field of SRH
+ * @ psid_addr: Address of previous SID in SRH SID list
+ * @ nsid_addr: Address of NEXT SID in SRH SID list
+ * @ lsid_addr: Address of LAST SID in SRH SID list
+ * @ psid_msk: Mask of previous SID in SRH SID list
+ * @ nsid_msk: Mask of next SID in SRH SID list
+ * @ lsid_msk: MAsk of last SID in SRH SID list
+ * @ mt_flags: match options
+ * @ mt_invflags: Invert the sense of match options
+ */
+
+struct ip6t_srh1 {
+ __u8 next_hdr;
+ __u8 hdr_len;
+ __u8 segs_left;
+ __u8 last_entry;
+ __u16 tag;
+ struct in6_addr psid_addr;
+ struct in6_addr nsid_addr;
+ struct in6_addr lsid_addr;
+ struct in6_addr psid_msk;
+ struct in6_addr nsid_msk;
+ struct in6_addr lsid_msk;
+ __u16 mt_flags;
+ __u16 mt_invflags;
+};
+
#endif /*_IP6T_SRH_H*/
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 271b93783d282..28b36545de244 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -11,6 +11,7 @@
* Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
* Copyright 2008 Colin McCabe <colin@cozybit.com>
* Copyright 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -203,7 +204,8 @@
* FILS shared key authentication offload should be able to construct the
* authentication and association frames for FILS shared key authentication and
* eventually do a key derivation as per IEEE 802.11ai. The below additional
- * parameters should be given to driver in %NL80211_CMD_CONNECT.
+ * parameters should be given to driver in %NL80211_CMD_CONNECT and/or in
+ * %NL80211_CMD_UPDATE_CONNECT_PARAMS.
* %NL80211_ATTR_FILS_ERP_USERNAME - used to construct keyname_nai
* %NL80211_ATTR_FILS_ERP_REALM - used to construct keyname_nai
* %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used to construct erp message
@@ -214,7 +216,8 @@
* as specified in IETF RFC 6696.
*
* When FILS shared key authentication is completed, driver needs to provide the
- * below additional parameters to userspace.
+ * below additional parameters to userspace, which can be either after setting
+ * up a connection or after roaming.
* %NL80211_ATTR_FILS_KEK - used for key renewal
* %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used in further EAP-RP exchanges
* %NL80211_ATTR_PMKID - used to identify the PMKSA used/generated
@@ -2225,6 +2228,16 @@ enum nl80211_commands {
* @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this
* u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
*
+ * @NL80211_ATTR_TXQ_STATS: TXQ statistics (nested attribute, see &enum
+ * nl80211_txq_stats)
+ * @NL80211_ATTR_TXQ_LIMIT: Total packet limit for the TXQ queues for this phy.
+ * The smaller of this and the memory limit is enforced.
+ * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory memory limit (in bytes) for the
+ * TXQ queues for this phy. The smaller of this and the packet limit is
+ * enforced.
+ * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes
+ * a flow is assigned on each round of the DRR scheduler.
+ *
* @NUM_NL80211_ATTR: total number of nl80211_attrs available
* @NL80211_ATTR_MAX: highest attribute number currently defined
* @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2659,6 +2672,11 @@ enum nl80211_attrs {
NL80211_ATTR_CONTROL_PORT_OVER_NL80211,
+ NL80211_ATTR_TXQ_STATS,
+ NL80211_ATTR_TXQ_LIMIT,
+ NL80211_ATTR_TXQ_MEMORY_LIMIT,
+ NL80211_ATTR_TXQ_QUANTUM,
+
/* add attributes here, update the policy in nl80211.c */
__NL80211_ATTR_AFTER_LAST,
@@ -2982,6 +3000,8 @@ enum nl80211_sta_bss_param {
* received from the station (u64, usec)
* @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
* @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
+ * @NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG: avg signal strength of (data)
+ * ACK frame (s8, dBm)
* @__NL80211_STA_INFO_AFTER_LAST: internal
* @NL80211_STA_INFO_MAX: highest possible station info attribute
*/
@@ -3021,6 +3041,7 @@ enum nl80211_sta_info {
NL80211_STA_INFO_RX_DURATION,
NL80211_STA_INFO_PAD,
NL80211_STA_INFO_ACK_SIGNAL,
+ NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG,
/* keep last */
__NL80211_STA_INFO_AFTER_LAST,
@@ -3038,6 +3059,7 @@ enum nl80211_sta_info {
* @NL80211_TID_STATS_TX_MSDU_FAILED: number of failed transmitted
* MSDUs (u64)
* @NL80211_TID_STATS_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_TID_STATS_TXQ_STATS: TXQ stats (nested attribute)
* @NUM_NL80211_TID_STATS: number of attributes here
* @NL80211_TID_STATS_MAX: highest numbered attribute here
*/
@@ -3048,6 +3070,7 @@ enum nl80211_tid_stats {
NL80211_TID_STATS_TX_MSDU_RETRIES,
NL80211_TID_STATS_TX_MSDU_FAILED,
NL80211_TID_STATS_PAD,
+ NL80211_TID_STATS_TXQ_STATS,
/* keep last */
NUM_NL80211_TID_STATS,
@@ -3055,6 +3078,44 @@ enum nl80211_tid_stats {
};
/**
+ * enum nl80211_txq_stats - per TXQ statistics attributes
+ * @__NL80211_TXQ_STATS_INVALID: attribute number 0 is reserved
+ * @NUM_NL80211_TXQ_STATS: number of attributes here
+ * @NL80211_TXQ_STATS_BACKLOG_BYTES: number of bytes currently backlogged
+ * @NL80211_TXQ_STATS_BACKLOG_PACKETS: number of packets currently
+ * backlogged
+ * @NL80211_TXQ_STATS_FLOWS: total number of new flows seen
+ * @NL80211_TXQ_STATS_DROPS: total number of packet drops
+ * @NL80211_TXQ_STATS_ECN_MARKS: total number of packet ECN marks
+ * @NL80211_TXQ_STATS_OVERLIMIT: number of drops due to queue space overflow
+ * @NL80211_TXQ_STATS_OVERMEMORY: number of drops due to memory limit overflow
+ * (only for per-phy stats)
+ * @NL80211_TXQ_STATS_COLLISIONS: number of hash collisions
+ * @NL80211_TXQ_STATS_TX_BYTES: total number of bytes dequeued from TXQ
+ * @NL80211_TXQ_STATS_TX_PACKETS: total number of packets dequeued from TXQ
+ * @NL80211_TXQ_STATS_MAX_FLOWS: number of flow buckets for PHY
+ * @NL80211_TXQ_STATS_MAX: highest numbered attribute here
+ */
+enum nl80211_txq_stats {
+ __NL80211_TXQ_STATS_INVALID,
+ NL80211_TXQ_STATS_BACKLOG_BYTES,
+ NL80211_TXQ_STATS_BACKLOG_PACKETS,
+ NL80211_TXQ_STATS_FLOWS,
+ NL80211_TXQ_STATS_DROPS,
+ NL80211_TXQ_STATS_ECN_MARKS,
+ NL80211_TXQ_STATS_OVERLIMIT,
+ NL80211_TXQ_STATS_OVERMEMORY,
+ NL80211_TXQ_STATS_COLLISIONS,
+ NL80211_TXQ_STATS_TX_BYTES,
+ NL80211_TXQ_STATS_TX_PACKETS,
+ NL80211_TXQ_STATS_MAX_FLOWS,
+
+ /* keep last */
+ NUM_NL80211_TXQ_STATS,
+ NL80211_TXQ_STATS_MAX = NUM_NL80211_TXQ_STATS - 1
+};
+
+/**
* enum nl80211_mpath_flags - nl80211 mesh path flags
*
* @NL80211_MPATH_FLAG_ACTIVE: the mesh path is active
@@ -3144,6 +3205,29 @@ enum nl80211_band_attr {
#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA
/**
+ * enum nl80211_wmm_rule - regulatory wmm rule
+ *
+ * @__NL80211_WMMR_INVALID: attribute number 0 is reserved
+ * @NL80211_WMMR_CW_MIN: Minimum contention window slot.
+ * @NL80211_WMMR_CW_MAX: Maximum contention window slot.
+ * @NL80211_WMMR_AIFSN: Arbitration Inter Frame Space.
+ * @NL80211_WMMR_TXOP: Maximum allowed tx operation time.
+ * @nl80211_WMMR_MAX: highest possible wmm rule.
+ * @__NL80211_WMMR_LAST: Internal use.
+ */
+enum nl80211_wmm_rule {
+ __NL80211_WMMR_INVALID,
+ NL80211_WMMR_CW_MIN,
+ NL80211_WMMR_CW_MAX,
+ NL80211_WMMR_AIFSN,
+ NL80211_WMMR_TXOP,
+
+ /* keep last */
+ __NL80211_WMMR_LAST,
+ NL80211_WMMR_MAX = __NL80211_WMMR_LAST - 1
+};
+
+/**
* enum nl80211_frequency_attr - frequency attributes
* @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved
* @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz
@@ -3192,6 +3276,9 @@ enum nl80211_band_attr {
* on this channel in current regulatory domain.
* @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed
* on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_WMM: this channel has wmm limitations.
+ * This is a nested attribute that contains the wmm limitation per AC.
+ * (see &enum nl80211_wmm_rule)
* @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
* currently defined
* @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -3220,6 +3307,7 @@ enum nl80211_frequency_attr {
NL80211_FREQUENCY_ATTR_IR_CONCURRENT,
NL80211_FREQUENCY_ATTR_NO_20MHZ,
NL80211_FREQUENCY_ATTR_NO_10MHZ,
+ NL80211_FREQUENCY_ATTR_WMM,
/* keep last */
__NL80211_FREQUENCY_ATTR_AFTER_LAST,
@@ -5040,6 +5128,11 @@ enum nl80211_feature_flags {
* "radar detected" event.
* @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and
* receiving control port frames over nl80211 instead of the netdevice.
+ * @NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT: This Driver support data ack
+ * rssi if firmware support, this flag is to intimate about ack rssi
+ * support to nl80211.
+ * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate
+ * TXQs.
*
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -5072,6 +5165,8 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
NL80211_EXT_FEATURE_DFS_OFFLOAD,
NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211,
+ NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT,
+ NL80211_EXT_FEATURE_TXQS,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 713e56ce681f5..863aabaa5cc92 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -937,4 +937,32 @@ enum ovs_meter_band_type {
#define OVS_METER_BAND_TYPE_MAX (__OVS_METER_BAND_TYPE_MAX - 1)
+/* Conntrack limit */
+#define OVS_CT_LIMIT_FAMILY "ovs_ct_limit"
+#define OVS_CT_LIMIT_MCGROUP "ovs_ct_limit"
+#define OVS_CT_LIMIT_VERSION 0x1
+
+enum ovs_ct_limit_cmd {
+ OVS_CT_LIMIT_CMD_UNSPEC,
+ OVS_CT_LIMIT_CMD_SET, /* Add or modify ct limit. */
+ OVS_CT_LIMIT_CMD_DEL, /* Delete ct limit. */
+ OVS_CT_LIMIT_CMD_GET /* Get ct limit. */
+};
+
+enum ovs_ct_limit_attr {
+ OVS_CT_LIMIT_ATTR_UNSPEC,
+ OVS_CT_LIMIT_ATTR_ZONE_LIMIT, /* Nested struct ovs_zone_limit. */
+ __OVS_CT_LIMIT_ATTR_MAX
+};
+
+#define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
+
+#define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
+
+struct ovs_zone_limit {
+ int zone_id;
+ __u32 limit;
+ __u32 count;
+};
+
#endif /* _LINUX_OPENVSWITCH_H */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 103ba797a8f31..83ade9b5cf950 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -506,6 +506,8 @@
#define PCI_EXP_DEVCTL_READRQ_256B 0x1000 /* 256 Bytes */
#define PCI_EXP_DEVCTL_READRQ_512B 0x2000 /* 512 Bytes */
#define PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */
+#define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */
+#define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */
#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */
#define PCI_EXP_DEVSTA 10 /* Device Status */
#define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index be05e66c167b1..84e4c1d0f874a 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -129,6 +129,7 @@ enum {
#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */
#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */
#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */
+#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */
/* U32 filters */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 9b15005955faa..7d8502313c993 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -254,6 +254,11 @@ enum {
#define RTPROT_DHCP 16 /* DHCP client */
#define RTPROT_MROUTED 17 /* Multicast daemon */
#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP 192 /* EIGRP Routes */
/* rtm_scope
@@ -327,6 +332,9 @@ enum rtattr_type_t {
RTA_PAD,
RTA_UID,
RTA_TTL_PROPAGATE,
+ RTA_IP_PROTO,
+ RTA_SPORT,
+ RTA_DPORT,
__RTA_MAX
};
diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index ef2d8c3e76c14..edc138bdc56da 100644
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -25,6 +25,7 @@ enum {
SEG6_LOCAL_NH6,
SEG6_LOCAL_IIF,
SEG6_LOCAL_OIF,
+ SEG6_LOCAL_BPF,
__SEG6_LOCAL_MAX,
};
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
@@ -59,10 +60,21 @@ enum {
SEG6_LOCAL_ACTION_END_AS = 13,
/* forward to SR-unaware VNF with masquerading */
SEG6_LOCAL_ACTION_END_AM = 14,
+ /* custom BPF action */
+ SEG6_LOCAL_ACTION_END_BPF = 15,
__SEG6_LOCAL_ACTION_MAX,
};
#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
+enum {
+ SEG6_LOCAL_BPF_PROG_UNSPEC,
+ SEG6_LOCAL_BPF_PROG,
+ SEG6_LOCAL_BPF_PROG_NAME,
+ __SEG6_LOCAL_BPF_PROG_MAX,
+};
+
+#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
+
#endif
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 33a70ece462f0..750d89120335e 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -276,6 +276,9 @@ enum
LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */
LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */
LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */
+ LINUX_MIB_TCPDELIVERED, /* TCPDelivered */
+ LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */
+ LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
__LINUX_MIB_MAX
};
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 560374c978f90..29eb659aa77a1 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -122,6 +122,10 @@ enum {
#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */
#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */
#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */
+#define TCP_ZEROCOPY_RECEIVE 35
+#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */
+
+#define TCP_CM_INQ TCP_INQ
struct tcp_repair_opt {
__u32 opt_code;
@@ -224,6 +228,9 @@ struct tcp_info {
__u64 tcpi_busy_time; /* Time (usec) busy sending data */
__u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */
__u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
+
+ __u32 tcpi_delivered;
+ __u32 tcpi_delivered_ce;
};
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -244,6 +251,8 @@ enum {
TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */
TCP_NLA_CA_STATE, /* ca_state of socket */
TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */
+ TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */
+ TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */
};
@@ -271,4 +280,11 @@ struct tcp_diag_md5sig {
__u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN];
};
+/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
+
+struct tcp_zerocopy_receive {
+ __u64 address; /* in: address of mapping */
+ __u32 length; /* in/out: number of bytes to map/mapped */
+ __u32 recv_skip_hint; /* out: amount of bytes to skip */
+};
#endif /* _UAPI_LINUX_TCP_H */
diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index bf6d28677cfe0..6b2fd4d9655f6 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -209,16 +209,16 @@ struct tipc_group_req {
* The string formatting for each name element is:
* media: media
* interface: media:interface name
- * link: Z.C.N:interface-Z.C.N:interface
- *
+ * link: node:interface-node:interface
*/
-
+#define TIPC_NODEID_LEN 16
#define TIPC_MAX_MEDIA_NAME 16
#define TIPC_MAX_IF_NAME 16
#define TIPC_MAX_BEARER_NAME 32
#define TIPC_MAX_LINK_NAME 68
-#define SIOCGETLINKNAME SIOCPROTOPRIVATE
+#define SIOCGETLINKNAME SIOCPROTOPRIVATE
+#define SIOCGETNODEID (SIOCPROTOPRIVATE + 1)
struct tipc_sioc_ln_req {
__u32 peer;
@@ -226,6 +226,10 @@ struct tipc_sioc_ln_req {
char linkname[TIPC_MAX_LINK_NAME];
};
+struct tipc_sioc_nodeid_req {
+ __u32 peer;
+ char node_id[TIPC_NODEID_LEN];
+};
/* The macros and functions below are deprecated:
*/
diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 3f29e3c8ed066..4b2c93b1934cf 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -185,6 +185,11 @@
#define TIPC_DEF_LINK_WIN 50
#define TIPC_MAX_LINK_WIN 8191
+/*
+ * Default MTU for UDP media
+ */
+
+#define TIPC_DEF_LINK_UDP_MTU 14000
struct tipc_node_info {
__be32 addr; /* network address of node */
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 0affb682e5e39..85c11982c89b3 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -266,6 +266,7 @@ enum {
TIPC_NLA_PROP_PRIO, /* u32 */
TIPC_NLA_PROP_TOL, /* u32 */
TIPC_NLA_PROP_WIN, /* u32 */
+ TIPC_NLA_PROP_MTU, /* u32 */
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index efb7b5991c2fd..09d00f8c442b7 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -32,6 +32,7 @@ struct udphdr {
#define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */
#define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */
#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */
+#define UDP_SEGMENT 103 /* Set GSO segmentation size */
/* UDP encapsulation types */
#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 5de6ed37695b9..a3715a3224c10 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -57,6 +57,9 @@
* Steering */
#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
+#define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device
+ * with the same MAC.
+ */
#define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */
#ifndef VIRTIO_NET_NO_LEGACY
diff --git a/include/uapi/linux/vmcore.h b/include/uapi/linux/vmcore.h
new file mode 100644
index 0000000000000..022619668e0e9
--- /dev/null
+++ b/include/uapi/linux/vmcore.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_VMCORE_H
+#define _UAPI_VMCORE_H
+
+#include <linux/types.h>
+
+#define VMCOREDD_NOTE_NAME "LINUX"
+#define VMCOREDD_MAX_NAME_BYTES 44
+
+struct vmcoredd_header {
+ __u32 n_namesz; /* Name size */
+ __u32 n_descsz; /* Content size */
+ __u32 n_type; /* NT_VMCOREDD */
+ __u8 name[8]; /* LINUX\0\0\0 */
+ __u8 dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Device dump's name */
+};
+
+#endif /* _UAPI_VMCORE_H */