summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/netfilter/Kconfig6
-rw-r--r--net/can/gw.c48
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/ceph/cls_lock_client.c54
-rw-r--r--net/ceph/decode.c84
-rw-r--r--net/ceph/messenger.c14
-rw-r--r--net/ceph/mon_client.c21
-rw-r--r--net/ceph/osd_client.c42
-rw-r--r--net/ceph/osdmap.c31
-rw-r--r--net/ceph/pagevec.c33
-rw-r--r--net/ceph/striper.c17
-rw-r--r--net/core/flow_offload.c22
-rw-r--r--net/core/neighbour.c20
-rw-r--r--net/core/sysctl_net_core.c34
-rw-r--r--net/dccp/sysctl.c16
-rw-r--r--net/dsa/slave.c6
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c4
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c2
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c12
-rw-r--r--net/ipv4/sysctl_net_ipv4.c60
-rw-r--r--net/ipv4/tcp_output.c13
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c8
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/sysctl_net_ipv6.c10
-rw-r--r--net/mac80211/cfg.c8
-rw-r--r--net/mac80211/driver-ops.c13
-rw-r--r--net/mpls/af_mpls.c10
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c2
-rw-r--r--net/netfilter/nf_conntrack_amanda.c2
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_conntrack_expect.c26
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c5
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c18
-rw-r--r--net/netfilter/nf_conntrack_irc.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c4
-rw-r--r--net/netfilter/nf_conntrack_pptp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c8
-rw-r--r--net/netfilter/nf_conntrack_sane.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c10
-rw-r--r--net/netfilter/nf_conntrack_tftp.c2
-rw-r--r--net/netfilter/nf_nat_amanda.c2
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/nf_nat_sip.c8
-rw-r--r--net/netfilter/nf_nat_tftp.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c8
-rw-r--r--net/netfilter/nf_tables_api.c4
-rw-r--r--net/netfilter/nf_tables_offload.c5
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nft_chain_filter.c2
-rw-r--r--net/netfilter/nft_chain_nat.c3
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/nft_meta.c2
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/nft_synproxy.c2
-rw-r--r--net/netrom/af_netrom.c1
-rw-r--r--net/openvswitch/datapath.c15
-rw-r--r--net/openvswitch/flow.c8
-rw-r--r--net/openvswitch/flow.h4
-rw-r--r--net/openvswitch/flow_table.c8
-rw-r--r--net/rds/ib_cm.c8
-rw-r--r--net/rxrpc/sysctl.c9
-rw-r--r--net/sched/act_ife.c5
-rw-r--r--net/sched/cls_api.c16
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/cls_flower.c2
-rw-r--r--net/sched/cls_matchall.c2
-rw-r--r--net/sched/cls_u32.c6
-rw-r--r--net/sctp/sysctl.c35
-rw-r--r--net/socket.c16
-rw-r--r--net/sunrpc/Kconfig2
-rw-r--r--net/sunrpc/backchannel_rqst.c40
-rw-r--r--net/sunrpc/clnt.c95
-rw-r--r--net/sunrpc/debugfs.c52
-rw-r--r--net/sunrpc/rpc_pipe.c34
-rw-r--r--net/sunrpc/sched.c81
-rw-r--r--net/sunrpc/stats.c23
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/xprt.c101
-rw-r--r--net/sunrpc/xprtmultipath.c89
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c7
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c327
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c152
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c8
-rw-r--r--net/sunrpc/xprtrdma/transport.c87
-rw-r--r--net/sunrpc/xprtrdma/verbs.c115
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h45
-rw-r--r--net/sunrpc/xprtsock.c126
-rw-r--r--net/tipc/sysctl.c6
-rw-r--r--net/tipc/topsrv.c2
-rw-r--r--net/wireless/Kconfig2
106 files changed, 1425 insertions, 884 deletions
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 154fa558bb908..5040fe43f4b42 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -6,7 +6,7 @@
menuconfig NF_TABLES_BRIDGE
depends on BRIDGE && NETFILTER && NF_TABLES
select NETFILTER_FAMILY_BRIDGE
- bool "Ethernet Bridge nf_tables support"
+ tristate "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
@@ -25,6 +25,8 @@ config NF_LOG_BRIDGE
tristate "Bridge packet logging"
select NF_LOG_COMMON
+endif # NF_TABLES_BRIDGE
+
config NF_CONNTRACK_BRIDGE
tristate "IPv4/IPV6 bridge connection tracking support"
depends on NF_CONNTRACK
@@ -39,8 +41,6 @@ config NF_CONNTRACK_BRIDGE
To compile it as a module, choose M here. If unsure, say N.
-endif # NF_TABLES_BRIDGE
-
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
diff --git a/net/can/gw.c b/net/can/gw.c
index 5275ddf580bc7..72711053ebe66 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1046,32 +1046,50 @@ static __init int cgw_module_init(void)
pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
max_hops);
- register_pernet_subsys(&cangw_pernet_ops);
+ ret = register_pernet_subsys(&cangw_pernet_ops);
+ if (ret)
+ return ret;
+
+ ret = -ENOMEM;
cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
0, 0, NULL);
-
if (!cgw_cache)
- return -ENOMEM;
+ goto out_cache_create;
/* set notifier */
notifier.notifier_call = cgw_notifier;
- register_netdevice_notifier(&notifier);
+ ret = register_netdevice_notifier(&notifier);
+ if (ret)
+ goto out_register_notifier;
ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
NULL, cgw_dump_jobs, 0);
- if (ret) {
- unregister_netdevice_notifier(&notifier);
- kmem_cache_destroy(cgw_cache);
- return -ENOBUFS;
- }
-
- /* Only the first call to rtnl_register_module can fail */
- rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
- cgw_create_job, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
- cgw_remove_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register1;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
+ cgw_create_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register2;
+ ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
+ cgw_remove_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register3;
return 0;
+
+out_rtnl_register3:
+ rtnl_unregister(PF_CAN, RTM_NEWROUTE);
+out_rtnl_register2:
+ rtnl_unregister(PF_CAN, RTM_GETROUTE);
+out_rtnl_register1:
+ unregister_netdevice_notifier(&notifier);
+out_register_notifier:
+ kmem_cache_destroy(cgw_cache);
+out_cache_create:
+ unregister_pernet_subsys(&cangw_pernet_ops);
+
+ return ret;
}
static __exit void cgw_module_exit(void)
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index db09defe27d0a..59d0ba2072deb 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
- mon_client.o \
+ mon_client.o decode.o \
cls_lock_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
striper.o \
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 4cc28541281be..17447c19d9376 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -6,6 +6,7 @@
#include <linux/ceph/cls_lock_client.h>
#include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h>
/**
* ceph_cls_lock - grab rados lock for object
@@ -264,8 +265,11 @@ static int decode_locker(void **p, void *end, struct ceph_locker *locker)
return ret;
*p += sizeof(struct ceph_timespec); /* skip expiration */
- ceph_decode_copy(p, &locker->info.addr, sizeof(locker->info.addr));
- ceph_decode_addr(&locker->info.addr);
+
+ ret = ceph_decode_entity_addr(p, end, &locker->info.addr);
+ if (ret)
+ return ret;
+
len = ceph_decode_32(p);
*p += len; /* skip description */
@@ -360,7 +364,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
dout("%s lock_name %s\n", __func__, lock_name);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info",
CEPH_OSD_FLAG_READ, get_info_op_page,
- get_info_op_buf_size, reply_page, &reply_len);
+ get_info_op_buf_size, &reply_page, &reply_len);
dout("%s: status %d\n", __func__, ret);
if (ret >= 0) {
@@ -375,3 +379,47 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
return ret;
}
EXPORT_SYMBOL(ceph_cls_lock_info);
+
+int ceph_cls_assert_locked(struct ceph_osd_request *req, int which,
+ char *lock_name, u8 type, char *cookie, char *tag)
+{
+ int assert_op_buf_size;
+ int name_len = strlen(lock_name);
+ int cookie_len = strlen(cookie);
+ int tag_len = strlen(tag);
+ struct page **pages;
+ void *p, *end;
+ int ret;
+
+ assert_op_buf_size = name_len + sizeof(__le32) +
+ cookie_len + sizeof(__le32) +
+ tag_len + sizeof(__le32) +
+ sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
+ if (assert_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ ret = osd_req_op_cls_init(req, which, "lock", "assert_locked");
+ if (ret)
+ return ret;
+
+ pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ p = page_address(pages[0]);
+ end = p + assert_op_buf_size;
+
+ /* encode cls_lock_assert_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ assert_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_8(&p, type);
+ ceph_encode_string(&p, end, cookie, cookie_len);
+ ceph_encode_string(&p, end, tag, tag_len);
+ WARN_ON(p != end);
+
+ osd_req_op_cls_request_data_pages(req, which, pages, assert_op_buf_size,
+ 0, false, true);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_cls_assert_locked);
diff --git a/net/ceph/decode.c b/net/ceph/decode.c
new file mode 100644
index 0000000000000..eea529595a7a1
--- /dev/null
+++ b/net/ceph/decode.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ceph/decode.h>
+
+static int
+ceph_decode_entity_addr_versioned(void **p, void *end,
+ struct ceph_entity_addr *addr)
+{
+ int ret;
+ u8 struct_v;
+ u32 struct_len, addr_len;
+ void *struct_end;
+
+ ret = ceph_start_decoding(p, end, 1, "entity_addr_t", &struct_v,
+ &struct_len);
+ if (ret)
+ goto bad;
+
+ ret = -EINVAL;
+ struct_end = *p + struct_len;
+
+ ceph_decode_copy_safe(p, end, &addr->type, sizeof(addr->type), bad);
+
+ ceph_decode_copy_safe(p, end, &addr->nonce, sizeof(addr->nonce), bad);
+
+ ceph_decode_32_safe(p, end, addr_len, bad);
+ if (addr_len > sizeof(addr->in_addr))
+ goto bad;
+
+ memset(&addr->in_addr, 0, sizeof(addr->in_addr));
+ if (addr_len) {
+ ceph_decode_copy_safe(p, end, &addr->in_addr, addr_len, bad);
+
+ addr->in_addr.ss_family =
+ le16_to_cpu((__force __le16)addr->in_addr.ss_family);
+ }
+
+ /* Advance past anything the client doesn't yet understand */
+ *p = struct_end;
+ ret = 0;
+bad:
+ return ret;
+}
+
+static int
+ceph_decode_entity_addr_legacy(void **p, void *end,
+ struct ceph_entity_addr *addr)
+{
+ int ret = -EINVAL;
+
+ /* Skip rest of type field */
+ ceph_decode_skip_n(p, end, 3, bad);
+
+ /*
+ * Clients that don't support ADDR2 always send TYPE_NONE, change it
+ * to TYPE_LEGACY for forward compatibility.
+ */
+ addr->type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
+ ceph_decode_copy_safe(p, end, &addr->nonce, sizeof(addr->nonce), bad);
+ memset(&addr->in_addr, 0, sizeof(addr->in_addr));
+ ceph_decode_copy_safe(p, end, &addr->in_addr,
+ sizeof(addr->in_addr), bad);
+ addr->in_addr.ss_family =
+ be16_to_cpu((__force __be16)addr->in_addr.ss_family);
+ ret = 0;
+bad:
+ return ret;
+}
+
+int
+ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr)
+{
+ u8 marker;
+
+ ceph_decode_8_safe(p, end, marker, bad);
+ if (marker == 1)
+ return ceph_decode_entity_addr_versioned(p, end, addr);
+ else if (marker == 0)
+ return ceph_decode_entity_addr_legacy(p, end, addr);
+bad:
+ return -EINVAL;
+}
+EXPORT_SYMBOL(ceph_decode_entity_addr);
+
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a33402c99321c..962f521c863ec 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -199,12 +199,14 @@ const char *ceph_pr_addr(const struct ceph_entity_addr *addr)
switch (ss.ss_family) {
case AF_INET:
- snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
+ snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu",
+ le32_to_cpu(addr->type), &in4->sin_addr,
ntohs(in4->sin_port));
break;
case AF_INET6:
- snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr,
+ snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu",
+ le32_to_cpu(addr->type), &in6->sin6_addr,
ntohs(in6->sin6_port));
break;
@@ -220,7 +222,7 @@ EXPORT_SYMBOL(ceph_pr_addr);
static void encode_my_addr(struct ceph_messenger *msgr)
{
memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
- ceph_encode_addr(&msgr->my_enc_addr);
+ ceph_encode_banner_addr(&msgr->my_enc_addr);
}
/*
@@ -1732,12 +1734,14 @@ static int read_partial_banner(struct ceph_connection *con)
ret = read_partial(con, end, size, &con->actual_peer_addr);
if (ret <= 0)
goto out;
+ ceph_decode_banner_addr(&con->actual_peer_addr);
size = sizeof (con->peer_addr_for_me);
end += size;
ret = read_partial(con, end, size, &con->peer_addr_for_me);
if (ret <= 0)
goto out;
+ ceph_decode_banner_addr(&con->peer_addr_for_me);
out:
return ret;
@@ -1981,6 +1985,7 @@ int ceph_parse_ips(const char *c, const char *end,
}
addr_set_port(&addr[i], port);
+ addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
@@ -2011,9 +2016,6 @@ static int process_banner(struct ceph_connection *con)
if (verify_hello(con) < 0)
return -1;
- ceph_decode_addr(&con->actual_peer_addr);
- ceph_decode_addr(&con->peer_addr_for_me);
-
/*
* Make sure the other end is who we wanted. note that the other
* end may not yet know their ip address, so if it's 0.0.0.0, give
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 895679d3529b8..0520bf9825aa3 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -39,7 +39,7 @@ static int __validate_auth(struct ceph_mon_client *monc);
/*
* Decode a monmap blob (e.g., during mount).
*/
-struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
+static struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
{
struct ceph_monmap *m = NULL;
int i, err = -EINVAL;
@@ -50,7 +50,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
ceph_decode_32_safe(&p, end, len, bad);
ceph_decode_need(&p, end, len, bad);
- dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
+ dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p));
p += sizeof(u16); /* skip version */
ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
@@ -58,7 +58,6 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
epoch = ceph_decode_32(&p);
num_mon = ceph_decode_32(&p);
- ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
if (num_mon > CEPH_MAX_MON)
goto bad;
@@ -68,17 +67,22 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
m->fsid = fsid;
m->epoch = epoch;
m->num_mon = num_mon;
- ceph_decode_copy(&p, m->mon_inst, num_mon*sizeof(m->mon_inst[0]));
- for (i = 0; i < num_mon; i++)
- ceph_decode_addr(&m->mon_inst[i].addr);
-
+ for (i = 0; i < num_mon; ++i) {
+ struct ceph_entity_inst *inst = &m->mon_inst[i];
+
+ /* copy name portion */
+ ceph_decode_copy_safe(&p, end, &inst->name,
+ sizeof(inst->name), bad);
+ err = ceph_decode_entity_addr(&p, end, &inst->addr);
+ if (err)
+ goto bad;
+ }
dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
m->num_mon);
for (i = 0; i < m->num_mon; i++)
dout("monmap_decode mon%d is %s\n", i,
ceph_pr_addr(&m->mon_inst[i].addr));
return m;
-
bad:
dout("monmap_decode failed with %d\n", err);
kfree(m);
@@ -469,6 +473,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
if (IS_ERR(monmap)) {
pr_err("problem decoding monmap, %d\n",
(int)PTR_ERR(monmap));
+ ceph_msg_dump(msg);
goto out;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 9a8eca5eda654..0b2df09b25549 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,14 +171,6 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
osd_data->num_bvecs = num_bvecs;
}
-#define osd_req_op_data(oreq, whch, typ, fld) \
-({ \
- struct ceph_osd_request *__oreq = (oreq); \
- unsigned int __whch = (whch); \
- BUG_ON(__whch >= __oreq->r_num_ops); \
- &__oreq->r_ops[__whch].typ.fld; \
-})
-
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
{
@@ -478,7 +470,7 @@ static void request_release_checks(struct ceph_osd_request *req)
{
WARN_ON(!RB_EMPTY_NODE(&req->r_node));
WARN_ON(!RB_EMPTY_NODE(&req->r_mc_node));
- WARN_ON(!list_empty(&req->r_unsafe_item));
+ WARN_ON(!list_empty(&req->r_private_item));
WARN_ON(req->r_osd);
}
@@ -538,7 +530,7 @@ static void request_init(struct ceph_osd_request *req)
init_completion(&req->r_completion);
RB_CLEAR_NODE(&req->r_node);
RB_CLEAR_NODE(&req->r_mc_node);
- INIT_LIST_HEAD(&req->r_unsafe_item);
+ INIT_LIST_HEAD(&req->r_private_item);
target_init(&req->r_t);
}
@@ -4914,20 +4906,26 @@ static int decode_watcher(void **p, void *end, struct ceph_watch_item *item)
ret = ceph_start_decoding(p, end, 2, "watch_item_t",
&struct_v, &struct_len);
if (ret)
- return ret;
+ goto bad;
+
+ ret = -EINVAL;
+ ceph_decode_copy_safe(p, end, &item->name, sizeof(item->name), bad);
+ ceph_decode_64_safe(p, end, item->cookie, bad);
+ ceph_decode_skip_32(p, end, bad); /* skip timeout seconds */
- ceph_decode_copy(p, &item->name, sizeof(item->name));
- item->cookie = ceph_decode_64(p);
- *p += 4; /* skip timeout_seconds */
if (struct_v >= 2) {
- ceph_decode_copy(p, &item->addr, sizeof(item->addr));
- ceph_decode_addr(&item->addr);
+ ret = ceph_decode_entity_addr(p, end, &item->addr);
+ if (ret)
+ goto bad;
+ } else {
+ ret = 0;
}
dout("%s %s%llu cookie %llu addr %s\n", __func__,
ENTITY_NAME(item->name), item->cookie,
ceph_pr_addr(&item->addr));
- return 0;
+bad:
+ return ret;
}
static int decode_watchers(void **p, void *end,
@@ -5044,12 +5042,12 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
const char *class, const char *method,
unsigned int flags,
struct page *req_page, size_t req_len,
- struct page *resp_page, size_t *resp_len)
+ struct page **resp_pages, size_t *resp_len)
{
struct ceph_osd_request *req;
int ret;
- if (req_len > PAGE_SIZE || (resp_page && *resp_len > PAGE_SIZE))
+ if (req_len > PAGE_SIZE)
return -E2BIG;
req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
@@ -5067,8 +5065,8 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
if (req_page)
osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len,
0, false, false);
- if (resp_page)
- osd_req_op_cls_response_data_pages(req, 0, &resp_page,
+ if (resp_pages)
+ osd_req_op_cls_response_data_pages(req, 0, resp_pages,
*resp_len, 0, false, false);
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
@@ -5079,7 +5077,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
ret = req->r_ops[0].rval;
- if (resp_page)
+ if (resp_pages)
*resp_len = req->r_ops[0].outdata_len;
}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 48a31dc9161cc..90437906b7bcf 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1489,11 +1489,9 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
/* osd_state, osd_weight, osd_addrs->client_addr */
ceph_decode_need(p, end, 3*sizeof(u32) +
- map->max_osd*((struct_v >= 5 ? sizeof(u32) :
- sizeof(u8)) +
- sizeof(*map->osd_weight) +
- sizeof(*map->osd_addr)), e_inval);
-
+ map->max_osd*(struct_v >= 5 ? sizeof(u32) :
+ sizeof(u8)) +
+ sizeof(*map->osd_weight), e_inval);
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
@@ -1514,9 +1512,11 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
- ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
- for (i = 0; i < map->max_osd; i++)
- ceph_decode_addr(&map->osd_addr[i]);
+ for (i = 0; i < map->max_osd; i++) {
+ err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
+ if (err)
+ goto bad;
+ }
/* pg_temp */
err = decode_pg_temp(p, end, map);
@@ -1618,12 +1618,17 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
void *new_state;
void *new_weight_end;
u32 len;
+ int i;
new_up_client = *p;
ceph_decode_32_safe(p, end, len, e_inval);
- len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
- ceph_decode_need(p, end, len, e_inval);
- *p += len;
+ for (i = 0; i < len; ++i) {
+ struct ceph_entity_addr addr;
+
+ ceph_decode_skip_32(p, end, e_inval);
+ if (ceph_decode_entity_addr(p, end, &addr))
+ goto e_inval;
+ }
new_state = *p;
ceph_decode_32_safe(p, end, len, e_inval);
@@ -1699,9 +1704,9 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_entity_addr addr;
osd = ceph_decode_32(p);
- ceph_decode_copy(p, &addr, sizeof(addr));
- ceph_decode_addr(&addr);
BUG_ON(osd >= map->max_osd);
+ if (ceph_decode_entity_addr(p, end, &addr))
+ goto e_inval;
pr_info("osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 74cafc0142ea7..64305e7056a1c 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -10,39 +10,6 @@
#include <linux/ceph/libceph.h>
-/*
- * build a vector of user pages
- */
-struct page **ceph_get_direct_page_vector(const void __user *data,
- int num_pages, bool write_page)
-{
- struct page **pages;
- int got = 0;
- int rc = 0;
-
- pages = kmalloc_array(num_pages, sizeof(*pages), GFP_NOFS);
- if (!pages)
- return ERR_PTR(-ENOMEM);
-
- while (got < num_pages) {
- rc = get_user_pages_fast(
- (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
- num_pages - got, write_page ? FOLL_WRITE : 0, pages + got);
- if (rc < 0)
- break;
- BUG_ON(rc == 0);
- got += rc;
- }
- if (rc < 0)
- goto fail;
- return pages;
-
-fail:
- ceph_put_page_vector(pages, got, false);
- return ERR_PTR(rc);
-}
-EXPORT_SYMBOL(ceph_get_direct_page_vector);
-
void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
{
int i;
diff --git a/net/ceph/striper.c b/net/ceph/striper.c
index c36462dc86b72..3b3fa75d11894 100644
--- a/net/ceph/striper.c
+++ b/net/ceph/striper.c
@@ -259,3 +259,20 @@ int ceph_extent_to_file(struct ceph_file_layout *l,
return 0;
}
EXPORT_SYMBOL(ceph_extent_to_file);
+
+u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
+{
+ u64 period = (u64)l->stripe_count * l->object_size;
+ u64 num_periods = DIV64_U64_ROUND_UP(size, period);
+ u64 remainder_bytes;
+ u64 remainder_objs = 0;
+
+ div64_u64_rem(size, period, &remainder_bytes);
+ if (remainder_bytes > 0 &&
+ remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
+ remainder_objs = l->stripe_count -
+ DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
+
+ return num_periods * l->stripe_count - remainder_objs;
+}
+EXPORT_SYMBOL(ceph_get_num_objects);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 76f8db3841d7a..d63b970784dc7 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -165,7 +165,7 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_enc_opts);
-struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv))
{
@@ -175,7 +175,6 @@ struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
if (!block_cb)
return ERR_PTR(-ENOMEM);
- block_cb->net = net;
block_cb->cb = cb;
block_cb->cb_ident = cb_ident;
block_cb->cb_priv = cb_priv;
@@ -194,14 +193,13 @@ void flow_block_cb_free(struct flow_block_cb *block_cb)
}
EXPORT_SYMBOL(flow_block_cb_free);
-struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f,
- tc_setup_cb_t *cb, void *cb_ident)
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
+ flow_setup_cb_t *cb, void *cb_ident)
{
struct flow_block_cb *block_cb;
- list_for_each_entry(block_cb, f->driver_block_list, driver_list) {
- if (block_cb->net == f->net &&
- block_cb->cb == cb &&
+ list_for_each_entry(block_cb, &block->cb_list, list) {
+ if (block_cb->cb == cb &&
block_cb->cb_ident == cb_ident)
return block_cb;
}
@@ -228,7 +226,7 @@ unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb)
}
EXPORT_SYMBOL(flow_block_cb_decref);
-bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident,
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
struct list_head *driver_block_list)
{
struct flow_block_cb *block_cb;
@@ -245,7 +243,8 @@ EXPORT_SYMBOL(flow_block_cb_is_busy);
int flow_block_cb_setup_simple(struct flow_block_offload *f,
struct list_head *driver_block_list,
- tc_setup_cb_t *cb, void *cb_ident, void *cb_priv,
+ flow_setup_cb_t *cb,
+ void *cb_ident, void *cb_priv,
bool ingress_only)
{
struct flow_block_cb *block_cb;
@@ -261,8 +260,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net, cb, cb_ident,
- cb_priv, NULL);
+ block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
@@ -270,7 +268,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
list_add_tail(&block_cb->driver_list, driver_block_list);
return 0;
case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f, cb, cb_ident);
+ block_cb = flow_block_cb_lookup(f->block, cb, cb_ident);
if (!block_cb)
return -ENOENT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0dfc97bc8760e..f79e61c570ea3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3376,8 +3376,6 @@ void neigh_app_ns(struct neighbour *n)
EXPORT_SYMBOL(neigh_app_ns);
#ifdef CONFIG_SYSCTL
-static int zero;
-static int int_max = INT_MAX;
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
static int proc_unres_qlen(struct ctl_table *ctl, int write,
@@ -3386,7 +3384,7 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write,
int size, ret;
struct ctl_table tmp = *ctl;
- tmp.extra1 = &zero;
+ tmp.extra1 = SYSCTL_ZERO;
tmp.extra2 = &unres_qlen_max;
tmp.data = &size;
@@ -3451,8 +3449,8 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
struct ctl_table tmp = *ctl;
int ret;
- tmp.extra1 = &zero;
- tmp.extra2 = &int_max;
+ tmp.extra1 = SYSCTL_ZERO;
+ tmp.extra2 = SYSCTL_INT_MAX;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
neigh_proc_update(ctl, write);
@@ -3597,24 +3595,24 @@ static struct neigh_sysctl_table {
.procname = "gc_thresh1",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_GC_THRESH2] = {
.procname = "gc_thresh2",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_GC_THRESH3] = {
.procname = "gc_thresh3",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
{},
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f9204719aeeeb..8da5b3a54dac4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -22,8 +22,6 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
-static int zero = 0;
-static int one = 1;
static int two __maybe_unused = 2;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
@@ -390,10 +388,10 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax_bpf_enable,
# ifdef CONFIG_BPF_JIT_ALWAYS_ON
- .extra1 = &one,
- .extra2 = &one,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
# else
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
# endif
},
@@ -404,7 +402,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec_minmax_bpf_restricted,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -413,8 +411,8 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec_minmax_bpf_restricted,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
# endif
{
@@ -461,8 +459,8 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
#ifdef CONFIG_RPS
{
@@ -493,7 +491,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "busy_read",
@@ -501,7 +499,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#endif
#ifdef CONFIG_NET_SCHED
@@ -533,7 +531,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &max_skb_frags,
},
{
@@ -542,7 +540,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "fb_tunnels_only_for_init_net",
@@ -550,8 +548,8 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "devconf_inherit_init_net",
@@ -559,7 +557,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -578,7 +576,7 @@ static struct ctl_table netns_core_table[] = {
.data = &init_net.core.sysctl_somaxconn,
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.proc_handler = proc_dointvec_minmax
},
{ }
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index b59040f268a99..ee8d4f5afa722 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -16,9 +16,7 @@
#endif
/* Boundary values */
-static int zero = 0,
- one = 1,
- u8_max = 0xFF;
+static int u8_max = 0xFF;
static unsigned long seqw_min = DCCPF_SEQ_WMIN,
seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */
@@ -38,7 +36,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_rx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &u8_max, /* RFC 4340, 10. */
},
{
@@ -47,7 +45,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_tx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &u8_max, /* RFC 4340, 10. */
},
{
@@ -56,7 +54,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_request_retries),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &u8_max,
},
{
@@ -65,7 +63,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_retries1),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &u8_max,
},
{
@@ -74,7 +72,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_retries2),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &u8_max,
},
{
@@ -83,7 +81,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_tx_qlen),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "sync_ratelimit",
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 614c38ece1043..33f41178afccf 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -951,7 +951,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
struct flow_block_offload *f)
{
struct flow_block_cb *block_cb;
- tc_setup_cb_t *cb;
+ flow_setup_cb_t *cb;
if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
cb = dsa_slave_setup_tc_block_cb_ig;
@@ -967,7 +967,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net, cb, dev, dev, NULL);
+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
@@ -975,7 +975,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list);
return 0;
case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f, cb, dev);
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
if (!block_cb)
return -ENOENT;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index d666756be5f18..a999451345f98 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -331,7 +331,7 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
if (!prev)
fq = inet_frag_create(fqdir, key, &prev);
- if (prev && !IS_ERR(prev)) {
+ if (!IS_ERR_OR_NULL(prev)) {
fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
fq = NULL;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 43adfc1641bac..2f01cf6fa0def 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -275,6 +275,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
const struct iphdr *tiph = &tunnel->parms.iph;
u8 ipproto;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_error;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
ipproto = IPPROTO_IPIP;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4d6bf7ac07925..6bdb1ab8af617 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -416,8 +416,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
ctinfo == IP_CT_RELATED_REPLY))
return XT_CONTINUE;
- /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
- * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+ /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
+ * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
* on, which all have an ID field [relevant for hashing]. */
hash = clusterip_hashfn(skb, cipinfo->config);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 8e7f84ec783da..0e70f3f65f6fe 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -36,6 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
+ opts.mss_encode = opts.mss;
+ opts.mss = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 59031670b16a0..cc23f1ce239c2 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -78,6 +78,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
flow.flowi4_tos = RT_TOS(iph->tos);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+ flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 87b711fd5a442..3e2685c120c77 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -221,11 +221,11 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
int ret;
rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
- ret = nf_ct_expect_related(rtp_exp);
+ ret = nf_ct_expect_related(rtp_exp, 0);
if (ret == 0) {
rtcp_exp->tuple.dst.u.udp.port =
htons(nated_port + 1);
- ret = nf_ct_expect_related(rtcp_exp);
+ ret = nf_ct_expect_related(rtcp_exp, 0);
if (ret == 0)
break;
else if (ret == -EBUSY) {
@@ -296,7 +296,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
@@ -352,7 +352,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
@@ -444,7 +444,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
@@ -537,7 +537,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7d66306b5f397..0b980e8419273 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -28,8 +28,6 @@
#include <net/protocol.h>
#include <net/netevent.h>
-static int zero;
-static int one = 1;
static int two = 2;
static int four = 4;
static int thousand = 1000;
@@ -576,7 +574,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "icmp_msgs_burst",
@@ -584,7 +582,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "udp_mem",
@@ -674,8 +672,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -763,8 +761,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = ipv4_fwd_update_priority,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "ip_nonlocal_bind",
@@ -794,8 +792,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -864,7 +862,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one
+ .extra1 = SYSCTL_ONE
},
#endif
{
@@ -969,7 +967,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -1011,7 +1009,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_tfo_blackhole_detect_timeout,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
@@ -1020,8 +1018,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "fib_multipath_hash_policy",
@@ -1029,8 +1027,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
- .extra1 = &zero,
- .extra2 = &two,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -1047,8 +1045,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -1078,7 +1076,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &four,
},
{
@@ -1222,7 +1220,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &gso_max_segs,
},
{
@@ -1231,7 +1229,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_day_secs
},
{
@@ -1240,8 +1238,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "tcp_invalid_ratelimit",
@@ -1256,7 +1254,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &thousand,
},
{
@@ -1265,7 +1263,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &thousand,
},
{
@@ -1274,7 +1272,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "tcp_rmem",
@@ -1282,7 +1280,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "tcp_comp_sack_delay_ns",
@@ -1297,7 +1295,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &comp_sack_nr_max,
},
{
@@ -1306,7 +1304,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one
+ .extra1 = SYSCTL_ONE
},
{
.procname = "udp_wmem_min",
@@ -1314,7 +1312,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one
+ .extra1 = SYSCTL_ONE
},
{ }
};
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4af1f5dae9d3e..6e4afc48d7bba 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1288,6 +1288,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
int nsize, old_factor;
+ long limit;
int nlen;
u8 flags;
@@ -1298,8 +1299,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (nsize < 0)
nsize = 0;
- if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
- tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
+ /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
+ * We need some allowance to not penalize applications setting small
+ * SO_SNDBUF values.
+ * Also allow first and last skb in retransmit queue to be split.
+ */
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+ tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+ skb != tcp_rtx_queue_head(sk) &&
+ skb != tcp_rtx_queue_tail(sk))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
return -ENOMEM;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 521e3203e83a4..dc73888c7859a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6432,8 +6432,6 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
}
static int minus_one = -1;
-static const int zero = 0;
-static const int one = 1;
static const int two_five_five = 255;
static const struct ctl_table addrconf_sysctl[] = {
@@ -6450,7 +6448,7 @@ static const struct ctl_table addrconf_sysctl[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&two_five_five,
},
{
@@ -6809,7 +6807,7 @@ static const struct ctl_table addrconf_sysctl[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&zero,
+ .extra1 = (void *)SYSCTL_ZERO,
.extra2 = (void *)&two_five_five,
},
{
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c2049c72f3e53..dd2d0b9632607 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -660,12 +660,13 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
struct flowi6 *fl6, __u8 *dsfield,
int *encap_limit)
{
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ipv6hdr *ipv6h;
struct ip6_tnl *t = netdev_priv(dev);
__u16 offset;
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ ipv6h = ipv6_hdr(skb);
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index e77ea1ed5eddd..5cdb4a69d277c 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -36,6 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
+ opts.mss_encode = opts.mss;
+ opts.mss = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 6bcaf73571834..d800801a5dd27 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -55,7 +55,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
if (rpfilter_addr_linklocal(&iph->saddr)) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6.flowi6_oif = dev->ifindex;
- } else if ((flags & XT_RPFILTER_LOOSE) == 0)
+ /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */
+ } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) ||
+ (flags & XT_RPFILTER_LOOSE) == 0)
fl6.flowi6_oif = dev->ifindex;
rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
@@ -70,7 +72,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
goto out;
}
- if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+ if (rt->rt6i_idev->dev == dev ||
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
+ (flags & XT_RPFILTER_LOOSE))
ret = true;
out:
ip6_rt_put(rt);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6fe3097b9ab71..e49fec767a10a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6031,9 +6031,6 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
return 0;
}
-static int zero;
-static int one = 1;
-
static struct ctl_table ipv6_route_table_template[] = {
{
.procname = "flush",
@@ -6111,8 +6108,8 @@ static struct ctl_table ipv6_route_table_template[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{ }
};
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index dc4c91e0bfb8e..ec8fcfc60a27b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -21,8 +21,6 @@
#include <net/calipso.h>
#endif
-static int zero;
-static int one = 1;
static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
@@ -115,7 +113,7 @@ static struct ctl_table ipv6_table_template[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &flowlabel_reflect_max,
},
{
@@ -152,8 +150,8 @@ static struct ctl_table ipv6_table_template[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_rt6_multipath_hash_policy,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "seg6_flowlabel",
@@ -179,7 +177,7 @@ static struct ctl_table ipv6_rotable[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one
+ .extra1 = SYSCTL_ONE
},
#ifdef CONFIG_NETLABEL
{
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 76cc9e967fa62..4d458067d80d1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -936,8 +936,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
err = ieee80211_set_probe_resp(sdata, params->probe_resp,
params->probe_resp_len, csa);
- if (err < 0)
+ if (err < 0) {
+ kfree(new);
return err;
+ }
if (err == 0)
changed |= BSS_CHANGED_AP_PROBE_RESP;
@@ -949,8 +951,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
params->civicloc,
params->civicloc_len);
- if (err < 0)
+ if (err < 0) {
+ kfree(new);
return err;
+ }
changed |= BSS_CHANGED_FTM_RESPONDER;
}
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index acd4afb4944b8..c9a8a2433e8ac 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -187,11 +187,16 @@ int drv_conf_tx(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return -EIO;
- if (WARN_ONCE(params->cw_min == 0 ||
- params->cw_min > params->cw_max,
- "%s: invalid CW_min/CW_max: %d/%d\n",
- sdata->name, params->cw_min, params->cw_max))
+ if (params->cw_min == 0 || params->cw_min > params->cw_max) {
+ /*
+ * If we can't configure hardware anyway, don't warn. We may
+ * never have initialized the CW parameters.
+ */
+ WARN_ONCE(local->ops->conf_tx,
+ "%s: invalid CW_min/CW_max: %d/%d\n",
+ sdata->name, params->cw_min, params->cw_max);
return -EINVAL;
+ }
trace_drv_conf_tx(local, sdata, ac, params);
if (local->ops->conf_tx)
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 198ec4fe4148b..c312741df2ce9 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -37,8 +37,6 @@
#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
-static int zero = 0;
-static int one = 1;
static int label_limit = (1 << 20) - 1;
static int ttl_max = 255;
@@ -2607,7 +2605,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
.data = &platform_labels,
.maxlen = sizeof(int),
.mode = table->mode,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &label_limit,
};
@@ -2636,8 +2634,8 @@ static const struct ctl_table mpls_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "default_ttl",
@@ -2645,7 +2643,7 @@ static const struct ctl_table mpls_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &ttl_max,
},
{ }
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 32a45c03786e9..0d65f4d394943 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -223,8 +223,6 @@ config NF_CONNTRACK_FTP
of Network Address Translation on them.
This is FTP support on Layer 3 independent connection tracking.
- Layer 3 independent connection tracking is experimental scheme
- which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
@@ -338,7 +336,7 @@ config NF_CONNTRACK_SIP
help
SIP is an application-layer control protocol that can establish,
modify, and terminate multimedia sessions (conferences) such as
- Internet telephony calls. With the ip_conntrack_sip and
+ Internet telephony calls. With the nf_conntrack_sip and
the nf_nat_sip modules you can support the protocol on a connection
tracking/NATing firewall.
@@ -1313,7 +1311,7 @@ config NETFILTER_XT_MATCH_HELPER
depends on NETFILTER_ADVANCED
help
Helper matching allows you to match packets in dynamic connections
- tracked by a conntrack-helper, ie. ip_conntrack_ftp
+ tracked by a conntrack-helper, ie. nf_conntrack_ftp
To compile it as a module, choose M here. If unsure, say Y.
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 07e0967bf1296..060565e7d227a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1726,7 +1726,6 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
-static int zero;
static int three = 3;
static int
@@ -1935,7 +1934,7 @@ static struct ctl_table vs_vars[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &three,
},
{
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 403541996952f..08adcb2229862 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -231,7 +231,7 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&exp->tuple));
- nf_ct_expect_related(exp);
+ nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
}
EXPORT_SYMBOL(ip_vs_nfct_expect_related);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 42ee659d0d1eb..d011d2eb08486 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -159,7 +159,7 @@ static int amanda_help(struct sk_buff *skb,
if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
ret = nf_nat_amanda(skb, ctinfo, protoff,
off - dataoff, len, exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 921a7b95be682..1ba6becc30795 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -68,7 +68,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
exp->helper = NULL;
- nf_ct_expect_related(exp);
+ nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
nf_ct_refresh(ct, skb, timeout * HZ);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index bdfeacee08177..a542761e90d1f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1817,9 +1817,7 @@ EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/mutex.h>
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
+/* Generic function for tcp/udp/sctp/dccp and alike. */
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ffd1f4906c4f7..65364de915d16 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -249,13 +249,22 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
- return a->master == b->master &&
- nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+ return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
+static bool master_matches(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_expect *b,
+ unsigned int flags)
+{
+ if (flags & NF_CT_EXP_F_SKIP_MASTER)
+ return true;
+
+ return a->master == b->master;
+}
+
/* Generally a bad idea to call this: could have matched already. */
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
@@ -399,7 +408,8 @@ static void evict_oldest_expect(struct nf_conn *master,
nf_ct_remove_expect(last);
}
-static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+ unsigned int flags)
{
const struct nf_conntrack_expect_policy *p;
struct nf_conntrack_expect *i;
@@ -417,8 +427,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
}
h = nf_ct_expect_dst_hash(net, &expect->tuple);
hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
- if (expect_matches(i, expect)) {
- if (i->class != expect->class)
+ if (master_matches(i, expect, flags) &&
+ expect_matches(i, expect)) {
+ if (i->class != expect->class ||
+ i->master != expect->master)
return -EALREADY;
if (nf_ct_remove_expect(i))
@@ -453,12 +465,12 @@ out:
}
int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
- u32 portid, int report)
+ u32 portid, int report, unsigned int flags)
{
int ret;
spin_lock_bh(&nf_conntrack_expect_lock);
- ret = __nf_ct_expect_check(expect);
+ ret = __nf_ct_expect_check(expect, flags);
if (ret < 0)
goto out;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 8c6c11bab5b67..0ecb3e289ef25 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -525,7 +525,7 @@ skip_nl_seq:
protoff, matchoff, matchlen, exp);
else {
/* Can't expect this? Best to drop packet now. */
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
} else
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 8f6ba8162f0be..573cb44814813 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -1,11 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
- * conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
*
* Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
*
- * See ip_conntrack_helper_h323_asn1.h for details.
+ * See nf_conntrack_helper_h323_asn1.h for details.
*/
#ifdef __KERNEL__
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6497e5fc08710..8ba037b76ad3a 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -305,8 +305,8 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
taddr, port, rtp_port, rtp_exp, rtcp_exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(rtp_exp) == 0) {
- if (nf_ct_expect_related(rtcp_exp) == 0) {
+ if (nf_ct_expect_related(rtp_exp, 0) == 0) {
+ if (nf_ct_expect_related(rtcp_exp, 0) == 0) {
pr_debug("nf_ct_h323: expect RTP ");
nf_ct_dump_tuple(&rtp_exp->tuple);
pr_debug("nf_ct_h323: expect RTCP ");
@@ -364,7 +364,7 @@ static int expect_t120(struct sk_buff *skb,
ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_h323: expect T.120 ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -701,7 +701,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_q931: expect H.245 ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -825,7 +825,7 @@ static int expect_callforwarding(struct sk_buff *skb,
protoff, data, dataoff,
taddr, port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_q931: expect Call Forwarding ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -1284,7 +1284,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
ret = nat_q931(skb, ct, ctinfo, protoff, data,
taddr, i, port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
@@ -1349,7 +1349,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
IPPROTO_UDP, NULL, &port);
exp->helper = nf_conntrack_helper_ras;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -1561,7 +1561,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->helper = nf_conntrack_helper_q931;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -1615,7 +1615,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->helper = nf_conntrack_helper_q931;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
} else
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 7ac156f1f3bc7..e40988a2f22fb 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -213,7 +213,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
addr_beg_p - ib_ptr,
addr_end_p - addr_beg_p,
exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct,
"cannot add expectation");
ret = NF_DROP;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1b77444d5b529..6aa01eb6fe99c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2616,7 +2616,7 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
if (IS_ERR(exp))
return PTR_ERR(exp);
- err = nf_ct_expect_related_report(exp, portid, report);
+ err = nf_ct_expect_related_report(exp, portid, report, 0);
nf_ct_expect_put(exp);
return err;
}
@@ -3367,7 +3367,7 @@ ctnetlink_create_expect(struct net *net,
goto err_rcu;
}
- err = nf_ct_expect_related_report(exp, portid, report);
+ err = nf_ct_expect_related_report(exp, portid, report, 0);
nf_ct_expect_put(exp);
err_rcu:
rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index b22042ad0fca6..a971183f11af7 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -234,9 +234,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
nf_nat_pptp_exp_gre(exp_orig, exp_reply);
- if (nf_ct_expect_related(exp_orig) != 0)
+ if (nf_ct_expect_related(exp_orig, 0) != 0)
goto out_put_both;
- if (nf_ct_expect_related(exp_reply) != 0)
+ if (nf_ct_expect_related(exp_reply, 0) != 0)
goto out_unexpect_orig;
/* Add GRE keymap entries */
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c2eb365f17237..5b05487a60d21 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
* Connection tracking protocol helper module for GRE.
*
* GRE is a generic encapsulation protocol, which is generally not very
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index dd53e2b20f6b6..097deba7441ae 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -215,7 +215,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
return -NF_ACCEPT;
}
- /* See ip_conntrack_proto_tcp.c */
+ /* See nf_conntrack_proto_tcp.c */
if (state->net->ct.sysctl_checksum &&
state->hook == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index d5fdfa00d6830..85c1f8c213b0f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -472,6 +472,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
+ u16 win_raw;
s32 receiver_offset;
bool res, in_recv_win;
@@ -480,7 +481,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
*/
seq = ntohl(tcph->seq);
ack = sack = ntohl(tcph->ack_seq);
- win = ntohs(tcph->window);
+ win_raw = ntohs(tcph->window);
+ win = win_raw;
end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
@@ -655,14 +657,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
&& state->last_seq == seq
&& state->last_ack == ack
&& state->last_end == end
- && state->last_win == win)
+ && state->last_win == win_raw)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
- state->last_win = win;
+ state->last_win = win_raw;
state->retrans = 0;
}
}
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 81448c3db6614..1aebd6569d4ef 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -153,7 +153,7 @@ static int help(struct sk_buff *skb,
nf_ct_dump_tuple(&exp->tuple);
/* Can't expect this? Best to drop packet now. */
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1072517318091..b83dc9bf0a5dd 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -977,11 +977,15 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
/* -EALREADY handling works around end-points that send
* SDP messages with identical port but different media type,
* we pretend expectation was set up.
+ * It also works in the case that SDP messages are sent with
+ * identical expect tuples but for different master conntracks.
*/
- int errp = nf_ct_expect_related(rtp_exp);
+ int errp = nf_ct_expect_related(rtp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (errp == 0 || errp == -EALREADY) {
- int errcp = nf_ct_expect_related(rtcp_exp);
+ int errcp = nf_ct_expect_related(rtcp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (errcp == 0 || errcp == -EALREADY)
ret = NF_ACCEPT;
@@ -1296,7 +1300,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
exp, matchoff, matchlen);
else {
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
} else
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index df6d6d61bd58e..80ee53f29f68f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -78,7 +78,7 @@ static int tftp_help(struct sk_buff *skb,
nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
ret = nf_nat_tftp(skb, ctinfo, exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index a352604d6186a..3bc7e0854efe4 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -48,7 +48,7 @@ static unsigned int help(struct sk_buff *skb,
int res;
exp->tuple.dst.u.tcp.port = htons(port);
- res = nf_ct_expect_related(exp);
+ res = nf_ct_expect_related(exp, 0);
if (res == 0)
break;
else if (res != -EBUSY) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 9ab410455992f..3f6023ed49666 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -519,7 +519,7 @@ another_round:
* and NF_INET_LOCAL_OUT, we change the destination to map into the
* range. It might not be possible to get a unique tuple, but we try.
* At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
+ * __nf_conntrack_confirm and drop the packet. */
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig_tuple,
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index d48484a9d52dc..aace6768a64e7 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -91,7 +91,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
int ret;
exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index dfb7ef8845bdf..c691ab8d234cf 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff *skb,
int ret;
exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index e338d91980d89..f0a735e868518 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -414,7 +414,7 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
int ret;
exp->tuple.dst.u.udp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER);
if (ret == 0)
break;
else if (ret != -EBUSY) {
@@ -607,7 +607,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
int ret;
rtp_exp->tuple.dst.u.udp.port = htons(port);
- ret = nf_ct_expect_related(rtp_exp);
+ ret = nf_ct_expect_related(rtp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (ret == -EBUSY)
continue;
else if (ret < 0) {
@@ -615,7 +616,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
break;
}
rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
- ret = nf_ct_expect_related(rtcp_exp);
+ ret = nf_ct_expect_related(rtcp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (ret == 0)
break;
else if (ret == -EBUSY) {
diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c
index 833a11f680313..1a591132d6eb9 100644
--- a/net/netfilter/nf_nat_tftp.c
+++ b/net/netfilter/nf_nat_tftp.c
@@ -30,7 +30,7 @@ static unsigned int help(struct sk_buff *skb,
= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = nf_nat_follow_master;
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, exp->master, "cannot add expectation");
return NF_DROP;
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b101f187eda8a..c769462a839e0 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -470,7 +470,7 @@ synproxy_send_client_synack(struct net *net,
struct iphdr *iph, *niph;
struct tcphdr *nth;
unsigned int tcp_hdr_size;
- u16 mss = opts->mss;
+ u16 mss = opts->mss_encode;
iph = ip_hdr(skb);
@@ -687,7 +687,7 @@ ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
state = &ct->proto.tcp;
switch (state->state) {
case TCP_CONNTRACK_CLOSE:
- if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
ntohl(th->seq) + 1);
break;
@@ -884,7 +884,7 @@ synproxy_send_client_synack_ipv6(struct net *net,
struct ipv6hdr *iph, *niph;
struct tcphdr *nth;
unsigned int tcp_hdr_size;
- u16 mss = opts->mss;
+ u16 mss = opts->mss_encode;
iph = ipv6_hdr(skb);
@@ -1111,7 +1111,7 @@ ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
state = &ct->proto.tcp;
switch (state->state) {
case TCP_CONNTRACK_CLOSE:
- if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
ntohl(th->seq) + 1);
break;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ed17a7c29b86f..605a7cfe7ca79 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1662,7 +1662,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
chain->flags |= NFT_BASE_CHAIN | flags;
basechain->policy = NF_ACCEPT;
- INIT_LIST_HEAD(&basechain->cb_list);
+ flow_block_init(&basechain->flow_block);
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
@@ -1900,6 +1900,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nla[NFTA_CHAIN_FLAGS])
flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
+ else if (chain)
+ flags = chain->flags;
nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 2c3302845f675..64f5fd5f240e2 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -116,7 +116,7 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain,
struct flow_block_cb *block_cb;
int err;
- list_for_each_entry(block_cb, &basechain->cb_list, list) {
+ list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err < 0)
return err;
@@ -154,7 +154,7 @@ static int nft_flow_offload_rule(struct nft_trans *trans,
static int nft_flow_offload_bind(struct flow_block_offload *bo,
struct nft_base_chain *basechain)
{
- list_splice(&bo->cb_list, &basechain->cb_list);
+ list_splice(&bo->cb_list, &basechain->flow_block.cb_list);
return 0;
}
@@ -198,6 +198,7 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
return -EOPNOTSUPP;
bo.command = cmd;
+ bo.block = &basechain->flow_block;
bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo.extack = &extack;
INIT_LIST_HEAD(&bo.cb_list);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 92077d4591090..4abbb452cf6c6 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -578,7 +578,7 @@ static int nfnetlink_bind(struct net *net, int group)
ss = nfnetlink_get_subsys(type << 8);
rcu_read_unlock();
if (!ss)
- request_module("nfnetlink-subsys-%d", type);
+ request_module_nowait("nfnetlink-subsys-%d", type);
return 0;
}
#endif
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 3fd540b2c6baf..b5d5d071d7655 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -193,7 +193,7 @@ static inline void nft_chain_filter_inet_init(void) {}
static inline void nft_chain_filter_inet_fini(void) {}
#endif /* CONFIG_NF_TABLES_IPV6 */
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
static unsigned int
nft_do_chain_bridge(void *priv,
struct sk_buff *skb,
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index 2f89bde3c61cb..ff9ac8ae0031f 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -142,3 +142,6 @@ MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
#ifdef CONFIG_NF_TABLES_IPV6
MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
#endif
+#ifdef CONFIG_NF_TABLES_INET
+MODULE_ALIAS_NFT_CHAIN(1, "nat"); /* NFPROTO_INET */
+#endif
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 827ab6196df99..46ca8bcca1bd5 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1252,7 +1252,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
priv->l4proto, NULL, &priv->dport);
exp->timeout.expires = jiffies + priv->timeout * HZ;
- if (nf_ct_expect_related(exp) != 0)
+ if (nf_ct_expect_related(exp, 0) != 0)
regs->verdict.code = NF_DROP;
}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index fe93e731dc7fb..b836d550b9199 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -129,7 +129,7 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
- if (priv->modulus <= 1)
+ if (priv->modulus < 1)
return -ERANGE;
if (priv->offset + priv->modulus - 1 < priv->offset)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 76866f77e3435..f1b1d948c07b4 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -546,7 +546,7 @@ nft_meta_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
return ERR_PTR(-EINVAL);
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META)
if (ctx->family == NFPROTO_BRIDGE)
return ERR_PTR(-EAGAIN);
#endif
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 8487eeff5c0ec..43eeb1f609f13 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -291,4 +291,4 @@ module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_EXPR("nat");
+MODULE_ALIAS_NFT_EXPR("redir");
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 80060ade8a5b3..928e661d1517d 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -31,6 +31,8 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
opts->options |= NF_SYNPROXY_OPT_ECN;
opts->options &= priv->info.options;
+ opts->mss_encode = opts->mss;
+ opts->mss = info->mss;
if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, opts);
else
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 96740d3893771..c4f54ad2b98af 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -967,6 +967,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
window = skb->data[20];
+ sock_hold(make);
skb->sk = make;
skb->destructor = sock_efree;
make->sk_state = TCP_ESTABLISHED;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 892287d06c176..d01410e520979 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1047,7 +1047,7 @@ error:
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static struct sw_flow_actions *get_flow_actions(struct net *net,
+static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net,
const struct nlattr *a,
const struct sw_flow_key *key,
const struct sw_flow_mask *mask,
@@ -1081,12 +1081,13 @@ static struct sw_flow_actions *get_flow_actions(struct net *net,
* we should not to return match object with dangling reference
* to mask.
* */
-static int ovs_nla_init_match_and_action(struct net *net,
- struct sw_flow_match *match,
- struct sw_flow_key *key,
- struct nlattr **a,
- struct sw_flow_actions **acts,
- bool log)
+static noinline_for_stack int
+ovs_nla_init_match_and_action(struct net *net,
+ struct sw_flow_match *match,
+ struct sw_flow_key *key,
+ struct nlattr **a,
+ struct sw_flow_actions **acts,
+ bool log)
{
struct sw_flow_mask mask;
int error = 0;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dca3b1e2acf06..bc89e16e05050 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -59,7 +59,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
const struct sk_buff *skb)
{
- struct flow_stats *stats;
+ struct sw_flow_stats *stats;
unsigned int cpu = smp_processor_id();
int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@@ -87,7 +87,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
if (likely(flow->stats_last_writer != -1) &&
likely(!rcu_access_pointer(flow->stats[cpu]))) {
/* Try to allocate CPU-specific stats. */
- struct flow_stats *new_stats;
+ struct sw_flow_stats *new_stats;
new_stats =
kmem_cache_alloc_node(flow_stats_cache,
@@ -134,7 +134,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
- struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
+ struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
/* Local CPU may write on non-local stats, so we must
@@ -158,7 +158,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
- struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
+ struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
spin_lock_bh(&stats->lock);
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 3e2cc2202d668..a5506e2d4b7ae 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -194,7 +194,7 @@ struct sw_flow_actions {
struct nlattr actions[];
};
-struct flow_stats {
+struct sw_flow_stats {
u64 packet_count; /* Number of packets matched. */
u64 byte_count; /* Number of bytes matched. */
unsigned long used; /* Last used time (in jiffies). */
@@ -216,7 +216,7 @@ struct sw_flow {
struct cpumask cpu_used_mask;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
- struct flow_stats __rcu *stats[]; /* One for each CPU. First one
+ struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
* is allocated at flow creation time,
* the rest are allocated on demand
* while holding the 'stats[0].lock'.
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 988fd8a94e43e..cf3582c5ed70c 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -66,7 +66,7 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
- struct flow_stats *stats;
+ struct sw_flow_stats *stats;
flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
if (!flow)
@@ -110,7 +110,7 @@ static void flow_free(struct sw_flow *flow)
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
- (struct flow_stats __force *)flow->stats[cpu]);
+ (struct sw_flow_stats __force *)flow->stats[cpu]);
kmem_cache_free(flow_cache, flow);
}
@@ -712,13 +712,13 @@ int ovs_flow_init(void)
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ (nr_cpu_ids
- * sizeof(struct flow_stats *)),
+ * sizeof(struct sw_flow_stats *)),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
flow_stats_cache
- = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+ = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
0, SLAB_HWCACHE_ALIGN, NULL);
if (flow_stats_cache == NULL) {
kmem_cache_destroy(flow_cache);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 4de0214da63c4..fddaa09f7b0db 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -607,11 +607,11 @@ send_hdrs_dma_out:
qp_out:
rdma_destroy_qp(ic->i_cm_id);
recv_cq_out:
- if (!ib_destroy_cq(ic->i_recv_cq))
- ic->i_recv_cq = NULL;
+ ib_destroy_cq(ic->i_recv_cq);
+ ic->i_recv_cq = NULL;
send_cq_out:
- if (!ib_destroy_cq(ic->i_send_cq))
- ic->i_send_cq = NULL;
+ ib_destroy_cq(ic->i_send_cq);
+ ic->i_send_cq = NULL;
rds_ibdev_out:
rds_ib_remove_conn(rds_ibdev, conn);
out:
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 1e3fa67d91aa8..2bbb38161851a 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -11,7 +11,6 @@
#include "ar-internal.h"
static struct ctl_table_header *rxrpc_sysctl_reg_table;
-static const unsigned int one = 1;
static const unsigned int four = 4;
static const unsigned int thirtytwo = 32;
static const unsigned int n_65535 = 65535;
@@ -97,7 +96,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&rxrpc_max_client_connections,
},
{
@@ -115,7 +114,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&n_max_acks,
},
{
@@ -124,7 +123,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&n_65535,
},
{
@@ -133,7 +132,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 41d5398dd2f2e..3578196d1600e 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -481,6 +481,11 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
int ret = 0;
int err;
+ if (!nla) {
+ NL_SET_ERR_MSG_MOD(extack, "IFE requires attributes to be passed");
+ return -EINVAL;
+ }
+
err = nla_parse_nested_deprecated(tb, TCA_IFE_MAX, nla, ife_policy,
NULL);
if (err < 0)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d144233423c5c..efd3cfb80a2ad 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -691,6 +691,8 @@ static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
if (!indr_dev->block)
return;
+ bo.block = &indr_dev->block->flow_block;
+
indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
&bo);
tcf_block_setup(indr_dev->block, &bo);
@@ -775,6 +777,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
.command = command,
.binder_type = ei->binder_type,
.net = dev_net(dev),
+ .block = &block->flow_block,
.block_shared = tcf_block_shared(block),
.extack = extack,
};
@@ -810,6 +813,7 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
bo.net = dev_net(dev);
bo.command = command;
bo.binder_type = ei->binder_type;
+ bo.block = &block->flow_block;
bo.block_shared = tcf_block_shared(block);
bo.extack = extack;
INIT_LIST_HEAD(&bo.cb_list);
@@ -987,8 +991,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return ERR_PTR(-ENOMEM);
}
mutex_init(&block->lock);
+ flow_block_init(&block->flow_block);
INIT_LIST_HEAD(&block->chain_list);
- INIT_LIST_HEAD(&block->cb_list);
INIT_LIST_HEAD(&block->owner_list);
INIT_LIST_HEAD(&block->chain0.filter_chain_list);
@@ -1514,7 +1518,7 @@ void tcf_block_put(struct tcf_block *block)
EXPORT_SYMBOL(tcf_block_put);
static int
-tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
void *cb_priv, bool add, bool offload_in_use,
struct netlink_ext_ack *extack)
{
@@ -1570,7 +1574,7 @@ static int tcf_block_bind(struct tcf_block *block,
i++;
}
- list_splice(&bo->cb_list, &block->cb_list);
+ list_splice(&bo->cb_list, &block->flow_block.cb_list);
return 0;
@@ -2152,7 +2156,9 @@ replay:
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held);
tfilter_put(tp, fh);
- q->flags &= ~TCQ_F_CAN_BYPASS;
+ /* q pointer is NULL for shared blocks */
+ if (q)
+ q->flags &= ~TCQ_F_CAN_BYPASS;
}
errout:
@@ -3156,7 +3162,7 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
if (block->nooffloaddevcnt && err_stop)
return -EOPNOTSUPP;
- list_for_each_entry(block_cb, &block->cb_list, list) {
+ list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err) {
if (err_stop)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 691f71830134b..3f7a9c02b70c5 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -651,7 +651,7 @@ skip:
}
}
-static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 38d6e85693fc8..054123742e323 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1800,7 +1800,7 @@ fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
return NULL;
}
-static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index a30d2f8feb323..455ea2793f9b3 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -282,7 +282,7 @@ skip:
arg->count++;
}
-static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index be9e46c77e8be..8614088edd1be 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1152,7 +1152,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
}
static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
- bool add, tc_setup_cb_t *cb, void *cb_priv,
+ bool add, flow_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack)
{
struct tc_cls_u32_offload cls_u32 = {};
@@ -1172,7 +1172,7 @@ static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
}
static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
- bool add, tc_setup_cb_t *cb, void *cb_priv,
+ bool add, flow_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -1213,7 +1213,7 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
return 0;
}
-static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 9a19147902f17..1250751bca1bc 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -25,10 +25,7 @@
#include <net/sctp/sctp.h>
#include <linux/sysctl.h>
-static int zero = 0;
-static int one = 1;
static int timer_max = 86400000; /* ms in one day */
-static int int_max = INT_MAX;
static int sack_timer_min = 1;
static int sack_timer_max = 500;
static int addr_scope_max = SCTP_SCOPE_POLICY_MAX;
@@ -92,7 +89,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &timer_max
},
{
@@ -101,7 +98,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_sctp_do_rto_min,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &init_net.sctp.rto_max
},
{
@@ -137,8 +134,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "cookie_preserve_enable",
@@ -160,7 +157,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &timer_max
},
{
@@ -178,7 +175,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &timer_max
},
{
@@ -187,8 +184,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "path_max_retrans",
@@ -196,8 +193,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "max_init_retransmits",
@@ -205,8 +202,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "pf_retrans",
@@ -214,8 +211,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "sndbuf_policy",
@@ -286,7 +283,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &addr_scope_max,
},
{
@@ -295,7 +292,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &rwnd_scale_max,
},
{
diff --git a/net/socket.c b/net/socket.c
index 293d56836f01c..6a9ab7a8b1d2c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -73,6 +73,7 @@
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
@@ -338,19 +339,22 @@ static const struct xattr_handler *sockfs_xattr_handlers[] = {
NULL
};
-static struct dentry *sockfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int sockfs_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
- sockfs_xattr_handlers,
- &sockfs_dentry_operations, SOCKFS_MAGIC);
+ struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->ops = &sockfs_ops;
+ ctx->dops = &sockfs_dentry_operations;
+ ctx->xattr = sockfs_xattr_handlers;
+ return 0;
}
static struct vfsmount *sock_mnt __read_mostly;
static struct file_system_type sock_fs_type = {
.name = "sockfs",
- .mount = sockfs_mount,
+ .init_fs_context = sockfs_init_fs_context,
.kill_sb = kill_anon_super,
};
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index aa307505ca546..3bcf985507be9 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -35,7 +35,7 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
-config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+config SUNRPC_DISABLE_INSECURE_ENCTYPES
bool "Secure RPC: Disable insecure Kerberos encryption types"
depends on RPCSEC_GSS_KRB5
default n
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index c47d82622fd12..339e8c077c2db 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -31,25 +31,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RPCDBG_FACILITY RPCDBG_TRANS
#endif
+#define BC_MAX_SLOTS 64U
+
+unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt)
+{
+ return BC_MAX_SLOTS;
+}
+
/*
* Helper routines that track the number of preallocation elements
* on the transport.
*/
static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
{
- return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots);
-}
-
-static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
-{
- atomic_add(n, &xprt->bc_free_slots);
- xprt->bc_alloc_count += n;
-}
-
-static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
-{
- atomic_sub(n, &xprt->bc_free_slots);
- return xprt->bc_alloc_count -= n;
+ return xprt->bc_alloc_count < xprt->bc_alloc_max;
}
/*
@@ -145,6 +140,9 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
dprintk("RPC: setup backchannel transport\n");
+ if (min_reqs > BC_MAX_SLOTS)
+ min_reqs = BC_MAX_SLOTS;
+
/*
* We use a temporary list to keep track of the preallocated
* buffers. Once we're done building the list we splice it
@@ -172,7 +170,9 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
*/
spin_lock(&xprt->bc_pa_lock);
list_splice(&tmp_list, &xprt->bc_pa_list);
- xprt_inc_alloc_count(xprt, min_reqs);
+ xprt->bc_alloc_count += min_reqs;
+ xprt->bc_alloc_max += min_reqs;
+ atomic_add(min_reqs, &xprt->bc_slot_count);
spin_unlock(&xprt->bc_pa_lock);
dprintk("RPC: setup backchannel transport done\n");
@@ -220,11 +220,13 @@ void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
goto out;
spin_lock_bh(&xprt->bc_pa_lock);
- xprt_dec_alloc_count(xprt, max_reqs);
+ xprt->bc_alloc_max -= max_reqs;
list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
dprintk("RPC: req=%p\n", req);
list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
+ xprt->bc_alloc_count--;
+ atomic_dec(&xprt->bc_slot_count);
if (--max_reqs == 0)
break;
}
@@ -241,13 +243,14 @@ static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid,
struct rpc_rqst *req = NULL;
dprintk("RPC: allocate a backchannel request\n");
- if (atomic_read(&xprt->bc_free_slots) <= 0)
- goto not_found;
if (list_empty(&xprt->bc_pa_list)) {
if (!new)
goto not_found;
+ if (atomic_read(&xprt->bc_slot_count) >= BC_MAX_SLOTS)
+ goto not_found;
list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
+ atomic_inc(&xprt->bc_slot_count);
}
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
@@ -291,6 +294,7 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
if (xprt_need_to_requeue(xprt)) {
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
+ atomic_inc(&xprt->bc_slot_count);
req = NULL;
}
spin_unlock_bh(&xprt->bc_pa_lock);
@@ -357,7 +361,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
spin_lock(&xprt->bc_pa_lock);
list_del(&req->rq_bc_pa_list);
- xprt_dec_alloc_count(xprt, 1);
+ xprt->bc_alloc_count--;
spin_unlock(&xprt->bc_pa_lock);
req->rq_private_buf.len = copied;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b03bfa055c082..d8679b6027e93 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -528,6 +528,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.bc_xprt = args->bc_xprt,
};
char servername[48];
+ struct rpc_clnt *clnt;
+ int i;
if (args->bc_xprt) {
WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
@@ -590,7 +592,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
- return rpc_create_xprt(args, xprt);
+ clnt = rpc_create_xprt(args, xprt);
+ if (IS_ERR(clnt) || args->nconnect <= 1)
+ return clnt;
+
+ for (i = 0; i < args->nconnect - 1; i++) {
+ if (rpc_clnt_add_xprt(clnt, &xprtargs, NULL, NULL) < 0)
+ break;
+ }
+ return clnt;
}
EXPORT_SYMBOL_GPL(rpc_create);
@@ -968,13 +978,46 @@ out:
}
EXPORT_SYMBOL_GPL(rpc_bind_new_program);
+struct rpc_xprt *
+rpc_task_get_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ struct rpc_xprt_switch *xps;
+
+ if (!xprt)
+ return NULL;
+ rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+ atomic_long_inc(&xps->xps_queuelen);
+ rcu_read_unlock();
+ atomic_long_inc(&xprt->queuelen);
+
+ return xprt;
+}
+
+static void
+rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ struct rpc_xprt_switch *xps;
+
+ atomic_long_dec(&xprt->queuelen);
+ rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+ atomic_long_dec(&xps->xps_queuelen);
+ rcu_read_unlock();
+
+ xprt_put(xprt);
+}
+
void rpc_task_release_transport(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
if (xprt) {
task->tk_xprt = NULL;
- xprt_put(xprt);
+ if (task->tk_client)
+ rpc_task_release_xprt(task->tk_client, xprt);
+ else
+ xprt_put(xprt);
}
}
EXPORT_SYMBOL_GPL(rpc_task_release_transport);
@@ -983,6 +1026,7 @@ void rpc_task_release_client(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
+ rpc_task_release_transport(task);
if (clnt != NULL) {
/* Remove from client task list */
spin_lock(&clnt->cl_lock);
@@ -992,14 +1036,34 @@ void rpc_task_release_client(struct rpc_task *task)
rpc_release_client(clnt);
}
- rpc_task_release_transport(task);
+}
+
+static struct rpc_xprt *
+rpc_task_get_first_xprt(struct rpc_clnt *clnt)
+{
+ struct rpc_xprt *xprt;
+
+ rcu_read_lock();
+ xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ rcu_read_unlock();
+ return rpc_task_get_xprt(clnt, xprt);
+}
+
+static struct rpc_xprt *
+rpc_task_get_next_xprt(struct rpc_clnt *clnt)
+{
+ return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi));
}
static
void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
{
- if (!task->tk_xprt)
- task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
+ if (task->tk_xprt)
+ return;
+ if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
+ task->tk_xprt = rpc_task_get_first_xprt(clnt);
+ else
+ task->tk_xprt = rpc_task_get_next_xprt(clnt);
}
static
@@ -1462,6 +1526,19 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt)
}
EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
+unsigned int rpc_num_bc_slots(struct rpc_clnt *clnt)
+{
+ struct rpc_xprt *xprt;
+ unsigned int ret;
+
+ rcu_read_lock();
+ xprt = rcu_dereference(clnt->cl_xprt);
+ ret = xprt->ops->bc_num_slots(xprt);
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_num_bc_slots);
+
/**
* rpc_force_rebind - force transport to check that remote port is unchanged
* @clnt: client to rebind
@@ -1788,6 +1865,7 @@ rpc_xdr_encode(struct rpc_task *task)
req->rq_snd_buf.head[0].iov_len = 0;
xdr_init_encode(&xdr, &req->rq_snd_buf,
req->rq_snd_buf.head[0].iov_base, req);
+ xdr_free_bvec(&req->rq_snd_buf);
if (rpc_encode_header(task, &xdr))
return;
@@ -1827,8 +1905,6 @@ call_encode(struct rpc_task *task)
rpc_call_rpcerror(task, task->tk_status);
}
return;
- } else {
- xprt_request_prepare(task->tk_rqstp);
}
/* Add task to reply queue before transmission to avoid races */
@@ -2696,6 +2772,10 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
return -ENOMEM;
data->xps = xprt_switch_get(xps);
data->xprt = xprt_get(xprt);
+ if (rpc_xprt_switch_has_addr(data->xps, (struct sockaddr *)&xprt->addr)) {
+ rpc_cb_add_xprt_release(data);
+ goto success;
+ }
task = rpc_call_null_helper(clnt, xprt, NULL,
RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
@@ -2703,6 +2783,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
+success:
return 1;
}
EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 707d7aab15466..fd9bca2427242 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* debugfs interface for sunrpc
*
* (c) 2014 Jeff Layton <jlayton@primarydata.com>
@@ -117,12 +117,37 @@ static const struct file_operations tasks_fops = {
.release = tasks_release,
};
+static int do_xprt_debugfs(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *numv)
+{
+ int len;
+ char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
+ char link[9]; /* enough for 8 hex digits + NULL */
+ int *nump = numv;
+
+ if (IS_ERR_OR_NULL(xprt->debugfs))
+ return 0;
+ len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
+ xprt->debugfs->d_name.name);
+ if (len > sizeof(name))
+ return -1;
+ if (*nump == 0)
+ strcpy(link, "xprt");
+ else {
+ len = snprintf(link, sizeof(link), "xprt%d", *nump);
+ if (len > sizeof(link))
+ return -1;
+ }
+ debugfs_create_symlink(link, clnt->cl_debugfs, name);
+ (*nump)++;
+ return 0;
+}
+
void
rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
{
int len;
- char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
- struct rpc_xprt *xprt;
+ char name[9]; /* enough for 8 hex digits + NULL */
+ int xprtnum = 0;
len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
if (len >= sizeof(name))
@@ -135,26 +160,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs, clnt,
&tasks_fops);
- rcu_read_lock();
- xprt = rcu_dereference(clnt->cl_xprt);
- /* no "debugfs" dentry? Don't bother with the symlink. */
- if (IS_ERR_OR_NULL(xprt->debugfs)) {
- rcu_read_unlock();
- return;
- }
- len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
- xprt->debugfs->d_name.name);
- rcu_read_unlock();
-
- if (len >= sizeof(name))
- goto out_err;
-
- debugfs_create_symlink("xprt", clnt->cl_debugfs, name);
-
- return;
-out_err:
- debugfs_remove_recursive(clnt->cl_debugfs);
- clnt->cl_debugfs = NULL;
+ rpc_clnt_iterate_for_each_xprt(clnt, do_xprt_debugfs, &xprtnum);
}
void
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 73bd62979fe76..748bac601e47d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/kernel.h>
@@ -1352,11 +1353,11 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
}
static int
-rpc_fill_super(struct super_block *sb, void *data, int silent)
+rpc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
struct dentry *root, *gssd_dentry;
- struct net *net = get_net(sb->s_fs_info);
+ struct net *net = sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
@@ -1413,12 +1414,29 @@ gssd_running(struct net *net)
}
EXPORT_SYMBOL_GPL(gssd_running);
-static struct dentry *
-rpc_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int rpc_fs_get_tree(struct fs_context *fc)
+{
+ fc->s_fs_info = get_net(fc->net_ns);
+ return vfs_get_super(fc, vfs_get_keyed_super, rpc_fill_super);
+}
+
+static void rpc_fs_free_fc(struct fs_context *fc)
{
- struct net *net = current->nsproxy->net_ns;
- return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super);
+ if (fc->s_fs_info)
+ put_net(fc->s_fs_info);
+}
+
+static const struct fs_context_operations rpc_fs_context_ops = {
+ .free = rpc_fs_free_fc,
+ .get_tree = rpc_fs_get_tree,
+};
+
+static int rpc_init_fs_context(struct fs_context *fc)
+{
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(fc->net_ns->user_ns);
+ fc->ops = &rpc_fs_context_ops;
+ return 0;
}
static void rpc_kill_sb(struct super_block *sb)
@@ -1446,7 +1464,7 @@ out:
static struct file_system_type rpc_pipe_fs_type = {
.owner = THIS_MODULE,
.name = "rpc_pipefs",
- .mount = rpc_mount,
+ .init_fs_context = rpc_init_fs_context,
.kill_sb = rpc_kill_sb,
};
MODULE_ALIAS_FS("rpc_pipefs");
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a2c1148127172..1f275aba786fc 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -23,6 +23,7 @@
#include <linux/sched/mm.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/metrics.h>
#include "sunrpc.h"
@@ -46,7 +47,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
static void rpc_async_schedule(struct work_struct *);
static void rpc_release_task(struct rpc_task *task);
-static void __rpc_queue_timer_fn(struct timer_list *t);
+static void __rpc_queue_timer_fn(struct work_struct *);
/*
* RPC tasks sit here while waiting for conditions to improve.
@@ -58,6 +59,7 @@ static struct rpc_wait_queue delay_queue;
*/
struct workqueue_struct *rpciod_workqueue __read_mostly;
struct workqueue_struct *xprtiod_workqueue __read_mostly;
+EXPORT_SYMBOL_GPL(xprtiod_workqueue);
unsigned long
rpc_task_timeout(const struct rpc_task *task)
@@ -87,13 +89,19 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
task->tk_timeout = 0;
list_del(&task->u.tk_wait.timer_list);
if (list_empty(&queue->timer_list.list))
- del_timer(&queue->timer_list.timer);
+ cancel_delayed_work(&queue->timer_list.dwork);
}
static void
rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
{
- timer_reduce(&queue->timer_list.timer, expires);
+ unsigned long now = jiffies;
+ queue->timer_list.expires = expires;
+ if (time_before_eq(expires, now))
+ expires = 0;
+ else
+ expires -= now;
+ mod_delayed_work(rpciod_workqueue, &queue->timer_list.dwork, expires);
}
/*
@@ -107,7 +115,8 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
task->tk_pid, jiffies_to_msecs(timeout - jiffies));
task->tk_timeout = timeout;
- rpc_set_queue_timer(queue, timeout);
+ if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires))
+ rpc_set_queue_timer(queue, timeout);
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
@@ -250,7 +259,8 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
- timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
+ queue->timer_list.expires = 0;
+ INIT_DEFERRABLE_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
@@ -269,7 +279,7 @@ EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
{
- del_timer_sync(&queue->timer_list.timer);
+ cancel_delayed_work_sync(&queue->timer_list.dwork);
}
EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
@@ -424,9 +434,9 @@ void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
/*
* Protect the queue operations.
*/
- spin_lock_bh(&q->lock);
+ spin_lock(&q->lock);
__rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
- spin_unlock_bh(&q->lock);
+ spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
@@ -442,9 +452,9 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
/*
* Protect the queue operations.
*/
- spin_lock_bh(&q->lock);
+ spin_lock(&q->lock);
__rpc_sleep_on_priority(q, task, task->tk_priority);
- spin_unlock_bh(&q->lock);
+ spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
@@ -458,9 +468,9 @@ void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
/*
* Protect the queue operations.
*/
- spin_lock_bh(&q->lock);
+ spin_lock(&q->lock);
__rpc_sleep_on_priority_timeout(q, task, timeout, priority);
- spin_unlock_bh(&q->lock);
+ spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
@@ -475,9 +485,9 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
/*
* Protect the queue operations.
*/
- spin_lock_bh(&q->lock);
+ spin_lock(&q->lock);
__rpc_sleep_on_priority(q, task, priority);
- spin_unlock_bh(&q->lock);
+ spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
@@ -555,9 +565,9 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
{
if (!RPC_IS_QUEUED(task))
return;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
/*
@@ -567,9 +577,9 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task
{
if (!RPC_IS_QUEUED(task))
return;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
rpc_wake_up_task_queue_locked(queue, task);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
@@ -602,9 +612,9 @@ rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
{
if (!RPC_IS_QUEUED(task))
return;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
rpc_wake_up_task_queue_set_status_locked(queue, task, status);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
/*
@@ -667,12 +677,12 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
dprintk("RPC: wake_up_first(%p \"%s\")\n",
queue, rpc_qname(queue));
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
task = __rpc_find_next_queued(queue);
if (task != NULL)
task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
task, func, data);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
return task;
}
@@ -711,7 +721,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
{
struct list_head *head;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
while (!list_empty(head)) {
@@ -725,7 +735,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
break;
head--;
}
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up);
@@ -740,7 +750,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
struct list_head *head;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
while (!list_empty(head)) {
@@ -755,13 +765,15 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
break;
head--;
}
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);
-static void __rpc_queue_timer_fn(struct timer_list *t)
+static void __rpc_queue_timer_fn(struct work_struct *work)
{
- struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
+ struct rpc_wait_queue *queue = container_of(work,
+ struct rpc_wait_queue,
+ timer_list.dwork.work);
struct rpc_task *task, *n;
unsigned long expires, now, timeo;
@@ -832,6 +844,10 @@ rpc_reset_task_statistics(struct rpc_task *task)
void rpc_exit_task(struct rpc_task *task)
{
task->tk_action = NULL;
+ if (task->tk_ops->rpc_count_stats)
+ task->tk_ops->rpc_count_stats(task, task->tk_calldata);
+ else if (task->tk_client)
+ rpc_count_iostats(task, task->tk_client->cl_metrics);
if (task->tk_ops->rpc_call_done != NULL) {
task->tk_ops->rpc_call_done(task, task->tk_calldata);
if (task->tk_action != NULL) {
@@ -927,13 +943,13 @@ static void __rpc_execute(struct rpc_task *task)
* rpc_task pointer may still be dereferenced.
*/
queue = task->tk_waitqueue;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
if (!RPC_IS_QUEUED(task)) {
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
continue;
}
rpc_clear_running(task);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
if (task_is_async)
return;
@@ -1076,7 +1092,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
/* Initialize workqueue for async tasks */
task->tk_workqueue = task_setup_data->workqueue;
- task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
+ task->tk_xprt = rpc_task_get_xprt(task_setup_data->rpc_client,
+ xprt_get(task_setup_data->rpc_xprt));
task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 2b6dc7e5f74f6..7c74197c2ecfc 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -177,6 +177,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
execute = ktime_sub(now, task->tk_start);
op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
+ if (task->tk_status < 0)
+ op_metrics->om_error_status++;
spin_unlock(&op_metrics->om_lock);
@@ -219,13 +221,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b)
a->om_queue = ktime_add(a->om_queue, b->om_queue);
a->om_rtt = ktime_add(a->om_rtt, b->om_rtt);
a->om_execute = ktime_add(a->om_execute, b->om_execute);
+ a->om_error_status += b->om_error_status;
}
static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
int op, const struct rpc_procinfo *procs)
{
_print_name(seq, op, procs);
- seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+ seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n",
stats->om_ops,
stats->om_ntrans,
stats->om_timeouts,
@@ -233,12 +236,20 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
stats->om_bytes_recv,
ktime_to_ms(stats->om_queue),
ktime_to_ms(stats->om_rtt),
- ktime_to_ms(stats->om_execute));
+ ktime_to_ms(stats->om_execute),
+ stats->om_error_status);
+}
+
+static int do_print_stats(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *seqv)
+{
+ struct seq_file *seq = seqv;
+
+ xprt->ops->print_stats(xprt, seq);
+ return 0;
}
void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
{
- struct rpc_xprt *xprt;
unsigned int op, maxproc = clnt->cl_maxproc;
if (!clnt->cl_metrics)
@@ -248,11 +259,7 @@ void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
seq_printf(seq, "p/v: %u/%u (%s)\n",
clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
- rcu_read_lock();
- xprt = rcu_dereference(clnt->cl_xprt);
- if (xprt)
- xprt->ops->print_stats(xprt, seq);
- rcu_read_unlock();
+ rpc_clnt_iterate_for_each_xprt(clnt, do_print_stats, seq);
seq_printf(seq, "\tper-op statistics\n");
for (op = 0; op < maxproc; op++) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e15cb704453ea..220b79988000e 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1595,7 +1595,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
/* Parse and execute the bc call */
proc_error = svc_process_common(rqstp, argv, resv);
- atomic_inc(&req->rq_xprt->bc_free_slots);
+ atomic_dec(&req->rq_xprt->bc_slot_count);
if (!proc_error) {
/* Processing error: drop the request */
xprt_free_bc_request(req);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f6c82b1651e73..783748dc5e6fb 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -302,9 +302,9 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
if (test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == task)
return 1;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
retval = xprt->ops->reserve_xprt(xprt, task);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
return retval;
}
@@ -381,9 +381,9 @@ static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *ta
{
if (xprt->snd_task != task)
return;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
/*
@@ -435,9 +435,9 @@ xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
if (req->rq_cong)
return true;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
ret = __xprt_get_cong(xprt, req) != 0;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
return ret;
}
EXPORT_SYMBOL_GPL(xprt_request_get_cong);
@@ -464,9 +464,9 @@ static void
xprt_clear_congestion_window_wait(struct rpc_xprt *xprt)
{
if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) {
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
__xprt_lock_write_next_cong(xprt);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
}
@@ -563,9 +563,9 @@ bool xprt_write_space(struct rpc_xprt *xprt)
if (!test_bit(XPRT_WRITE_SPACE, &xprt->state))
return false;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
ret = xprt_clear_write_space_locked(xprt);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
return ret;
}
EXPORT_SYMBOL_GPL(xprt_write_space);
@@ -634,9 +634,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
req->rq_retries = 0;
xprt_reset_majortimeo(req);
/* Reset the RTT counters == "slow start" */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
status = -ETIMEDOUT;
}
@@ -668,11 +668,11 @@ static void xprt_autoclose(struct work_struct *work)
void xprt_disconnect_done(struct rpc_xprt *xprt)
{
dprintk("RPC: disconnected transport %p\n", xprt);
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt_clear_connected(xprt);
xprt_clear_write_space_locked(xprt);
xprt_wake_pending_tasks(xprt, -ENOTCONN);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -684,7 +684,7 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done);
void xprt_force_disconnect(struct rpc_xprt *xprt)
{
/* Don't race with the test_bit() in xprt_clear_locked() */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
set_bit(XPRT_CLOSE_WAIT, &xprt->state);
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
@@ -692,7 +692,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
else if (xprt->snd_task)
rpc_wake_up_queued_task_set_status(&xprt->pending,
xprt->snd_task, -ENOTCONN);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
@@ -726,7 +726,7 @@ xprt_request_retransmit_after_disconnect(struct rpc_task *task)
void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
{
/* Don't race with the test_bit() in xprt_clear_locked() */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
if (cookie != xprt->connect_cookie)
goto out;
if (test_bit(XPRT_CLOSING, &xprt->state))
@@ -737,7 +737,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
static bool
@@ -750,6 +750,7 @@ static void
xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
__must_hold(&xprt->transport_lock)
{
+ xprt->last_used = jiffies;
if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt))
mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
}
@@ -759,18 +760,13 @@ xprt_init_autodisconnect(struct timer_list *t)
{
struct rpc_xprt *xprt = from_timer(xprt, t, timer);
- spin_lock(&xprt->transport_lock);
if (!RB_EMPTY_ROOT(&xprt->recv_queue))
- goto out_abort;
+ return;
/* Reset xprt->last_used to avoid connect/autodisconnect cycling */
xprt->last_used = jiffies;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
- goto out_abort;
- spin_unlock(&xprt->transport_lock);
+ return;
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- return;
-out_abort:
- spin_unlock(&xprt->transport_lock);
}
bool xprt_lock_connect(struct rpc_xprt *xprt,
@@ -779,7 +775,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
{
bool ret = false;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
if (!test_bit(XPRT_LOCKED, &xprt->state))
goto out;
if (xprt->snd_task != task)
@@ -787,13 +783,13 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
xprt->snd_task = cookie;
ret = true;
out:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
return ret;
}
void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
{
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
if (xprt->snd_task != cookie)
goto out;
if (!test_bit(XPRT_LOCKED, &xprt->state))
@@ -802,7 +798,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
xprt->ops->release_xprt(xprt, NULL);
xprt_schedule_autodisconnect(xprt);
out:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
wake_up_bit(&xprt->state, XPRT_LOCKED);
}
@@ -850,6 +846,38 @@ void xprt_connect(struct rpc_task *task)
xprt_release_write(xprt, task);
}
+/**
+ * xprt_reconnect_delay - compute the wait before scheduling a connect
+ * @xprt: transport instance
+ *
+ */
+unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt)
+{
+ unsigned long start, now = jiffies;
+
+ start = xprt->stat.connect_start + xprt->reestablish_timeout;
+ if (time_after(start, now))
+ return start - now;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xprt_reconnect_delay);
+
+/**
+ * xprt_reconnect_backoff - compute the new re-establish timeout
+ * @xprt: transport instance
+ * @init_to: initial reestablish timeout
+ *
+ */
+void xprt_reconnect_backoff(struct rpc_xprt *xprt, unsigned long init_to)
+{
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+ xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+ if (xprt->reestablish_timeout < init_to)
+ xprt->reestablish_timeout = init_to;
+}
+EXPORT_SYMBOL_GPL(xprt_reconnect_backoff);
+
enum xprt_xid_rb_cmp {
XID_RB_EQUAL,
XID_RB_LEFT,
@@ -1013,6 +1041,8 @@ xprt_request_enqueue_receive(struct rpc_task *task)
if (!xprt_request_need_enqueue_receive(task, req))
return;
+
+ xprt_request_prepare(task->tk_rqstp);
spin_lock(&xprt->queue_lock);
/* Update the softirq receive buffer */
@@ -1412,14 +1442,14 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
xprt_inject_disconnect(xprt);
task->tk_flags |= RPC_TASK_SENT;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
xprt->stat.sending_u += xprt->sending.qlen;
xprt->stat.pending_u += xprt->pending.qlen;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
req->rq_connect_cookie = connect_cookie;
out_dequeue:
@@ -1765,18 +1795,13 @@ void xprt_release(struct rpc_task *task)
}
xprt = req->rq_xprt;
- if (task->tk_ops->rpc_count_stats != NULL)
- task->tk_ops->rpc_count_stats(task, task->tk_calldata);
- else if (task->tk_client)
- rpc_count_iostats(task, task->tk_client->cl_metrics);
xprt_request_dequeue_all(task, req);
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
if (xprt->ops->release_request)
xprt->ops->release_request(task);
- xprt->last_used = jiffies;
xprt_schedule_autodisconnect(xprt);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 8394124126f8f..78c075a68c047 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -19,7 +19,7 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/xprtmultipath.h>
-typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
+typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
const struct rpc_xprt *cur);
static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
@@ -36,6 +36,7 @@ static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
if (xps->xps_nxprts == 0)
xps->xps_net = xprt->xprt_net;
xps->xps_nxprts++;
+ xps->xps_nactive++;
}
/**
@@ -51,8 +52,7 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
if (xprt == NULL)
return;
spin_lock(&xps->xps_lock);
- if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) &&
- !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
+ if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
xprt_switch_add_xprt_locked(xps, xprt);
spin_unlock(&xps->xps_lock);
}
@@ -62,6 +62,7 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
{
if (unlikely(xprt == NULL))
return;
+ xps->xps_nactive--;
xps->xps_nxprts--;
if (xps->xps_nxprts == 0)
xps->xps_net = NULL;
@@ -102,7 +103,9 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
if (xps != NULL) {
spin_lock_init(&xps->xps_lock);
kref_init(&xps->xps_kref);
- xps->xps_nxprts = 0;
+ xps->xps_nxprts = xps->xps_nactive = 0;
+ atomic_long_set(&xps->xps_queuelen, 0);
+ xps->xps_net = NULL;
INIT_LIST_HEAD(&xps->xps_xprt_list);
xps->xps_iter_ops = &rpc_xprt_iter_singular;
xprt_switch_add_xprt_locked(xps, xprt);
@@ -193,9 +196,21 @@ void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
}
static
+bool xprt_is_active(const struct rpc_xprt *xprt)
+{
+ return kref_read(&xprt->kref) != 0;
+}
+
+static
struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
{
- return list_first_or_null_rcu(head, struct rpc_xprt, xprt_switch);
+ struct rpc_xprt *pos;
+
+ list_for_each_entry_rcu(pos, head, xprt_switch) {
+ if (xprt_is_active(pos))
+ return pos;
+ }
+ return NULL;
}
static
@@ -213,9 +228,12 @@ struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
const struct rpc_xprt *cur)
{
struct rpc_xprt *pos;
+ bool found = false;
list_for_each_entry_rcu(pos, head, xprt_switch) {
if (cur == pos)
+ found = true;
+ if (found && xprt_is_active(pos))
return pos;
}
return NULL;
@@ -260,9 +278,12 @@ struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
const struct rpc_xprt *cur)
{
struct rpc_xprt *pos, *prev = NULL;
+ bool found = false;
list_for_each_entry_rcu(pos, head, xprt_switch) {
if (cur == prev)
+ found = true;
+ if (found && xprt_is_active(pos))
return pos;
prev = pos;
}
@@ -270,22 +291,15 @@ struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
}
static
-struct rpc_xprt *xprt_switch_set_next_cursor(struct list_head *head,
+struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps,
struct rpc_xprt **cursor,
xprt_switch_find_xprt_t find_next)
{
- struct rpc_xprt *cur, *pos, *old;
+ struct rpc_xprt *pos, *old;
- cur = READ_ONCE(*cursor);
- for (;;) {
- old = cur;
- pos = find_next(head, old);
- if (pos == NULL)
- break;
- cur = cmpxchg_relaxed(cursor, old, pos);
- if (cur == old)
- break;
- }
+ old = smp_load_acquire(cursor);
+ pos = find_next(xps, old);
+ smp_store_release(cursor, pos);
return pos;
}
@@ -297,13 +311,11 @@ struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi,
if (xps == NULL)
return NULL;
- return xprt_switch_set_next_cursor(&xps->xps_xprt_list,
- &xpi->xpi_cursor,
- find_next);
+ return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next);
}
static
-struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
+struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head,
const struct rpc_xprt *cur)
{
struct rpc_xprt *ret;
@@ -315,6 +327,31 @@ struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
}
static
+struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps,
+ const struct rpc_xprt *cur)
+{
+ struct list_head *head = &xps->xps_xprt_list;
+ struct rpc_xprt *xprt;
+ unsigned int nactive;
+
+ for (;;) {
+ unsigned long xprt_queuelen, xps_queuelen;
+
+ xprt = __xprt_switch_find_next_entry_roundrobin(head, cur);
+ if (!xprt)
+ break;
+ xprt_queuelen = atomic_long_read(&xprt->queuelen);
+ xps_queuelen = atomic_long_read(&xps->xps_queuelen);
+ nactive = READ_ONCE(xps->xps_nactive);
+ /* Exit loop if xprt_queuelen <= average queue length */
+ if (xprt_queuelen * nactive <= xps_queuelen)
+ break;
+ cur = xprt;
+ }
+ return xprt;
+}
+
+static
struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
{
return xprt_iter_next_entry_multiple(xpi,
@@ -322,9 +359,17 @@ struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
}
static
+struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps,
+ const struct rpc_xprt *cur)
+{
+ return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur);
+}
+
+static
struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
{
- return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry);
+ return xprt_iter_next_entry_multiple(xpi,
+ xprt_switch_find_next_entry_all);
}
/*
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index ce986591f2138..59e624b1d7a0d 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -52,6 +52,13 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
+unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ return r_xprt->rx_buf.rb_bc_srv_max_requests;
+}
+
static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 794ba4ca0994d..0b6dad7580a1f 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -144,6 +144,26 @@ frwr_mr_recycle_worker(struct work_struct *work)
frwr_release_mr(mr);
}
+/* frwr_reset - Place MRs back on the free list
+ * @req: request to reset
+ *
+ * Used after a failed marshal. For FRWR, this means the MRs
+ * don't have to be fully released and recreated.
+ *
+ * NB: This is safe only as long as none of @req's MRs are
+ * involved with an ongoing asynchronous FAST_REG or LOCAL_INV
+ * Work Request.
+ */
+void frwr_reset(struct rpcrdma_req *req)
+{
+ while (!list_empty(&req->rl_registered)) {
+ struct rpcrdma_mr *mr;
+
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ rpcrdma_mr_unmap_and_put(mr);
+ }
+}
+
/**
* frwr_init_mr - Initialize one MR
* @ia: interface adapter
@@ -168,7 +188,6 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
goto out_list_err;
mr->frwr.fr_mr = frmr;
- mr->frwr.fr_state = FRWR_IS_INVALID;
mr->mr_dir = DMA_NONE;
INIT_LIST_HEAD(&mr->mr_list);
INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
@@ -298,65 +317,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
}
/**
- * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
- * @cq: completion queue (ignored)
- * @wc: completed WR
- *
- */
-static void
-frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
-
- /* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS)
- frwr->fr_state = FRWR_FLUSHED_FR;
- trace_xprtrdma_wc_fastreg(wc, frwr);
-}
-
-/**
- * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
- * @cq: completion queue (ignored)
- * @wc: completed WR
- *
- */
-static void
-frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
- fr_cqe);
-
- /* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS)
- frwr->fr_state = FRWR_FLUSHED_LI;
- trace_xprtrdma_wc_li(wc, frwr);
-}
-
-/**
- * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC
- * @cq: completion queue (ignored)
- * @wc: completed WR
- *
- * Awaken anyone waiting for an MR to finish being fenced.
- */
-static void
-frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
- fr_cqe);
-
- /* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS)
- frwr->fr_state = FRWR_FLUSHED_LI;
- trace_xprtrdma_wc_li_wake(wc, frwr);
- complete(&frwr->fr_linv_done);
-}
-
-/**
* frwr_map - Register a memory region
* @r_xprt: controlling transport
* @seg: memory region co-ordinates
@@ -378,23 +338,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
int i, n;
u8 key;
- mr = NULL;
- do {
- if (mr)
- rpcrdma_mr_recycle(mr);
- mr = rpcrdma_mr_get(r_xprt);
- if (!mr)
- return ERR_PTR(-EAGAIN);
- } while (mr->frwr.fr_state != FRWR_IS_INVALID);
- frwr = &mr->frwr;
- frwr->fr_state = FRWR_IS_VALID;
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
+ goto out_getmr_err;
if (nsegs > ia->ri_max_frwr_depth)
nsegs = ia->ri_max_frwr_depth;
@@ -423,7 +375,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
if (!mr->mr_nents)
goto out_dmamap_err;
- ibmr = frwr->fr_mr;
+ ibmr = mr->frwr.fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
if (unlikely(n != mr->mr_nents))
goto out_mapmr_err;
@@ -433,7 +385,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);
- reg_wr = &frwr->fr_regwr;
+ reg_wr = &mr->frwr.fr_regwr;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
@@ -448,6 +400,10 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
*out = mr;
return seg;
+out_getmr_err:
+ xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
+ return ERR_PTR(-EAGAIN);
+
out_dmamap_err:
mr->mr_dir = DMA_NONE;
trace_xprtrdma_frwr_sgerr(mr, i);
@@ -461,6 +417,23 @@ out_mapmr_err:
}
/**
+ * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
+ * @cq: completion queue (ignored)
+ * @wc: completed WR
+ *
+ */
+static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_fastreg(wc, frwr);
+ /* The MR will get recycled when the associated req is retransmitted */
+}
+
+/**
* frwr_send - post Send WR containing the RPC Call message
* @ia: interface adapter
* @req: Prepared RPC Call
@@ -512,31 +485,75 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
trace_xprtrdma_mr_remoteinv(mr);
- mr->frwr.fr_state = FRWR_IS_INVALID;
rpcrdma_mr_unmap_and_put(mr);
break; /* only one invalidated MR per RPC */
}
}
+static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
+{
+ if (wc->status != IB_WC_SUCCESS)
+ rpcrdma_mr_recycle(mr);
+ else
+ rpcrdma_mr_unmap_and_put(mr);
+}
+
/**
- * frwr_unmap_sync - invalidate memory regions that were registered for @req
- * @r_xprt: controlling transport
- * @mrs: list of MRs to process
+ * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC
+ * @cq: completion queue (ignored)
+ * @wc: completed WR
+ *
+ */
+static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_li(wc, frwr);
+ __frwr_release_mr(wc, mr);
+}
+
+/**
+ * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC
+ * @cq: completion queue (ignored)
+ * @wc: completed WR
*
- * Sleeps until it is safe for the host CPU to access the
- * previously mapped memory regions.
+ * Awaken anyone waiting for an MR to finish being fenced.
+ */
+static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_li_wake(wc, frwr);
+ complete(&frwr->fr_linv_done);
+ __frwr_release_mr(wc, mr);
+}
+
+/**
+ * frwr_unmap_sync - invalidate memory regions that were registered for @req
+ * @r_xprt: controlling transport instance
+ * @req: rpcrdma_req with a non-empty list of MRs to process
*
- * Caller ensures that @mrs is not empty before the call. This
- * function empties the list.
+ * Sleeps until it is safe for the host CPU to access the previously mapped
+ * memory regions. This guarantees that registered MRs are properly fenced
+ * from the server before the RPC consumer accesses the data in them. It
+ * also ensures proper Send flow control: waking the next RPC waits until
+ * this RPC has relinquished all its Send Queue entries.
*/
-void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
+void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, **prev, *last;
const struct ib_send_wr *bad_wr;
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
- int count, rc;
+ int rc;
/* ORDER: Invalidate all of the MRs first
*
@@ -544,33 +561,32 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
* a single ib_post_send() call.
*/
frwr = NULL;
- count = 0;
prev = &first;
- list_for_each_entry(mr, mrs, mr_list) {
- mr->frwr.fr_state = FRWR_IS_INVALID;
+ while (!list_empty(&req->rl_registered)) {
+ mr = rpcrdma_mr_pop(&req->rl_registered);
- frwr = &mr->frwr;
trace_xprtrdma_mr_localinv(mr);
+ r_xprt->rx_stats.local_inv_needed++;
+ frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_localinv;
last = &frwr->fr_invwr;
- memset(last, 0, sizeof(*last));
+ last->next = NULL;
last->wr_cqe = &frwr->fr_cqe;
+ last->sg_list = NULL;
+ last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
+ last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
- count++;
*prev = last;
prev = &last->next;
}
- if (!frwr)
- goto unmap;
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
- last->send_flags = IB_SEND_SIGNALED;
frwr->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&frwr->fr_linv_done);
@@ -578,37 +594,126 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
*/
- r_xprt->rx_stats.local_inv_needed++;
bad_wr = NULL;
- rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
+ rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
+ trace_xprtrdma_post_send(req, rc);
+
+ /* The final LOCAL_INV WR in the chain is supposed to
+ * do the wake. If it was never posted, the wake will
+ * not happen, so don't wait in that case.
+ */
if (bad_wr != first)
wait_for_completion(&frwr->fr_linv_done);
- if (rc)
- goto out_release;
+ if (!rc)
+ return;
- /* ORDER: Now DMA unmap all of the MRs, and return
- * them to the free MR list.
+ /* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
-unmap:
- while (!list_empty(mrs)) {
- mr = rpcrdma_mr_pop(mrs);
- rpcrdma_mr_unmap_and_put(mr);
+ while (bad_wr) {
+ frwr = container_of(bad_wr, struct rpcrdma_frwr,
+ fr_invwr);
+ mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ bad_wr = bad_wr->next;
+
+ list_del_init(&mr->mr_list);
+ rpcrdma_mr_recycle(mr);
}
- return;
+}
-out_release:
- pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
+/**
+ * frwr_wc_localinv_done - Invoked by RDMA provider for a signaled LOCAL_INV WC
+ * @cq: completion queue (ignored)
+ * @wc: completed WR
+ *
+ */
+static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
- /* Unmap and release the MRs in the LOCAL_INV WRs that did not
- * get posted.
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_li_done(wc, frwr);
+ rpcrdma_complete_rqst(frwr->fr_req->rl_reply);
+ __frwr_release_mr(wc, mr);
+}
+
+/**
+ * frwr_unmap_async - invalidate memory regions that were registered for @req
+ * @r_xprt: controlling transport instance
+ * @req: rpcrdma_req with a non-empty list of MRs to process
+ *
+ * This guarantees that registered MRs are properly fenced from the
+ * server before the RPC consumer accesses the data in them. It also
+ * ensures proper Send flow control: waking the next RPC waits until
+ * this RPC has relinquished all its Send Queue entries.
+ */
+void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *first, *last, **prev;
+ const struct ib_send_wr *bad_wr;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
+ int rc;
+
+ /* Chain the LOCAL_INV Work Requests and post them with
+ * a single ib_post_send() call.
+ */
+ frwr = NULL;
+ prev = &first;
+ while (!list_empty(&req->rl_registered)) {
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+
+ trace_xprtrdma_mr_localinv(mr);
+ r_xprt->rx_stats.local_inv_needed++;
+
+ frwr = &mr->frwr;
+ frwr->fr_cqe.done = frwr_wc_localinv;
+ frwr->fr_req = req;
+ last = &frwr->fr_invwr;
+ last->next = NULL;
+ last->wr_cqe = &frwr->fr_cqe;
+ last->sg_list = NULL;
+ last->num_sge = 0;
+ last->opcode = IB_WR_LOCAL_INV;
+ last->send_flags = IB_SEND_SIGNALED;
+ last->ex.invalidate_rkey = mr->mr_handle;
+
+ *prev = last;
+ prev = &last->next;
+ }
+
+ /* Strong send queue ordering guarantees that when the
+ * last WR in the chain completes, all WRs in the chain
+ * are complete. The last completion will wake up the
+ * RPC waiter.
+ */
+ frwr->fr_cqe.done = frwr_wc_localinv_done;
+
+ /* Transport disconnect drains the receive CQ before it
+ * replaces the QP. The RPC reply handler won't call us
+ * unless ri_id->qp is a valid pointer.
+ */
+ bad_wr = NULL;
+ rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
+ trace_xprtrdma_post_send(req, rc);
+ if (!rc)
+ return;
+
+ /* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr,
- fr_invwr);
+ frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
bad_wr = bad_wr->next;
- list_del_init(&mr->mr_list);
rpcrdma_mr_recycle(mr);
}
+
+ /* The final LOCAL_INV WR in the chain is supposed to
+ * do the wake. If it was never posted, the wake will
+ * not happen, so wake here in that case.
+ */
+ rpcrdma_complete_rqst(req->rl_reply);
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 85115a2e26392..4345e6912392c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -366,6 +366,9 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
unsigned int pos;
int nsegs;
+ if (rtype == rpcrdma_noch)
+ goto done;
+
pos = rqst->rq_snd_buf.head[0].iov_len;
if (rtype == rpcrdma_areadch)
pos = 0;
@@ -389,7 +392,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nsegs -= mr->mr_nents;
} while (nsegs);
- return 0;
+done:
+ return encode_item_not_present(xdr);
}
/* Register and XDR encode the Write list. Supports encoding a list
@@ -417,6 +421,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
int nsegs, nchunks;
__be32 *segcount;
+ if (wtype != rpcrdma_writech)
+ goto done;
+
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len,
@@ -451,7 +458,8 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
/* Update count of segments in this Write chunk */
*segcount = cpu_to_be32(nchunks);
- return 0;
+done:
+ return encode_item_not_present(xdr);
}
/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -476,6 +484,9 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
int nsegs, nchunks;
__be32 *segcount;
+ if (wtype != rpcrdma_replych)
+ return encode_item_not_present(xdr);
+
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
if (nsegs < 0)
@@ -511,6 +522,16 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
return 0;
}
+static void rpcrdma_sendctx_done(struct kref *kref)
+{
+ struct rpcrdma_req *req =
+ container_of(kref, struct rpcrdma_req, rl_kref);
+ struct rpcrdma_rep *rep = req->rl_reply;
+
+ rpcrdma_complete_rqst(rep);
+ rep->rr_rxprt->rx_stats.reply_waits_for_send++;
+}
+
/**
* rpcrdma_sendctx_unmap - DMA-unmap Send buffer
* @sc: sendctx containing SGEs to unmap
@@ -520,6 +541,9 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
{
struct ib_sge *sge;
+ if (!sc->sc_unmap_count)
+ return;
+
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
@@ -529,9 +553,7 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
DMA_TO_DEVICE);
- if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES,
- &sc->sc_req->rl_flags))
- wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
+ kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
}
/* Prepare an SGE for the RPC-over-RDMA transport header.
@@ -666,7 +688,7 @@ map_tail:
out:
sc->sc_wr.num_sge += sge_no;
if (sc->sc_unmap_count)
- __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+ kref_get(&req->rl_kref);
return true;
out_regbuf:
@@ -699,22 +721,28 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
+ int ret;
+
+ ret = -EAGAIN;
req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
if (!req->rl_sendctx)
- return -EAGAIN;
+ goto err;
req->rl_sendctx->sc_wr.num_sge = 0;
req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req;
- __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+ kref_init(&req->rl_kref);
+ ret = -EIO;
if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
- return -EIO;
-
+ goto err;
if (rtype != rpcrdma_areadch)
if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
- return -EIO;
-
+ goto err;
return 0;
+
+err:
+ trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
+ return ret;
}
/**
@@ -842,50 +870,28 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
* send a Call message with a Position Zero Read chunk and a
* regular Read chunk at the same time.
*/
- if (rtype != rpcrdma_noch) {
- ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
- if (ret)
- goto out_err;
- }
- ret = encode_item_not_present(xdr);
+ ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
if (ret)
goto out_err;
-
- if (wtype == rpcrdma_writech) {
- ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
- if (ret)
- goto out_err;
- }
- ret = encode_item_not_present(xdr);
+ ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
if (ret)
goto out_err;
-
- if (wtype != rpcrdma_replych)
- ret = encode_item_not_present(xdr);
- else
- ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
+ ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
if (ret)
goto out_err;
- trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
-
- ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
+ ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
&rqst->rq_snd_buf, rtype);
if (ret)
goto out_err;
+
+ trace_xprtrdma_marshal(req, rtype, wtype);
return 0;
out_err:
trace_xprtrdma_marshal_failed(rqst, ret);
- switch (ret) {
- case -EAGAIN:
- xprt_wait_for_buffer_space(rqst->rq_xprt);
- break;
- case -ENOBUFS:
- break;
- default:
- r_xprt->rx_stats.failed_marshal_count++;
- }
+ r_xprt->rx_stats.failed_marshal_count++;
+ frwr_reset(req);
return ret;
}
@@ -1269,51 +1275,17 @@ out_badheader:
goto out;
}
-void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
-{
- /* Invalidate and unmap the data payloads before waking
- * the waiting application. This guarantees the memory
- * regions are properly fenced from the server before the
- * application accesses the data. It also ensures proper
- * send flow control: waking the next RPC waits until this
- * RPC has relinquished all its Send Queue entries.
- */
- if (!list_empty(&req->rl_registered))
- frwr_unmap_sync(r_xprt, &req->rl_registered);
-
- /* Ensure that any DMA mapped pages associated with
- * the Send of the RPC Call have been unmapped before
- * allowing the RPC to complete. This protects argument
- * memory not controlled by the RPC client from being
- * re-used before we're done with it.
- */
- if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
- r_xprt->rx_stats.reply_waits_for_send++;
- out_of_line_wait_on_bit(&req->rl_flags,
- RPCRDMA_REQ_F_TX_RESOURCES,
- bit_wait,
- TASK_UNINTERRUPTIBLE);
- }
-}
-
-/* Reply handling runs in the poll worker thread. Anything that
- * might wait is deferred to a separate workqueue.
- */
-void rpcrdma_deferred_completion(struct work_struct *work)
+static void rpcrdma_reply_done(struct kref *kref)
{
- struct rpcrdma_rep *rep =
- container_of(work, struct rpcrdma_rep, rr_work);
- struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
- struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+ struct rpcrdma_req *req =
+ container_of(kref, struct rpcrdma_req, rl_kref);
- trace_xprtrdma_defer_cmp(rep);
- if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
- frwr_reminv(rep, &req->rl_registered);
- rpcrdma_release_rqst(r_xprt, req);
- rpcrdma_complete_rqst(rep);
+ rpcrdma_complete_rqst(req->rl_reply);
}
-/* Process received RPC/RDMA messages.
+/**
+ * rpcrdma_reply_handler - Process received RPC/RDMA messages
+ * @rep: Incoming rpcrdma_rep object to process
*
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
@@ -1360,10 +1332,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
else if (credits > buf->rb_max_requests)
credits = buf->rb_max_requests;
if (buf->rb_credits != credits) {
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
buf->rb_credits = credits;
xprt->cwnd = credits << RPC_CWNDSHIFT;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
req = rpcr_to_rdmar(rqst);
@@ -1373,10 +1345,16 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
}
req->rl_reply = rep;
rep->rr_rqst = rqst;
- clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
- queue_work(buf->rb_completion_wq, &rep->rr_work);
+
+ if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
+ frwr_reminv(rep, &req->rl_registered);
+ if (!list_empty(&req->rl_registered))
+ frwr_unmap_async(r_xprt, req);
+ /* LocalInv completion will complete the RPC */
+ else
+ kref_put(&req->rl_kref, rpcrdma_reply_done);
return;
out_badversion:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index bed57d8b5c196..d1fcc41d5eb57 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -72,9 +72,9 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
credits = r_xprt->rx_buf.rb_bc_max_requests;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt->cwnd = credits << RPC_CWNDSHIFT;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
spin_lock(&xprt->queue_lock);
ret = 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 0004535c01882..3fe665152d954 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -226,9 +226,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
* Enqueue the new transport on the accept queue of the listening
* transport
*/
- spin_lock_bh(&listen_xprt->sc_lock);
+ spin_lock(&listen_xprt->sc_lock);
list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
- spin_unlock_bh(&listen_xprt->sc_lock);
+ spin_unlock(&listen_xprt->sc_lock);
set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
svc_xprt_enqueue(&listen_xprt->sc_xprt);
@@ -401,7 +401,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
/* Get the next entry off the accept list */
- spin_lock_bh(&listen_rdma->sc_lock);
+ spin_lock(&listen_rdma->sc_lock);
if (!list_empty(&listen_rdma->sc_accept_q)) {
newxprt = list_entry(listen_rdma->sc_accept_q.next,
struct svcxprt_rdma, sc_accept_q);
@@ -409,7 +409,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
if (!list_empty(&listen_rdma->sc_accept_q))
set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags);
- spin_unlock_bh(&listen_rdma->sc_lock);
+ spin_unlock(&listen_rdma->sc_lock);
if (!newxprt)
return NULL;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 1f73a6a7e43c2..2ec349ed47702 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -80,7 +80,6 @@ static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
static unsigned int min_inline_size = RPCRDMA_MIN_INLINE;
static unsigned int max_inline_size = RPCRDMA_MAX_INLINE;
-static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1;
@@ -122,7 +121,7 @@ static struct ctl_table xr_tunables_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &max_padding,
},
{
@@ -298,6 +297,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
module_put(THIS_MODULE);
}
+/* 60 second timeout, no retries */
static const struct rpc_timeout xprt_rdma_default_timeout = {
.to_initval = 60 * HZ,
.to_maxval = 60 * HZ,
@@ -323,8 +323,9 @@ xprt_setup_rdma(struct xprt_create *args)
if (!xprt)
return ERR_PTR(-ENOMEM);
- /* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
+ xprt->connect_timeout = xprt->timeout->to_initval;
+ xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
xprt->bind_timeout = RPCRDMA_BIND_TO;
xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
@@ -487,31 +488,64 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
}
/**
- * xprt_rdma_connect - try to establish a transport connection
+ * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection
+ * @xprt: controlling transport instance
+ * @connect_timeout: reconnect timeout after client disconnects
+ * @reconnect_timeout: reconnect timeout after server disconnects
+ *
+ */
+static void xprt_rdma_tcp_set_connect_timeout(struct rpc_xprt *xprt,
+ unsigned long connect_timeout,
+ unsigned long reconnect_timeout)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout);
+
+ spin_lock(&xprt->transport_lock);
+
+ if (connect_timeout < xprt->connect_timeout) {
+ struct rpc_timeout to;
+ unsigned long initval;
+
+ to = *xprt->timeout;
+ initval = connect_timeout;
+ if (initval < RPCRDMA_INIT_REEST_TO << 1)
+ initval = RPCRDMA_INIT_REEST_TO << 1;
+ to.to_initval = initval;
+ to.to_maxval = initval;
+ r_xprt->rx_timeout = to;
+ xprt->timeout = &r_xprt->rx_timeout;
+ xprt->connect_timeout = connect_timeout;
+ }
+
+ if (reconnect_timeout < xprt->max_reconnect_timeout)
+ xprt->max_reconnect_timeout = reconnect_timeout;
+
+ spin_unlock(&xprt->transport_lock);
+}
+
+/**
+ * xprt_rdma_connect - schedule an attempt to reconnect
* @xprt: transport state
- * @task: RPC scheduler context
+ * @task: RPC scheduler context (unused)
*
*/
static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ unsigned long delay;
trace_xprtrdma_op_connect(r_xprt);
+
+ delay = 0;
if (r_xprt->rx_ep.rep_connected != 0) {
- /* Reconnect */
- schedule_delayed_work(&r_xprt->rx_connect_worker,
- xprt->reestablish_timeout);
- xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
- xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
- else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
- xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
- } else {
- schedule_delayed_work(&r_xprt->rx_connect_worker, 0);
- if (!RPC_IS_ASYNC(task))
- flush_delayed_work(&r_xprt->rx_connect_worker);
+ delay = xprt_reconnect_delay(xprt);
+ xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
}
+ queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker,
+ delay);
}
/**
@@ -550,8 +584,11 @@ out_sleep:
static void
xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
{
+ struct rpcrdma_xprt *r_xprt =
+ container_of(xprt, struct rpcrdma_xprt, rx_xprt);
+
memset(rqst, 0, sizeof(*rqst));
- rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
+ rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
rpc_wake_up_next(&xprt->backlog);
}
@@ -618,9 +655,16 @@ xprt_rdma_free(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
- rpcrdma_release_rqst(r_xprt, req);
trace_xprtrdma_op_free(task, req);
+
+ if (!list_empty(&req->rl_registered))
+ frwr_unmap_sync(r_xprt, req);
+
+ /* XXX: If the RPC is completing because of a signal and
+ * not because a reply was received, we ought to ensure
+ * that the Send completion has fired, so that memory
+ * involved with the Send is not still visible to the NIC.
+ */
}
/**
@@ -667,7 +711,6 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
goto drop_connection;
rqst->rq_xtime = ktime_get();
- __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
@@ -760,6 +803,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
.send_request = xprt_rdma_send_request,
.close = xprt_rdma_close,
.destroy = xprt_rdma_destroy,
+ .set_connect_timeout = xprt_rdma_tcp_set_connect_timeout,
.print_stats = xprt_rdma_print_stats,
.enable_swap = xprt_rdma_enable_swap,
.disable_swap = xprt_rdma_disable_swap,
@@ -767,6 +811,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
.bc_setup = xprt_rdma_bc_setup,
.bc_maxpayload = xprt_rdma_bc_maxpayload,
+ .bc_num_slots = xprt_rdma_bc_max_slots,
.bc_free_rqst = xprt_rdma_bc_free_rqst,
.bc_destroy = xprt_rdma_bc_destroy,
#endif
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 84bb379245406..805b1f35e1caa 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -89,14 +89,12 @@ static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
*/
static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
/* Flush Receives, then wait for deferred Reply work
* to complete.
*/
ib_drain_rq(ia->ri_id->qp);
- drain_workqueue(buf->rb_completion_wq);
/* Deferred Reply processing might have scheduled
* local invalidations.
@@ -901,7 +899,7 @@ out_emptyq:
* completions recently. This is a sign the Send Queue is
* backing up. Cause the caller to pause and try again.
*/
- set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
+ xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
r_xprt->rx_stats.empty_sendctx_q++;
return NULL;
}
@@ -936,10 +934,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
/* Paired with READ_ONCE */
smp_store_release(&buf->rb_sc_tail, next_tail);
- if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) {
- smp_mb__after_atomic();
- xprt_write_space(&sc->sc_xprt->rx_xprt);
- }
+ xprt_write_space(&sc->sc_xprt->rx_xprt);
}
static void
@@ -977,8 +972,6 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
r_xprt->rx_stats.mrs_allocated += count;
spin_unlock(&buf->rb_mrlock);
trace_xprtrdma_createmrs(r_xprt, count);
-
- xprt_write_space(&r_xprt->rx_xprt);
}
static void
@@ -990,6 +983,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
rx_buf);
rpcrdma_mrs_create(r_xprt);
+ xprt_write_space(&r_xprt->rx_xprt);
}
/**
@@ -1025,7 +1019,6 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
if (!req->rl_recvbuf)
goto out4;
- req->rl_buffer = buffer;
INIT_LIST_HEAD(&req->rl_registered);
spin_lock(&buffer->rb_lock);
list_add(&req->rl_all, &buffer->rb_allreqs);
@@ -1042,9 +1035,9 @@ out1:
return NULL;
}
-static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp)
+static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
+ bool temp)
{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
@@ -1055,27 +1048,22 @@ static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp)
DMA_FROM_DEVICE, GFP_KERNEL);
if (!rep->rr_rdmabuf)
goto out_free;
+
xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf));
-
rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
- INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
rep->rr_recv_wr.next = NULL;
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp;
-
- spin_lock(&buf->rb_lock);
- list_add(&rep->rr_list, &buf->rb_recv_bufs);
- spin_unlock(&buf->rb_lock);
- return true;
+ return rep;
out_free:
kfree(rep);
out:
- return false;
+ return NULL;
}
/**
@@ -1089,7 +1077,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
int i, rc;
- buf->rb_flags = 0;
buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_mrlock);
@@ -1122,15 +1109,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
if (rc)
goto out;
- buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s",
- WQ_MEM_RECLAIM | WQ_HIGHPRI,
- 0,
- r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]);
- if (!buf->rb_completion_wq) {
- rc = -ENOMEM;
- goto out;
- }
-
return 0;
out:
rpcrdma_buffer_destroy(buf);
@@ -1204,11 +1182,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
cancel_delayed_work_sync(&buf->rb_refresh_worker);
- if (buf->rb_completion_wq) {
- destroy_workqueue(buf->rb_completion_wq);
- buf->rb_completion_wq = NULL;
- }
-
rpcrdma_sendctxs_destroy(buf);
while (!list_empty(&buf->rb_recv_bufs)) {
@@ -1325,13 +1298,12 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
/**
* rpcrdma_buffer_put - Put request/reply buffers back into pool
+ * @buffers: buffer pool
* @req: object to return
*
*/
-void
-rpcrdma_buffer_put(struct rpcrdma_req *req)
+void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
{
- struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_rep *rep = req->rl_reply;
req->rl_reply = NULL;
@@ -1484,8 +1456,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
int rc;
- if (!ep->rep_send_count ||
- test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) {
send_wr->send_flags |= IB_SEND_SIGNALED;
ep->rep_send_count = ep->rep_send_batch;
} else {
@@ -1505,11 +1476,13 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
- struct ib_recv_wr *wr, *bad_wr;
+ struct ib_recv_wr *i, *wr, *bad_wr;
+ struct rpcrdma_rep *rep;
int needed, count, rc;
rc = 0;
count = 0;
+
needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (ep->rep_receive_count > needed)
goto out;
@@ -1517,51 +1490,65 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH;
- count = 0;
+ /* fast path: all needed reps can be found on the free list */
wr = NULL;
+ spin_lock(&buf->rb_lock);
while (needed) {
- struct rpcrdma_regbuf *rb;
- struct rpcrdma_rep *rep;
-
- spin_lock(&buf->rb_lock);
rep = list_first_entry_or_null(&buf->rb_recv_bufs,
struct rpcrdma_rep, rr_list);
- if (likely(rep))
- list_del(&rep->rr_list);
- spin_unlock(&buf->rb_lock);
- if (!rep) {
- if (!rpcrdma_rep_create(r_xprt, temp))
- break;
- continue;
- }
+ if (!rep)
+ break;
- rb = rep->rr_rdmabuf;
- if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) {
- rpcrdma_recv_buffer_put(rep);
+ list_del(&rep->rr_list);
+ rep->rr_recv_wr.next = wr;
+ wr = &rep->rr_recv_wr;
+ --needed;
+ }
+ spin_unlock(&buf->rb_lock);
+
+ while (needed) {
+ rep = rpcrdma_rep_create(r_xprt, temp);
+ if (!rep)
break;
- }
- trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
- ++count;
--needed;
}
- if (!count)
+ if (!wr)
goto out;
+ for (i = wr; i; i = i->next) {
+ rep = container_of(i, struct rpcrdma_rep, rr_recv_wr);
+
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
+ goto release_wrs;
+
+ trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
+ ++count;
+ }
+
rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
+out:
+ trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) {
- for (wr = bad_wr; wr; wr = wr->next) {
+ for (wr = bad_wr; wr;) {
struct rpcrdma_rep *rep;
rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
+ wr = wr->next;
rpcrdma_recv_buffer_put(rep);
--count;
}
}
ep->rep_receive_count += count;
-out:
- trace_xprtrdma_post_recvs(r_xprt, count, rc);
+ return;
+
+release_wrs:
+ for (i = wr; i;) {
+ rep = container_of(i, struct rpcrdma_rep, rr_recv_wr);
+ i = i->next;
+ rpcrdma_recv_buffer_put(rep);
+ }
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index d1e0749bcbc49..92ce09fcea742 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -44,7 +44,8 @@
#include <linux/wait.h> /* wait_queue_head_t, etc */
#include <linux/spinlock.h> /* spinlock_t, etc */
-#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/kref.h> /* struct kref */
#include <linux/workqueue.h> /* struct work_struct */
#include <rdma/rdma_cm.h> /* RDMA connection api */
@@ -202,10 +203,9 @@ struct rpcrdma_rep {
bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
- struct work_struct rr_work;
+ struct rpc_rqst *rr_rqst;
struct xdr_buf rr_hdrbuf;
struct xdr_stream rr_stream;
- struct rpc_rqst *rr_rqst;
struct list_head rr_list;
struct ib_recv_wr rr_recv_wr;
};
@@ -240,18 +240,12 @@ struct rpcrdma_sendctx {
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*/
-enum rpcrdma_frwr_state {
- FRWR_IS_INVALID, /* ready to be used */
- FRWR_IS_VALID, /* in use */
- FRWR_FLUSHED_FR, /* flushed FASTREG WR */
- FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
-};
-
+struct rpcrdma_req;
struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
- enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done;
+ struct rpcrdma_req *fr_req;
union {
struct ib_reg_wr fr_regwr;
struct ib_send_wr fr_invwr;
@@ -326,7 +320,6 @@ struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
struct rpc_rqst rl_slot;
- struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
struct xdr_buf rl_hdrbuf;
@@ -336,18 +329,12 @@ struct rpcrdma_req {
struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
struct list_head rl_all;
- unsigned long rl_flags;
+ struct kref rl_kref;
struct list_head rl_registered; /* registered segments */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
};
-/* rl_flags */
-enum {
- RPCRDMA_REQ_F_PENDING = 0,
- RPCRDMA_REQ_F_TX_RESOURCES,
-};
-
static inline struct rpcrdma_req *
rpcr_to_rdmar(const struct rpc_rqst *rqst)
{
@@ -391,22 +378,15 @@ struct rpcrdma_buffer {
struct list_head rb_recv_bufs;
struct list_head rb_allreqs;
- unsigned long rb_flags;
u32 rb_max_requests;
u32 rb_credits; /* most recent credit grant */
u32 rb_bc_srv_max_requests;
u32 rb_bc_max_requests;
- struct workqueue_struct *rb_completion_wq;
struct delayed_work rb_refresh_worker;
};
-/* rb_flags */
-enum {
- RPCRDMA_BUF_F_EMPTY_SCQ = 0,
-};
-
/*
* Statistics for RPCRDMA
*/
@@ -452,6 +432,7 @@ struct rpcrdma_xprt {
struct rpcrdma_ep rx_ep;
struct rpcrdma_buffer rx_buf;
struct delayed_work rx_connect_worker;
+ struct rpc_timeout rx_timeout;
struct rpcrdma_stats rx_stats;
};
@@ -518,7 +499,8 @@ rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
}
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
-void rpcrdma_buffer_put(struct rpcrdma_req *);
+void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
+ struct rpcrdma_req *req);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
@@ -564,6 +546,7 @@ rpcrdma_data_dir(bool writing)
/* Memory registration calls xprtrdma/frwr_ops.c
*/
bool frwr_is_supported(struct ib_device *device);
+void frwr_reset(struct rpcrdma_req *req);
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
void frwr_release_mr(struct rpcrdma_mr *mr);
@@ -574,8 +557,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr **mr);
int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
-void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt,
- struct list_head *mrs);
+void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
+void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
@@ -598,9 +581,6 @@ int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
-void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
- struct rpcrdma_req *req);
-void rpcrdma_deferred_completion(struct work_struct *work);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
{
@@ -625,6 +605,7 @@ void xprt_rdma_cleanup(void);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
+unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 36652352a38c3..e2176c167a579 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -880,7 +880,7 @@ static int xs_nospace(struct rpc_rqst *req)
req->rq_slen);
/* Protect against races with write_space */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
@@ -890,7 +890,7 @@ static int xs_nospace(struct rpc_rqst *req)
} else
ret = -ENOTCONN;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
/* Race breaker in case memory is freed before above code is called */
if (ret == -EAGAIN) {
@@ -909,6 +909,7 @@ static int xs_nospace(struct rpc_rqst *req)
static void
xs_stream_prepare_request(struct rpc_rqst *req)
{
+ xdr_free_bvec(&req->rq_rcv_buf);
req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
}
@@ -1211,6 +1212,15 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
+ clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
+ clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
+}
+
+static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
+{
+ set_bit(nr, &transport->sock_state);
+ queue_work(xprtiod_workqueue, &transport->error_worker);
}
static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
@@ -1231,6 +1241,7 @@ static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
*/
static void xs_error_report(struct sock *sk)
{
+ struct sock_xprt *transport;
struct rpc_xprt *xprt;
int err;
@@ -1238,13 +1249,14 @@ static void xs_error_report(struct sock *sk)
if (!(xprt = xprt_from_sock(sk)))
goto out;
+ transport = container_of(xprt, struct sock_xprt, xprt);
err = -sk->sk_err;
if (err == 0)
goto out;
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -err);
trace_rpc_socket_error(xprt, sk->sk_socket, err);
- xprt_wake_pending_tasks(xprt, err);
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR);
out:
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1333,6 +1345,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
cancel_delayed_work_sync(&transport->connect_worker);
xs_close(xprt);
cancel_work_sync(&transport->recv_worker);
+ cancel_work_sync(&transport->error_worker);
xs_xprt_free(xprt);
module_put(THIS_MODULE);
}
@@ -1386,9 +1399,9 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
}
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, copied);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
spin_lock(&xprt->queue_lock);
xprt_complete_rqst(task, copied);
__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
@@ -1498,7 +1511,6 @@ static void xs_tcp_state_change(struct sock *sk)
trace_rpc_socket_state_change(xprt, sk->sk_socket);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
- spin_lock(&xprt->transport_lock);
if (!xprt_test_and_set_connected(xprt)) {
xprt->connect_cookie++;
clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
@@ -1507,9 +1519,8 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->stat.connect_count++;
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
- xprt_wake_pending_tasks(xprt, -EAGAIN);
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING);
}
- spin_unlock(&xprt->transport_lock);
break;
case TCP_FIN_WAIT1:
/* The client initiated a shutdown of the socket */
@@ -1525,7 +1536,7 @@ static void xs_tcp_state_change(struct sock *sk)
/* The server initiated a shutdown of the socket */
xprt->connect_cookie++;
clear_bit(XPRT_CONNECTED, &xprt->state);
- xs_tcp_force_close(xprt);
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
/* fall through */
case TCP_CLOSING:
/*
@@ -1547,7 +1558,7 @@ static void xs_tcp_state_change(struct sock *sk)
xprt_clear_connecting(xprt);
clear_bit(XPRT_CLOSING, &xprt->state);
/* Trigger the socket release */
- xs_tcp_force_close(xprt);
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
}
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -1556,6 +1567,7 @@ static void xs_tcp_state_change(struct sock *sk)
static void xs_write_space(struct sock *sk)
{
struct socket_wq *wq;
+ struct sock_xprt *transport;
struct rpc_xprt *xprt;
if (!sk->sk_socket)
@@ -1564,13 +1576,14 @@ static void xs_write_space(struct sock *sk)
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
+ transport = container_of(xprt, struct sock_xprt, xprt);
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
goto out;
- if (xprt_write_space(xprt))
- sk->sk_write_pending--;
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE);
+ sk->sk_write_pending--;
out:
rcu_read_unlock();
}
@@ -1664,9 +1677,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
*/
static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
static int xs_get_random_port(void)
@@ -2201,13 +2214,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
unsigned int opt_on = 1;
unsigned int timeo;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
keepcnt = xprt->timeout->to_retries + 1;
timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
(xprt->timeout->to_retries + 1);
clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
/* TCP Keepalive options */
kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2232,7 +2245,7 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
struct rpc_timeout to;
unsigned long initval;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
if (reconnect_timeout < xprt->max_reconnect_timeout)
xprt->max_reconnect_timeout = reconnect_timeout;
if (connect_timeout < xprt->connect_timeout) {
@@ -2249,7 +2262,7 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
xprt->connect_timeout = connect_timeout;
}
set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -2402,25 +2415,6 @@ out:
xprt_wake_pending_tasks(xprt, status);
}
-static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
-{
- unsigned long start, now = jiffies;
-
- start = xprt->stat.connect_start + xprt->reestablish_timeout;
- if (time_after(start, now))
- return start - now;
- return 0;
-}
-
-static void xs_reconnect_backoff(struct rpc_xprt *xprt)
-{
- xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
- xprt->reestablish_timeout = xprt->max_reconnect_timeout;
- if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
- xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-}
-
/**
* xs_connect - connect a socket to a remote endpoint
* @xprt: pointer to transport structure
@@ -2450,8 +2444,8 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
/* Start by resetting any existing state */
xs_reset_transport(transport);
- delay = xs_reconnect_delay(xprt);
- xs_reconnect_backoff(xprt);
+ delay = xprt_reconnect_delay(xprt);
+ xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
} else
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
@@ -2461,6 +2455,56 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
delay);
}
+static void xs_wake_disconnect(struct sock_xprt *transport)
+{
+ if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state))
+ xs_tcp_force_close(&transport->xprt);
+}
+
+static void xs_wake_write(struct sock_xprt *transport)
+{
+ if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state))
+ xprt_write_space(&transport->xprt);
+}
+
+static void xs_wake_error(struct sock_xprt *transport)
+{
+ int sockerr;
+ int sockerr_len = sizeof(sockerr);
+
+ if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
+ return;
+ mutex_lock(&transport->recv_mutex);
+ if (transport->sock == NULL)
+ goto out;
+ if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
+ goto out;
+ if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR,
+ (char *)&sockerr, &sockerr_len) != 0)
+ goto out;
+ if (sockerr < 0)
+ xprt_wake_pending_tasks(&transport->xprt, sockerr);
+out:
+ mutex_unlock(&transport->recv_mutex);
+}
+
+static void xs_wake_pending(struct sock_xprt *transport)
+{
+ if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, &transport->sock_state))
+ xprt_wake_pending_tasks(&transport->xprt, -EAGAIN);
+}
+
+static void xs_error_handle(struct work_struct *work)
+{
+ struct sock_xprt *transport = container_of(work,
+ struct sock_xprt, error_worker);
+
+ xs_wake_disconnect(transport);
+ xs_wake_write(transport);
+ xs_wake_error(transport);
+ xs_wake_pending(transport);
+}
+
/**
* xs_local_print_stats - display AF_LOCAL socket-specifc stats
* @xprt: rpc_xprt struct containing statistics
@@ -2745,6 +2789,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
#ifdef CONFIG_SUNRPC_BACKCHANNEL
.bc_setup = xprt_setup_bc,
.bc_maxpayload = xs_tcp_bc_maxpayload,
+ .bc_num_slots = xprt_bc_max_slots,
.bc_free_rqst = xprt_free_bc_rqst,
.bc_destroy = xprt_destroy_bc,
#endif
@@ -2873,6 +2918,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
xprt->timeout = &xs_local_default_timeout;
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+ INIT_WORK(&transport->error_worker, xs_error_handle);
INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket);
switch (sun->sun_family) {
@@ -2943,6 +2989,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
xprt->timeout = &xs_udp_default_timeout;
INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn);
+ INIT_WORK(&transport->error_worker, xs_error_handle);
INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
switch (addr->sa_family) {
@@ -3024,6 +3071,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
(xprt->timeout->to_retries + 1);
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+ INIT_WORK(&transport->error_worker, xs_error_handle);
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
switch (addr->sa_family) {
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 9df82a573aa77..6159d327db76a 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -38,8 +38,6 @@
#include <linux/sysctl.h>
-static int zero;
-static int one = 1;
static struct ctl_table_header *tipc_ctl_hdr;
static struct ctl_table tipc_table[] = {
@@ -49,7 +47,7 @@ static struct ctl_table tipc_table[] = {
.maxlen = sizeof(sysctl_tipc_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "named_timeout",
@@ -57,7 +55,7 @@ static struct ctl_table tipc_table[] = {
.maxlen = sizeof(sysctl_tipc_named_timeout),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "sk_filter",
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index f345662890a6e..ca8ac96d22a96 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -476,7 +476,7 @@ static void tipc_topsrv_accept(struct work_struct *work)
}
}
-/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+/* tipc_topsrv_listener_data_ready - interrupt callback with connection request
* The queued job is launched into tipc_topsrv_accept()
*/
static void tipc_topsrv_listener_data_ready(struct sock *sk)
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 578cce4fbe6c4..67f8360dfcee7 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -166,7 +166,7 @@ config CFG80211_DEFAULT_PS
If this causes your applications to misbehave you should fix your
applications instead -- they need to register their network
- latency requirement, see Documentation/power/pm_qos_interface.txt.
+ latency requirement, see Documentation/power/pm_qos_interface.rst.
config CFG80211_DEBUGFS
bool "cfg80211 DebugFS entries"