summaryrefslogtreecommitdiffstats
path: root/net/smc/smc_tx.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/smc/smc_tx.c')
-rw-r--r--net/smc/smc_tx.c111
1 files changed, 74 insertions, 37 deletions
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 72f004c9c9b13..cee6664007524 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -19,6 +19,7 @@
#include <linux/sched/signal.h>
#include <net/sock.h>
+#include <net/tcp.h>
#include "smc.h"
#include "smc_wr.h"
@@ -26,11 +27,12 @@
#include "smc_tx.h"
#define SMC_TX_WORK_DELAY HZ
+#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
/***************************** sndbuf producer *******************************/
/* callback implementation for sk.sk_write_space()
- * to wakeup sndbuf producers that blocked with smc_tx_wait_memory().
+ * to wakeup sndbuf producers that blocked with smc_tx_wait().
* called under sk_socket lock.
*/
static void smc_tx_write_space(struct sock *sk)
@@ -54,7 +56,7 @@ static void smc_tx_write_space(struct sock *sk)
}
}
-/* Wakeup sndbuf producers that blocked with smc_tx_wait_memory().
+/* Wakeup sndbuf producers that blocked with smc_tx_wait().
* Cf. tcp_data_snd_check()=>tcp_check_space()=>tcp_new_space().
*/
void smc_tx_sndbuf_nonfull(struct smc_sock *smc)
@@ -64,8 +66,10 @@ void smc_tx_sndbuf_nonfull(struct smc_sock *smc)
smc->sk.sk_write_space(&smc->sk);
}
-/* blocks sndbuf producer until at least one byte of free space available */
-static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
+/* blocks sndbuf producer until at least one byte of free space available
+ * or urgent Byte was consumed
+ */
+static int smc_tx_wait(struct smc_sock *smc, int flags)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct smc_connection *conn = &smc->conn;
@@ -101,20 +105,28 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
break;
}
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- if (atomic_read(&conn->sndbuf_space))
- break; /* at least 1 byte of free space available */
+ if (atomic_read(&conn->sndbuf_space) && !conn->urg_tx_pend)
+ break; /* at least 1 byte of free & no urgent data */
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk_wait_event(sk, &timeo,
sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
smc_cdc_rxed_any_close(conn) ||
- atomic_read(&conn->sndbuf_space),
+ (atomic_read(&conn->sndbuf_space) &&
+ !conn->urg_tx_pend),
&wait);
}
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
}
+static bool smc_tx_is_corked(struct smc_sock *smc)
+{
+ struct tcp_sock *tp = tcp_sk(smc->clcsock->sk);
+
+ return (tp->nonagle & TCP_NAGLE_CORK) ? true : false;
+}
+
/* sndbuf producer: main API called by socket layer.
* called under sock lock.
*/
@@ -148,8 +160,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
if (smc_cdc_rxed_any_close(conn))
return send_done ?: -ECONNRESET;
- if (!atomic_read(&conn->sndbuf_space)) {
- rc = smc_tx_wait_memory(smc, msg->msg_flags);
+ if (msg->msg_flags & MSG_OOB)
+ conn->local_tx_ctrl.prod_flags.urg_data_pending = 1;
+
+ if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) {
+ rc = smc_tx_wait(smc, msg->msg_flags);
if (rc) {
if (send_done)
return send_done;
@@ -159,7 +174,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
}
/* initialize variables for 1st iteration of subsequent loop */
- /* could be just 1 byte, even after smc_tx_wait_memory above */
+ /* could be just 1 byte, even after smc_tx_wait above */
writespace = atomic_read(&conn->sndbuf_space);
/* not more than what user space asked for */
copylen = min_t(size_t, send_remaining, writespace);
@@ -171,8 +186,8 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
tx_cnt_prep = prep.count;
/* determine chunks where to write into sndbuf */
/* either unwrapped case, or 1st chunk of wrapped case */
- chunk_len = min_t(size_t,
- copylen, conn->sndbuf_size - tx_cnt_prep);
+ chunk_len = min_t(size_t, copylen, conn->sndbuf_desc->len -
+ tx_cnt_prep);
chunk_len_sum = chunk_len;
chunk_off = tx_cnt_prep;
smc_sndbuf_sync_sg_for_cpu(conn);
@@ -197,19 +212,30 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
}
smc_sndbuf_sync_sg_for_device(conn);
/* update cursors */
- smc_curs_add(conn->sndbuf_size, &prep, copylen);
+ smc_curs_add(conn->sndbuf_desc->len, &prep, copylen);
smc_curs_write(&conn->tx_curs_prep,
smc_curs_read(&prep, conn),
conn);
/* increased in send tasklet smc_cdc_tx_handler() */
smp_mb__before_atomic();
atomic_sub(copylen, &conn->sndbuf_space);
- /* guarantee 0 <= sndbuf_space <= sndbuf_size */
+ /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
smp_mb__after_atomic();
/* since we just produced more new data into sndbuf,
* trigger sndbuf consumer: RDMA write into peer RMBE and CDC
*/
- smc_tx_sndbuf_nonempty(conn);
+ if ((msg->msg_flags & MSG_OOB) && !send_remaining)
+ conn->urg_tx_pend = true;
+ if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
+ (atomic_read(&conn->sndbuf_space) >
+ (conn->sndbuf_desc->len >> 1)))
+ /* for a corked socket defer the RDMA writes if there
+ * is still sufficient sndbuf_space available
+ */
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_CORK_DELAY);
+ else
+ smc_tx_sndbuf_nonempty(conn);
} /* while (msg_data_left(msg)) */
return send_done;
@@ -243,7 +269,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
rdma_wr.remote_addr =
lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
/* RMBE within RMB */
- ((conn->peer_conn_idx - 1) * conn->peer_rmbe_size) +
+ conn->tx_off +
/* offset within RMBE */
peer_rmbe_offset;
rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
@@ -268,7 +294,7 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
atomic_sub(len, &conn->peer_rmbe_space);
/* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
smp_mb__after_atomic();
- smc_curs_add(conn->sndbuf_size, sent, len);
+ smc_curs_add(conn->sndbuf_desc->len, sent, len);
}
/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
@@ -281,6 +307,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
union smc_host_cursor sent, prep, prod, cons;
struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
struct smc_link_group *lgr = conn->lgr;
+ struct smc_cdc_producer_flags *pflags;
int to_send, rmbespace;
struct smc_link *link;
dma_addr_t dma_addr;
@@ -291,7 +318,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
/* cf. wmem_alloc - (snd_max - snd_una) */
- to_send = smc_curs_diff(conn->sndbuf_size, &sent, &prep);
+ to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
if (to_send <= 0)
return 0;
@@ -308,7 +335,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
conn);
/* if usable snd_wnd closes ask peer to advertise once it opens again */
- conn->local_tx_ctrl.prod_flags.write_blocked = (to_send >= rmbespace);
+ pflags = &conn->local_tx_ctrl.prod_flags;
+ pflags->write_blocked = (to_send >= rmbespace);
/* cf. usable snd_wnd */
len = min(to_send, rmbespace);
@@ -333,12 +361,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
dst_len_sum = dst_len;
src_off = sent.count;
/* dst_len determines the maximum src_len */
- if (sent.count + dst_len <= conn->sndbuf_size) {
+ if (sent.count + dst_len <= conn->sndbuf_desc->len) {
/* unwrapped src case: single chunk of entire dst_len */
src_len = dst_len;
} else {
/* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
- src_len = conn->sndbuf_size - sent.count;
+ src_len = conn->sndbuf_desc->len - sent.count;
}
src_len_sum = src_len;
dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
@@ -350,8 +378,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
num_sges++;
src_off += src_len;
- if (src_off >= conn->sndbuf_size)
- src_off -= conn->sndbuf_size;
+ if (src_off >= conn->sndbuf_desc->len)
+ src_off -= conn->sndbuf_desc->len;
/* modulo in send ring */
if (src_len_sum == dst_len)
break; /* either on 1st or 2nd iteration */
@@ -369,10 +397,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
dst_len = len - dst_len; /* remainder */
dst_len_sum += dst_len;
src_len = min_t(int,
- dst_len, conn->sndbuf_size - sent.count);
+ dst_len, conn->sndbuf_desc->len - sent.count);
src_len_sum = src_len;
}
+ if (conn->urg_tx_pend && len == to_send)
+ pflags->urg_data_present = 1;
smc_tx_advance_cursors(conn, &prod, &sent, len);
/* update connection's cursors with advanced local cursors */
smc_curs_write(&conn->local_tx_ctrl.prod,
@@ -392,6 +422,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
*/
int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
+ struct smc_cdc_producer_flags *pflags;
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
int rc;
@@ -409,20 +440,27 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
}
rc = 0;
if (conn->alert_token_local) /* connection healthy */
- schedule_delayed_work(&conn->tx_work,
- SMC_TX_WORK_DELAY);
+ mod_delayed_work(system_wq, &conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
- rc = smc_tx_rdma_writes(conn);
- if (rc) {
- smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
- (struct smc_wr_tx_pend_priv *)pend);
- goto out_unlock;
+ if (!conn->local_tx_ctrl.prod_flags.urg_data_present) {
+ rc = smc_tx_rdma_writes(conn);
+ if (rc) {
+ smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
+ (struct smc_wr_tx_pend_priv *)pend);
+ goto out_unlock;
+ }
}
rc = smc_cdc_msg_send(conn, wr_buf, pend);
+ pflags = &conn->local_tx_ctrl.prod_flags;
+ if (!rc && pflags->urg_data_present) {
+ pflags->urg_data_pending = 0;
+ pflags->urg_data_present = 0;
+ }
out_unlock:
spin_unlock_bh(&conn->send_lock);
@@ -432,7 +470,7 @@ out_unlock:
/* Wakeup sndbuf consumers from process context
* since there is more data to transmit
*/
-static void smc_tx_work(struct work_struct *work)
+void smc_tx_work(struct work_struct *work)
{
struct smc_connection *conn = container_of(to_delayed_work(work),
struct smc_connection,
@@ -455,7 +493,7 @@ out:
release_sock(&smc->sk);
}
-void smc_tx_consumer_update(struct smc_connection *conn)
+void smc_tx_consumer_update(struct smc_connection *conn, bool force)
{
union smc_host_cursor cfed, cons;
int to_confirm;
@@ -466,11 +504,12 @@ void smc_tx_consumer_update(struct smc_connection *conn)
smc_curs_write(&cfed,
smc_curs_read(&conn->rx_curs_confirmed, conn),
conn);
- to_confirm = smc_curs_diff(conn->rmbe_size, &cfed, &cons);
+ to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
+ force ||
((to_confirm > conn->rmbe_update_limit) &&
- ((to_confirm > (conn->rmbe_size / 2)) ||
+ ((to_confirm > (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */
@@ -494,6 +533,4 @@ void smc_tx_consumer_update(struct smc_connection *conn)
void smc_tx_init(struct smc_sock *smc)
{
smc->sk.sk_write_space = smc_tx_write_space;
- INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
- spin_lock_init(&smc->conn.send_lock);
}