summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2018-05-16 16:40:13 -0700
committerDavid S. Miller <davem@davemloft.net>2018-05-17 15:41:29 -0400
commitd716bfdb10b4250617783c94253e48b0e85adcb1 (patch)
treee2622423fe783b7a7dfb81ea3a28b45dbb15ff3e
parent6ac06ecd3a5d1dd1aaea5c2a8f6d6e4c81d5de6a (diff)
downloadlinux-0-day-d716bfdb10b4250617783c94253e48b0e85adcb1.tar.gz
linux-0-day-d716bfdb10b4250617783c94253e48b0e85adcb1.tar.xz
tcp: account lost retransmit after timeout
The previous approach for the lost and retransmit bits was to wipe the slate clean: zero all the lost and retransmit bits, correspondingly zero the lost_out and retrans_out counters, and then add back the lost bits (and correspondingly increment lost_out). The new approach is to treat this very much like marking packets lost in fast recovery. We don’t wipe the slate clean. We just say that for all packets that were not yet marked sacked or lost, we now mark them as lost in exactly the same way we do for fast recovery. This fixes the lost retransmit accounting at RTO time and greatly simplifies the RTO code by sharing much of the logic with Fast Recovery. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com> Reviewed-by: Priyaranjan Jha <priyarjha@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h1
-rw-r--r--net/ipv4/tcp_input.c18
-rw-r--r--net/ipv4/tcp_recovery.c4
3 files changed, 6 insertions, 17 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9a3ce379b9944..2b5372ef2e0eb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1877,6 +1877,7 @@ void tcp_v4_init(void);
void tcp_init(void);
/* tcp_recovery.c */
+void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb);
void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
extern void tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 076206873e3e1..6fb0a28977a07 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1929,7 +1929,6 @@ void tcp_enter_loss(struct sock *sk)
struct sk_buff *skb;
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
bool is_reneg; /* is receiver reneging on SACKs? */
- bool mark_lost;
/* Reduce ssthresh if it has not yet been made inside this window. */
if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
@@ -1945,9 +1944,6 @@ void tcp_enter_loss(struct sock *sk)
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
- tp->retrans_out = 0;
- tp->lost_out = 0;
-
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
@@ -1959,21 +1955,13 @@ void tcp_enter_loss(struct sock *sk)
/* Mark SACK reneging until we recover from this loss event. */
tp->is_sack_reneg = 1;
}
- tcp_clear_all_retrans_hints(tp);
-
skb_rbtree_walk_from(skb) {
- mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
- is_reneg);
- if (mark_lost)
- tcp_sum_lost(tp, skb);
- TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
- if (mark_lost) {
+ if (is_reneg)
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
- TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tp->lost_out += tcp_skb_pcount(skb);
- }
+ tcp_mark_skb_lost(sk, skb);
}
tcp_verify_left_out(tp);
+ tcp_clear_all_retrans_hints(tp);
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 299b0e38aa9ad..b2f9be388bf33 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -2,7 +2,7 @@
#include <linux/tcp.h>
#include <net/tcp.h>
-static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
+void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -95,7 +95,7 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
remaining = tp->rack.rtt_us + reo_wnd -
tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
if (remaining <= 0) {
- tcp_rack_mark_skb_lost(sk, skb);
+ tcp_mark_skb_lost(sk, skb);
list_del_init(&skb->tcp_tsorted_anchor);
} else {
/* Record maximum wait time */