Tail loss probe https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01 Index: sys/netinet/tcp_input.c =================================================================== RCS file: /cvsroot/src/sys/netinet/tcp_input.c,v retrieving revision 1.347 diff -d -u -r1.347 tcp_input.c --- sys/netinet/tcp_input.c 10 Jun 2016 13:31:44 -0000 1.347 +++ sys/netinet/tcp_input.c 25 Jul 2016 13:13:34 -0000 @@ -1998,6 +1998,11 @@ tp->ts_recent_age = tcp_now; tp->ts_recent = opti.ts_val; } + /* + * Disable Tail Loss Probe timer + */ + if (TCP_TIMER_ISARMED(tp, TCPT_TLP)) + TCP_TIMER_DISARM(tp, TCPT_TLP); if (tlen == 0) { /* Ack prediction. */ Index: sys/netinet/tcp_output.c =================================================================== RCS file: /cvsroot/src/sys/netinet/tcp_output.c,v retrieving revision 1.186 diff -d -u -r1.186 tcp_output.c --- sys/netinet/tcp_output.c 10 Jun 2016 13:27:16 -0000 1.186 +++ sys/netinet/tcp_output.c 25 Jul 2016 13:13:34 -0000 @@ -578,6 +578,11 @@ int sigoff = 0; #endif uint64_t *tcps; + uint64_t flightsize; + + /* but first things first - disarm the TLP timer */ + if (TCP_TIMER_ISARMED(tp, TCPT_TLP)) + TCP_TIMER_DISARM(tp, TCPT_TLP); #ifdef DIAGNOSTIC if (tp->t_inpcb && tp->t_in6pcb) @@ -676,6 +681,7 @@ } else { if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur) { /* + * RFC5681 Section 4.1. * We have been idle for "a while" and no acks are * expected to clock out any data we send -- * slow start to get ack "clock" running again. @@ -1526,6 +1532,46 @@ } } +timer: + flightsize = (tp->snd_nxt - tp->sack_newdata) + + sack_bytes_rxmt; + if (tcp_tlp_enabled != 0 && + TCPS_HAVEESTABLISHED(tp->t_state) && + TCP_SACK_ENABLED(tp) && + len != 0 && + /* only if we've transmitted everything or cwnd reached */ + (off + len == so->so_snd.sb_cc || + tp->snd_cwnd - flightsize < tp->t_segsz) && + tp->snd_tlp_nxt != tp->snd_nxt) { + int pto; + /* + * Arm the timer for tail loss probe + * Draft dukkipati Section 2.1 + * FlightSize > 1: schedule PTO in max(2*SRTT, 10ms) + * tp->t_srtt >> 5 = hz expressed value + */ + if (flightsize > tp->t_segsz) + pto = max(tp->t_srtt >> 4, mstohz(10)); + /* + * FlightSize == 1: + * schedule PTO in max(2*SRTT, 1.5*SRTT+WCDelAckT). + */ + else + pto = max(tp->t_srtt >> 4, + 3 * (tp->t_srtt >> 6) + mstohz(200)); + /* + * If RTO is earlier, schedule PTO in its place: + * PTO = min(RTO, PTO) + */ + if (pto >= TCP_REXMTVAL(tp) * hz / PR_SLOWHZ && + TCP_TIMER_ISARMED(tp, TCPT_REXMT)) { + TCP_TIMER_DISARM(tp, TCPT_REXMT); + pto = TCP_REXMTVAL(tp) * hz / PR_SLOWHZ; + } + tp->snd_tlp_nxt = tp->snd_nxt; + TCP_TIMER_ARM_HZ(tp, TCPT_TLP, pto > 0 ? pto : 1); + } + /* * Set retransmit timer if not currently set, * and not doing an ack or a keep-alive probe. @@ -1534,10 +1580,10 @@ * Initialize shift counter which is used for backoff * of retransmit time. */ -timer: if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) { - if ((sack_rxmit && tp->snd_nxt != tp->snd_max) - || tp->snd_nxt != tp->snd_una) { + if (((sack_rxmit && tp->snd_nxt != tp->snd_max) + || tp->snd_nxt != tp->snd_una) && + TCP_TIMER_ISARMED(tp, TCPT_TLP) == 0) { if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) { TCP_TIMER_DISARM(tp, TCPT_PERSIST); tp->t_rxtshift = 0; Index: sys/netinet/tcp_seq.h =================================================================== RCS file: /cvsroot/src/sys/netinet/tcp_seq.h,v retrieving revision 1.17 diff -d -u -r1.17 tcp_seq.h --- sys/netinet/tcp_seq.h 25 Jul 2014 17:53:59 -0000 1.17 +++ sys/netinet/tcp_seq.h 25 Jul 2016 13:13:34 -0000 @@ -58,7 +58,8 @@ #define tcp_sendseqinit(tp) \ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \ - (tp)->snd_recover = (tp)->snd_high = (tp)->snd_fack = (tp)->iss + (tp)->snd_recover = (tp)->snd_high = (tp)->snd_fack = \ + (tp)->snd_tlp_nxt = (tp)->iss #define TCP_ISS_RANDOM_MASK 0x00ffffff /* bits of randomness in a TCP ISS */ #define TCP_ISSINCR 0x01000000 /* increment per time and per conn */ Index: sys/netinet/tcp_timer.c =================================================================== RCS file: /cvsroot/src/sys/netinet/tcp_timer.c,v retrieving revision 1.90 diff -d -u -r1.90 tcp_timer.c --- sys/netinet/tcp_timer.c 26 Apr 2016 08:44:44 -0000 1.90 +++ sys/netinet/tcp_timer.c 25 Jul 2016 13:13:34 -0000 @@ -149,6 +149,8 @@ int tcp_maxpersistidle = 0; /* max idle time in persist */ +u_int tcp_tlp_enabled = 0; /* tail loss probe */ + /* * Time to delay the ACK. This is initialized in tcp_init(), unless * its patched. @@ -159,12 +161,14 @@ void tcp_timer_persist(void *); void tcp_timer_keep(void *); void tcp_timer_2msl(void *); +void tcp_timer_tlp(void *); const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = { tcp_timer_rexmt, tcp_timer_persist, tcp_timer_keep, tcp_timer_2msl, + tcp_timer_tlp, }; /* @@ -658,3 +662,61 @@ mutex_exit(softnet_lock); KERNEL_UNLOCK_ONE(NULL); } + +void +tcp_timer_tlp(void *arg) +{ + struct tcpcb *tp = arg; +#ifdef INET + struct inpcb *inp; +#endif +#ifdef INET6 + struct in6pcb *in6p; +#endif + struct socket *so = NULL; + + mutex_enter(softnet_lock); + if ((tp->t_flags & TF_DEAD) != 0 || + TCPS_HAVEESTABLISHED(tp->t_state) == 0) { + mutex_exit(softnet_lock); + return; + } + if (!callout_expired(&tp->t_timer[TCPT_TLP])) { + mutex_exit(softnet_lock); + return; + } + + /* Do a tail loss probe rxmit */ + KERNEL_LOCK(1, NULL); + /* check if tlp is still needed */ + if (tp->snd_tlp_nxt == tp->snd_nxt || + TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) { + + TCP_STATINC(TCP_STAT_TLP); + + tcp_free_sackholes(tp); + tp->snd_fack = tp->snd_una; + +#ifdef INET + inp = tp->t_inpcb; + if (inp) + so = inp->inp_socket; +#endif +#ifdef INET6 + in6p = tp->t_in6pcb; + if (in6p) + so = in6p->in6p_socket; +#endif + KASSERT(so != NULL); + /* Check if it should rexmit */ + if (tp->snd_tlp_nxt - tp->snd_una == so->so_snd.sb_cc) + tp->snd_nxt = tp->snd_una; + + tp->t_force = 1; + (void) tcp_output(tp); + tp->t_force = 0; + } + + KERNEL_UNLOCK_ONE(NULL); + mutex_exit(softnet_lock); +} Index: sys/netinet/tcp_timer.h =================================================================== RCS file: /cvsroot/src/sys/netinet/tcp_timer.h,v retrieving revision 1.28 diff -d -u -r1.28 tcp_timer.h --- sys/netinet/tcp_timer.h 24 May 2011 18:37:52 -0000 1.28 +++ sys/netinet/tcp_timer.h 25 Jul 2016 13:13:34 -0000 @@ -69,12 +69,13 @@ * Definitions of the TCP timers. These timers are counted * down PR_SLOWHZ times a second. */ -#define TCPT_NTIMERS 4 +#define TCPT_NTIMERS 5 #define TCPT_REXMT 0 /* retransmit */ #define TCPT_PERSIST 1 /* retransmit persistance */ #define TCPT_KEEP 2 /* keep alive */ #define TCPT_2MSL 3 /* 2*msl quiet time timer */ +#define TCPT_TLP 4 /* tail loss probe */ /* * The TCPT_REXMT timer is used to force retransmissions. @@ -110,6 +111,9 @@ * an ack segment in response from the peer. If, despite the TCPT_KEEP * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE * amount of time probing, then we drop the connection. + * + * The TCPT_TLP timer is used to trigger retransmissions on short streams + * that wouldn't have chance to rexmit a segment until RTO */ /* @@ -141,7 +145,7 @@ #ifdef TCPTIMERS const char *tcptimers[] = - { "REXMT", "PERSIST", "KEEP", "2MSL" }; + { "REXMT", "PERSIST", "KEEP", "2MSL", "TLP" }; #endif /* @@ -159,6 +163,13 @@ callout_schedule(&(tp)->t_timer[(timer)], \ (nticks) * (hz / PR_SLOWHZ)) +/* + * Arm the timer for a specific number of hz units + */ +#define TCP_TIMER_ARM_HZ(tp, timer, nticks) \ + callout_schedule(&(tp)->t_timer[(timer)], \ + (nticks)) + #define TCP_TIMER_DISARM(tp, timer) \ callout_stop(&(tp)->t_timer[(timer)]) @@ -188,6 +199,7 @@ extern int tcp_maxpersistidle; /* max idle time in persist */ extern int tcp_ttl; /* time to live for TCP segs */ extern const int tcp_backoff[]; +extern u_int tcp_tlp_enabled; /* tail loss probe enabled */ void tcp_timer_init(void); #endif Index: sys/netinet/tcp_usrreq.c =================================================================== RCS file: /cvsroot/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.212 diff -d -u -r1.212 tcp_usrreq.c --- sys/netinet/tcp_usrreq.c 26 Apr 2016 08:44:45 -0000 1.212 +++ sys/netinet/tcp_usrreq.c 25 Jul 2016 13:13:34 -0000 @@ -2184,6 +2184,13 @@ sysctl_tcp_keep, 0, &tcp_keepcnt, 0, CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, + CTLTYPE_INT, "tlp", + SYSCTL_DESCR("Enable tail loss probe"), + NULL, 0, &tcp_tlp_enabled, 0, + CTL_NET, pf, IPPROTO_TCP, TCPCTL_TLP, CTL_EOL); + + sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, CTLTYPE_INT, "slowhz", SYSCTL_DESCR("Keepalive ticks per second"), Index: sys/netinet/tcp_var.h =================================================================== RCS file: /cvsroot/src/sys/netinet/tcp_var.h,v retrieving revision 1.177 diff -d -u -r1.177 tcp_var.h --- sys/netinet/tcp_var.h 14 Feb 2015 22:09:53 -0000 1.177 +++ sys/netinet/tcp_var.h 25 Jul 2016 13:13:34 -0000 @@ -235,6 +235,7 @@ tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_nxt; /* send next */ tcp_seq snd_up; /* send urgent pointer */ + tcp_seq snd_tlp_nxt; /* next sequence when we sent tlp */ tcp_seq snd_wl1; /* window update seg seq number */ tcp_seq snd_wl2; /* window update seg ack number */ tcp_seq iss; /* initial send sequence number */ @@ -710,8 +711,9 @@ #define TCP_STAT_ECN_SHS 73 /* # of successful ECN handshakes */ #define TCP_STAT_ECN_CE 74 /* # of packets with CE bit */ #define TCP_STAT_ECN_ECT 75 /* # of packets with ECT(0) bit */ +#define TCP_STAT_TLP 76 /* # of tail loss probes sent */ -#define TCP_NSTATS 76 +#define TCP_NSTATS 77 /* * Names for TCP sysctl objects. @@ -754,7 +756,8 @@ #define TCPCTL_DEBX 32 /* # of tcp debug sockets */ #define TCPCTL_DROP 33 /* drop tcp connection */ #define TCPCTL_MSL 34 /* Max Segment Life */ -#define TCPCTL_MAXID 35 +#define TCPCTL_TLP 35 /* Tail Loss Probe */ +#define TCPCTL_MAXID 36 #define TCPCTL_NAMES { \ { 0, 0 }, \ Index: usr.bin/netstat/inet.c =================================================================== RCS file: /cvsroot/src/usr.bin/netstat/inet.c,v retrieving revision 1.106 diff -d -u -r1.106 inet.c --- usr.bin/netstat/inet.c 8 Feb 2015 15:09:45 -0000 1.106 +++ usr.bin/netstat/inet.c 25 Jul 2016 13:13:36 -0000 @@ -503,6 +503,7 @@ p(TCP_STAT_ECN_SHS, "\t%" PRIu64 " successful ECN handshake%s\n"); p(TCP_STAT_ECN_CE, "\t%" PRIu64 " packet%s with ECN CE bit\n"); p(TCP_STAT_ECN_ECT, "\t%" PRIu64 " packet%s ECN ECT(0) bit\n"); + p(TCP_STAT_TLP, "\t%" PRIu64 " tail drop probe%s\n"); #undef p #undef ps #undef p2