* a normal way
*/
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
- u32 skip_to_seq)
+ u32 skip_to_seq, int *fack_count)
{
tcp_for_write_queue_from(skb, sk) {
if (skb == tcp_send_head(sk))
if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
break;
+
+ *fack_count += tcp_skb_pcount(skb);
}
return skb;
}
return skb;
if (before(next_dup->start_seq, skip_to_seq)) {
- skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
+ skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
tcp_sacktag_walk(skb, sk, NULL,
next_dup->start_seq, next_dup->end_seq,
1, fack_count, reord, flag);
/* Head todo? */
if (before(start_seq, cache->start_seq)) {
- skb = tcp_sacktag_skip(skb, sk, start_seq);
+ skb = tcp_sacktag_skip(skb, sk, start_seq,
+ &fack_count);
skb = tcp_sacktag_walk(skb, sk, next_dup,
start_seq,
cache->start_seq,
goto walk;
}
- skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
+ skb = tcp_sacktag_skip(skb, sk, cache->end_seq,
+ &fack_count);
/* Check overlap against next cached too (past this one already) */
cache++;
continue;
break;
fack_count = tp->fackets_out;
}
- skb = tcp_sacktag_skip(skb, sk, start_seq);
+ skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count);
walk:
skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
return flag;
}
-/* If we receive more dupacks than we expected counting segments
- * in assumption of absent reordering, interpret this as reordering.
- * The only another reason could be bug in receiver TCP.
+/* Limits sacked_out so that sum with lost_out isn't ever larger than
+ * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
*/
-static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+int tcp_limit_reno_sacked(struct tcp_sock *tp)
{
- struct tcp_sock *tp = tcp_sk(sk);
u32 holes;
holes = max(tp->lost_out, 1U);
if ((tp->sacked_out + holes) > tp->packets_out) {
tp->sacked_out = tp->packets_out - holes;
- tcp_update_reordering(sk, tp->packets_out + addend, 0);
+ return 1;
}
+ return 0;
+}
+
+/* If we receive more dupacks than we expected counting segments
+ * in assumption of absent reordering, interpret this as reordering.
+ * The only another reason could be bug in receiver TCP.
+ */
+static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ if (tcp_limit_reno_sacked(tp))
+ tcp_update_reordering(sk, tp->packets_out + addend, 0);
}
/* Emulate SACKs for SACKless connection: account for a new dupack. */
/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
* is against sacked "cnt", otherwise it's against facked "cnt"
*/
-static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
+static void tcp_mark_head_lost(struct sock *sk, int packets)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int cnt;
+ int cnt, oldcnt;
+ int err;
+ unsigned int mss;
BUG_TRAP(packets <= tp->packets_out);
if (tp->lost_skb_hint) {
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
- if (tcp_is_fack(tp) ||
+ if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+ break;
+
+ oldcnt = cnt;
+ if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
cnt += tcp_skb_pcount(skb);
- if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
- after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
- break;
+ if (cnt > packets) {
+ if (tcp_is_sack(tp) || (oldcnt >= packets))
+ break;
+
+ mss = skb_shinfo(skb)->gso_size;
+ err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+ if (err < 0)
+ break;
+ cnt = packets;
+ }
+
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_is_reno(tp)) {
- tcp_mark_head_lost(sk, 1, fast_rexmit);
+ tcp_mark_head_lost(sk, 1);
} else if (tcp_is_fack(tp)) {
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
- tcp_mark_head_lost(sk, lost, fast_rexmit);
+ tcp_mark_head_lost(sk, lost);
} else {
int sacked_upto = tp->sacked_out - tp->reordering;
- if (sacked_upto < 0)
- sacked_upto = 0;
- tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
+ if (sacked_upto < fast_rexmit)
+ sacked_upto = fast_rexmit;
+ tcp_mark_head_lost(sk, sacked_upto);
}
/* New heuristics: it is possible only after we switched
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
+ tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
case TCP_CA_Loss:
if (flag & FLAG_DATA_ACKED)
icsk->icsk_retransmits = 0;
+ if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
+ tcp_reset_reno_sack(tp);
if (!tcp_try_undo_loss(sk)) {
tcp_moderate_cwnd(tp);
tcp_xmit_retransmit_queue(sk);
int opsize;
switch (opcode) {
- case TCPOPT_EOL:
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2) /* "silly options" */
return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- return; /* don't parse partial options */
- switch (opcode) {
- case TCPOPT_MSS:
- if (opsize==TCPOLEN_MSS && th->syn && !estab) {
- u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
- if (in_mss) {
- if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
- in_mss = opt_rx->user_mss;
- opt_rx->mss_clamp = in_mss;
- }
- }
- break;
- case TCPOPT_WINDOW:
- if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
- if (sysctl_tcp_window_scaling) {
- __u8 snd_wscale = *(__u8 *) ptr;
- opt_rx->wscale_ok = 1;
- if (snd_wscale > 14) {
- if (net_ratelimit())
- printk(KERN_INFO "tcp_parse_options: Illegal window "
- "scaling value %d >14 received.\n",
- snd_wscale);
- snd_wscale = 14;
- }
- opt_rx->snd_wscale = snd_wscale;
- }
- break;
- case TCPOPT_TIMESTAMP:
- if (opsize==TCPOLEN_TIMESTAMP) {
- if ((estab && opt_rx->tstamp_ok) ||
- (!estab && sysctl_tcp_timestamps)) {
- opt_rx->saw_tstamp = 1;
- opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
- opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
- }
+ if (opsize > length)
+ return; /* don't parse partial options */
+ switch (opcode) {
+ case TCPOPT_MSS:
+ if (opsize == TCPOLEN_MSS && th->syn && !estab) {
+ u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
+ if (in_mss) {
+ if (opt_rx->user_mss &&
+ opt_rx->user_mss < in_mss)
+ in_mss = opt_rx->user_mss;
+ opt_rx->mss_clamp = in_mss;
}
- break;
- case TCPOPT_SACK_PERM:
- if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
- if (sysctl_tcp_sack) {
- opt_rx->sack_ok = 1;
- tcp_sack_reset(opt_rx);
- }
+ }
+ break;
+ case TCPOPT_WINDOW:
+ if (opsize == TCPOLEN_WINDOW && th->syn &&
+ !estab && sysctl_tcp_window_scaling) {
+ __u8 snd_wscale = *(__u8 *)ptr;
+ opt_rx->wscale_ok = 1;
+ if (snd_wscale > 14) {
+ if (net_ratelimit())
+ printk(KERN_INFO "tcp_parse_options: Illegal window "
+ "scaling value %d >14 received.\n",
+ snd_wscale);
+ snd_wscale = 14;
}
- break;
+ opt_rx->snd_wscale = snd_wscale;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ if ((opsize == TCPOLEN_TIMESTAMP) &&
+ ((estab && opt_rx->tstamp_ok) ||
+ (!estab && sysctl_tcp_timestamps))) {
+ opt_rx->saw_tstamp = 1;
+ opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+ opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (opsize == TCPOLEN_SACK_PERM && th->syn &&
+ !estab && sysctl_tcp_sack) {
+ opt_rx->sack_ok = 1;
+ tcp_sack_reset(opt_rx);
+ }
+ break;
- case TCPOPT_SACK:
- if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
- !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
- opt_rx->sack_ok) {
- TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
- }
- break;
+ case TCPOPT_SACK:
+ if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+ !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+ opt_rx->sack_ok) {
+ TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+ }
+ break;
#ifdef CONFIG_TCP_MD5SIG
- case TCPOPT_MD5SIG:
- /*
- * The MD5 Hash has already been
- * checked (see tcp_v{4,6}_do_rcv()).
- */
- break;
+ case TCPOPT_MD5SIG:
+ /*
+ * The MD5 Hash has already been
+ * checked (see tcp_v{4,6}_do_rcv()).
+ */
+ break;
#endif
- }
+ }
- ptr+=opsize-2;
- length-=opsize;
+ ptr += opsize-2;
+ length -= opsize;
}
}
}