]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - net/ipv4/tcp_input.c
[TCP]: tcp_simple_retransmit can cause S+L
[linux-2.6-omap-h63xx.git] / net / ipv4 / tcp_input.c
index 18e099c6fa62b627c0ab0aef93c5da22a2eb22b2..b4812c3cbbcff574e8c001345b83f69b83ecf7d7 100644 (file)
@@ -1367,7 +1367,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
  * a normal way
  */
 static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
-                                       u32 skip_to_seq)
+                                       u32 skip_to_seq, int *fack_count)
 {
        tcp_for_write_queue_from(skb, sk) {
                if (skb == tcp_send_head(sk))
@@ -1375,6 +1375,8 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
 
                if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
                        break;
+
+               *fack_count += tcp_skb_pcount(skb);
        }
        return skb;
 }
@@ -1390,7 +1392,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
                return skb;
 
        if (before(next_dup->start_seq, skip_to_seq)) {
-               skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
+               skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
                tcp_sacktag_walk(skb, sk, NULL,
                                 next_dup->start_seq, next_dup->end_seq,
                                 1, fack_count, reord, flag);
@@ -1537,7 +1539,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
 
                        /* Head todo? */
                        if (before(start_seq, cache->start_seq)) {
-                               skb = tcp_sacktag_skip(skb, sk, start_seq);
+                               skb = tcp_sacktag_skip(skb, sk, start_seq,
+                                                      &fack_count);
                                skb = tcp_sacktag_walk(skb, sk, next_dup,
                                                       start_seq,
                                                       cache->start_seq,
@@ -1565,7 +1568,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
                                goto walk;
                        }
 
-                       skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
+                       skb = tcp_sacktag_skip(skb, sk, cache->end_seq,
+                                              &fack_count);
                        /* Check overlap against next cached too (past this one already) */
                        cache++;
                        continue;
@@ -1577,7 +1581,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
                                break;
                        fack_count = tp->fackets_out;
                }
-               skb = tcp_sacktag_skip(skb, sk, start_seq);
+               skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count);
 
 walk:
                skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
@@ -1621,13 +1625,11 @@ out:
        return flag;
 }
 
-/* If we receive more dupacks than we expected counting segments
- * in assumption of absent reordering, interpret this as reordering.
- * The only another reason could be bug in receiver TCP.
+/* Limits sacked_out so that sum with lost_out isn't ever larger than
+ * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
  */
-static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+int tcp_limit_reno_sacked(struct tcp_sock *tp)
 {
-       struct tcp_sock *tp = tcp_sk(sk);
        u32 holes;
 
        holes = max(tp->lost_out, 1U);
@@ -1635,8 +1637,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 
        if ((tp->sacked_out + holes) > tp->packets_out) {
                tp->sacked_out = tp->packets_out - holes;
-               tcp_update_reordering(sk, tp->packets_out + addend, 0);
+               return 1;
        }
+       return 0;
+}
+
+/* If we receive more dupacks than we expected counting segments
+ * in assumption of absent reordering, interpret this as reordering.
+ * The only another reason could be bug in receiver TCP.
+ */
+static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       if (tcp_limit_reno_sacked(tp))
+               tcp_update_reordering(sk, tp->packets_out + addend, 0);
 }
 
 /* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -2130,11 +2144,13 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
 /* Mark head of queue up as lost. With RFC3517 SACK, the packets is
  * is against sacked "cnt", otherwise it's against facked "cnt"
  */
-static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
+static void tcp_mark_head_lost(struct sock *sk, int packets)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
-       int cnt;
+       int cnt, oldcnt;
+       int err;
+       unsigned int mss;
 
        BUG_TRAP(packets <= tp->packets_out);
        if (tp->lost_skb_hint) {
@@ -2153,13 +2169,25 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
                tp->lost_skb_hint = skb;
                tp->lost_cnt_hint = cnt;
 
-               if (tcp_is_fack(tp) ||
+               if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+                       break;
+
+               oldcnt = cnt;
+               if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
                    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                        cnt += tcp_skb_pcount(skb);
 
-               if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
-                   after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
-                       break;
+               if (cnt > packets) {
+                       if (tcp_is_sack(tp) || (oldcnt >= packets))
+                               break;
+
+                       mss = skb_shinfo(skb)->gso_size;
+                       err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+                       if (err < 0)
+                               break;
+                       cnt = packets;
+               }
+
                if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
@@ -2176,17 +2204,17 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
        struct tcp_sock *tp = tcp_sk(sk);
 
        if (tcp_is_reno(tp)) {
-               tcp_mark_head_lost(sk, 1, fast_rexmit);
+               tcp_mark_head_lost(sk, 1);
        } else if (tcp_is_fack(tp)) {
                int lost = tp->fackets_out - tp->reordering;
                if (lost <= 0)
                        lost = 1;
-               tcp_mark_head_lost(sk, lost, fast_rexmit);
+               tcp_mark_head_lost(sk, lost);
        } else {
                int sacked_upto = tp->sacked_out - tp->reordering;
-               if (sacked_upto < 0)
-                       sacked_upto = 0;
-               tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
+               if (sacked_upto < fast_rexmit)
+                       sacked_upto = fast_rexmit;
+               tcp_mark_head_lost(sk, sacked_upto);
        }
 
        /* New heuristics: it is possible only after we switched
@@ -2520,7 +2548,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
            before(tp->snd_una, tp->high_seq) &&
            icsk->icsk_ca_state != TCP_CA_Open &&
            tp->fackets_out > tp->reordering) {
-               tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
+               tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
                NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
        }
 
@@ -2582,6 +2610,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
        case TCP_CA_Loss:
                if (flag & FLAG_DATA_ACKED)
                        icsk->icsk_retransmits = 0;
+               if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
+                       tcp_reset_reno_sack(tp);
                if (!tcp_try_undo_loss(sk)) {
                        tcp_moderate_cwnd(tp);
                        tcp_xmit_retransmit_queue(sk);
@@ -3278,81 +3308,80 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
                int opsize;
 
                switch (opcode) {
-                       case TCPOPT_EOL:
+               case TCPOPT_EOL:
+                       return;
+               case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
+                       length--;
+                       continue;
+               default:
+                       opsize = *ptr++;
+                       if (opsize < 2) /* "silly options" */
                                return;
-                       case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
-                               length--;
-                               continue;
-                       default:
-                               opsize=*ptr++;
-                               if (opsize < 2) /* "silly options" */
-                                       return;
-                               if (opsize > length)
-                                       return; /* don't parse partial options */
-                               switch (opcode) {
-                               case TCPOPT_MSS:
-                                       if (opsize==TCPOLEN_MSS && th->syn && !estab) {
-                                               u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
-                                               if (in_mss) {
-                                                       if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
-                                                               in_mss = opt_rx->user_mss;
-                                                       opt_rx->mss_clamp = in_mss;
-                                               }
-                                       }
-                                       break;
-                               case TCPOPT_WINDOW:
-                                       if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
-                                               if (sysctl_tcp_window_scaling) {
-                                                       __u8 snd_wscale = *(__u8 *) ptr;
-                                                       opt_rx->wscale_ok = 1;
-                                                       if (snd_wscale > 14) {
-                                                               if (net_ratelimit())
-                                                                       printk(KERN_INFO "tcp_parse_options: Illegal window "
-                                                                              "scaling value %d >14 received.\n",
-                                                                              snd_wscale);
-                                                               snd_wscale = 14;
-                                                       }
-                                                       opt_rx->snd_wscale = snd_wscale;
-                                               }
-                                       break;
-                               case TCPOPT_TIMESTAMP:
-                                       if (opsize==TCPOLEN_TIMESTAMP) {
-                                               if ((estab && opt_rx->tstamp_ok) ||
-                                                   (!estab && sysctl_tcp_timestamps)) {
-                                                       opt_rx->saw_tstamp = 1;
-                                                       opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
-                                                       opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
-                                               }
+                       if (opsize > length)
+                               return; /* don't parse partial options */
+                       switch (opcode) {
+                       case TCPOPT_MSS:
+                               if (opsize == TCPOLEN_MSS && th->syn && !estab) {
+                                       u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
+                                       if (in_mss) {
+                                               if (opt_rx->user_mss &&
+                                                   opt_rx->user_mss < in_mss)
+                                                       in_mss = opt_rx->user_mss;
+                                               opt_rx->mss_clamp = in_mss;
                                        }
-                                       break;
-                               case TCPOPT_SACK_PERM:
-                                       if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
-                                               if (sysctl_tcp_sack) {
-                                                       opt_rx->sack_ok = 1;
-                                                       tcp_sack_reset(opt_rx);
-                                               }
+                               }
+                               break;
+                       case TCPOPT_WINDOW:
+                               if (opsize == TCPOLEN_WINDOW && th->syn &&
+                                   !estab && sysctl_tcp_window_scaling) {
+                                       __u8 snd_wscale = *(__u8 *)ptr;
+                                       opt_rx->wscale_ok = 1;
+                                       if (snd_wscale > 14) {
+                                               if (net_ratelimit())
+                                                       printk(KERN_INFO "tcp_parse_options: Illegal window "
+                                                              "scaling value %d >14 received.\n",
+                                                              snd_wscale);
+                                               snd_wscale = 14;
                                        }
-                                       break;
+                                       opt_rx->snd_wscale = snd_wscale;
+                               }
+                               break;
+                       case TCPOPT_TIMESTAMP:
+                               if ((opsize == TCPOLEN_TIMESTAMP) &&
+                                   ((estab && opt_rx->tstamp_ok) ||
+                                    (!estab && sysctl_tcp_timestamps))) {
+                                       opt_rx->saw_tstamp = 1;
+                                       opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+                                       opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+                               }
+                               break;
+                       case TCPOPT_SACK_PERM:
+                               if (opsize == TCPOLEN_SACK_PERM && th->syn &&
+                                   !estab && sysctl_tcp_sack) {
+                                       opt_rx->sack_ok = 1;
+                                       tcp_sack_reset(opt_rx);
+                               }
+                               break;
 
-                               case TCPOPT_SACK:
-                                       if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
-                                          !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
-                                          opt_rx->sack_ok) {
-                                               TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
-                                       }
-                                       break;
+                       case TCPOPT_SACK:
+                               if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+                                  !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+                                  opt_rx->sack_ok) {
+                                       TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+                               }
+                               break;
 #ifdef CONFIG_TCP_MD5SIG
-                               case TCPOPT_MD5SIG:
-                                       /*
-                                        * The MD5 Hash has already been
-                                        * checked (see tcp_v{4,6}_do_rcv()).
-                                        */
-                                       break;
+                       case TCPOPT_MD5SIG:
+                               /*
+                                * The MD5 Hash has already been
+                                * checked (see tcp_v{4,6}_do_rcv()).
+                                */
+                               break;
 #endif
-                               }
+                       }
 
-                               ptr+=opsize-2;
-                               length-=opsize;
+                       ptr += opsize-2;
+                       length -= opsize;
                }
        }
 }