]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - net/ipv4/tcp_input.c
tcp: don't clear lost_skb_hint when not necessary
[linux-2.6-omap-h63xx.git] / net / ipv4 / tcp_input.c
index 67ccce2a96bd0d7d506bc7bca08a7ac29c38fea6..85627f83665fa23b40aca36c600c827b3f462dc5 100644 (file)
@@ -979,6 +979,39 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
        }
 }
 
+/* This must be called before lost_out is incremented */
+static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
+{
+       if ((tp->retransmit_skb_hint == NULL) ||
+           before(TCP_SKB_CB(skb)->seq,
+                  TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+               tp->retransmit_skb_hint = skb;
+
+       if (!tp->lost_out ||
+           after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
+               tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
+}
+
+static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
+{
+       if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+               tcp_verify_retransmit_hint(tp, skb);
+
+               tp->lost_out += tcp_skb_pcount(skb);
+               TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+       }
+}
+
+void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
+{
+       tcp_verify_retransmit_hint(tp, skb);
+
+       if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+               tp->lost_out += tcp_skb_pcount(skb);
+               TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+       }
+}
+
 /* This procedure tags the retransmission queue when SACKs arrive.
  *
  * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -1155,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                        tp->retrans_out -= tcp_skb_pcount(skb);
 
-                       /* clear lost hint */
-                       tp->retransmit_skb_hint = NULL;
-
-                       if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
-                               tp->lost_out += tcp_skb_pcount(skb);
-                               TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                       }
+                       tcp_skb_mark_lost_uncond_verify(tp, skb);
                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
                } else {
                        if (before(ack_seq, new_low_seq))
@@ -1271,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
                                        ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
                                tp->lost_out -= tcp_skb_pcount(skb);
                                tp->retrans_out -= tcp_skb_pcount(skb);
-
-                               /* clear lost hint */
-                               tp->retransmit_skb_hint = NULL;
                        }
                } else {
                        if (!(sacked & TCPCB_RETRANS)) {
@@ -1292,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
                        if (sacked & TCPCB_LOST) {
                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                                tp->lost_out -= tcp_skb_pcount(skb);
-
-                               /* clear lost hint */
-                               tp->retransmit_skb_hint = NULL;
                        }
                }
 
@@ -1324,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
        if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                tp->retrans_out -= tcp_skb_pcount(skb);
-               tp->retransmit_skb_hint = NULL;
        }
 
        return flag;
@@ -1867,6 +1887,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
                if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
+                       tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
                }
        }
        tcp_verify_left_out(tp);
@@ -1883,7 +1904,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
 
-       tcp_clear_retrans_hints_partial(tp);
+       tcp_clear_all_retrans_hints(tp);
 }
 
 static void tcp_clear_retrans_partial(struct tcp_sock *tp)
@@ -1934,12 +1955,11 @@ void tcp_enter_loss(struct sock *sk, int how)
                /* Push undo marker, if it was plain RTO and nothing
                 * was retransmitted. */
                tp->undo_marker = tp->snd_una;
-               tcp_clear_retrans_hints_partial(tp);
        } else {
                tp->sacked_out = 0;
                tp->fackets_out = 0;
-               tcp_clear_all_retrans_hints(tp);
        }
+       tcp_clear_all_retrans_hints(tp);
 
        tcp_for_write_queue(skb, sk) {
                if (skb == tcp_send_head(sk))
@@ -1952,6 +1972,7 @@ void tcp_enter_loss(struct sock *sk, int how)
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
+                       tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
                }
        }
        tcp_verify_left_out(tp);
@@ -2157,19 +2178,6 @@ static int tcp_time_to_recover(struct sock *sk)
        return 0;
 }
 
-/* RFC: This is from the original, I doubt that this is necessary at all:
- * clear xmit_retrans hint if seq of this skb is beyond hint. How could we
- * retransmitted past LOST markings in the first place? I'm not fully sure
- * about undo and end of connection cases, which can cause R without L?
- */
-static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
-{
-       if ((tp->retransmit_skb_hint != NULL) &&
-           before(TCP_SKB_CB(skb)->seq,
-                  TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
-               tp->retransmit_skb_hint = NULL;
-}
-
 /* Mark head of queue up as lost. With RFC3517 SACK, the packets is
  * is against sacked "cnt", otherwise it's against facked "cnt"
  */
@@ -2217,11 +2225,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
                        cnt = packets;
                }
 
-               if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
-                       TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                       tp->lost_out += tcp_skb_pcount(skb);
-                       tcp_verify_retransmit_hint(tp, skb);
-               }
+               tcp_skb_mark_lost(tp, skb);
        }
        tcp_verify_left_out(tp);
 }
@@ -2263,11 +2267,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
                        if (!tcp_skb_timedout(sk, skb))
                                break;
 
-                       if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
-                               TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                               tp->lost_out += tcp_skb_pcount(skb);
-                               tcp_verify_retransmit_hint(tp, skb);
-                       }
+                       tcp_skb_mark_lost(tp, skb);
                }
 
                tp->scoreboard_skb_hint = skb;
@@ -2378,10 +2378,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
        }
        tcp_moderate_cwnd(tp);
        tp->snd_cwnd_stamp = tcp_time_stamp;
-
-       /* There is something screwy going on with the retrans hints after
-          an undo */
-       tcp_clear_all_retrans_hints(tp);
 }
 
 static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -2848,6 +2844,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
        int flag = 0;
        u32 pkts_acked = 0;
        u32 reord = tp->packets_out;
+       u32 prior_sacked = tp->sacked_out;
        s32 seq_rtt = -1;
        s32 ca_seq_rtt = -1;
        ktime_t last_ackt = net_invalid_timestamp();
@@ -2929,7 +2926,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 
                tcp_unlink_write_queue(skb, sk);
                sk_wmem_free_skb(sk, skb);
-               tcp_clear_all_retrans_hints(tp);
+               tp->scoreboard_skb_hint = NULL;
+               if (skb == tp->retransmit_skb_hint)
+                       tp->retransmit_skb_hint = NULL;
+               if (skb == tp->lost_skb_hint)
+                       tp->lost_skb_hint = NULL;
        }
 
        if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
@@ -2948,6 +2949,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
                        /* Non-retransmitted hole got filled? That's reordering */
                        if (reord < prior_fackets)
                                tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+
+                       /* No need to care for underflows here because
+                        * the lost_skb_hint gets NULLed if we're past it
+                        * (or something non-trivial happened)
+                        */
+                       if (tcp_is_fack(tp))
+                               tp->lost_cnt_hint -= pkts_acked;
+                       else
+                               tp->lost_cnt_hint -= prior_sacked - tp->sacked_out;
                }
 
                tp->fackets_out -= min(pkts_acked, tp->fackets_out);
@@ -3442,6 +3452,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
        }
 }
 
+static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
+{
+       __be32 *ptr = (__be32 *)(th + 1);
+
+       if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+                         | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+               tp->rx_opt.saw_tstamp = 1;
+               ++ptr;
+               tp->rx_opt.rcv_tsval = ntohl(*ptr);
+               ++ptr;
+               tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+               return 1;
+       }
+       return 0;
+}
+
 /* Fast parse options. This hopes to only see timestamps.
  * If it is wrong it falls back on tcp_parse_options().
  */
@@ -3453,16 +3479,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
                return 0;
        } else if (tp->rx_opt.tstamp_ok &&
                   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
-               __be32 *ptr = (__be32 *)(th + 1);
-               if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
-                                 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
-                       tp->rx_opt.saw_tstamp = 1;
-                       ++ptr;
-                       tp->rx_opt.rcv_tsval = ntohl(*ptr);
-                       ++ptr;
-                       tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+               if (tcp_parse_aligned_timestamp(tp, th))
                        return 1;
-               }
        }
        tcp_parse_options(skb, &tp->rx_opt, 1);
        return 1;
@@ -4161,6 +4179,18 @@ add_sack:
        }
 }
 
+static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
+                                       struct sk_buff_head *list)
+{
+       struct sk_buff *next = skb->next;
+
+       __skb_unlink(skb, list);
+       __kfree_skb(skb);
+       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
+
+       return next;
+}
+
 /* Collapse contiguous sequence of skbs head..tail with
  * sequence numbers start..end.
  * Segments with FIN/SYN are not collapsed (only because this
@@ -4178,11 +4208,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
        for (skb = head; skb != tail;) {
                /* No new bits? It is possible on ofo queue. */
                if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-                       struct sk_buff *next = skb->next;
-                       __skb_unlink(skb, list);
-                       __kfree_skb(skb);
-                       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
-                       skb = next;
+                       skb = tcp_collapse_one(sk, skb, list);
                        continue;
                }
 
@@ -4246,11 +4272,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
                                start += size;
                        }
                        if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-                               struct sk_buff *next = skb->next;
-                               __skb_unlink(skb, list);
-                               __kfree_skb(skb);
-                               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
-                               skb = next;
+                               skb = tcp_collapse_one(sk, skb, list);
                                if (skb == tail ||
                                    tcp_hdr(skb)->syn ||
                                    tcp_hdr(skb)->fin)
@@ -4691,6 +4713,67 @@ out:
 }
 #endif /* CONFIG_NET_DMA */
 
+/* Does PAWS and seqno based validation of an incoming segment, flags will
+ * play significant role here.
+ */
+static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
+                             struct tcphdr *th, int syn_inerr)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       /* RFC1323: H1. Apply PAWS check first. */
+       if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+           tcp_paws_discard(sk, skb)) {
+               if (!th->rst) {
+                       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+                       tcp_send_dupack(sk, skb);
+                       goto discard;
+               }
+               /* Reset is accepted even if it did not pass PAWS. */
+       }
+
+       /* Step 1: check sequence number */
+       if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+               /* RFC793, page 37: "In all states except SYN-SENT, all reset
+                * (RST) segments are validated by checking their SEQ-fields."
+                * And page 69: "If an incoming segment is not acceptable,
+                * an acknowledgment should be sent in reply (unless the RST
+                * bit is set, if so drop the segment and return)".
+                */
+               if (!th->rst)
+                       tcp_send_dupack(sk, skb);
+               goto discard;
+       }
+
+       /* Step 2: check RST bit */
+       if (th->rst) {
+               tcp_reset(sk);
+               goto discard;
+       }
+
+       /* ts_recent update must be made after we are sure that the packet
+        * is in window.
+        */
+       tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
+       /* step 3: check security and precedence [ignored] */
+
+       /* step 4: Check for a SYN in window. */
+       if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+               if (syn_inerr)
+                       TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
+               tcp_reset(sk);
+               return -1;
+       }
+
+       return 1;
+
+discard:
+       __kfree_skb(skb);
+       return 0;
+}
+
 /*
  *     TCP receive function for the ESTABLISHED state.
  *
@@ -4718,6 +4801,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                        struct tcphdr *th, unsigned len)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+       int res;
 
        /*
         *      Header prediction.
@@ -4756,19 +4840,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
                /* Check timestamp */
                if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
-                       __be32 *ptr = (__be32 *)(th + 1);
-
                        /* No? Slow path! */
-                       if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
-                                         | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
+                       if (!tcp_parse_aligned_timestamp(tp, th))
                                goto slow_path;
 
-                       tp->rx_opt.saw_tstamp = 1;
-                       ++ptr;
-                       tp->rx_opt.rcv_tsval = ntohl(*ptr);
-                       ++ptr;
-                       tp->rx_opt.rcv_tsecr = ntohl(*ptr);
-
                        /* If PAWS failed, check it more carefully in slow path */
                        if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
                                goto slow_path;
@@ -4898,52 +4973,13 @@ slow_path:
        if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
                goto csum_error;
 
-       /*
-        * RFC1323: H1. Apply PAWS check first.
-        */
-       if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-           tcp_paws_discard(sk, skb)) {
-               if (!th->rst) {
-                       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-                       tcp_send_dupack(sk, skb);
-                       goto discard;
-               }
-               /* Resets are accepted even if PAWS failed.
-
-                  ts_recent update must be made after we are sure
-                  that the packet is in window.
-                */
-       }
-
        /*
         *      Standard slow path.
         */
 
-       if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
-               /* RFC793, page 37: "In all states except SYN-SENT, all reset
-                * (RST) segments are validated by checking their SEQ-fields."
-                * And page 69: "If an incoming segment is not acceptable,
-                * an acknowledgment should be sent in reply (unless the RST bit
-                * is set, if so drop the segment and return)".
-                */
-               if (!th->rst)
-                       tcp_send_dupack(sk, skb);
-               goto discard;
-       }
-
-       if (th->rst) {
-               tcp_reset(sk);
-               goto discard;
-       }
-
-       tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-
-       if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-               TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
-               tcp_reset(sk);
-               return 1;
-       }
+       res = tcp_validate_incoming(sk, skb, th, 1);
+       if (res <= 0)
+               return -res;
 
 step5:
        if (th->ack)
@@ -5225,6 +5261,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
        int queued = 0;
+       int res;
 
        tp->rx_opt.saw_tstamp = 0;
 
@@ -5277,42 +5314,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                return 0;
        }
 
-       if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-           tcp_paws_discard(sk, skb)) {
-               if (!th->rst) {
-                       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-                       tcp_send_dupack(sk, skb);
-                       goto discard;
-               }
-               /* Reset is accepted even if it did not pass PAWS. */
-       }
-
-       /* step 1: check sequence number */
-       if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
-               if (!th->rst)
-                       tcp_send_dupack(sk, skb);
-               goto discard;
-       }
-
-       /* step 2: check RST bit */
-       if (th->rst) {
-               tcp_reset(sk);
-               goto discard;
-       }
-
-       tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-
-       /* step 3: check security and precedence [ignored] */
-
-       /*      step 4:
-        *
-        *      Check for a SYN in window.
-        */
-       if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
-               tcp_reset(sk);
-               return 1;
-       }
+       res = tcp_validate_incoming(sk, skb, th, 0);
+       if (res <= 0)
+               return -res;
 
        /* step 5: check the ACK field */
        if (th->ack) {