]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/ipv4/ipvs/ip_vs_proto_tcp.c
537f616776da1bbbb6ae230e0948fd1daa0dd3bb
[linux-2.6-omap-h63xx.git] / net / ipv4 / ipvs / ip_vs_proto_tcp.c
1 /*
2  * ip_vs_proto_tcp.c:   TCP load balancing support for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/ip.h>
18 #include <linux/tcp.h>                  /* for tcphdr */
19 #include <net/ip.h>
20 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
21 #include <linux/netfilter.h>
22 #include <linux/netfilter_ipv4.h>
23
24 #include <net/ip_vs.h>
25
26
27 static struct ip_vs_conn *
28 tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
29                 const struct ip_vs_iphdr *iph, unsigned int proto_off,
30                 int inverse)
31 {
32         __be16 _ports[2], *pptr;
33
34         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
35         if (pptr == NULL)
36                 return NULL;
37
38         if (likely(!inverse)) {
39                 return ip_vs_conn_in_get(af, iph->protocol,
40                                          &iph->saddr, pptr[0],
41                                          &iph->daddr, pptr[1]);
42         } else {
43                 return ip_vs_conn_in_get(af, iph->protocol,
44                                          &iph->daddr, pptr[1],
45                                          &iph->saddr, pptr[0]);
46         }
47 }
48
49 static struct ip_vs_conn *
50 tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
51                  const struct ip_vs_iphdr *iph, unsigned int proto_off,
52                  int inverse)
53 {
54         __be16 _ports[2], *pptr;
55
56         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
57         if (pptr == NULL)
58                 return NULL;
59
60         if (likely(!inverse)) {
61                 return ip_vs_conn_out_get(af, iph->protocol,
62                                           &iph->saddr, pptr[0],
63                                           &iph->daddr, pptr[1]);
64         } else {
65                 return ip_vs_conn_out_get(af, iph->protocol,
66                                           &iph->daddr, pptr[1],
67                                           &iph->saddr, pptr[0]);
68         }
69 }
70
71
72 static int
73 tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
74                   int *verdict, struct ip_vs_conn **cpp)
75 {
76         struct ip_vs_service *svc;
77         struct tcphdr _tcph, *th;
78         struct ip_vs_iphdr iph;
79
80         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
81
82         th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
83         if (th == NULL) {
84                 *verdict = NF_DROP;
85                 return 0;
86         }
87
88         if (th->syn &&
89             (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
90                                      th->dest))) {
91                 if (ip_vs_todrop()) {
92                         /*
93                          * It seems that we are very loaded.
94                          * We have to drop this packet :(
95                          */
96                         ip_vs_service_put(svc);
97                         *verdict = NF_DROP;
98                         return 0;
99                 }
100
101                 /*
102                  * Let the virtual server select a real server for the
103                  * incoming connection, and create a connection entry.
104                  */
105                 *cpp = ip_vs_schedule(svc, skb);
106                 if (!*cpp) {
107                         *verdict = ip_vs_leave(svc, skb, pp);
108                         return 0;
109                 }
110                 ip_vs_service_put(svc);
111         }
112         return 1;
113 }
114
115
116 static inline void
117 tcp_fast_csum_update(int af, struct tcphdr *tcph,
118                      const union nf_inet_addr *oldip,
119                      const union nf_inet_addr *newip,
120                      __be16 oldport, __be16 newport)
121 {
122 #ifdef CONFIG_IP_VS_IPV6
123         if (af == AF_INET6)
124                 tcph->check =
125                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
126                                          ip_vs_check_diff2(oldport, newport,
127                                                 ~csum_unfold(tcph->check))));
128         else
129 #endif
130         tcph->check =
131                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
132                                  ip_vs_check_diff2(oldport, newport,
133                                                 ~csum_unfold(tcph->check))));
134 }
135
136
137 static inline void
138 tcp_partial_csum_update(int af, struct tcphdr *tcph,
139                      const union nf_inet_addr *oldip,
140                      const union nf_inet_addr *newip,
141                      __be16 oldlen, __be16 newlen)
142 {
143 #ifdef CONFIG_IP_VS_IPV6
144         if (af == AF_INET6)
145                 tcph->check =
146                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
147                                          ip_vs_check_diff2(oldlen, newlen,
148                                                 ~csum_unfold(tcph->check))));
149         else
150 #endif
151         tcph->check =
152                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
153                                 ip_vs_check_diff2(oldlen, newlen,
154                                                 ~csum_unfold(tcph->check))));
155 }
156
157
158 static int
159 tcp_snat_handler(struct sk_buff *skb,
160                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
161 {
162         struct tcphdr *tcph;
163         unsigned int tcphoff;
164         int oldlen;
165
166 #ifdef CONFIG_IP_VS_IPV6
167         if (cp->af == AF_INET6)
168                 tcphoff = sizeof(struct ipv6hdr);
169         else
170 #endif
171                 tcphoff = ip_hdrlen(skb);
172         oldlen = skb->len - tcphoff;
173
174         /* csum_check requires unshared skb */
175         if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
176                 return 0;
177
178         if (unlikely(cp->app != NULL)) {
179                 /* Some checks before mangling */
180                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
181                         return 0;
182
183                 /* Call application helper if needed */
184                 if (!ip_vs_app_pkt_out(cp, skb))
185                         return 0;
186         }
187
188         tcph = (void *)skb_network_header(skb) + tcphoff;
189         tcph->source = cp->vport;
190
191         /* Adjust TCP checksums */
192         if (skb->ip_summed == CHECKSUM_PARTIAL) {
193                 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
194                                         htonl(oldlen),
195                                         htonl(skb->len - tcphoff));
196         } else if (!cp->app) {
197                 /* Only port and addr are changed, do fast csum update */
198                 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
199                                      cp->dport, cp->vport);
200                 if (skb->ip_summed == CHECKSUM_COMPLETE)
201                         skb->ip_summed = CHECKSUM_NONE;
202         } else {
203                 /* full checksum calculation */
204                 tcph->check = 0;
205                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
206 #ifdef CONFIG_IP_VS_IPV6
207                 if (cp->af == AF_INET6)
208                         tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
209                                                       &cp->caddr.in6,
210                                                       skb->len - tcphoff,
211                                                       cp->protocol, skb->csum);
212                 else
213 #endif
214                         tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
215                                                         cp->caddr.ip,
216                                                         skb->len - tcphoff,
217                                                         cp->protocol,
218                                                         skb->csum);
219
220                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
221                           pp->name, tcph->check,
222                           (char*)&(tcph->check) - (char*)tcph);
223         }
224         return 1;
225 }
226
227
228 static int
229 tcp_dnat_handler(struct sk_buff *skb,
230                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
231 {
232         struct tcphdr *tcph;
233         unsigned int tcphoff;
234         int oldlen;
235
236 #ifdef CONFIG_IP_VS_IPV6
237         if (cp->af == AF_INET6)
238                 tcphoff = sizeof(struct ipv6hdr);
239         else
240 #endif
241                 tcphoff = ip_hdrlen(skb);
242         oldlen = skb->len - tcphoff;
243
244         /* csum_check requires unshared skb */
245         if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
246                 return 0;
247
248         if (unlikely(cp->app != NULL)) {
249                 /* Some checks before mangling */
250                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
251                         return 0;
252
253                 /*
254                  *      Attempt ip_vs_app call.
255                  *      It will fix ip_vs_conn and iph ack_seq stuff
256                  */
257                 if (!ip_vs_app_pkt_in(cp, skb))
258                         return 0;
259         }
260
261         tcph = (void *)skb_network_header(skb) + tcphoff;
262         tcph->dest = cp->dport;
263
264         /*
265          *      Adjust TCP checksums
266          */
267         if (skb->ip_summed == CHECKSUM_PARTIAL) {
268                 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
269                                         htonl(oldlen),
270                                         htonl(skb->len - tcphoff));
271         } else if (!cp->app) {
272                 /* Only port and addr are changed, do fast csum update */
273                 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
274                                      cp->vport, cp->dport);
275                 if (skb->ip_summed == CHECKSUM_COMPLETE)
276                         skb->ip_summed = CHECKSUM_NONE;
277         } else {
278                 /* full checksum calculation */
279                 tcph->check = 0;
280                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
281 #ifdef CONFIG_IP_VS_IPV6
282                 if (cp->af == AF_INET6)
283                         tcph->check = csum_ipv6_magic(&cp->caddr.in6,
284                                                       &cp->daddr.in6,
285                                                       skb->len - tcphoff,
286                                                       cp->protocol, skb->csum);
287                 else
288 #endif
289                         tcph->check = csum_tcpudp_magic(cp->caddr.ip,
290                                                         cp->daddr.ip,
291                                                         skb->len - tcphoff,
292                                                         cp->protocol,
293                                                         skb->csum);
294                 skb->ip_summed = CHECKSUM_UNNECESSARY;
295         }
296         return 1;
297 }
298
299
300 static int
301 tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
302 {
303         unsigned int tcphoff;
304
305 #ifdef CONFIG_IP_VS_IPV6
306         if (af == AF_INET6)
307                 tcphoff = sizeof(struct ipv6hdr);
308         else
309 #endif
310                 tcphoff = ip_hdrlen(skb);
311
312         switch (skb->ip_summed) {
313         case CHECKSUM_NONE:
314                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
315         case CHECKSUM_COMPLETE:
316 #ifdef CONFIG_IP_VS_IPV6
317                 if (af == AF_INET6) {
318                         if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
319                                             &ipv6_hdr(skb)->daddr,
320                                             skb->len - tcphoff,
321                                             ipv6_hdr(skb)->nexthdr,
322                                             skb->csum)) {
323                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
324                                                  "Failed checksum for");
325                                 return 0;
326                         }
327                 } else
328 #endif
329                         if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
330                                               ip_hdr(skb)->daddr,
331                                               skb->len - tcphoff,
332                                               ip_hdr(skb)->protocol,
333                                               skb->csum)) {
334                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
335                                                  "Failed checksum for");
336                                 return 0;
337                         }
338                 break;
339         default:
340                 /* No need to checksum. */
341                 break;
342         }
343
344         return 1;
345 }
346
347
348 #define TCP_DIR_INPUT           0
349 #define TCP_DIR_OUTPUT          4
350 #define TCP_DIR_INPUT_ONLY      8
351
352 static const int tcp_state_off[IP_VS_DIR_LAST] = {
353         [IP_VS_DIR_INPUT]               =       TCP_DIR_INPUT,
354         [IP_VS_DIR_OUTPUT]              =       TCP_DIR_OUTPUT,
355         [IP_VS_DIR_INPUT_ONLY]          =       TCP_DIR_INPUT_ONLY,
356 };
357
358 /*
359  *      Timeout table[state]
360  */
361 static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
362         [IP_VS_TCP_S_NONE]              =       2*HZ,
363         [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
364         [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
365         [IP_VS_TCP_S_SYN_RECV]          =       1*60*HZ,
366         [IP_VS_TCP_S_FIN_WAIT]          =       2*60*HZ,
367         [IP_VS_TCP_S_TIME_WAIT]         =       2*60*HZ,
368         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
369         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
370         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
371         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
372         [IP_VS_TCP_S_SYNACK]            =       120*HZ,
373         [IP_VS_TCP_S_LAST]              =       2*HZ,
374 };
375
376 static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
377         [IP_VS_TCP_S_NONE]              =       "NONE",
378         [IP_VS_TCP_S_ESTABLISHED]       =       "ESTABLISHED",
379         [IP_VS_TCP_S_SYN_SENT]          =       "SYN_SENT",
380         [IP_VS_TCP_S_SYN_RECV]          =       "SYN_RECV",
381         [IP_VS_TCP_S_FIN_WAIT]          =       "FIN_WAIT",
382         [IP_VS_TCP_S_TIME_WAIT]         =       "TIME_WAIT",
383         [IP_VS_TCP_S_CLOSE]             =       "CLOSE",
384         [IP_VS_TCP_S_CLOSE_WAIT]        =       "CLOSE_WAIT",
385         [IP_VS_TCP_S_LAST_ACK]          =       "LAST_ACK",
386         [IP_VS_TCP_S_LISTEN]            =       "LISTEN",
387         [IP_VS_TCP_S_SYNACK]            =       "SYNACK",
388         [IP_VS_TCP_S_LAST]              =       "BUG!",
389 };
390
391 #define sNO IP_VS_TCP_S_NONE
392 #define sES IP_VS_TCP_S_ESTABLISHED
393 #define sSS IP_VS_TCP_S_SYN_SENT
394 #define sSR IP_VS_TCP_S_SYN_RECV
395 #define sFW IP_VS_TCP_S_FIN_WAIT
396 #define sTW IP_VS_TCP_S_TIME_WAIT
397 #define sCL IP_VS_TCP_S_CLOSE
398 #define sCW IP_VS_TCP_S_CLOSE_WAIT
399 #define sLA IP_VS_TCP_S_LAST_ACK
400 #define sLI IP_VS_TCP_S_LISTEN
401 #define sSA IP_VS_TCP_S_SYNACK
402
403 struct tcp_states_t {
404         int next_state[IP_VS_TCP_S_LAST];
405 };
406
407 static const char * tcp_state_name(int state)
408 {
409         if (state >= IP_VS_TCP_S_LAST)
410                 return "ERR!";
411         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
412 }
413
414 static struct tcp_states_t tcp_states [] = {
415 /*      INPUT */
416 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
417 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
418 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
419 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
420 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
421
422 /*      OUTPUT */
423 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
424 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
425 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
426 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
427 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
428
429 /*      INPUT-ONLY */
430 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
431 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
432 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
433 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
434 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
435 };
436
437 static struct tcp_states_t tcp_states_dos [] = {
438 /*      INPUT */
439 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
440 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
441 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
442 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
443 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
444
445 /*      OUTPUT */
446 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
447 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
448 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
449 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
450 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
451
452 /*      INPUT-ONLY */
453 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
454 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
455 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
456 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
457 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
458 };
459
460 static struct tcp_states_t *tcp_state_table = tcp_states;
461
462
463 static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
464 {
465         int on = (flags & 1);           /* secure_tcp */
466
467         /*
468         ** FIXME: change secure_tcp to independent sysctl var
469         ** or make it per-service or per-app because it is valid
470         ** for most if not for all of the applications. Something
471         ** like "capabilities" (flags) for each object.
472         */
473         tcp_state_table = (on? tcp_states_dos : tcp_states);
474 }
475
476 static int
477 tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
478 {
479         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
480                                        tcp_state_name_table, sname, to);
481 }
482
483 static inline int tcp_state_idx(struct tcphdr *th)
484 {
485         if (th->rst)
486                 return 3;
487         if (th->syn)
488                 return 0;
489         if (th->fin)
490                 return 1;
491         if (th->ack)
492                 return 2;
493         return -1;
494 }
495
496 static inline void
497 set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
498               int direction, struct tcphdr *th)
499 {
500         int state_idx;
501         int new_state = IP_VS_TCP_S_CLOSE;
502         int state_off = tcp_state_off[direction];
503
504         /*
505          *    Update state offset to INPUT_ONLY if necessary
506          *    or delete NO_OUTPUT flag if output packet detected
507          */
508         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
509                 if (state_off == TCP_DIR_OUTPUT)
510                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
511                 else
512                         state_off = TCP_DIR_INPUT_ONLY;
513         }
514
515         if ((state_idx = tcp_state_idx(th)) < 0) {
516                 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
517                 goto tcp_state_out;
518         }
519
520         new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
521
522   tcp_state_out:
523         if (new_state != cp->state) {
524                 struct ip_vs_dest *dest = cp->dest;
525
526                 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
527                               "%s:%d state: %s->%s conn->refcnt:%d\n",
528                               pp->name,
529                               ((state_off == TCP_DIR_OUTPUT) ?
530                                "output " : "input "),
531                               th->syn ? 'S' : '.',
532                               th->fin ? 'F' : '.',
533                               th->ack ? 'A' : '.',
534                               th->rst ? 'R' : '.',
535                               IP_VS_DBG_ADDR(cp->af, &cp->daddr),
536                               ntohs(cp->dport),
537                               IP_VS_DBG_ADDR(cp->af, &cp->caddr),
538                               ntohs(cp->cport),
539                               tcp_state_name(cp->state),
540                               tcp_state_name(new_state),
541                               atomic_read(&cp->refcnt));
542
543                 if (dest) {
544                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
545                             (new_state != IP_VS_TCP_S_ESTABLISHED)) {
546                                 atomic_dec(&dest->activeconns);
547                                 atomic_inc(&dest->inactconns);
548                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
549                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
550                                    (new_state == IP_VS_TCP_S_ESTABLISHED)) {
551                                 atomic_inc(&dest->activeconns);
552                                 atomic_dec(&dest->inactconns);
553                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
554                         }
555                 }
556         }
557
558         cp->timeout = pp->timeout_table[cp->state = new_state];
559 }
560
561
562 /*
563  *      Handle state transitions
564  */
565 static int
566 tcp_state_transition(struct ip_vs_conn *cp, int direction,
567                      const struct sk_buff *skb,
568                      struct ip_vs_protocol *pp)
569 {
570         struct tcphdr _tcph, *th;
571
572 #ifdef CONFIG_IP_VS_IPV6
573         int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
574 #else
575         int ihl = ip_hdrlen(skb);
576 #endif
577
578         th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
579         if (th == NULL)
580                 return 0;
581
582         spin_lock(&cp->lock);
583         set_tcp_state(pp, cp, direction, th);
584         spin_unlock(&cp->lock);
585
586         return 1;
587 }
588
589
590 /*
591  *      Hash table for TCP application incarnations
592  */
593 #define TCP_APP_TAB_BITS        4
594 #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
595 #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
596
597 static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
598 static DEFINE_SPINLOCK(tcp_app_lock);
599
600 static inline __u16 tcp_app_hashkey(__be16 port)
601 {
602         return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
603                 & TCP_APP_TAB_MASK;
604 }
605
606
607 static int tcp_register_app(struct ip_vs_app *inc)
608 {
609         struct ip_vs_app *i;
610         __u16 hash;
611         __be16 port = inc->port;
612         int ret = 0;
613
614         hash = tcp_app_hashkey(port);
615
616         spin_lock_bh(&tcp_app_lock);
617         list_for_each_entry(i, &tcp_apps[hash], p_list) {
618                 if (i->port == port) {
619                         ret = -EEXIST;
620                         goto out;
621                 }
622         }
623         list_add(&inc->p_list, &tcp_apps[hash]);
624         atomic_inc(&ip_vs_protocol_tcp.appcnt);
625
626   out:
627         spin_unlock_bh(&tcp_app_lock);
628         return ret;
629 }
630
631
632 static void
633 tcp_unregister_app(struct ip_vs_app *inc)
634 {
635         spin_lock_bh(&tcp_app_lock);
636         atomic_dec(&ip_vs_protocol_tcp.appcnt);
637         list_del(&inc->p_list);
638         spin_unlock_bh(&tcp_app_lock);
639 }
640
641
642 static int
643 tcp_app_conn_bind(struct ip_vs_conn *cp)
644 {
645         int hash;
646         struct ip_vs_app *inc;
647         int result = 0;
648
649         /* Default binding: bind app only for NAT */
650         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
651                 return 0;
652
653         /* Lookup application incarnations and bind the right one */
654         hash = tcp_app_hashkey(cp->vport);
655
656         spin_lock(&tcp_app_lock);
657         list_for_each_entry(inc, &tcp_apps[hash], p_list) {
658                 if (inc->port == cp->vport) {
659                         if (unlikely(!ip_vs_app_inc_get(inc)))
660                                 break;
661                         spin_unlock(&tcp_app_lock);
662
663                         IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
664                                       "%s:%u to app %s on port %u\n",
665                                       __func__,
666                                       IP_VS_DBG_ADDR(cp->af, &cp->caddr),
667                                       ntohs(cp->cport),
668                                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
669                                       ntohs(cp->vport),
670                                       inc->name, ntohs(inc->port));
671
672                         cp->app = inc;
673                         if (inc->init_conn)
674                                 result = inc->init_conn(inc, cp);
675                         goto out;
676                 }
677         }
678         spin_unlock(&tcp_app_lock);
679
680   out:
681         return result;
682 }
683
684
685 /*
686  *      Set LISTEN timeout. (ip_vs_conn_put will setup timer)
687  */
688 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
689 {
690         spin_lock(&cp->lock);
691         cp->state = IP_VS_TCP_S_LISTEN;
692         cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
693         spin_unlock(&cp->lock);
694 }
695
696
697 static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
698 {
699         IP_VS_INIT_HASH_TABLE(tcp_apps);
700         pp->timeout_table = tcp_timeouts;
701 }
702
703
704 static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
705 {
706 }
707
708
709 struct ip_vs_protocol ip_vs_protocol_tcp = {
710         .name =                 "TCP",
711         .protocol =             IPPROTO_TCP,
712         .num_states =           IP_VS_TCP_S_LAST,
713         .dont_defrag =          0,
714         .appcnt =               ATOMIC_INIT(0),
715         .init =                 ip_vs_tcp_init,
716         .exit =                 ip_vs_tcp_exit,
717         .register_app =         tcp_register_app,
718         .unregister_app =       tcp_unregister_app,
719         .conn_schedule =        tcp_conn_schedule,
720         .conn_in_get =          tcp_conn_in_get,
721         .conn_out_get =         tcp_conn_out_get,
722         .snat_handler =         tcp_snat_handler,
723         .dnat_handler =         tcp_dnat_handler,
724         .csum_check =           tcp_csum_check,
725         .state_name =           tcp_state_name,
726         .state_transition =     tcp_state_transition,
727         .app_conn_bind =        tcp_app_conn_bind,
728         .debug_packet =         ip_vs_tcpudp_debug_packet,
729         .timeout_change =       tcp_timeout_change,
730         .set_state_timeout =    tcp_set_state_timeout,
731 };