]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/ipv4/ipvs/ip_vs_proto_tcp.c
net: ip_vs_proto_{tcp,udp} build fix
[linux-2.6-omap-h63xx.git] / net / ipv4 / ipvs / ip_vs_proto_tcp.c
1 /*
2  * ip_vs_proto_tcp.c:   TCP load balancing support for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/ip.h>
18 #include <linux/tcp.h>                  /* for tcphdr */
19 #include <net/ip.h>
20 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
21 #include <net/ip6_checksum.h>
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24
25 #include <net/ip_vs.h>
26
27
28 static struct ip_vs_conn *
29 tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
30                 const struct ip_vs_iphdr *iph, unsigned int proto_off,
31                 int inverse)
32 {
33         __be16 _ports[2], *pptr;
34
35         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
36         if (pptr == NULL)
37                 return NULL;
38
39         if (likely(!inverse)) {
40                 return ip_vs_conn_in_get(af, iph->protocol,
41                                          &iph->saddr, pptr[0],
42                                          &iph->daddr, pptr[1]);
43         } else {
44                 return ip_vs_conn_in_get(af, iph->protocol,
45                                          &iph->daddr, pptr[1],
46                                          &iph->saddr, pptr[0]);
47         }
48 }
49
50 static struct ip_vs_conn *
51 tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
52                  const struct ip_vs_iphdr *iph, unsigned int proto_off,
53                  int inverse)
54 {
55         __be16 _ports[2], *pptr;
56
57         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
58         if (pptr == NULL)
59                 return NULL;
60
61         if (likely(!inverse)) {
62                 return ip_vs_conn_out_get(af, iph->protocol,
63                                           &iph->saddr, pptr[0],
64                                           &iph->daddr, pptr[1]);
65         } else {
66                 return ip_vs_conn_out_get(af, iph->protocol,
67                                           &iph->daddr, pptr[1],
68                                           &iph->saddr, pptr[0]);
69         }
70 }
71
72
73 static int
74 tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
75                   int *verdict, struct ip_vs_conn **cpp)
76 {
77         struct ip_vs_service *svc;
78         struct tcphdr _tcph, *th;
79         struct ip_vs_iphdr iph;
80
81         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
82
83         th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
84         if (th == NULL) {
85                 *verdict = NF_DROP;
86                 return 0;
87         }
88
89         if (th->syn &&
90             (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
91                                      th->dest))) {
92                 if (ip_vs_todrop()) {
93                         /*
94                          * It seems that we are very loaded.
95                          * We have to drop this packet :(
96                          */
97                         ip_vs_service_put(svc);
98                         *verdict = NF_DROP;
99                         return 0;
100                 }
101
102                 /*
103                  * Let the virtual server select a real server for the
104                  * incoming connection, and create a connection entry.
105                  */
106                 *cpp = ip_vs_schedule(svc, skb);
107                 if (!*cpp) {
108                         *verdict = ip_vs_leave(svc, skb, pp);
109                         return 0;
110                 }
111                 ip_vs_service_put(svc);
112         }
113         return 1;
114 }
115
116
117 static inline void
118 tcp_fast_csum_update(int af, struct tcphdr *tcph,
119                      const union nf_inet_addr *oldip,
120                      const union nf_inet_addr *newip,
121                      __be16 oldport, __be16 newport)
122 {
123 #ifdef CONFIG_IP_VS_IPV6
124         if (af == AF_INET6)
125                 tcph->check =
126                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
127                                          ip_vs_check_diff2(oldport, newport,
128                                                 ~csum_unfold(tcph->check))));
129         else
130 #endif
131         tcph->check =
132                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
133                                  ip_vs_check_diff2(oldport, newport,
134                                                 ~csum_unfold(tcph->check))));
135 }
136
137
138 static inline void
139 tcp_partial_csum_update(int af, struct tcphdr *tcph,
140                      const union nf_inet_addr *oldip,
141                      const union nf_inet_addr *newip,
142                      __be16 oldlen, __be16 newlen)
143 {
144 #ifdef CONFIG_IP_VS_IPV6
145         if (af == AF_INET6)
146                 tcph->check =
147                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
148                                          ip_vs_check_diff2(oldlen, newlen,
149                                                 ~csum_unfold(tcph->check))));
150         else
151 #endif
152         tcph->check =
153                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
154                                 ip_vs_check_diff2(oldlen, newlen,
155                                                 ~csum_unfold(tcph->check))));
156 }
157
158
159 static int
160 tcp_snat_handler(struct sk_buff *skb,
161                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
162 {
163         struct tcphdr *tcph;
164         unsigned int tcphoff;
165         int oldlen;
166
167 #ifdef CONFIG_IP_VS_IPV6
168         if (cp->af == AF_INET6)
169                 tcphoff = sizeof(struct ipv6hdr);
170         else
171 #endif
172                 tcphoff = ip_hdrlen(skb);
173         oldlen = skb->len - tcphoff;
174
175         /* csum_check requires unshared skb */
176         if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
177                 return 0;
178
179         if (unlikely(cp->app != NULL)) {
180                 /* Some checks before mangling */
181                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
182                         return 0;
183
184                 /* Call application helper if needed */
185                 if (!ip_vs_app_pkt_out(cp, skb))
186                         return 0;
187         }
188
189         tcph = (void *)skb_network_header(skb) + tcphoff;
190         tcph->source = cp->vport;
191
192         /* Adjust TCP checksums */
193         if (skb->ip_summed == CHECKSUM_PARTIAL) {
194                 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
195                                         htonl(oldlen),
196                                         htonl(skb->len - tcphoff));
197         } else if (!cp->app) {
198                 /* Only port and addr are changed, do fast csum update */
199                 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
200                                      cp->dport, cp->vport);
201                 if (skb->ip_summed == CHECKSUM_COMPLETE)
202                         skb->ip_summed = CHECKSUM_NONE;
203         } else {
204                 /* full checksum calculation */
205                 tcph->check = 0;
206                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
207 #ifdef CONFIG_IP_VS_IPV6
208                 if (cp->af == AF_INET6)
209                         tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
210                                                       &cp->caddr.in6,
211                                                       skb->len - tcphoff,
212                                                       cp->protocol, skb->csum);
213                 else
214 #endif
215                         tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
216                                                         cp->caddr.ip,
217                                                         skb->len - tcphoff,
218                                                         cp->protocol,
219                                                         skb->csum);
220
221                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
222                           pp->name, tcph->check,
223                           (char*)&(tcph->check) - (char*)tcph);
224         }
225         return 1;
226 }
227
228
229 static int
230 tcp_dnat_handler(struct sk_buff *skb,
231                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
232 {
233         struct tcphdr *tcph;
234         unsigned int tcphoff;
235         int oldlen;
236
237 #ifdef CONFIG_IP_VS_IPV6
238         if (cp->af == AF_INET6)
239                 tcphoff = sizeof(struct ipv6hdr);
240         else
241 #endif
242                 tcphoff = ip_hdrlen(skb);
243         oldlen = skb->len - tcphoff;
244
245         /* csum_check requires unshared skb */
246         if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
247                 return 0;
248
249         if (unlikely(cp->app != NULL)) {
250                 /* Some checks before mangling */
251                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
252                         return 0;
253
254                 /*
255                  *      Attempt ip_vs_app call.
256                  *      It will fix ip_vs_conn and iph ack_seq stuff
257                  */
258                 if (!ip_vs_app_pkt_in(cp, skb))
259                         return 0;
260         }
261
262         tcph = (void *)skb_network_header(skb) + tcphoff;
263         tcph->dest = cp->dport;
264
265         /*
266          *      Adjust TCP checksums
267          */
268         if (skb->ip_summed == CHECKSUM_PARTIAL) {
269                 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
270                                         htonl(oldlen),
271                                         htonl(skb->len - tcphoff));
272         } else if (!cp->app) {
273                 /* Only port and addr are changed, do fast csum update */
274                 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
275                                      cp->vport, cp->dport);
276                 if (skb->ip_summed == CHECKSUM_COMPLETE)
277                         skb->ip_summed = CHECKSUM_NONE;
278         } else {
279                 /* full checksum calculation */
280                 tcph->check = 0;
281                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
282 #ifdef CONFIG_IP_VS_IPV6
283                 if (cp->af == AF_INET6)
284                         tcph->check = csum_ipv6_magic(&cp->caddr.in6,
285                                                       &cp->daddr.in6,
286                                                       skb->len - tcphoff,
287                                                       cp->protocol, skb->csum);
288                 else
289 #endif
290                         tcph->check = csum_tcpudp_magic(cp->caddr.ip,
291                                                         cp->daddr.ip,
292                                                         skb->len - tcphoff,
293                                                         cp->protocol,
294                                                         skb->csum);
295                 skb->ip_summed = CHECKSUM_UNNECESSARY;
296         }
297         return 1;
298 }
299
300
301 static int
302 tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
303 {
304         unsigned int tcphoff;
305
306 #ifdef CONFIG_IP_VS_IPV6
307         if (af == AF_INET6)
308                 tcphoff = sizeof(struct ipv6hdr);
309         else
310 #endif
311                 tcphoff = ip_hdrlen(skb);
312
313         switch (skb->ip_summed) {
314         case CHECKSUM_NONE:
315                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
316         case CHECKSUM_COMPLETE:
317 #ifdef CONFIG_IP_VS_IPV6
318                 if (af == AF_INET6) {
319                         if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
320                                             &ipv6_hdr(skb)->daddr,
321                                             skb->len - tcphoff,
322                                             ipv6_hdr(skb)->nexthdr,
323                                             skb->csum)) {
324                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
325                                                  "Failed checksum for");
326                                 return 0;
327                         }
328                 } else
329 #endif
330                         if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
331                                               ip_hdr(skb)->daddr,
332                                               skb->len - tcphoff,
333                                               ip_hdr(skb)->protocol,
334                                               skb->csum)) {
335                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
336                                                  "Failed checksum for");
337                                 return 0;
338                         }
339                 break;
340         default:
341                 /* No need to checksum. */
342                 break;
343         }
344
345         return 1;
346 }
347
348
349 #define TCP_DIR_INPUT           0
350 #define TCP_DIR_OUTPUT          4
351 #define TCP_DIR_INPUT_ONLY      8
352
353 static const int tcp_state_off[IP_VS_DIR_LAST] = {
354         [IP_VS_DIR_INPUT]               =       TCP_DIR_INPUT,
355         [IP_VS_DIR_OUTPUT]              =       TCP_DIR_OUTPUT,
356         [IP_VS_DIR_INPUT_ONLY]          =       TCP_DIR_INPUT_ONLY,
357 };
358
359 /*
360  *      Timeout table[state]
361  */
362 static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
363         [IP_VS_TCP_S_NONE]              =       2*HZ,
364         [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
365         [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
366         [IP_VS_TCP_S_SYN_RECV]          =       1*60*HZ,
367         [IP_VS_TCP_S_FIN_WAIT]          =       2*60*HZ,
368         [IP_VS_TCP_S_TIME_WAIT]         =       2*60*HZ,
369         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
370         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
371         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
372         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
373         [IP_VS_TCP_S_SYNACK]            =       120*HZ,
374         [IP_VS_TCP_S_LAST]              =       2*HZ,
375 };
376
377 static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
378         [IP_VS_TCP_S_NONE]              =       "NONE",
379         [IP_VS_TCP_S_ESTABLISHED]       =       "ESTABLISHED",
380         [IP_VS_TCP_S_SYN_SENT]          =       "SYN_SENT",
381         [IP_VS_TCP_S_SYN_RECV]          =       "SYN_RECV",
382         [IP_VS_TCP_S_FIN_WAIT]          =       "FIN_WAIT",
383         [IP_VS_TCP_S_TIME_WAIT]         =       "TIME_WAIT",
384         [IP_VS_TCP_S_CLOSE]             =       "CLOSE",
385         [IP_VS_TCP_S_CLOSE_WAIT]        =       "CLOSE_WAIT",
386         [IP_VS_TCP_S_LAST_ACK]          =       "LAST_ACK",
387         [IP_VS_TCP_S_LISTEN]            =       "LISTEN",
388         [IP_VS_TCP_S_SYNACK]            =       "SYNACK",
389         [IP_VS_TCP_S_LAST]              =       "BUG!",
390 };
391
392 #define sNO IP_VS_TCP_S_NONE
393 #define sES IP_VS_TCP_S_ESTABLISHED
394 #define sSS IP_VS_TCP_S_SYN_SENT
395 #define sSR IP_VS_TCP_S_SYN_RECV
396 #define sFW IP_VS_TCP_S_FIN_WAIT
397 #define sTW IP_VS_TCP_S_TIME_WAIT
398 #define sCL IP_VS_TCP_S_CLOSE
399 #define sCW IP_VS_TCP_S_CLOSE_WAIT
400 #define sLA IP_VS_TCP_S_LAST_ACK
401 #define sLI IP_VS_TCP_S_LISTEN
402 #define sSA IP_VS_TCP_S_SYNACK
403
404 struct tcp_states_t {
405         int next_state[IP_VS_TCP_S_LAST];
406 };
407
408 static const char * tcp_state_name(int state)
409 {
410         if (state >= IP_VS_TCP_S_LAST)
411                 return "ERR!";
412         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
413 }
414
415 static struct tcp_states_t tcp_states [] = {
416 /*      INPUT */
417 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
418 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
419 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
420 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
421 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
422
423 /*      OUTPUT */
424 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
425 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
426 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
427 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
428 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
429
430 /*      INPUT-ONLY */
431 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
432 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
433 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
434 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
435 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
436 };
437
438 static struct tcp_states_t tcp_states_dos [] = {
439 /*      INPUT */
440 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
441 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
442 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
443 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
444 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
445
446 /*      OUTPUT */
447 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
448 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
449 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
450 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
451 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
452
453 /*      INPUT-ONLY */
454 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
455 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
456 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
457 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
458 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
459 };
460
461 static struct tcp_states_t *tcp_state_table = tcp_states;
462
463
464 static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
465 {
466         int on = (flags & 1);           /* secure_tcp */
467
468         /*
469         ** FIXME: change secure_tcp to independent sysctl var
470         ** or make it per-service or per-app because it is valid
471         ** for most if not for all of the applications. Something
472         ** like "capabilities" (flags) for each object.
473         */
474         tcp_state_table = (on? tcp_states_dos : tcp_states);
475 }
476
477 static int
478 tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
479 {
480         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
481                                        tcp_state_name_table, sname, to);
482 }
483
484 static inline int tcp_state_idx(struct tcphdr *th)
485 {
486         if (th->rst)
487                 return 3;
488         if (th->syn)
489                 return 0;
490         if (th->fin)
491                 return 1;
492         if (th->ack)
493                 return 2;
494         return -1;
495 }
496
497 static inline void
498 set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
499               int direction, struct tcphdr *th)
500 {
501         int state_idx;
502         int new_state = IP_VS_TCP_S_CLOSE;
503         int state_off = tcp_state_off[direction];
504
505         /*
506          *    Update state offset to INPUT_ONLY if necessary
507          *    or delete NO_OUTPUT flag if output packet detected
508          */
509         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
510                 if (state_off == TCP_DIR_OUTPUT)
511                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
512                 else
513                         state_off = TCP_DIR_INPUT_ONLY;
514         }
515
516         if ((state_idx = tcp_state_idx(th)) < 0) {
517                 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
518                 goto tcp_state_out;
519         }
520
521         new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
522
523   tcp_state_out:
524         if (new_state != cp->state) {
525                 struct ip_vs_dest *dest = cp->dest;
526
527                 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
528                               "%s:%d state: %s->%s conn->refcnt:%d\n",
529                               pp->name,
530                               ((state_off == TCP_DIR_OUTPUT) ?
531                                "output " : "input "),
532                               th->syn ? 'S' : '.',
533                               th->fin ? 'F' : '.',
534                               th->ack ? 'A' : '.',
535                               th->rst ? 'R' : '.',
536                               IP_VS_DBG_ADDR(cp->af, &cp->daddr),
537                               ntohs(cp->dport),
538                               IP_VS_DBG_ADDR(cp->af, &cp->caddr),
539                               ntohs(cp->cport),
540                               tcp_state_name(cp->state),
541                               tcp_state_name(new_state),
542                               atomic_read(&cp->refcnt));
543
544                 if (dest) {
545                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
546                             (new_state != IP_VS_TCP_S_ESTABLISHED)) {
547                                 atomic_dec(&dest->activeconns);
548                                 atomic_inc(&dest->inactconns);
549                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
550                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
551                                    (new_state == IP_VS_TCP_S_ESTABLISHED)) {
552                                 atomic_inc(&dest->activeconns);
553                                 atomic_dec(&dest->inactconns);
554                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
555                         }
556                 }
557         }
558
559         cp->timeout = pp->timeout_table[cp->state = new_state];
560 }
561
562
563 /*
564  *      Handle state transitions
565  */
566 static int
567 tcp_state_transition(struct ip_vs_conn *cp, int direction,
568                      const struct sk_buff *skb,
569                      struct ip_vs_protocol *pp)
570 {
571         struct tcphdr _tcph, *th;
572
573 #ifdef CONFIG_IP_VS_IPV6
574         int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
575 #else
576         int ihl = ip_hdrlen(skb);
577 #endif
578
579         th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
580         if (th == NULL)
581                 return 0;
582
583         spin_lock(&cp->lock);
584         set_tcp_state(pp, cp, direction, th);
585         spin_unlock(&cp->lock);
586
587         return 1;
588 }
589
590
591 /*
592  *      Hash table for TCP application incarnations
593  */
594 #define TCP_APP_TAB_BITS        4
595 #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
596 #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
597
598 static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
599 static DEFINE_SPINLOCK(tcp_app_lock);
600
601 static inline __u16 tcp_app_hashkey(__be16 port)
602 {
603         return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
604                 & TCP_APP_TAB_MASK;
605 }
606
607
608 static int tcp_register_app(struct ip_vs_app *inc)
609 {
610         struct ip_vs_app *i;
611         __u16 hash;
612         __be16 port = inc->port;
613         int ret = 0;
614
615         hash = tcp_app_hashkey(port);
616
617         spin_lock_bh(&tcp_app_lock);
618         list_for_each_entry(i, &tcp_apps[hash], p_list) {
619                 if (i->port == port) {
620                         ret = -EEXIST;
621                         goto out;
622                 }
623         }
624         list_add(&inc->p_list, &tcp_apps[hash]);
625         atomic_inc(&ip_vs_protocol_tcp.appcnt);
626
627   out:
628         spin_unlock_bh(&tcp_app_lock);
629         return ret;
630 }
631
632
633 static void
634 tcp_unregister_app(struct ip_vs_app *inc)
635 {
636         spin_lock_bh(&tcp_app_lock);
637         atomic_dec(&ip_vs_protocol_tcp.appcnt);
638         list_del(&inc->p_list);
639         spin_unlock_bh(&tcp_app_lock);
640 }
641
642
643 static int
644 tcp_app_conn_bind(struct ip_vs_conn *cp)
645 {
646         int hash;
647         struct ip_vs_app *inc;
648         int result = 0;
649
650         /* Default binding: bind app only for NAT */
651         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
652                 return 0;
653
654         /* Lookup application incarnations and bind the right one */
655         hash = tcp_app_hashkey(cp->vport);
656
657         spin_lock(&tcp_app_lock);
658         list_for_each_entry(inc, &tcp_apps[hash], p_list) {
659                 if (inc->port == cp->vport) {
660                         if (unlikely(!ip_vs_app_inc_get(inc)))
661                                 break;
662                         spin_unlock(&tcp_app_lock);
663
664                         IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
665                                       "%s:%u to app %s on port %u\n",
666                                       __func__,
667                                       IP_VS_DBG_ADDR(cp->af, &cp->caddr),
668                                       ntohs(cp->cport),
669                                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
670                                       ntohs(cp->vport),
671                                       inc->name, ntohs(inc->port));
672
673                         cp->app = inc;
674                         if (inc->init_conn)
675                                 result = inc->init_conn(inc, cp);
676                         goto out;
677                 }
678         }
679         spin_unlock(&tcp_app_lock);
680
681   out:
682         return result;
683 }
684
685
686 /*
687  *      Set LISTEN timeout. (ip_vs_conn_put will setup timer)
688  */
689 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
690 {
691         spin_lock(&cp->lock);
692         cp->state = IP_VS_TCP_S_LISTEN;
693         cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
694         spin_unlock(&cp->lock);
695 }
696
697
698 static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
699 {
700         IP_VS_INIT_HASH_TABLE(tcp_apps);
701         pp->timeout_table = tcp_timeouts;
702 }
703
704
705 static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
706 {
707 }
708
709
710 struct ip_vs_protocol ip_vs_protocol_tcp = {
711         .name =                 "TCP",
712         .protocol =             IPPROTO_TCP,
713         .num_states =           IP_VS_TCP_S_LAST,
714         .dont_defrag =          0,
715         .appcnt =               ATOMIC_INIT(0),
716         .init =                 ip_vs_tcp_init,
717         .exit =                 ip_vs_tcp_exit,
718         .register_app =         tcp_register_app,
719         .unregister_app =       tcp_unregister_app,
720         .conn_schedule =        tcp_conn_schedule,
721         .conn_in_get =          tcp_conn_in_get,
722         .conn_out_get =         tcp_conn_out_get,
723         .snat_handler =         tcp_snat_handler,
724         .dnat_handler =         tcp_dnat_handler,
725         .csum_check =           tcp_csum_check,
726         .state_name =           tcp_state_name,
727         .state_transition =     tcp_state_transition,
728         .app_conn_bind =        tcp_app_conn_bind,
729         .debug_packet =         ip_vs_tcpudp_debug_packet,
730         .timeout_change =       tcp_timeout_change,
731         .set_state_timeout =    tcp_set_state_timeout,
732 };