]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/ipv4/ipvs/ip_vs_proto_udp.c
e3ee26bd1de730189b7d792f2af7083f9ccf395a
[linux-2.6-omap-h63xx.git] / net / ipv4 / ipvs / ip_vs_proto_udp.c
1 /*
2  * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #include <linux/in.h>
17 #include <linux/ip.h>
18 #include <linux/kernel.h>
19 #include <linux/netfilter.h>
20 #include <linux/netfilter_ipv4.h>
21 #include <linux/udp.h>
22
23 #include <net/ip_vs.h>
24 #include <net/ip.h>
25
26 static struct ip_vs_conn *
27 udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
28                 const struct ip_vs_iphdr *iph, unsigned int proto_off,
29                 int inverse)
30 {
31         struct ip_vs_conn *cp;
32         __be16 _ports[2], *pptr;
33
34         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
35         if (pptr == NULL)
36                 return NULL;
37
38         if (likely(!inverse)) {
39                 cp = ip_vs_conn_in_get(af, iph->protocol,
40                                        &iph->saddr, pptr[0],
41                                        &iph->daddr, pptr[1]);
42         } else {
43                 cp = ip_vs_conn_in_get(af, iph->protocol,
44                                        &iph->daddr, pptr[1],
45                                        &iph->saddr, pptr[0]);
46         }
47
48         return cp;
49 }
50
51
52 static struct ip_vs_conn *
53 udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
54                  const struct ip_vs_iphdr *iph, unsigned int proto_off,
55                  int inverse)
56 {
57         struct ip_vs_conn *cp;
58         __be16 _ports[2], *pptr;
59
60         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
61         if (pptr == NULL)
62                 return NULL;
63
64         if (likely(!inverse)) {
65                 cp = ip_vs_conn_out_get(af, iph->protocol,
66                                         &iph->saddr, pptr[0],
67                                         &iph->daddr, pptr[1]);
68         } else {
69                 cp = ip_vs_conn_out_get(af, iph->protocol,
70                                         &iph->daddr, pptr[1],
71                                         &iph->saddr, pptr[0]);
72         }
73
74         return cp;
75 }
76
77
78 static int
79 udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
80                   int *verdict, struct ip_vs_conn **cpp)
81 {
82         struct ip_vs_service *svc;
83         struct udphdr _udph, *uh;
84         struct ip_vs_iphdr iph;
85
86         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
87
88         uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
89         if (uh == NULL) {
90                 *verdict = NF_DROP;
91                 return 0;
92         }
93
94         svc = ip_vs_service_get(af, skb->mark, iph.protocol,
95                                 &iph.daddr, uh->dest);
96         if (svc) {
97                 if (ip_vs_todrop()) {
98                         /*
99                          * It seems that we are very loaded.
100                          * We have to drop this packet :(
101                          */
102                         ip_vs_service_put(svc);
103                         *verdict = NF_DROP;
104                         return 0;
105                 }
106
107                 /*
108                  * Let the virtual server select a real server for the
109                  * incoming connection, and create a connection entry.
110                  */
111                 *cpp = ip_vs_schedule(svc, skb);
112                 if (!*cpp) {
113                         *verdict = ip_vs_leave(svc, skb, pp);
114                         return 0;
115                 }
116                 ip_vs_service_put(svc);
117         }
118         return 1;
119 }
120
121
122 static inline void
123 udp_fast_csum_update(int af, struct udphdr *uhdr,
124                      const union nf_inet_addr *oldip,
125                      const union nf_inet_addr *newip,
126                      __be16 oldport, __be16 newport)
127 {
128 #ifdef CONFIG_IP_VS_IPV6
129         if (af == AF_INET6)
130                 uhdr->check =
131                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
132                                          ip_vs_check_diff2(oldport, newport,
133                                                 ~csum_unfold(uhdr->check))));
134         else
135 #endif
136                 uhdr->check =
137                         csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
138                                          ip_vs_check_diff2(oldport, newport,
139                                                 ~csum_unfold(uhdr->check))));
140         if (!uhdr->check)
141                 uhdr->check = CSUM_MANGLED_0;
142 }
143
144 static inline void
145 udp_partial_csum_update(int af, struct udphdr *uhdr,
146                      const union nf_inet_addr *oldip,
147                      const union nf_inet_addr *newip,
148                      __be16 oldlen, __be16 newlen)
149 {
150 #ifdef CONFIG_IP_VS_IPV6
151         if (af == AF_INET6)
152                 uhdr->check =
153                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
154                                          ip_vs_check_diff2(oldlen, newlen,
155                                                 ~csum_unfold(uhdr->check))));
156         else
157 #endif
158         uhdr->check =
159                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
160                                 ip_vs_check_diff2(oldlen, newlen,
161                                                 ~csum_unfold(uhdr->check))));
162 }
163
164
165 static int
166 udp_snat_handler(struct sk_buff *skb,
167                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
168 {
169         struct udphdr *udph;
170         unsigned int udphoff;
171         int oldlen;
172
173 #ifdef CONFIG_IP_VS_IPV6
174         if (cp->af == AF_INET6)
175                 udphoff = sizeof(struct ipv6hdr);
176         else
177 #endif
178                 udphoff = ip_hdrlen(skb);
179         oldlen = skb->len - udphoff;
180
181         /* csum_check requires unshared skb */
182         if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
183                 return 0;
184
185         if (unlikely(cp->app != NULL)) {
186                 /* Some checks before mangling */
187                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
188                         return 0;
189
190                 /*
191                  *      Call application helper if needed
192                  */
193                 if (!ip_vs_app_pkt_out(cp, skb))
194                         return 0;
195         }
196
197         udph = (void *)skb_network_header(skb) + udphoff;
198         udph->source = cp->vport;
199
200         /*
201          *      Adjust UDP checksums
202          */
203         if (skb->ip_summed == CHECKSUM_PARTIAL) {
204                 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
205                                         htonl(oldlen),
206                                         htonl(skb->len - udphoff));
207         } else if (!cp->app && (udph->check != 0)) {
208                 /* Only port and addr are changed, do fast csum update */
209                 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
210                                      cp->dport, cp->vport);
211                 if (skb->ip_summed == CHECKSUM_COMPLETE)
212                         skb->ip_summed = CHECKSUM_NONE;
213         } else {
214                 /* full checksum calculation */
215                 udph->check = 0;
216                 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
217 #ifdef CONFIG_IP_VS_IPV6
218                 if (cp->af == AF_INET6)
219                         udph->check = csum_ipv6_magic(&cp->vaddr.in6,
220                                                       &cp->caddr.in6,
221                                                       skb->len - udphoff,
222                                                       cp->protocol, skb->csum);
223                 else
224 #endif
225                         udph->check = csum_tcpudp_magic(cp->vaddr.ip,
226                                                         cp->caddr.ip,
227                                                         skb->len - udphoff,
228                                                         cp->protocol,
229                                                         skb->csum);
230                 if (udph->check == 0)
231                         udph->check = CSUM_MANGLED_0;
232                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
233                           pp->name, udph->check,
234                           (char*)&(udph->check) - (char*)udph);
235         }
236         return 1;
237 }
238
239
240 static int
241 udp_dnat_handler(struct sk_buff *skb,
242                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
243 {
244         struct udphdr *udph;
245         unsigned int udphoff;
246         int oldlen;
247
248 #ifdef CONFIG_IP_VS_IPV6
249         if (cp->af == AF_INET6)
250                 udphoff = sizeof(struct ipv6hdr);
251         else
252 #endif
253                 udphoff = ip_hdrlen(skb);
254         oldlen = skb->len - udphoff;
255
256         /* csum_check requires unshared skb */
257         if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
258                 return 0;
259
260         if (unlikely(cp->app != NULL)) {
261                 /* Some checks before mangling */
262                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
263                         return 0;
264
265                 /*
266                  *      Attempt ip_vs_app call.
267                  *      It will fix ip_vs_conn
268                  */
269                 if (!ip_vs_app_pkt_in(cp, skb))
270                         return 0;
271         }
272
273         udph = (void *)skb_network_header(skb) + udphoff;
274         udph->dest = cp->dport;
275
276         /*
277          *      Adjust UDP checksums
278          */
279         if (skb->ip_summed == CHECKSUM_PARTIAL) {
280                 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
281                                         htonl(oldlen),
282                                         htonl(skb->len - udphoff));
283         } else if (!cp->app && (udph->check != 0)) {
284                 /* Only port and addr are changed, do fast csum update */
285                 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
286                                      cp->vport, cp->dport);
287                 if (skb->ip_summed == CHECKSUM_COMPLETE)
288                         skb->ip_summed = CHECKSUM_NONE;
289         } else {
290                 /* full checksum calculation */
291                 udph->check = 0;
292                 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
293 #ifdef CONFIG_IP_VS_IPV6
294                 if (cp->af == AF_INET6)
295                         udph->check = csum_ipv6_magic(&cp->caddr.in6,
296                                                       &cp->daddr.in6,
297                                                       skb->len - udphoff,
298                                                       cp->protocol, skb->csum);
299                 else
300 #endif
301                         udph->check = csum_tcpudp_magic(cp->caddr.ip,
302                                                         cp->daddr.ip,
303                                                         skb->len - udphoff,
304                                                         cp->protocol,
305                                                         skb->csum);
306                 if (udph->check == 0)
307                         udph->check = CSUM_MANGLED_0;
308                 skb->ip_summed = CHECKSUM_UNNECESSARY;
309         }
310         return 1;
311 }
312
313
314 static int
315 udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
316 {
317         struct udphdr _udph, *uh;
318         unsigned int udphoff;
319
320 #ifdef CONFIG_IP_VS_IPV6
321         if (af == AF_INET6)
322                 udphoff = sizeof(struct ipv6hdr);
323         else
324 #endif
325                 udphoff = ip_hdrlen(skb);
326
327         uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
328         if (uh == NULL)
329                 return 0;
330
331         if (uh->check != 0) {
332                 switch (skb->ip_summed) {
333                 case CHECKSUM_NONE:
334                         skb->csum = skb_checksum(skb, udphoff,
335                                                  skb->len - udphoff, 0);
336                 case CHECKSUM_COMPLETE:
337 #ifdef CONFIG_IP_VS_IPV6
338                         if (af == AF_INET6) {
339                                 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
340                                                     &ipv6_hdr(skb)->daddr,
341                                                     skb->len - udphoff,
342                                                     ipv6_hdr(skb)->nexthdr,
343                                                     skb->csum)) {
344                                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
345                                                          "Failed checksum for");
346                                         return 0;
347                                 }
348                         } else
349 #endif
350                                 if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
351                                                       ip_hdr(skb)->daddr,
352                                                       skb->len - udphoff,
353                                                       ip_hdr(skb)->protocol,
354                                                       skb->csum)) {
355                                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
356                                                          "Failed checksum for");
357                                         return 0;
358                                 }
359                         break;
360                 default:
361                         /* No need to checksum. */
362                         break;
363                 }
364         }
365         return 1;
366 }
367
368
369 /*
370  *      Note: the caller guarantees that only one of register_app,
371  *      unregister_app or app_conn_bind is called each time.
372  */
373
374 #define UDP_APP_TAB_BITS        4
375 #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
376 #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
377
378 static struct list_head udp_apps[UDP_APP_TAB_SIZE];
379 static DEFINE_SPINLOCK(udp_app_lock);
380
381 static inline __u16 udp_app_hashkey(__be16 port)
382 {
383         return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
384                 & UDP_APP_TAB_MASK;
385 }
386
387
388 static int udp_register_app(struct ip_vs_app *inc)
389 {
390         struct ip_vs_app *i;
391         __u16 hash;
392         __be16 port = inc->port;
393         int ret = 0;
394
395         hash = udp_app_hashkey(port);
396
397
398         spin_lock_bh(&udp_app_lock);
399         list_for_each_entry(i, &udp_apps[hash], p_list) {
400                 if (i->port == port) {
401                         ret = -EEXIST;
402                         goto out;
403                 }
404         }
405         list_add(&inc->p_list, &udp_apps[hash]);
406         atomic_inc(&ip_vs_protocol_udp.appcnt);
407
408   out:
409         spin_unlock_bh(&udp_app_lock);
410         return ret;
411 }
412
413
414 static void
415 udp_unregister_app(struct ip_vs_app *inc)
416 {
417         spin_lock_bh(&udp_app_lock);
418         atomic_dec(&ip_vs_protocol_udp.appcnt);
419         list_del(&inc->p_list);
420         spin_unlock_bh(&udp_app_lock);
421 }
422
423
424 static int udp_app_conn_bind(struct ip_vs_conn *cp)
425 {
426         int hash;
427         struct ip_vs_app *inc;
428         int result = 0;
429
430         /* Default binding: bind app only for NAT */
431         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
432                 return 0;
433
434         /* Lookup application incarnations and bind the right one */
435         hash = udp_app_hashkey(cp->vport);
436
437         spin_lock(&udp_app_lock);
438         list_for_each_entry(inc, &udp_apps[hash], p_list) {
439                 if (inc->port == cp->vport) {
440                         if (unlikely(!ip_vs_app_inc_get(inc)))
441                                 break;
442                         spin_unlock(&udp_app_lock);
443
444                         IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
445                                       "%s:%u to app %s on port %u\n",
446                                       __func__,
447                                       IP_VS_DBG_ADDR(cp->af, &cp->caddr),
448                                       ntohs(cp->cport),
449                                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
450                                       ntohs(cp->vport),
451                                       inc->name, ntohs(inc->port));
452
453                         cp->app = inc;
454                         if (inc->init_conn)
455                                 result = inc->init_conn(inc, cp);
456                         goto out;
457                 }
458         }
459         spin_unlock(&udp_app_lock);
460
461   out:
462         return result;
463 }
464
465
466 static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
467         [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
468         [IP_VS_UDP_S_LAST]              =       2*HZ,
469 };
470
471 static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
472         [IP_VS_UDP_S_NORMAL]            =       "UDP",
473         [IP_VS_UDP_S_LAST]              =       "BUG!",
474 };
475
476
477 static int
478 udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
479 {
480         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
481                                        udp_state_name_table, sname, to);
482 }
483
484 static const char * udp_state_name(int state)
485 {
486         if (state >= IP_VS_UDP_S_LAST)
487                 return "ERR!";
488         return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
489 }
490
491 static int
492 udp_state_transition(struct ip_vs_conn *cp, int direction,
493                      const struct sk_buff *skb,
494                      struct ip_vs_protocol *pp)
495 {
496         cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
497         return 1;
498 }
499
500 static void udp_init(struct ip_vs_protocol *pp)
501 {
502         IP_VS_INIT_HASH_TABLE(udp_apps);
503         pp->timeout_table = udp_timeouts;
504 }
505
506 static void udp_exit(struct ip_vs_protocol *pp)
507 {
508 }
509
510
511 struct ip_vs_protocol ip_vs_protocol_udp = {
512         .name =                 "UDP",
513         .protocol =             IPPROTO_UDP,
514         .num_states =           IP_VS_UDP_S_LAST,
515         .dont_defrag =          0,
516         .init =                 udp_init,
517         .exit =                 udp_exit,
518         .conn_schedule =        udp_conn_schedule,
519         .conn_in_get =          udp_conn_in_get,
520         .conn_out_get =         udp_conn_out_get,
521         .snat_handler =         udp_snat_handler,
522         .dnat_handler =         udp_dnat_handler,
523         .csum_check =           udp_csum_check,
524         .state_transition =     udp_state_transition,
525         .state_name =           udp_state_name,
526         .register_app =         udp_register_app,
527         .unregister_app =       udp_unregister_app,
528         .app_conn_bind =        udp_app_conn_bind,
529         .debug_packet =         ip_vs_tcpudp_debug_packet,
530         .timeout_change =       NULL,
531         .set_state_timeout =    udp_set_state_timeout,
532 };