]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/netfilter/ipvs/ip_vs_xmit.c
02ddc2b3ce2e883f230aeaacd194e2433c49d902
[linux-2.6-omap-h63xx.git] / net / netfilter / ipvs / ip_vs_xmit.c
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/tcp.h>                  /* for tcphdr */
18 #include <net/ip.h>
19 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
20 #include <net/udp.h>
21 #include <net/icmp.h>                   /* for icmp_send */
22 #include <net/route.h>                  /* for ip_route_output */
23 #include <net/ipv6.h>
24 #include <net/ip6_route.h>
25 #include <linux/icmpv6.h>
26 #include <linux/netfilter.h>
27 #include <linux/netfilter_ipv4.h>
28
29 #include <net/ip_vs.h>
30
31
32 /*
33  *      Destination cache to speed up outgoing route lookup
34  */
35 static inline void
36 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
37 {
38         struct dst_entry *old_dst;
39
40         old_dst = dest->dst_cache;
41         dest->dst_cache = dst;
42         dest->dst_rtos = rtos;
43         dst_release(old_dst);
44 }
45
46 static inline struct dst_entry *
47 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
48 {
49         struct dst_entry *dst = dest->dst_cache;
50
51         if (!dst)
52                 return NULL;
53         if ((dst->obsolete
54              || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
55             dst->ops->check(dst, cookie) == NULL) {
56                 dest->dst_cache = NULL;
57                 dst_release(dst);
58                 return NULL;
59         }
60         dst_hold(dst);
61         return dst;
62 }
63
64 static struct rtable *
65 __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
66 {
67         struct rtable *rt;                      /* Route to the other host */
68         struct ip_vs_dest *dest = cp->dest;
69
70         if (dest) {
71                 spin_lock(&dest->dst_lock);
72                 if (!(rt = (struct rtable *)
73                       __ip_vs_dst_check(dest, rtos, 0))) {
74                         struct flowi fl = {
75                                 .oif = 0,
76                                 .nl_u = {
77                                         .ip4_u = {
78                                                 .daddr = dest->addr.ip,
79                                                 .saddr = 0,
80                                                 .tos = rtos, } },
81                         };
82
83                         if (ip_route_output_key(&init_net, &rt, &fl)) {
84                                 spin_unlock(&dest->dst_lock);
85                                 IP_VS_DBG_RL("ip_route_output error, "
86                                              "dest: %u.%u.%u.%u\n",
87                                              NIPQUAD(dest->addr.ip));
88                                 return NULL;
89                         }
90                         __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
91                         IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
92                                   NIPQUAD(dest->addr.ip),
93                                   atomic_read(&rt->u.dst.__refcnt), rtos);
94                 }
95                 spin_unlock(&dest->dst_lock);
96         } else {
97                 struct flowi fl = {
98                         .oif = 0,
99                         .nl_u = {
100                                 .ip4_u = {
101                                         .daddr = cp->daddr.ip,
102                                         .saddr = 0,
103                                         .tos = rtos, } },
104                 };
105
106                 if (ip_route_output_key(&init_net, &rt, &fl)) {
107                         IP_VS_DBG_RL("ip_route_output error, dest: "
108                                      "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
109                         return NULL;
110                 }
111         }
112
113         return rt;
114 }
115
116 #ifdef CONFIG_IP_VS_IPV6
117 static struct rt6_info *
118 __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
119 {
120         struct rt6_info *rt;                    /* Route to the other host */
121         struct ip_vs_dest *dest = cp->dest;
122
123         if (dest) {
124                 spin_lock(&dest->dst_lock);
125                 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
126                 if (!rt) {
127                         struct flowi fl = {
128                                 .oif = 0,
129                                 .nl_u = {
130                                         .ip6_u = {
131                                                 .daddr = dest->addr.in6,
132                                                 .saddr = {
133                                                         .s6_addr32 =
134                                                                 { 0, 0, 0, 0 },
135                                                 },
136                                         },
137                                 },
138                         };
139
140                         rt = (struct rt6_info *)ip6_route_output(&init_net,
141                                                                  NULL, &fl);
142                         if (!rt) {
143                                 spin_unlock(&dest->dst_lock);
144                                 IP_VS_DBG_RL("ip6_route_output error, "
145                                              "dest: " NIP6_FMT "\n",
146                                              NIP6(dest->addr.in6));
147                                 return NULL;
148                         }
149                         __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
150                         IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
151                                   NIP6(dest->addr.in6),
152                                   atomic_read(&rt->u.dst.__refcnt));
153                 }
154                 spin_unlock(&dest->dst_lock);
155         } else {
156                 struct flowi fl = {
157                         .oif = 0,
158                         .nl_u = {
159                                 .ip6_u = {
160                                         .daddr = cp->daddr.in6,
161                                         .saddr = {
162                                                 .s6_addr32 = { 0, 0, 0, 0 },
163                                         },
164                                 },
165                         },
166                 };
167
168                 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
169                 if (!rt) {
170                         IP_VS_DBG_RL("ip6_route_output error, dest: "
171                                      NIP6_FMT "\n", NIP6(cp->daddr.in6));
172                         return NULL;
173                 }
174         }
175
176         return rt;
177 }
178 #endif
179
180
181 /*
182  *      Release dest->dst_cache before a dest is removed
183  */
184 void
185 ip_vs_dst_reset(struct ip_vs_dest *dest)
186 {
187         struct dst_entry *old_dst;
188
189         old_dst = dest->dst_cache;
190         dest->dst_cache = NULL;
191         dst_release(old_dst);
192 }
193
194 #define IP_VS_XMIT(pf, skb, rt)                         \
195 do {                                                    \
196         (skb)->ipvs_property = 1;                       \
197         skb_forward_csum(skb);                          \
198         NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
199                 (rt)->u.dst.dev, dst_output);           \
200 } while (0)
201
202
203 /*
204  *      NULL transmitter (do nothing except return NF_ACCEPT)
205  */
206 int
207 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
208                 struct ip_vs_protocol *pp)
209 {
210         /* we do not touch skb and do not need pskb ptr */
211         return NF_ACCEPT;
212 }
213
214
215 /*
216  *      Bypass transmitter
217  *      Let packets bypass the destination when the destination is not
218  *      available, it may be only used in transparent cache cluster.
219  */
220 int
221 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
222                   struct ip_vs_protocol *pp)
223 {
224         struct rtable *rt;                      /* Route to the other host */
225         struct iphdr  *iph = ip_hdr(skb);
226         u8     tos = iph->tos;
227         int    mtu;
228         struct flowi fl = {
229                 .oif = 0,
230                 .nl_u = {
231                         .ip4_u = {
232                                 .daddr = iph->daddr,
233                                 .saddr = 0,
234                                 .tos = RT_TOS(tos), } },
235         };
236
237         EnterFunction(10);
238
239         if (ip_route_output_key(&init_net, &rt, &fl)) {
240                 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
241                              "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
242                 goto tx_error_icmp;
243         }
244
245         /* MTU checking */
246         mtu = dst_mtu(&rt->u.dst);
247         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
248                 ip_rt_put(rt);
249                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
250                 IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
251                 goto tx_error;
252         }
253
254         /*
255          * Call ip_send_check because we are not sure it is called
256          * after ip_defrag. Is copy-on-write needed?
257          */
258         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
259                 ip_rt_put(rt);
260                 return NF_STOLEN;
261         }
262         ip_send_check(ip_hdr(skb));
263
264         /* drop old route */
265         dst_release(skb->dst);
266         skb->dst = &rt->u.dst;
267
268         /* Another hack: avoid icmp_send in ip_fragment */
269         skb->local_df = 1;
270
271         IP_VS_XMIT(PF_INET, skb, rt);
272
273         LeaveFunction(10);
274         return NF_STOLEN;
275
276  tx_error_icmp:
277         dst_link_failure(skb);
278  tx_error:
279         kfree_skb(skb);
280         LeaveFunction(10);
281         return NF_STOLEN;
282 }
283
284 #ifdef CONFIG_IP_VS_IPV6
285 int
286 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
287                      struct ip_vs_protocol *pp)
288 {
289         struct rt6_info *rt;                    /* Route to the other host */
290         struct ipv6hdr  *iph = ipv6_hdr(skb);
291         int    mtu;
292         struct flowi fl = {
293                 .oif = 0,
294                 .nl_u = {
295                         .ip6_u = {
296                                 .daddr = iph->daddr,
297                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
298         };
299
300         EnterFunction(10);
301
302         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
303         if (!rt) {
304                 IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
305                              "dest: " NIP6_FMT "\n", NIP6(iph->daddr));
306                 goto tx_error_icmp;
307         }
308
309         /* MTU checking */
310         mtu = dst_mtu(&rt->u.dst);
311         if (skb->len > mtu) {
312                 dst_release(&rt->u.dst);
313                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
314                 IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
315                 goto tx_error;
316         }
317
318         /*
319          * Call ip_send_check because we are not sure it is called
320          * after ip_defrag. Is copy-on-write needed?
321          */
322         skb = skb_share_check(skb, GFP_ATOMIC);
323         if (unlikely(skb == NULL)) {
324                 dst_release(&rt->u.dst);
325                 return NF_STOLEN;
326         }
327
328         /* drop old route */
329         dst_release(skb->dst);
330         skb->dst = &rt->u.dst;
331
332         /* Another hack: avoid icmp_send in ip_fragment */
333         skb->local_df = 1;
334
335         IP_VS_XMIT(PF_INET6, skb, rt);
336
337         LeaveFunction(10);
338         return NF_STOLEN;
339
340  tx_error_icmp:
341         dst_link_failure(skb);
342  tx_error:
343         kfree_skb(skb);
344         LeaveFunction(10);
345         return NF_STOLEN;
346 }
347 #endif
348
349 /*
350  *      NAT transmitter (only for outside-to-inside nat forwarding)
351  *      Not used for related ICMP
352  */
353 int
354 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
355                struct ip_vs_protocol *pp)
356 {
357         struct rtable *rt;              /* Route to the other host */
358         int mtu;
359         struct iphdr *iph = ip_hdr(skb);
360
361         EnterFunction(10);
362
363         /* check if it is a connection of no-client-port */
364         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
365                 __be16 _pt, *p;
366                 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
367                 if (p == NULL)
368                         goto tx_error;
369                 ip_vs_conn_fill_cport(cp, *p);
370                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
371         }
372
373         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
374                 goto tx_error_icmp;
375
376         /* MTU checking */
377         mtu = dst_mtu(&rt->u.dst);
378         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
379                 ip_rt_put(rt);
380                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
381                 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
382                 goto tx_error;
383         }
384
385         /* copy-on-write the packet before mangling it */
386         if (!skb_make_writable(skb, sizeof(struct iphdr)))
387                 goto tx_error_put;
388
389         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
390                 goto tx_error_put;
391
392         /* drop old route */
393         dst_release(skb->dst);
394         skb->dst = &rt->u.dst;
395
396         /* mangle the packet */
397         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
398                 goto tx_error;
399         ip_hdr(skb)->daddr = cp->daddr.ip;
400         ip_send_check(ip_hdr(skb));
401
402         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
403
404         /* FIXME: when application helper enlarges the packet and the length
405            is larger than the MTU of outgoing device, there will be still
406            MTU problem. */
407
408         /* Another hack: avoid icmp_send in ip_fragment */
409         skb->local_df = 1;
410
411         IP_VS_XMIT(PF_INET, skb, rt);
412
413         LeaveFunction(10);
414         return NF_STOLEN;
415
416   tx_error_icmp:
417         dst_link_failure(skb);
418   tx_error:
419         LeaveFunction(10);
420         kfree_skb(skb);
421         return NF_STOLEN;
422   tx_error_put:
423         ip_rt_put(rt);
424         goto tx_error;
425 }
426
427 #ifdef CONFIG_IP_VS_IPV6
428 int
429 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
430                   struct ip_vs_protocol *pp)
431 {
432         struct rt6_info *rt;            /* Route to the other host */
433         int mtu;
434
435         EnterFunction(10);
436
437         /* check if it is a connection of no-client-port */
438         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
439                 __be16 _pt, *p;
440                 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
441                                        sizeof(_pt), &_pt);
442                 if (p == NULL)
443                         goto tx_error;
444                 ip_vs_conn_fill_cport(cp, *p);
445                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
446         }
447
448         rt = __ip_vs_get_out_rt_v6(cp);
449         if (!rt)
450                 goto tx_error_icmp;
451
452         /* MTU checking */
453         mtu = dst_mtu(&rt->u.dst);
454         if (skb->len > mtu) {
455                 dst_release(&rt->u.dst);
456                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
457                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
458                                  "ip_vs_nat_xmit_v6(): frag needed for");
459                 goto tx_error;
460         }
461
462         /* copy-on-write the packet before mangling it */
463         if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
464                 goto tx_error_put;
465
466         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
467                 goto tx_error_put;
468
469         /* drop old route */
470         dst_release(skb->dst);
471         skb->dst = &rt->u.dst;
472
473         /* mangle the packet */
474         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
475                 goto tx_error;
476         ipv6_hdr(skb)->daddr = cp->daddr.in6;
477
478         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
479
480         /* FIXME: when application helper enlarges the packet and the length
481            is larger than the MTU of outgoing device, there will be still
482            MTU problem. */
483
484         /* Another hack: avoid icmp_send in ip_fragment */
485         skb->local_df = 1;
486
487         IP_VS_XMIT(PF_INET6, skb, rt);
488
489         LeaveFunction(10);
490         return NF_STOLEN;
491
492 tx_error_icmp:
493         dst_link_failure(skb);
494 tx_error:
495         LeaveFunction(10);
496         kfree_skb(skb);
497         return NF_STOLEN;
498 tx_error_put:
499         dst_release(&rt->u.dst);
500         goto tx_error;
501 }
502 #endif
503
504
505 /*
506  *   IP Tunneling transmitter
507  *
508  *   This function encapsulates the packet in a new IP packet, its
509  *   destination will be set to cp->daddr. Most code of this function
510  *   is taken from ipip.c.
511  *
512  *   It is used in VS/TUN cluster. The load balancer selects a real
513  *   server from a cluster based on a scheduling algorithm,
514  *   encapsulates the request packet and forwards it to the selected
515  *   server. For example, all real servers are configured with
516  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
517  *   the encapsulated packet, it will decapsulate the packet, processe
518  *   the request and return the response packets directly to the client
519  *   without passing the load balancer. This can greatly increase the
520  *   scalability of virtual server.
521  *
522  *   Used for ANY protocol
523  */
524 int
525 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
526                   struct ip_vs_protocol *pp)
527 {
528         struct rtable *rt;                      /* Route to the other host */
529         struct net_device *tdev;                /* Device to other host */
530         struct iphdr  *old_iph = ip_hdr(skb);
531         u8     tos = old_iph->tos;
532         __be16 df = old_iph->frag_off;
533         sk_buff_data_t old_transport_header = skb->transport_header;
534         struct iphdr  *iph;                     /* Our new IP header */
535         unsigned int max_headroom;              /* The extra header space needed */
536         int    mtu;
537
538         EnterFunction(10);
539
540         if (skb->protocol != htons(ETH_P_IP)) {
541                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
542                              "ETH_P_IP: %d, skb protocol: %d\n",
543                              htons(ETH_P_IP), skb->protocol);
544                 goto tx_error;
545         }
546
547         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
548                 goto tx_error_icmp;
549
550         tdev = rt->u.dst.dev;
551
552         mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
553         if (mtu < 68) {
554                 ip_rt_put(rt);
555                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
556                 goto tx_error;
557         }
558         if (skb->dst)
559                 skb->dst->ops->update_pmtu(skb->dst, mtu);
560
561         df |= (old_iph->frag_off & htons(IP_DF));
562
563         if ((old_iph->frag_off & htons(IP_DF))
564             && mtu < ntohs(old_iph->tot_len)) {
565                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
566                 ip_rt_put(rt);
567                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
568                 goto tx_error;
569         }
570
571         /*
572          * Okay, now see if we can stuff it in the buffer as-is.
573          */
574         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
575
576         if (skb_headroom(skb) < max_headroom
577             || skb_cloned(skb) || skb_shared(skb)) {
578                 struct sk_buff *new_skb =
579                         skb_realloc_headroom(skb, max_headroom);
580                 if (!new_skb) {
581                         ip_rt_put(rt);
582                         kfree_skb(skb);
583                         IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
584                         return NF_STOLEN;
585                 }
586                 kfree_skb(skb);
587                 skb = new_skb;
588                 old_iph = ip_hdr(skb);
589         }
590
591         skb->transport_header = old_transport_header;
592
593         /* fix old IP header checksum */
594         ip_send_check(old_iph);
595
596         skb_push(skb, sizeof(struct iphdr));
597         skb_reset_network_header(skb);
598         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
599
600         /* drop old route */
601         dst_release(skb->dst);
602         skb->dst = &rt->u.dst;
603
604         /*
605          *      Push down and install the IPIP header.
606          */
607         iph                     =       ip_hdr(skb);
608         iph->version            =       4;
609         iph->ihl                =       sizeof(struct iphdr)>>2;
610         iph->frag_off           =       df;
611         iph->protocol           =       IPPROTO_IPIP;
612         iph->tos                =       tos;
613         iph->daddr              =       rt->rt_dst;
614         iph->saddr              =       rt->rt_src;
615         iph->ttl                =       old_iph->ttl;
616         ip_select_ident(iph, &rt->u.dst, NULL);
617
618         /* Another hack: avoid icmp_send in ip_fragment */
619         skb->local_df = 1;
620
621         ip_local_out(skb);
622
623         LeaveFunction(10);
624
625         return NF_STOLEN;
626
627   tx_error_icmp:
628         dst_link_failure(skb);
629   tx_error:
630         kfree_skb(skb);
631         LeaveFunction(10);
632         return NF_STOLEN;
633 }
634
635 #ifdef CONFIG_IP_VS_IPV6
636 int
637 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
638                      struct ip_vs_protocol *pp)
639 {
640         struct rt6_info *rt;            /* Route to the other host */
641         struct net_device *tdev;        /* Device to other host */
642         struct ipv6hdr  *old_iph = ipv6_hdr(skb);
643         sk_buff_data_t old_transport_header = skb->transport_header;
644         struct ipv6hdr  *iph;           /* Our new IP header */
645         unsigned int max_headroom;      /* The extra header space needed */
646         int    mtu;
647
648         EnterFunction(10);
649
650         if (skb->protocol != htons(ETH_P_IPV6)) {
651                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
652                              "ETH_P_IPV6: %d, skb protocol: %d\n",
653                              htons(ETH_P_IPV6), skb->protocol);
654                 goto tx_error;
655         }
656
657         rt = __ip_vs_get_out_rt_v6(cp);
658         if (!rt)
659                 goto tx_error_icmp;
660
661         tdev = rt->u.dst.dev;
662
663         mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
664         /* TODO IPv6: do we need this check in IPv6? */
665         if (mtu < 1280) {
666                 dst_release(&rt->u.dst);
667                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
668                 goto tx_error;
669         }
670         if (skb->dst)
671                 skb->dst->ops->update_pmtu(skb->dst, mtu);
672
673         if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
674                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
675                 dst_release(&rt->u.dst);
676                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
677                 goto tx_error;
678         }
679
680         /*
681          * Okay, now see if we can stuff it in the buffer as-is.
682          */
683         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
684
685         if (skb_headroom(skb) < max_headroom
686             || skb_cloned(skb) || skb_shared(skb)) {
687                 struct sk_buff *new_skb =
688                         skb_realloc_headroom(skb, max_headroom);
689                 if (!new_skb) {
690                         dst_release(&rt->u.dst);
691                         kfree_skb(skb);
692                         IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
693                         return NF_STOLEN;
694                 }
695                 kfree_skb(skb);
696                 skb = new_skb;
697                 old_iph = ipv6_hdr(skb);
698         }
699
700         skb->transport_header = old_transport_header;
701
702         skb_push(skb, sizeof(struct ipv6hdr));
703         skb_reset_network_header(skb);
704         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
705
706         /* drop old route */
707         dst_release(skb->dst);
708         skb->dst = &rt->u.dst;
709
710         /*
711          *      Push down and install the IPIP header.
712          */
713         iph                     =       ipv6_hdr(skb);
714         iph->version            =       6;
715         iph->nexthdr            =       IPPROTO_IPV6;
716         iph->payload_len        =       old_iph->payload_len + sizeof(old_iph);
717         iph->priority           =       old_iph->priority;
718         memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
719         iph->daddr              =       rt->rt6i_dst.addr;
720         iph->saddr              =       cp->vaddr.in6; /* rt->rt6i_src.addr; */
721         iph->hop_limit          =       old_iph->hop_limit;
722
723         /* Another hack: avoid icmp_send in ip_fragment */
724         skb->local_df = 1;
725
726         ip6_local_out(skb);
727
728         LeaveFunction(10);
729
730         return NF_STOLEN;
731
732 tx_error_icmp:
733         dst_link_failure(skb);
734 tx_error:
735         kfree_skb(skb);
736         LeaveFunction(10);
737         return NF_STOLEN;
738 }
739 #endif
740
741
742 /*
743  *      Direct Routing transmitter
744  *      Used for ANY protocol
745  */
746 int
747 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
748               struct ip_vs_protocol *pp)
749 {
750         struct rtable *rt;                      /* Route to the other host */
751         struct iphdr  *iph = ip_hdr(skb);
752         int    mtu;
753
754         EnterFunction(10);
755
756         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
757                 goto tx_error_icmp;
758
759         /* MTU checking */
760         mtu = dst_mtu(&rt->u.dst);
761         if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
762                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
763                 ip_rt_put(rt);
764                 IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
765                 goto tx_error;
766         }
767
768         /*
769          * Call ip_send_check because we are not sure it is called
770          * after ip_defrag. Is copy-on-write needed?
771          */
772         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
773                 ip_rt_put(rt);
774                 return NF_STOLEN;
775         }
776         ip_send_check(ip_hdr(skb));
777
778         /* drop old route */
779         dst_release(skb->dst);
780         skb->dst = &rt->u.dst;
781
782         /* Another hack: avoid icmp_send in ip_fragment */
783         skb->local_df = 1;
784
785         IP_VS_XMIT(PF_INET, skb, rt);
786
787         LeaveFunction(10);
788         return NF_STOLEN;
789
790   tx_error_icmp:
791         dst_link_failure(skb);
792   tx_error:
793         kfree_skb(skb);
794         LeaveFunction(10);
795         return NF_STOLEN;
796 }
797
798 #ifdef CONFIG_IP_VS_IPV6
799 int
800 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
801                  struct ip_vs_protocol *pp)
802 {
803         struct rt6_info *rt;                    /* Route to the other host */
804         int    mtu;
805
806         EnterFunction(10);
807
808         rt = __ip_vs_get_out_rt_v6(cp);
809         if (!rt)
810                 goto tx_error_icmp;
811
812         /* MTU checking */
813         mtu = dst_mtu(&rt->u.dst);
814         if (skb->len > mtu) {
815                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
816                 dst_release(&rt->u.dst);
817                 IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
818                 goto tx_error;
819         }
820
821         /*
822          * Call ip_send_check because we are not sure it is called
823          * after ip_defrag. Is copy-on-write needed?
824          */
825         skb = skb_share_check(skb, GFP_ATOMIC);
826         if (unlikely(skb == NULL)) {
827                 dst_release(&rt->u.dst);
828                 return NF_STOLEN;
829         }
830
831         /* drop old route */
832         dst_release(skb->dst);
833         skb->dst = &rt->u.dst;
834
835         /* Another hack: avoid icmp_send in ip_fragment */
836         skb->local_df = 1;
837
838         IP_VS_XMIT(PF_INET6, skb, rt);
839
840         LeaveFunction(10);
841         return NF_STOLEN;
842
843 tx_error_icmp:
844         dst_link_failure(skb);
845 tx_error:
846         kfree_skb(skb);
847         LeaveFunction(10);
848         return NF_STOLEN;
849 }
850 #endif
851
852
853 /*
854  *      ICMP packet transmitter
855  *      called by the ip_vs_in_icmp
856  */
857 int
858 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
859                 struct ip_vs_protocol *pp, int offset)
860 {
861         struct rtable   *rt;    /* Route to the other host */
862         int mtu;
863         int rc;
864
865         EnterFunction(10);
866
867         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
868            forwarded directly here, because there is no need to
869            translate address/port back */
870         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
871                 if (cp->packet_xmit)
872                         rc = cp->packet_xmit(skb, cp, pp);
873                 else
874                         rc = NF_ACCEPT;
875                 /* do not touch skb anymore */
876                 atomic_inc(&cp->in_pkts);
877                 goto out;
878         }
879
880         /*
881          * mangle and send the packet here (only for VS/NAT)
882          */
883
884         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
885                 goto tx_error_icmp;
886
887         /* MTU checking */
888         mtu = dst_mtu(&rt->u.dst);
889         if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
890                 ip_rt_put(rt);
891                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
892                 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
893                 goto tx_error;
894         }
895
896         /* copy-on-write the packet before mangling it */
897         if (!skb_make_writable(skb, offset))
898                 goto tx_error_put;
899
900         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
901                 goto tx_error_put;
902
903         /* drop the old route when skb is not shared */
904         dst_release(skb->dst);
905         skb->dst = &rt->u.dst;
906
907         ip_vs_nat_icmp(skb, pp, cp, 0);
908
909         /* Another hack: avoid icmp_send in ip_fragment */
910         skb->local_df = 1;
911
912         IP_VS_XMIT(PF_INET, skb, rt);
913
914         rc = NF_STOLEN;
915         goto out;
916
917   tx_error_icmp:
918         dst_link_failure(skb);
919   tx_error:
920         dev_kfree_skb(skb);
921         rc = NF_STOLEN;
922   out:
923         LeaveFunction(10);
924         return rc;
925   tx_error_put:
926         ip_rt_put(rt);
927         goto tx_error;
928 }
929
930 #ifdef CONFIG_IP_VS_IPV6
931 int
932 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
933                 struct ip_vs_protocol *pp, int offset)
934 {
935         struct rt6_info *rt;    /* Route to the other host */
936         int mtu;
937         int rc;
938
939         EnterFunction(10);
940
941         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
942            forwarded directly here, because there is no need to
943            translate address/port back */
944         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
945                 if (cp->packet_xmit)
946                         rc = cp->packet_xmit(skb, cp, pp);
947                 else
948                         rc = NF_ACCEPT;
949                 /* do not touch skb anymore */
950                 atomic_inc(&cp->in_pkts);
951                 goto out;
952         }
953
954         /*
955          * mangle and send the packet here (only for VS/NAT)
956          */
957
958         rt = __ip_vs_get_out_rt_v6(cp);
959         if (!rt)
960                 goto tx_error_icmp;
961
962         /* MTU checking */
963         mtu = dst_mtu(&rt->u.dst);
964         if (skb->len > mtu) {
965                 dst_release(&rt->u.dst);
966                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
967                 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
968                 goto tx_error;
969         }
970
971         /* copy-on-write the packet before mangling it */
972         if (!skb_make_writable(skb, offset))
973                 goto tx_error_put;
974
975         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
976                 goto tx_error_put;
977
978         /* drop the old route when skb is not shared */
979         dst_release(skb->dst);
980         skb->dst = &rt->u.dst;
981
982         ip_vs_nat_icmp_v6(skb, pp, cp, 0);
983
984         /* Another hack: avoid icmp_send in ip_fragment */
985         skb->local_df = 1;
986
987         IP_VS_XMIT(PF_INET6, skb, rt);
988
989         rc = NF_STOLEN;
990         goto out;
991
992 tx_error_icmp:
993         dst_link_failure(skb);
994 tx_error:
995         dev_kfree_skb(skb);
996         rc = NF_STOLEN;
997 out:
998         LeaveFunction(10);
999         return rc;
1000 tx_error_put:
1001         dst_release(&rt->u.dst);
1002         goto tx_error;
1003 }
1004 #endif