]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - drivers/net/tun.c
tun: Fix/rewrite packet filtering logic
[linux-2.6-omap-h63xx.git] / drivers / net / tun.c
1 /*
2  *  TUN - Universal TUN/TAP device driver.
3  *  Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  *  GNU General Public License for more details.
14  *
15  *  $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $
16  */
17
18 /*
19  *  Changes:
20  *
21  *  Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
22  *    Add TUNSETLINK ioctl to set the link encapsulation
23  *
24  *  Mark Smith <markzzzsmith@yahoo.com.au>
25  *    Use random_ether_addr() for tap MAC address.
26  *
27  *  Harald Roelle <harald.roelle@ifi.lmu.de>  2004/04/20
28  *    Fixes in packet dropping, queue length setting and queue wakeup.
29  *    Increased default tx queue length.
30  *    Added ethtool API.
31  *    Minor cleanups
32  *
33  *  Daniel Podlejski <underley@underley.eu.org>
34  *    Modifications for 2.3.99-pre5 kernel.
35  */
36
37 #define DRV_NAME        "tun"
38 #define DRV_VERSION     "1.6"
39 #define DRV_DESCRIPTION "Universal TUN/TAP device driver"
40 #define DRV_COPYRIGHT   "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
41
42 #include <linux/module.h>
43 #include <linux/errno.h>
44 #include <linux/kernel.h>
45 #include <linux/major.h>
46 #include <linux/slab.h>
47 #include <linux/poll.h>
48 #include <linux/fcntl.h>
49 #include <linux/init.h>
50 #include <linux/skbuff.h>
51 #include <linux/netdevice.h>
52 #include <linux/etherdevice.h>
53 #include <linux/miscdevice.h>
54 #include <linux/ethtool.h>
55 #include <linux/rtnetlink.h>
56 #include <linux/if.h>
57 #include <linux/if_arp.h>
58 #include <linux/if_ether.h>
59 #include <linux/if_tun.h>
60 #include <linux/crc32.h>
61 #include <linux/nsproxy.h>
62 #include <linux/virtio_net.h>
63 #include <net/net_namespace.h>
64 #include <net/netns/generic.h>
65
66 #include <asm/system.h>
67 #include <asm/uaccess.h>
68
69 /* Uncomment to enable debugging */
70 /* #define TUN_DEBUG 1 */
71
72 #ifdef TUN_DEBUG
73 static int debug;
74
75 #define DBG  if(tun->debug)printk
76 #define DBG1 if(debug==2)printk
77 #else
78 #define DBG( a... )
79 #define DBG1( a... )
80 #endif
81
82 #define FLT_EXACT_COUNT 8
83 struct tap_filter {
84         unsigned int    count;    /* Number of addrs. Zero means disabled */
85         u32             mask[2];  /* Mask of the hashed addrs */
86         unsigned char   addr[FLT_EXACT_COUNT][ETH_ALEN];
87 };
88
89 struct tun_struct {
90         struct list_head        list;
91         unsigned int            flags;
92         int                     attached;
93         uid_t                   owner;
94         gid_t                   group;
95
96         wait_queue_head_t       read_wait;
97         struct sk_buff_head     readq;
98
99         struct net_device       *dev;
100         struct fasync_struct    *fasync;
101
102         struct tap_filter       txflt;
103
104 #ifdef TUN_DEBUG
105         int debug;
106 #endif
107 };
108
109 /* TAP filterting */
110 static void addr_hash_set(u32 *mask, const u8 *addr)
111 {
112         int n = ether_crc(ETH_ALEN, addr) >> 26;
113         mask[n >> 5] |= (1 << (n & 31));
114 }
115
116 static unsigned int addr_hash_test(const u32 *mask, const u8 *addr)
117 {
118         int n = ether_crc(ETH_ALEN, addr) >> 26;
119         return mask[n >> 5] & (1 << (n & 31));
120 }
121
122 static int update_filter(struct tap_filter *filter, void __user *arg)
123 {
124         struct { u8 u[ETH_ALEN]; } *addr;
125         struct tun_filter uf;
126         int err, alen, n, nexact;
127
128         if (copy_from_user(&uf, arg, sizeof(uf)))
129                 return -EFAULT;
130
131         if (!uf.count) {
132                 /* Disabled */
133                 filter->count = 0;
134                 return 0;
135         }
136
137         alen = ETH_ALEN * uf.count;
138         addr = kmalloc(alen, GFP_KERNEL);
139         if (!addr)
140                 return -ENOMEM;
141
142         if (copy_from_user(addr, arg + sizeof(uf), alen)) {
143                 err = -EFAULT;
144                 goto done;
145         }
146
147         /* The filter is updated without holding any locks. Which is
148          * perfectly safe. We disable it first and in the worst
149          * case we'll accept a few undesired packets. */
150         filter->count = 0;
151         wmb();
152
153         /* Use first set of addresses as an exact filter */
154         for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
155                 memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
156
157         nexact = n;
158
159         /* The rest is hashed */
160         memset(filter->mask, 0, sizeof(filter->mask));
161         for (; n < uf.count; n++)
162                 addr_hash_set(filter->mask, addr[n].u);
163
164         /* For ALLMULTI just set the mask to all ones.
165          * This overrides the mask populated above. */
166         if ((uf.flags & TUN_FLT_ALLMULTI))
167                 memset(filter->mask, ~0, sizeof(filter->mask));
168
169         /* Now enable the filter */
170         wmb();
171         filter->count = nexact;
172
173         /* Return the number of exact filters */
174         err = nexact;
175
176 done:
177         kfree(addr);
178         return err;
179 }
180
181 /* Returns: 0 - drop, !=0 - accept */
182 static int run_filter(struct tap_filter *filter, const struct sk_buff *skb)
183 {
184         /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect
185          * at this point. */
186         struct ethhdr *eh = (struct ethhdr *) skb->data;
187         int i;
188
189         /* Exact match */
190         for (i = 0; i < filter->count; i++)
191                 if (!compare_ether_addr(eh->h_dest, filter->addr[i]))
192                         return 1;
193
194         /* Inexact match (multicast only) */
195         if (is_multicast_ether_addr(eh->h_dest))
196                 return addr_hash_test(filter->mask, eh->h_dest);
197
198         return 0;
199 }
200
201 /*
202  * Checks whether the packet is accepted or not.
203  * Returns: 0 - drop, !=0 - accept
204  */
205 static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
206 {
207         if (!filter->count)
208                 return 1;
209
210         return run_filter(filter, skb);
211 }
212
213 /* Network device part of the driver */
214
215 static unsigned int tun_net_id;
216 struct tun_net {
217         struct list_head dev_list;
218 };
219
220 static const struct ethtool_ops tun_ethtool_ops;
221
222 /* Net device open. */
223 static int tun_net_open(struct net_device *dev)
224 {
225         netif_start_queue(dev);
226         return 0;
227 }
228
229 /* Net device close. */
230 static int tun_net_close(struct net_device *dev)
231 {
232         netif_stop_queue(dev);
233         return 0;
234 }
235
236 /* Net device start xmit */
237 static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
238 {
239         struct tun_struct *tun = netdev_priv(dev);
240
241         DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
242
243         /* Drop packet if interface is not attached */
244         if (!tun->attached)
245                 goto drop;
246
247         /* Drop if the filter does not like it.
248          * This is a noop if the filter is disabled.
249          * Filter can be enabled only for the TAP devices. */
250         if (!check_filter(&tun->txflt, skb))
251                 goto drop;
252
253         if (skb_queue_len(&tun->readq) >= dev->tx_queue_len) {
254                 if (!(tun->flags & TUN_ONE_QUEUE)) {
255                         /* Normal queueing mode. */
256                         /* Packet scheduler handles dropping of further packets. */
257                         netif_stop_queue(dev);
258
259                         /* We won't see all dropped packets individually, so overrun
260                          * error is more appropriate. */
261                         dev->stats.tx_fifo_errors++;
262                 } else {
263                         /* Single queue mode.
264                          * Driver handles dropping of all packets itself. */
265                         goto drop;
266                 }
267         }
268
269         /* Enqueue packet */
270         skb_queue_tail(&tun->readq, skb);
271         dev->trans_start = jiffies;
272
273         /* Notify and wake up reader process */
274         if (tun->flags & TUN_FASYNC)
275                 kill_fasync(&tun->fasync, SIGIO, POLL_IN);
276         wake_up_interruptible(&tun->read_wait);
277         return 0;
278
279 drop:
280         dev->stats.tx_dropped++;
281         kfree_skb(skb);
282         return 0;
283 }
284
285 static void tun_net_mclist(struct net_device *dev)
286 {
287         /*
288          * This callback is supposed to deal with mc filter in
289          * _rx_ path and has nothing to do with the _tx_ path.
290          * In rx path we always accept everything userspace gives us.
291          */
292         return;
293 }
294
295 #define MIN_MTU 68
296 #define MAX_MTU 65535
297
298 static int
299 tun_net_change_mtu(struct net_device *dev, int new_mtu)
300 {
301         if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU)
302                 return -EINVAL;
303         dev->mtu = new_mtu;
304         return 0;
305 }
306
307 /* Initialize net device. */
308 static void tun_net_init(struct net_device *dev)
309 {
310         struct tun_struct *tun = netdev_priv(dev);
311
312         switch (tun->flags & TUN_TYPE_MASK) {
313         case TUN_TUN_DEV:
314                 /* Point-to-Point TUN Device */
315                 dev->hard_header_len = 0;
316                 dev->addr_len = 0;
317                 dev->mtu = 1500;
318                 dev->change_mtu = tun_net_change_mtu;
319
320                 /* Zero header length */
321                 dev->type = ARPHRD_NONE;
322                 dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
323                 dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
324                 break;
325
326         case TUN_TAP_DEV:
327                 /* Ethernet TAP Device */
328                 ether_setup(dev);
329                 dev->change_mtu         = tun_net_change_mtu;
330                 dev->set_multicast_list = tun_net_mclist;
331
332                 random_ether_addr(dev->dev_addr);
333
334                 dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
335                 break;
336         }
337 }
338
339 /* Character device part */
340
341 /* Poll */
342 static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
343 {
344         struct tun_struct *tun = file->private_data;
345         unsigned int mask = POLLOUT | POLLWRNORM;
346
347         if (!tun)
348                 return -EBADFD;
349
350         DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
351
352         poll_wait(file, &tun->read_wait, wait);
353
354         if (!skb_queue_empty(&tun->readq))
355                 mask |= POLLIN | POLLRDNORM;
356
357         return mask;
358 }
359
360 /* Get packet from user space buffer */
361 static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
362 {
363         struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
364         struct sk_buff *skb;
365         size_t len = count, align = 0;
366         struct virtio_net_hdr gso = { 0 };
367
368         if (!(tun->flags & TUN_NO_PI)) {
369                 if ((len -= sizeof(pi)) > count)
370                         return -EINVAL;
371
372                 if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
373                         return -EFAULT;
374         }
375
376         if (tun->flags & TUN_VNET_HDR) {
377                 if ((len -= sizeof(gso)) > count)
378                         return -EINVAL;
379
380                 if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
381                         return -EFAULT;
382
383                 if (gso.hdr_len > len)
384                         return -EINVAL;
385         }
386
387         if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
388                 align = NET_IP_ALIGN;
389                 if (unlikely(len < ETH_HLEN))
390                         return -EINVAL;
391         }
392
393         if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
394                 tun->dev->stats.rx_dropped++;
395                 return -ENOMEM;
396         }
397
398         if (align)
399                 skb_reserve(skb, align);
400         if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
401                 tun->dev->stats.rx_dropped++;
402                 kfree_skb(skb);
403                 return -EFAULT;
404         }
405
406         if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
407                 if (!skb_partial_csum_set(skb, gso.csum_start,
408                                           gso.csum_offset)) {
409                         tun->dev->stats.rx_frame_errors++;
410                         kfree_skb(skb);
411                         return -EINVAL;
412                 }
413         } else if (tun->flags & TUN_NOCHECKSUM)
414                 skb->ip_summed = CHECKSUM_UNNECESSARY;
415
416         switch (tun->flags & TUN_TYPE_MASK) {
417         case TUN_TUN_DEV:
418                 if (tun->flags & TUN_NO_PI) {
419                         switch (skb->data[0] & 0xf0) {
420                         case 0x40:
421                                 pi.proto = htons(ETH_P_IP);
422                                 break;
423                         case 0x60:
424                                 pi.proto = htons(ETH_P_IPV6);
425                                 break;
426                         default:
427                                 tun->dev->stats.rx_dropped++;
428                                 kfree_skb(skb);
429                                 return -EINVAL;
430                         }
431                 }
432
433                 skb_reset_mac_header(skb);
434                 skb->protocol = pi.proto;
435                 skb->dev = tun->dev;
436                 break;
437         case TUN_TAP_DEV:
438                 skb->protocol = eth_type_trans(skb, tun->dev);
439                 break;
440         };
441
442         if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
443                 pr_debug("GSO!\n");
444                 switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
445                 case VIRTIO_NET_HDR_GSO_TCPV4:
446                         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
447                         break;
448                 case VIRTIO_NET_HDR_GSO_TCPV6:
449                         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
450                         break;
451                 default:
452                         tun->dev->stats.rx_frame_errors++;
453                         kfree_skb(skb);
454                         return -EINVAL;
455                 }
456
457                 if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
458                         skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
459
460                 skb_shinfo(skb)->gso_size = gso.gso_size;
461                 if (skb_shinfo(skb)->gso_size == 0) {
462                         tun->dev->stats.rx_frame_errors++;
463                         kfree_skb(skb);
464                         return -EINVAL;
465                 }
466
467                 /* Header must be checked, and gso_segs computed. */
468                 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
469                 skb_shinfo(skb)->gso_segs = 0;
470         }
471
472         netif_rx_ni(skb);
473         tun->dev->last_rx = jiffies;
474
475         tun->dev->stats.rx_packets++;
476         tun->dev->stats.rx_bytes += len;
477
478         return count;
479 }
480
481 static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
482                               unsigned long count, loff_t pos)
483 {
484         struct tun_struct *tun = iocb->ki_filp->private_data;
485
486         if (!tun)
487                 return -EBADFD;
488
489         DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
490
491         return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
492 }
493
494 /* Put packet to the user space buffer */
495 static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
496                                        struct sk_buff *skb,
497                                        struct iovec *iv, int len)
498 {
499         struct tun_pi pi = { 0, skb->protocol };
500         ssize_t total = 0;
501
502         if (!(tun->flags & TUN_NO_PI)) {
503                 if ((len -= sizeof(pi)) < 0)
504                         return -EINVAL;
505
506                 if (len < skb->len) {
507                         /* Packet will be striped */
508                         pi.flags |= TUN_PKT_STRIP;
509                 }
510
511                 if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi)))
512                         return -EFAULT;
513                 total += sizeof(pi);
514         }
515
516         if (tun->flags & TUN_VNET_HDR) {
517                 struct virtio_net_hdr gso = { 0 }; /* no info leak */
518                 if ((len -= sizeof(gso)) < 0)
519                         return -EINVAL;
520
521                 if (skb_is_gso(skb)) {
522                         struct skb_shared_info *sinfo = skb_shinfo(skb);
523
524                         /* This is a hint as to how much should be linear. */
525                         gso.hdr_len = skb_headlen(skb);
526                         gso.gso_size = sinfo->gso_size;
527                         if (sinfo->gso_type & SKB_GSO_TCPV4)
528                                 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
529                         else if (sinfo->gso_type & SKB_GSO_TCPV6)
530                                 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
531                         else
532                                 BUG();
533                         if (sinfo->gso_type & SKB_GSO_TCP_ECN)
534                                 gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
535                 } else
536                         gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
537
538                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
539                         gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
540                         gso.csum_start = skb->csum_start - skb_headroom(skb);
541                         gso.csum_offset = skb->csum_offset;
542                 } /* else everything is zero */
543
544                 if (unlikely(memcpy_toiovec(iv, (void *)&gso, sizeof(gso))))
545                         return -EFAULT;
546                 total += sizeof(gso);
547         }
548
549         len = min_t(int, skb->len, len);
550
551         skb_copy_datagram_iovec(skb, 0, iv, len);
552         total += len;
553
554         tun->dev->stats.tx_packets++;
555         tun->dev->stats.tx_bytes += len;
556
557         return total;
558 }
559
560 static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
561                             unsigned long count, loff_t pos)
562 {
563         struct file *file = iocb->ki_filp;
564         struct tun_struct *tun = file->private_data;
565         DECLARE_WAITQUEUE(wait, current);
566         struct sk_buff *skb;
567         ssize_t len, ret = 0;
568
569         if (!tun)
570                 return -EBADFD;
571
572         DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
573
574         len = iov_length(iv, count);
575         if (len < 0)
576                 return -EINVAL;
577
578         add_wait_queue(&tun->read_wait, &wait);
579         while (len) {
580                 current->state = TASK_INTERRUPTIBLE;
581
582                 /* Read frames from the queue */
583                 if (!(skb=skb_dequeue(&tun->readq))) {
584                         if (file->f_flags & O_NONBLOCK) {
585                                 ret = -EAGAIN;
586                                 break;
587                         }
588                         if (signal_pending(current)) {
589                                 ret = -ERESTARTSYS;
590                                 break;
591                         }
592
593                         /* Nothing to read, let's sleep */
594                         schedule();
595                         continue;
596                 }
597                 netif_wake_queue(tun->dev);
598
599                 ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
600                 kfree_skb(skb);
601                 break;
602         }
603
604         current->state = TASK_RUNNING;
605         remove_wait_queue(&tun->read_wait, &wait);
606
607         return ret;
608 }
609
610 static void tun_setup(struct net_device *dev)
611 {
612         struct tun_struct *tun = netdev_priv(dev);
613
614         skb_queue_head_init(&tun->readq);
615         init_waitqueue_head(&tun->read_wait);
616
617         tun->owner = -1;
618         tun->group = -1;
619
620         dev->open = tun_net_open;
621         dev->hard_start_xmit = tun_net_xmit;
622         dev->stop = tun_net_close;
623         dev->ethtool_ops = &tun_ethtool_ops;
624         dev->destructor = free_netdev;
625         dev->features |= NETIF_F_NETNS_LOCAL;
626 }
627
628 static struct tun_struct *tun_get_by_name(struct tun_net *tn, const char *name)
629 {
630         struct tun_struct *tun;
631
632         ASSERT_RTNL();
633         list_for_each_entry(tun, &tn->dev_list, list) {
634                 if (!strncmp(tun->dev->name, name, IFNAMSIZ))
635                     return tun;
636         }
637
638         return NULL;
639 }
640
641 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
642 {
643         struct tun_net *tn;
644         struct tun_struct *tun;
645         struct net_device *dev;
646         int err;
647
648         tn = net_generic(net, tun_net_id);
649         tun = tun_get_by_name(tn, ifr->ifr_name);
650         if (tun) {
651                 if (tun->attached)
652                         return -EBUSY;
653
654                 /* Check permissions */
655                 if (((tun->owner != -1 &&
656                       current->euid != tun->owner) ||
657                      (tun->group != -1 &&
658                       current->egid != tun->group)) &&
659                      !capable(CAP_NET_ADMIN))
660                         return -EPERM;
661         }
662         else if (__dev_get_by_name(net, ifr->ifr_name))
663                 return -EINVAL;
664         else {
665                 char *name;
666                 unsigned long flags = 0;
667
668                 err = -EINVAL;
669
670                 if (!capable(CAP_NET_ADMIN))
671                         return -EPERM;
672
673                 /* Set dev type */
674                 if (ifr->ifr_flags & IFF_TUN) {
675                         /* TUN device */
676                         flags |= TUN_TUN_DEV;
677                         name = "tun%d";
678                 } else if (ifr->ifr_flags & IFF_TAP) {
679                         /* TAP device */
680                         flags |= TUN_TAP_DEV;
681                         name = "tap%d";
682                 } else
683                         goto failed;
684
685                 if (*ifr->ifr_name)
686                         name = ifr->ifr_name;
687
688                 dev = alloc_netdev(sizeof(struct tun_struct), name,
689                                    tun_setup);
690                 if (!dev)
691                         return -ENOMEM;
692
693                 dev_net_set(dev, net);
694                 tun = netdev_priv(dev);
695                 tun->dev = dev;
696                 tun->flags = flags;
697                 tun->txflt.count = 0;
698
699                 tun_net_init(dev);
700
701                 if (strchr(dev->name, '%')) {
702                         err = dev_alloc_name(dev, dev->name);
703                         if (err < 0)
704                                 goto err_free_dev;
705                 }
706
707                 err = register_netdevice(tun->dev);
708                 if (err < 0)
709                         goto err_free_dev;
710
711                 list_add(&tun->list, &tn->dev_list);
712         }
713
714         DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name);
715
716         if (ifr->ifr_flags & IFF_NO_PI)
717                 tun->flags |= TUN_NO_PI;
718         else
719                 tun->flags &= ~TUN_NO_PI;
720
721         if (ifr->ifr_flags & IFF_ONE_QUEUE)
722                 tun->flags |= TUN_ONE_QUEUE;
723         else
724                 tun->flags &= ~TUN_ONE_QUEUE;
725
726         if (ifr->ifr_flags & IFF_VNET_HDR)
727                 tun->flags |= TUN_VNET_HDR;
728         else
729                 tun->flags &= ~TUN_VNET_HDR;
730
731         file->private_data = tun;
732         tun->attached = 1;
733         get_net(dev_net(tun->dev));
734
735         /* Make sure persistent devices do not get stuck in
736          * xoff state.
737          */
738         if (netif_running(tun->dev))
739                 netif_wake_queue(tun->dev);
740
741         strcpy(ifr->ifr_name, tun->dev->name);
742         return 0;
743
744  err_free_dev:
745         free_netdev(dev);
746  failed:
747         return err;
748 }
749
750 /* This is like a cut-down ethtool ops, except done via tun fd so no
751  * privs required. */
752 static int set_offload(struct net_device *dev, unsigned long arg)
753 {
754         unsigned int old_features, features;
755
756         old_features = dev->features;
757         /* Unset features, set them as we chew on the arg. */
758         features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST
759                                     |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6));
760
761         if (arg & TUN_F_CSUM) {
762                 features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
763                 arg &= ~TUN_F_CSUM;
764
765                 if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
766                         if (arg & TUN_F_TSO_ECN) {
767                                 features |= NETIF_F_TSO_ECN;
768                                 arg &= ~TUN_F_TSO_ECN;
769                         }
770                         if (arg & TUN_F_TSO4)
771                                 features |= NETIF_F_TSO;
772                         if (arg & TUN_F_TSO6)
773                                 features |= NETIF_F_TSO6;
774                         arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
775                 }
776         }
777
778         /* This gives the user a way to test for new features in future by
779          * trying to set them. */
780         if (arg)
781                 return -EINVAL;
782
783         dev->features = features;
784         if (old_features != dev->features)
785                 netdev_features_change(dev);
786
787         return 0;
788 }
789
790 static int tun_chr_ioctl(struct inode *inode, struct file *file,
791                          unsigned int cmd, unsigned long arg)
792 {
793         struct tun_struct *tun = file->private_data;
794         void __user* argp = (void __user*)arg;
795         struct ifreq ifr;
796         int ret;
797         DECLARE_MAC_BUF(mac);
798
799         if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
800                 if (copy_from_user(&ifr, argp, sizeof ifr))
801                         return -EFAULT;
802
803         if (cmd == TUNSETIFF && !tun) {
804                 int err;
805
806                 ifr.ifr_name[IFNAMSIZ-1] = '\0';
807
808                 rtnl_lock();
809                 err = tun_set_iff(current->nsproxy->net_ns, file, &ifr);
810                 rtnl_unlock();
811
812                 if (err)
813                         return err;
814
815                 if (copy_to_user(argp, &ifr, sizeof(ifr)))
816                         return -EFAULT;
817                 return 0;
818         }
819
820         if (cmd == TUNGETFEATURES) {
821                 /* Currently this just means: "what IFF flags are valid?".
822                  * This is needed because we never checked for invalid flags on
823                  * TUNSETIFF. */
824                 return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
825                                 IFF_VNET_HDR,
826                                 (unsigned int __user*)argp);
827         }
828
829         if (!tun)
830                 return -EBADFD;
831
832         DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd);
833
834         switch (cmd) {
835         case TUNSETNOCSUM:
836                 /* Disable/Enable checksum */
837                 if (arg)
838                         tun->flags |= TUN_NOCHECKSUM;
839                 else
840                         tun->flags &= ~TUN_NOCHECKSUM;
841
842                 DBG(KERN_INFO "%s: checksum %s\n",
843                     tun->dev->name, arg ? "disabled" : "enabled");
844                 break;
845
846         case TUNSETPERSIST:
847                 /* Disable/Enable persist mode */
848                 if (arg)
849                         tun->flags |= TUN_PERSIST;
850                 else
851                         tun->flags &= ~TUN_PERSIST;
852
853                 DBG(KERN_INFO "%s: persist %s\n",
854                     tun->dev->name, arg ? "enabled" : "disabled");
855                 break;
856
857         case TUNSETOWNER:
858                 /* Set owner of the device */
859                 tun->owner = (uid_t) arg;
860
861                 DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner);
862                 break;
863
864         case TUNSETGROUP:
865                 /* Set group of the device */
866                 tun->group= (gid_t) arg;
867
868                 DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
869                 break;
870
871         case TUNSETLINK:
872                 /* Only allow setting the type when the interface is down */
873                 rtnl_lock();
874                 if (tun->dev->flags & IFF_UP) {
875                         DBG(KERN_INFO "%s: Linktype set failed because interface is up\n",
876                                 tun->dev->name);
877                         ret = -EBUSY;
878                 } else {
879                         tun->dev->type = (int) arg;
880                         DBG(KERN_INFO "%s: linktype set to %d\n", tun->dev->name, tun->dev->type);
881                         ret = 0;
882                 }
883                 rtnl_unlock();
884                 return ret;
885
886 #ifdef TUN_DEBUG
887         case TUNSETDEBUG:
888                 tun->debug = arg;
889                 break;
890 #endif
891         case TUNSETOFFLOAD:
892                 rtnl_lock();
893                 ret = set_offload(tun->dev, arg);
894                 rtnl_unlock();
895                 return ret;
896
897         case TUNSETTXFILTER:
898                 /* Can be set only for TAPs */
899                 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
900                         return -EINVAL;
901                 rtnl_lock();
902                 ret = update_filter(&tun->txflt, (void *) __user arg);
903                 rtnl_unlock();
904                 return ret;
905
906         case SIOCGIFHWADDR:
907                 /* Get hw addres */
908                 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
909                 ifr.ifr_hwaddr.sa_family = tun->dev->type;
910                 if (copy_to_user(argp, &ifr, sizeof ifr))
911                         return -EFAULT;
912                 return 0;
913
914         case SIOCSIFHWADDR:
915                 /* Set hw address */
916                 DBG(KERN_DEBUG "%s: set hw address: %s\n",
917                         tun->dev->name, print_mac(mac, ifr.ifr_hwaddr.sa_data));
918
919                 rtnl_lock();
920                 ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
921                 rtnl_unlock();
922                 return ret;
923
924         default:
925                 return -EINVAL;
926         };
927
928         return 0;
929 }
930
931 static int tun_chr_fasync(int fd, struct file *file, int on)
932 {
933         struct tun_struct *tun = file->private_data;
934         int ret;
935
936         if (!tun)
937                 return -EBADFD;
938
939         DBG(KERN_INFO "%s: tun_chr_fasync %d\n", tun->dev->name, on);
940
941         if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0)
942                 return ret;
943
944         if (on) {
945                 ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
946                 if (ret)
947                         return ret;
948                 tun->flags |= TUN_FASYNC;
949         } else
950                 tun->flags &= ~TUN_FASYNC;
951
952         return 0;
953 }
954
955 static int tun_chr_open(struct inode *inode, struct file * file)
956 {
957         DBG1(KERN_INFO "tunX: tun_chr_open\n");
958         file->private_data = NULL;
959         return 0;
960 }
961
962 static int tun_chr_close(struct inode *inode, struct file *file)
963 {
964         struct tun_struct *tun = file->private_data;
965
966         if (!tun)
967                 return 0;
968
969         DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
970
971         tun_chr_fasync(-1, file, 0);
972
973         rtnl_lock();
974
975         /* Detach from net device */
976         file->private_data = NULL;
977         tun->attached = 0;
978         put_net(dev_net(tun->dev));
979
980         /* Drop read queue */
981         skb_queue_purge(&tun->readq);
982
983         if (!(tun->flags & TUN_PERSIST)) {
984                 list_del(&tun->list);
985                 unregister_netdevice(tun->dev);
986         }
987
988         rtnl_unlock();
989
990         return 0;
991 }
992
993 static const struct file_operations tun_fops = {
994         .owner  = THIS_MODULE,
995         .llseek = no_llseek,
996         .read  = do_sync_read,
997         .aio_read  = tun_chr_aio_read,
998         .write = do_sync_write,
999         .aio_write = tun_chr_aio_write,
1000         .poll   = tun_chr_poll,
1001         .ioctl  = tun_chr_ioctl,
1002         .open   = tun_chr_open,
1003         .release = tun_chr_close,
1004         .fasync = tun_chr_fasync
1005 };
1006
1007 static struct miscdevice tun_miscdev = {
1008         .minor = TUN_MINOR,
1009         .name = "tun",
1010         .fops = &tun_fops,
1011 };
1012
1013 /* ethtool interface */
1014
1015 static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1016 {
1017         cmd->supported          = 0;
1018         cmd->advertising        = 0;
1019         cmd->speed              = SPEED_10;
1020         cmd->duplex             = DUPLEX_FULL;
1021         cmd->port               = PORT_TP;
1022         cmd->phy_address        = 0;
1023         cmd->transceiver        = XCVR_INTERNAL;
1024         cmd->autoneg            = AUTONEG_DISABLE;
1025         cmd->maxtxpkt           = 0;
1026         cmd->maxrxpkt           = 0;
1027         return 0;
1028 }
1029
1030 static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1031 {
1032         struct tun_struct *tun = netdev_priv(dev);
1033
1034         strcpy(info->driver, DRV_NAME);
1035         strcpy(info->version, DRV_VERSION);
1036         strcpy(info->fw_version, "N/A");
1037
1038         switch (tun->flags & TUN_TYPE_MASK) {
1039         case TUN_TUN_DEV:
1040                 strcpy(info->bus_info, "tun");
1041                 break;
1042         case TUN_TAP_DEV:
1043                 strcpy(info->bus_info, "tap");
1044                 break;
1045         }
1046 }
1047
1048 static u32 tun_get_msglevel(struct net_device *dev)
1049 {
1050 #ifdef TUN_DEBUG
1051         struct tun_struct *tun = netdev_priv(dev);
1052         return tun->debug;
1053 #else
1054         return -EOPNOTSUPP;
1055 #endif
1056 }
1057
1058 static void tun_set_msglevel(struct net_device *dev, u32 value)
1059 {
1060 #ifdef TUN_DEBUG
1061         struct tun_struct *tun = netdev_priv(dev);
1062         tun->debug = value;
1063 #endif
1064 }
1065
1066 static u32 tun_get_link(struct net_device *dev)
1067 {
1068         struct tun_struct *tun = netdev_priv(dev);
1069         return tun->attached;
1070 }
1071
1072 static u32 tun_get_rx_csum(struct net_device *dev)
1073 {
1074         struct tun_struct *tun = netdev_priv(dev);
1075         return (tun->flags & TUN_NOCHECKSUM) == 0;
1076 }
1077
1078 static int tun_set_rx_csum(struct net_device *dev, u32 data)
1079 {
1080         struct tun_struct *tun = netdev_priv(dev);
1081         if (data)
1082                 tun->flags &= ~TUN_NOCHECKSUM;
1083         else
1084                 tun->flags |= TUN_NOCHECKSUM;
1085         return 0;
1086 }
1087
1088 static const struct ethtool_ops tun_ethtool_ops = {
1089         .get_settings   = tun_get_settings,
1090         .get_drvinfo    = tun_get_drvinfo,
1091         .get_msglevel   = tun_get_msglevel,
1092         .set_msglevel   = tun_set_msglevel,
1093         .get_link       = tun_get_link,
1094         .get_rx_csum    = tun_get_rx_csum,
1095         .set_rx_csum    = tun_set_rx_csum
1096 };
1097
1098 static int tun_init_net(struct net *net)
1099 {
1100         struct tun_net *tn;
1101
1102         tn = kmalloc(sizeof(*tn), GFP_KERNEL);
1103         if (tn == NULL)
1104                 return -ENOMEM;
1105
1106         INIT_LIST_HEAD(&tn->dev_list);
1107
1108         if (net_assign_generic(net, tun_net_id, tn)) {
1109                 kfree(tn);
1110                 return -ENOMEM;
1111         }
1112
1113         return 0;
1114 }
1115
1116 static void tun_exit_net(struct net *net)
1117 {
1118         struct tun_net *tn;
1119         struct tun_struct *tun, *nxt;
1120
1121         tn = net_generic(net, tun_net_id);
1122
1123         rtnl_lock();
1124         list_for_each_entry_safe(tun, nxt, &tn->dev_list, list) {
1125                 DBG(KERN_INFO "%s cleaned up\n", tun->dev->name);
1126                 unregister_netdevice(tun->dev);
1127         }
1128         rtnl_unlock();
1129
1130         kfree(tn);
1131 }
1132
1133 static struct pernet_operations tun_net_ops = {
1134         .init = tun_init_net,
1135         .exit = tun_exit_net,
1136 };
1137
1138 static int __init tun_init(void)
1139 {
1140         int ret = 0;
1141
1142         printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
1143         printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT);
1144
1145         ret = register_pernet_gen_device(&tun_net_id, &tun_net_ops);
1146         if (ret) {
1147                 printk(KERN_ERR "tun: Can't register pernet ops\n");
1148                 goto err_pernet;
1149         }
1150
1151         ret = misc_register(&tun_miscdev);
1152         if (ret) {
1153                 printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR);
1154                 goto err_misc;
1155         }
1156         return 0;
1157
1158 err_misc:
1159         unregister_pernet_gen_device(tun_net_id, &tun_net_ops);
1160 err_pernet:
1161         return ret;
1162 }
1163
1164 static void tun_cleanup(void)
1165 {
1166         misc_deregister(&tun_miscdev);
1167         unregister_pernet_gen_device(tun_net_id, &tun_net_ops);
1168 }
1169
1170 module_init(tun_init);
1171 module_exit(tun_cleanup);
1172 MODULE_DESCRIPTION(DRV_DESCRIPTION);
1173 MODULE_AUTHOR(DRV_COPYRIGHT);
1174 MODULE_LICENSE("GPL");
1175 MODULE_ALIAS_MISCDEV(TUN_MINOR);