net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/notifier.h>
  94 #include <linux/skbuff.h>
  95 #include <net/net_namespace.h>
  96 #include <net/sock.h>
  97 #include <linux/rtnetlink.h>
  98 #include <linux/proc_fs.h>
  99 #include <linux/seq_file.h>
 100 #include <linux/stat.h>
 101 #include <linux/if_bridge.h>
 102 #include <linux/if_macvlan.h>
 103 #include <net/dst.h>
 104 #include <net/pkt_sched.h>
 105 #include <net/checksum.h>
 106 #include <linux/highmem.h>
 107 #include <linux/init.h>
 108 #include <linux/kmod.h>
 109 #include <linux/module.h>
 110 #include <linux/kallsyms.h>
 111 #include <linux/netpoll.h>
 112 #include <linux/rcupdate.h>
 113 #include <linux/delay.h>
 114 #include <net/wext.h>
 115 #include <net/iw_handler.h>
 116 #include <asm/current.h>
 117 #include <linux/audit.h>
 118 #include <linux/dmaengine.h>
 119 #include <linux/err.h>
 120 #include <linux/ctype.h>
 121 #include <linux/if_arp.h>
 122
 123 /*
 124  *      The list of packet types we will receive (as opposed to discard)
 125  *      and the routines to invoke.
 126  *
 127  *      Why 16. Because with 16 the only overlap we get on a hash of the
 128  *      low nibble of the protocol value is RARP/SNAP/X.25.
 129  *
 130  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 131  *             sure which should go first, but I bet it won't make much
 132  *             difference if we are running VLANs.  The good news is that
 133  *             this protocol won't be in the list unless compiled in, so
 134  *             the average user (w/out VLANs) will not be adversely affected.
 135  *             --BLG
 136  *
 137  *              0800    IP
 138  *              8100    802.1Q VLAN
 139  *              0001    802.3
 140  *              0002    AX.25
 141  *              0004    802.2
 142  *              8035    RARP
 143  *              0005    SNAP
 144  *              0805    X.25
 145  *              0806    ARP
 146  *              8137    IPX
 147  *              0009    Localtalk
 148  *              86DD    IPv6
 149  */
 150
 151 static DEFINE_SPINLOCK(ptype_lock);
 152 static struct list_head ptype_base[16] __read_mostly;   /* 16 way hashed list */
 153 static struct list_head ptype_all __read_mostly;        /* Taps */
 154
 155 #ifdef CONFIG_NET_DMA
 156 struct net_dma {
 157         struct dma_client client;
 158         spinlock_t lock;
 159         cpumask_t channel_mask;
 160         struct dma_chan *channels[NR_CPUS];
 161 };
 162
 163 static enum dma_state_client
 164 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 165         enum dma_state state);
 166
 167 static struct net_dma net_dma = {
 168         .client = {
 169                 .event_callback = netdev_dma_event,
 170         },
 171 };
 172 #endif
 173
 174 /*
 175  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 176  * semaphore.
 177  *
 178  * Pure readers hold dev_base_lock for reading.
 179  *
 180  * Writers must hold the rtnl semaphore while they loop through the
 181  * dev_base_head list, and hold dev_base_lock for writing when they do the
 182  * actual updates.  This allows pure readers to access the list even
 183  * while a writer is preparing to update it.
 184  *
 185  * To put it another way, dev_base_lock is held for writing only to
 186  * protect against pure readers; the rtnl semaphore provides the
 187  * protection against other writers.
 188  *
 189  * See, for example usages, register_netdevice() and
 190  * unregister_netdevice(), which must be called with the rtnl
 191  * semaphore held.
 192  */
 193 DEFINE_RWLOCK(dev_base_lock);
 194
 195 EXPORT_SYMBOL(dev_base_lock);
 196
 197 #define NETDEV_HASHBITS 8
 198 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 199
 200 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 201 {
 202         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 203         return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 204 }
 205
 206 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 207 {
 208         return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 209 }
 210
 211 /*
 212  *      Our notifier list
 213  */
 214
 215 static RAW_NOTIFIER_HEAD(netdev_chain);
 216
 217 /*
 218  *      Device drivers call our routines to queue packets here. We empty the
 219  *      queue in the local softnet handler.
 220  */
 221
 222 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 223
 224 #ifdef CONFIG_SYSFS
 225 extern int netdev_sysfs_init(void);
 226 extern int netdev_register_sysfs(struct net_device *);
 227 extern void netdev_unregister_sysfs(struct net_device *);
 228 #else
 229 #define netdev_sysfs_init()             (0)
 230 #define netdev_register_sysfs(dev)      (0)
 231 #define netdev_unregister_sysfs(dev)    do { } while(0)
 232 #endif
 233
 234 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 235 /*
 236  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
 237  * according to dev->type
 238  */
 239 static const unsigned short netdev_lock_type[] =
 240         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 241          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 242          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 243          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 244          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 245          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 246          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 247          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 248          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 249          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 250          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 251          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 252          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 253          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 254          ARPHRD_NONE};
 255
 256 static const char *netdev_lock_name[] =
 257         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 258          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 259          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 260          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 261          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 262          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 263          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 264          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 265          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 266          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 267          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 268          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 269          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 270          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 271          "_xmit_NONE"};
 272
 273 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 274
 275 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 276 {
 277         int i;
 278
 279         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 280                 if (netdev_lock_type[i] == dev_type)
 281                         return i;
 282         /* the last key is used by default */
 283         return ARRAY_SIZE(netdev_lock_type) - 1;
 284 }
 285
 286 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 287                                             unsigned short dev_type)
 288 {
 289         int i;
 290
 291         i = netdev_lock_pos(dev_type);
 292         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 293                                    netdev_lock_name[i]);
 294 }
 295 #else
 296 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 297                                             unsigned short dev_type)
 298 {
 299 }
 300 #endif
 301
 302 /*******************************************************************************
 303
 304                 Protocol management and registration routines
 305
 306 *******************************************************************************/
 307
 308 /*
 309  *      Add a protocol ID to the list. Now that the input handler is
 310  *      smarter we can dispense with all the messy stuff that used to be
 311  *      here.
 312  *
 313  *      BEWARE!!! Protocol handlers, mangling input packets,
 314  *      MUST BE last in hash buckets and checking protocol handlers
 315  *      MUST start from promiscuous ptype_all chain in net_bh.
 316  *      It is true now, do not change it.
 317  *      Explanation follows: if protocol handler, mangling packet, will
 318  *      be the first on list, it is not able to sense, that packet
 319  *      is cloned and should be copied-on-write, so that it will
 320  *      change it and subsequent readers will get broken packet.
 321  *                                                      --ANK (980803)
 322  */
 323
 324 /**
 325  *      dev_add_pack - add packet handler
 326  *      @pt: packet type declaration
 327  *
 328  *      Add a protocol handler to the networking stack. The passed &packet_type
 329  *      is linked into kernel lists and may not be freed until it has been
 330  *      removed from the kernel lists.
 331  *
 332  *      This call does not sleep therefore it can not
 333  *      guarantee all CPU's that are in middle of receiving packets
 334  *      will see the new packet type (until the next received packet).
 335  */
 336
 337 void dev_add_pack(struct packet_type *pt)
 338 {
 339         int hash;
 340
 341         spin_lock_bh(&ptype_lock);
 342         if (pt->type == htons(ETH_P_ALL))
 343                 list_add_rcu(&pt->list, &ptype_all);
 344         else {
 345                 hash = ntohs(pt->type) & 15;
 346                 list_add_rcu(&pt->list, &ptype_base[hash]);
 347         }
 348         spin_unlock_bh(&ptype_lock);
 349 }
 350
 351 /**
 352  *      __dev_remove_pack        - remove packet handler
 353  *      @pt: packet type declaration
 354  *
 355  *      Remove a protocol handler that was previously added to the kernel
 356  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 357  *      from the kernel lists and can be freed or reused once this function
 358  *      returns.
 359  *
 360  *      The packet type might still be in use by receivers
 361  *      and must not be freed until after all the CPU's have gone
 362  *      through a quiescent state.
 363  */
 364 void __dev_remove_pack(struct packet_type *pt)
 365 {
 366         struct list_head *head;
 367         struct packet_type *pt1;
 368
 369         spin_lock_bh(&ptype_lock);
 370
 371         if (pt->type == htons(ETH_P_ALL))
 372                 head = &ptype_all;
 373         else
 374                 head = &ptype_base[ntohs(pt->type) & 15];
 375
 376         list_for_each_entry(pt1, head, list) {
 377                 if (pt == pt1) {
 378                         list_del_rcu(&pt->list);
 379                         goto out;
 380                 }
 381         }
 382
 383         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 384 out:
 385         spin_unlock_bh(&ptype_lock);
 386 }
 387 /**
 388  *      dev_remove_pack  - remove packet handler
 389  *      @pt: packet type declaration
 390  *
 391  *      Remove a protocol handler that was previously added to the kernel
 392  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 393  *      from the kernel lists and can be freed or reused once this function
 394  *      returns.
 395  *
 396  *      This call sleeps to guarantee that no CPU is looking at the packet
 397  *      type after return.
 398  */
 399 void dev_remove_pack(struct packet_type *pt)
 400 {
 401         __dev_remove_pack(pt);
 402
 403         synchronize_net();
 404 }
 405
 406 /******************************************************************************
 407
 408                       Device Boot-time Settings Routines
 409
 410 *******************************************************************************/
 411
 412 /* Boot time configuration table */
 413 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 414
 415 /**
 416  *      netdev_boot_setup_add   - add new setup entry
 417  *      @name: name of the device
 418  *      @map: configured settings for the device
 419  *
 420  *      Adds new setup entry to the dev_boot_setup list.  The function
 421  *      returns 0 on error and 1 on success.  This is a generic routine to
 422  *      all netdevices.
 423  */
 424 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 425 {
 426         struct netdev_boot_setup *s;
 427         int i;
 428
 429         s = dev_boot_setup;
 430         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 431                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 432                         memset(s[i].name, 0, sizeof(s[i].name));
 433                         strcpy(s[i].name, name);
 434                         memcpy(&s[i].map, map, sizeof(s[i].map));
 435                         break;
 436                 }
 437         }
 438
 439         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 440 }
 441
 442 /**
 443  *      netdev_boot_setup_check - check boot time settings
 444  *      @dev: the netdevice
 445  *
 446  *      Check boot time settings for the device.
 447  *      The found settings are set for the device to be used
 448  *      later in the device probing.
 449  *      Returns 0 if no settings found, 1 if they are.
 450  */
 451 int netdev_boot_setup_check(struct net_device *dev)
 452 {
 453         struct netdev_boot_setup *s = dev_boot_setup;
 454         int i;
 455
 456         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 457                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 458                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 459                         dev->irq        = s[i].map.irq;
 460                         dev->base_addr  = s[i].map.base_addr;
 461                         dev->mem_start  = s[i].map.mem_start;
 462                         dev->mem_end    = s[i].map.mem_end;
 463                         return 1;
 464                 }
 465         }
 466         return 0;
 467 }
 468
 469
 470 /**
 471  *      netdev_boot_base        - get address from boot time settings
 472  *      @prefix: prefix for network device
 473  *      @unit: id for network device
 474  *
 475  *      Check boot time settings for the base address of device.
 476  *      The found settings are set for the device to be used
 477  *      later in the device probing.
 478  *      Returns 0 if no settings found.
 479  */
 480 unsigned long netdev_boot_base(const char *prefix, int unit)
 481 {
 482         const struct netdev_boot_setup *s = dev_boot_setup;
 483         char name[IFNAMSIZ];
 484         int i;
 485
 486         sprintf(name, "%s%d", prefix, unit);
 487
 488         /*
 489          * If device already registered then return base of 1
 490          * to indicate not to probe for this interface
 491          */
 492         if (__dev_get_by_name(&init_net, name))
 493                 return 1;
 494
 495         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 496                 if (!strcmp(name, s[i].name))
 497                         return s[i].map.base_addr;
 498         return 0;
 499 }
 500
 501 /*
 502  * Saves at boot time configured settings for any netdevice.
 503  */
 504 int __init netdev_boot_setup(char *str)
 505 {
 506         int ints[5];
 507         struct ifmap map;
 508
 509         str = get_options(str, ARRAY_SIZE(ints), ints);
 510         if (!str || !*str)
 511                 return 0;
 512
 513         /* Save settings */
 514         memset(&map, 0, sizeof(map));
 515         if (ints[0] > 0)
 516                 map.irq = ints[1];
 517         if (ints[0] > 1)
 518                 map.base_addr = ints[2];
 519         if (ints[0] > 2)
 520                 map.mem_start = ints[3];
 521         if (ints[0] > 3)
 522                 map.mem_end = ints[4];
 523
 524         /* Add new entry to the list */
 525         return netdev_boot_setup_add(str, &map);
 526 }
 527
 528 __setup("netdev=", netdev_boot_setup);
 529
 530 /*******************************************************************************
 531
 532                             Device Interface Subroutines
 533
 534 *******************************************************************************/
 535
 536 /**
 537  *      __dev_get_by_name       - find a device by its name
 538  *      @name: name to find
 539  *
 540  *      Find an interface by name. Must be called under RTNL semaphore
 541  *      or @dev_base_lock. If the name is found a pointer to the device
 542  *      is returned. If the name is not found then %NULL is returned. The
 543  *      reference counters are not incremented so the caller must be
 544  *      careful with locks.
 545  */
 546
 547 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 548 {
 549         struct hlist_node *p;
 550
 551         hlist_for_each(p, dev_name_hash(net, name)) {
 552                 struct net_device *dev
 553                         = hlist_entry(p, struct net_device, name_hlist);
 554                 if (!strncmp(dev->name, name, IFNAMSIZ))
 555                         return dev;
 556         }
 557         return NULL;
 558 }
 559
 560 /**
 561  *      dev_get_by_name         - find a device by its name
 562  *      @name: name to find
 563  *
 564  *      Find an interface by name. This can be called from any
 565  *      context and does its own locking. The returned handle has
 566  *      the usage count incremented and the caller must use dev_put() to
 567  *      release it when it is no longer needed. %NULL is returned if no
 568  *      matching device is found.
 569  */
 570
 571 struct net_device *dev_get_by_name(struct net *net, const char *name)
 572 {
 573         struct net_device *dev;
 574
 575         read_lock(&dev_base_lock);
 576         dev = __dev_get_by_name(net, name);
 577         if (dev)
 578                 dev_hold(dev);
 579         read_unlock(&dev_base_lock);
 580         return dev;
 581 }
 582
 583 /**
 584  *      __dev_get_by_index - find a device by its ifindex
 585  *      @ifindex: index of device
 586  *
 587  *      Search for an interface by index. Returns %NULL if the device
 588  *      is not found or a pointer to the device. The device has not
 589  *      had its reference counter increased so the caller must be careful
 590  *      about locking. The caller must hold either the RTNL semaphore
 591  *      or @dev_base_lock.
 592  */
 593
 594 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 595 {
 596         struct hlist_node *p;
 597
 598         hlist_for_each(p, dev_index_hash(net, ifindex)) {
 599                 struct net_device *dev
 600                         = hlist_entry(p, struct net_device, index_hlist);
 601                 if (dev->ifindex == ifindex)
 602                         return dev;
 603         }
 604         return NULL;
 605 }
 606
 607
 608 /**
 609  *      dev_get_by_index - find a device by its ifindex
 610  *      @ifindex: index of device
 611  *
 612  *      Search for an interface by index. Returns NULL if the device
 613  *      is not found or a pointer to the device. The device returned has
 614  *      had a reference added and the pointer is safe until the user calls
 615  *      dev_put to indicate they have finished with it.
 616  */
 617
 618 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 619 {
 620         struct net_device *dev;
 621
 622         read_lock(&dev_base_lock);
 623         dev = __dev_get_by_index(net, ifindex);
 624         if (dev)
 625                 dev_hold(dev);
 626         read_unlock(&dev_base_lock);
 627         return dev;
 628 }
 629
 630 /**
 631  *      dev_getbyhwaddr - find a device by its hardware address
 632  *      @type: media type of device
 633  *      @ha: hardware address
 634  *
 635  *      Search for an interface by MAC address. Returns NULL if the device
 636  *      is not found or a pointer to the device. The caller must hold the
 637  *      rtnl semaphore. The returned device has not had its ref count increased
 638  *      and the caller must therefore be careful about locking
 639  *
 640  *      BUGS:
 641  *      If the API was consistent this would be __dev_get_by_hwaddr
 642  */
 643
 644 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 645 {
 646         struct net_device *dev;
 647
 648         ASSERT_RTNL();
 649
 650         for_each_netdev(&init_net, dev)
 651                 if (dev->type == type &&
 652                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 653                         return dev;
 654
 655         return NULL;
 656 }
 657
 658 EXPORT_SYMBOL(dev_getbyhwaddr);
 659
 660 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 661 {
 662         struct net_device *dev;
 663
 664         ASSERT_RTNL();
 665         for_each_netdev(net, dev)
 666                 if (dev->type == type)
 667                         return dev;
 668
 669         return NULL;
 670 }
 671
 672 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 673
 674 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 675 {
 676         struct net_device *dev;
 677
 678         rtnl_lock();
 679         dev = __dev_getfirstbyhwtype(net, type);
 680         if (dev)
 681                 dev_hold(dev);
 682         rtnl_unlock();
 683         return dev;
 684 }
 685
 686 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 687
 688 /**
 689  *      dev_get_by_flags - find any device with given flags
 690  *      @if_flags: IFF_* values
 691  *      @mask: bitmask of bits in if_flags to check
 692  *
 693  *      Search for any interface with the given flags. Returns NULL if a device
 694  *      is not found or a pointer to the device. The device returned has
 695  *      had a reference added and the pointer is safe until the user calls
 696  *      dev_put to indicate they have finished with it.
 697  */
 698
 699 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 700 {
 701         struct net_device *dev, *ret;
 702
 703         ret = NULL;
 704         read_lock(&dev_base_lock);
 705         for_each_netdev(net, dev) {
 706                 if (((dev->flags ^ if_flags) & mask) == 0) {
 707                         dev_hold(dev);
 708                         ret = dev;
 709                         break;
 710                 }
 711         }
 712         read_unlock(&dev_base_lock);
 713         return ret;
 714 }
 715
 716 /**
 717  *      dev_valid_name - check if name is okay for network device
 718  *      @name: name string
 719  *
 720  *      Network device names need to be valid file names to
 721  *      to allow sysfs to work.  We also disallow any kind of
 722  *      whitespace.
 723  */
 724 int dev_valid_name(const char *name)
 725 {
 726         if (*name == '\0')
 727                 return 0;
 728         if (strlen(name) >= IFNAMSIZ)
 729                 return 0;
 730         if (!strcmp(name, ".") || !strcmp(name, ".."))
 731                 return 0;
 732
 733         while (*name) {
 734                 if (*name == '/' || isspace(*name))
 735                         return 0;
 736                 name++;
 737         }
 738         return 1;
 739 }
 740
 741 /**
 742  *      __dev_alloc_name - allocate a name for a device
 743  *      @net: network namespace to allocate the device name in
 744  *      @name: name format string
 745  *      @buf:  scratch buffer and result name string
 746  *
 747  *      Passed a format string - eg "lt%d" it will try and find a suitable
 748  *      id. It scans list of devices to build up a free map, then chooses
 749  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 750  *      while allocating the name and adding the device in order to avoid
 751  *      duplicates.
 752  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 753  *      Returns the number of the unit assigned or a negative errno code.
 754  */
 755
 756 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 757 {
 758         int i = 0;
 759         const char *p;
 760         const int max_netdevices = 8*PAGE_SIZE;
 761         long *inuse;
 762         struct net_device *d;
 763
 764         p = strnchr(name, IFNAMSIZ-1, '%');
 765         if (p) {
 766                 /*
 767                  * Verify the string as this thing may have come from
 768                  * the user.  There must be either one "%d" and no other "%"
 769                  * characters.
 770                  */
 771                 if (p[1] != 'd' || strchr(p + 2, '%'))
 772                         return -EINVAL;
 773
 774                 /* Use one page as a bit array of possible slots */
 775                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 776                 if (!inuse)
 777                         return -ENOMEM;
 778
 779                 for_each_netdev(net, d) {
 780                         if (!sscanf(d->name, name, &i))
 781                                 continue;
 782                         if (i < 0 || i >= max_netdevices)
 783                                 continue;
 784
 785                         /*  avoid cases where sscanf is not exact inverse of printf */
 786                         snprintf(buf, IFNAMSIZ, name, i);
 787                         if (!strncmp(buf, d->name, IFNAMSIZ))
 788                                 set_bit(i, inuse);
 789                 }
 790
 791                 i = find_first_zero_bit(inuse, max_netdevices);
 792                 free_page((unsigned long) inuse);
 793         }
 794
 795         snprintf(buf, IFNAMSIZ, name, i);
 796         if (!__dev_get_by_name(net, buf))
 797                 return i;
 798
 799         /* It is possible to run out of possible slots
 800          * when the name is long and there isn't enough space left
 801          * for the digits, or if all bits are used.
 802          */
 803         return -ENFILE;
 804 }
 805
 806 /**
 807  *      dev_alloc_name - allocate a name for a device
 808  *      @dev: device
 809  *      @name: name format string
 810  *
 811  *      Passed a format string - eg "lt%d" it will try and find a suitable
 812  *      id. It scans list of devices to build up a free map, then chooses
 813  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 814  *      while allocating the name and adding the device in order to avoid
 815  *      duplicates.
 816  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 817  *      Returns the number of the unit assigned or a negative errno code.
 818  */
 819
 820 int dev_alloc_name(struct net_device *dev, const char *name)
 821 {
 822         char buf[IFNAMSIZ];
 823         struct net *net;
 824         int ret;
 825
 826         BUG_ON(!dev->nd_net);
 827         net = dev->nd_net;
 828         ret = __dev_alloc_name(net, name, buf);
 829         if (ret >= 0)
 830                 strlcpy(dev->name, buf, IFNAMSIZ);
 831         return ret;
 832 }
 833
 834
 835 /**
 836  *      dev_change_name - change name of a device
 837  *      @dev: device
 838  *      @newname: name (or format string) must be at least IFNAMSIZ
 839  *
 840  *      Change name of a device, can pass format strings "eth%d".
 841  *      for wildcarding.
 842  */
 843 int dev_change_name(struct net_device *dev, char *newname)
 844 {
 845         char oldname[IFNAMSIZ];
 846         int err = 0;
 847         int ret;
 848         struct net *net;
 849
 850         ASSERT_RTNL();
 851         BUG_ON(!dev->nd_net);
 852
 853         net = dev->nd_net;
 854         if (dev->flags & IFF_UP)
 855                 return -EBUSY;
 856
 857         if (!dev_valid_name(newname))
 858                 return -EINVAL;
 859
 860         memcpy(oldname, dev->name, IFNAMSIZ);
 861
 862         if (strchr(newname, '%')) {
 863                 err = dev_alloc_name(dev, newname);
 864                 if (err < 0)
 865                         return err;
 866                 strcpy(newname, dev->name);
 867         }
 868         else if (__dev_get_by_name(net, newname))
 869                 return -EEXIST;
 870         else
 871                 strlcpy(dev->name, newname, IFNAMSIZ);
 872
 873 rollback:
 874         device_rename(&dev->dev, dev->name);
 875
 876         write_lock_bh(&dev_base_lock);
 877         hlist_del(&dev->name_hlist);
 878         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 879         write_unlock_bh(&dev_base_lock);
 880
 881         ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 882         ret = notifier_to_errno(ret);
 883
 884         if (ret) {
 885                 if (err) {
 886                         printk(KERN_ERR
 887                                "%s: name change rollback failed: %d.\n",
 888                                dev->name, ret);
 889                 } else {
 890                         err = ret;
 891                         memcpy(dev->name, oldname, IFNAMSIZ);
 892                         goto rollback;
 893                 }
 894         }
 895
 896         return err;
 897 }
 898
 899 /**
 900  *      netdev_features_change - device changes features
 901  *      @dev: device to cause notification
 902  *
 903  *      Called to indicate a device has changed features.
 904  */
 905 void netdev_features_change(struct net_device *dev)
 906 {
 907         raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 908 }
 909 EXPORT_SYMBOL(netdev_features_change);
 910
 911 /**
 912  *      netdev_state_change - device changes state
 913  *      @dev: device to cause notification
 914  *
 915  *      Called to indicate a device has changed state. This function calls
 916  *      the notifier chains for netdev_chain and sends a NEWLINK message
 917  *      to the routing socket.
 918  */
 919 void netdev_state_change(struct net_device *dev)
 920 {
 921         if (dev->flags & IFF_UP) {
 922                 raw_notifier_call_chain(&netdev_chain,
 923                                 NETDEV_CHANGE, dev);
 924                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 925         }
 926 }
 927
 928 /**
 929  *      dev_load        - load a network module
 930  *      @name: name of interface
 931  *
 932  *      If a network interface is not present and the process has suitable
 933  *      privileges this function loads the module. If module loading is not
 934  *      available in this kernel then it becomes a nop.
 935  */
 936
 937 void dev_load(struct net *net, const char *name)
 938 {
 939         struct net_device *dev;
 940
 941         read_lock(&dev_base_lock);
 942         dev = __dev_get_by_name(net, name);
 943         read_unlock(&dev_base_lock);
 944
 945         if (!dev && capable(CAP_SYS_MODULE))
 946                 request_module("%s", name);
 947 }
 948
 949 static int default_rebuild_header(struct sk_buff *skb)
 950 {
 951         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 952                skb->dev ? skb->dev->name : "NULL!!!");
 953         kfree_skb(skb);
 954         return 1;
 955 }
 956
 957 /**
 958  *      dev_open        - prepare an interface for use.
 959  *      @dev:   device to open
 960  *
 961  *      Takes a device from down to up state. The device's private open
 962  *      function is invoked and then the multicast lists are loaded. Finally
 963  *      the device is moved into the up state and a %NETDEV_UP message is
 964  *      sent to the netdev notifier chain.
 965  *
 966  *      Calling this function on an active interface is a nop. On a failure
 967  *      a negative errno code is returned.
 968  */
 969 int dev_open(struct net_device *dev)
 970 {
 971         int ret = 0;
 972
 973         /*
 974          *      Is it already up?
 975          */
 976
 977         if (dev->flags & IFF_UP)
 978                 return 0;
 979
 980         /*
 981          *      Is it even present?
 982          */
 983         if (!netif_device_present(dev))
 984                 return -ENODEV;
 985
 986         /*
 987          *      Call device private open method
 988          */
 989         set_bit(__LINK_STATE_START, &dev->state);
 990         if (dev->open) {
 991                 ret = dev->open(dev);
 992                 if (ret)
 993                         clear_bit(__LINK_STATE_START, &dev->state);
 994         }
 995
 996         /*
 997          *      If it went open OK then:
 998          */
 999
1000         if (!ret) {
1001                 /*
1002                  *      Set the flags.
1003                  */
1004                 dev->flags |= IFF_UP;
1005
1006                 /*
1007                  *      Initialize multicasting status
1008                  */
1009                 dev_set_rx_mode(dev);
1010
1011                 /*
1012                  *      Wakeup transmit queue engine
1013                  */
1014                 dev_activate(dev);
1015
1016                 /*
1017                  *      ... and announce new interface.
1018                  */
1019                 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
1020         }
1021         return ret;
1022 }
1023
1024 /**
1025  *      dev_close - shutdown an interface.
1026  *      @dev: device to shutdown
1027  *
1028  *      This function moves an active device into down state. A
1029  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1030  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1031  *      chain.
1032  */
1033 int dev_close(struct net_device *dev)
1034 {
1035         if (!(dev->flags & IFF_UP))
1036                 return 0;
1037
1038         /*
1039          *      Tell people we are going down, so that they can
1040          *      prepare to death, when device is still operating.
1041          */
1042         raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
1043
1044         dev_deactivate(dev);
1045
1046         clear_bit(__LINK_STATE_START, &dev->state);
1047
1048         /* Synchronize to scheduled poll. We cannot touch poll list,
1049          * it can be even on different cpu. So just clear netif_running().
1050          *
1051          * dev->stop() will invoke napi_disable() on all of it's
1052          * napi_struct instances on this device.
1053          */
1054         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1055
1056         /*
1057          *      Call the device specific close. This cannot fail.
1058          *      Only if device is UP
1059          *
1060          *      We allow it to be called even after a DETACH hot-plug
1061          *      event.
1062          */
1063         if (dev->stop)
1064                 dev->stop(dev);
1065
1066         /*
1067          *      Device is now down.
1068          */
1069
1070         dev->flags &= ~IFF_UP;
1071
1072         /*
1073          * Tell people we are down
1074          */
1075         raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1076
1077         return 0;
1078 }
1079
1080
1081 static int dev_boot_phase = 1;
1082
1083 /*
1084  *      Device change register/unregister. These are not inline or static
1085  *      as we export them to the world.
1086  */
1087
1088 /**
1089  *      register_netdevice_notifier - register a network notifier block
1090  *      @nb: notifier
1091  *
1092  *      Register a notifier to be called when network device events occur.
1093  *      The notifier passed is linked into the kernel structures and must
1094  *      not be reused until it has been unregistered. A negative errno code
1095  *      is returned on a failure.
1096  *
1097  *      When registered all registration and up events are replayed
1098  *      to the new notifier to allow device to have a race free
1099  *      view of the network device list.
1100  */
1101
1102 int register_netdevice_notifier(struct notifier_block *nb)
1103 {
1104         struct net_device *dev;
1105         struct net_device *last;
1106         struct net *net;
1107         int err;
1108
1109         rtnl_lock();
1110         err = raw_notifier_chain_register(&netdev_chain, nb);
1111         if (err)
1112                 goto unlock;
1113         if (dev_boot_phase)
1114                 goto unlock;
1115         for_each_net(net) {
1116                 for_each_netdev(net, dev) {
1117                         err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1118                         err = notifier_to_errno(err);
1119                         if (err)
1120                                 goto rollback;
1121
1122                         if (!(dev->flags & IFF_UP))
1123                                 continue;
1124
1125                         nb->notifier_call(nb, NETDEV_UP, dev);
1126                 }
1127         }
1128
1129 unlock:
1130         rtnl_unlock();
1131         return err;
1132
1133 rollback:
1134         last = dev;
1135         for_each_net(net) {
1136                 for_each_netdev(net, dev) {
1137                         if (dev == last)
1138                                 break;
1139
1140                         if (dev->flags & IFF_UP) {
1141                                 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1142                                 nb->notifier_call(nb, NETDEV_DOWN, dev);
1143                         }
1144                         nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1145                 }
1146         }
1147         goto unlock;
1148 }
1149
1150 /**
1151  *      unregister_netdevice_notifier - unregister a network notifier block
1152  *      @nb: notifier
1153  *
1154  *      Unregister a notifier previously registered by
1155  *      register_netdevice_notifier(). The notifier is unlinked into the
1156  *      kernel structures and may then be reused. A negative errno code
1157  *      is returned on a failure.
1158  */
1159
1160 int unregister_netdevice_notifier(struct notifier_block *nb)
1161 {
1162         int err;
1163
1164         rtnl_lock();
1165         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1166         rtnl_unlock();
1167         return err;
1168 }
1169
1170 /**
1171  *      call_netdevice_notifiers - call all network notifier blocks
1172  *      @val: value passed unmodified to notifier function
1173  *      @v:   pointer passed unmodified to notifier function
1174  *
1175  *      Call all network notifier blocks.  Parameters and return value
1176  *      are as for raw_notifier_call_chain().
1177  */
1178
1179 int call_netdevice_notifiers(unsigned long val, void *v)
1180 {
1181         return raw_notifier_call_chain(&netdev_chain, val, v);
1182 }
1183
1184 /* When > 0 there are consumers of rx skb time stamps */
1185 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1186
1187 void net_enable_timestamp(void)
1188 {
1189         atomic_inc(&netstamp_needed);
1190 }
1191
1192 void net_disable_timestamp(void)
1193 {
1194         atomic_dec(&netstamp_needed);
1195 }
1196
1197 static inline void net_timestamp(struct sk_buff *skb)
1198 {
1199         if (atomic_read(&netstamp_needed))
1200                 __net_timestamp(skb);
1201         else
1202                 skb->tstamp.tv64 = 0;
1203 }
1204
1205 /*
1206  *      Support routine. Sends outgoing frames to any network
1207  *      taps currently in use.
1208  */
1209
1210 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1211 {
1212         struct packet_type *ptype;
1213
1214         net_timestamp(skb);
1215
1216         rcu_read_lock();
1217         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1218                 /* Never send packets back to the socket
1219                  * they originated from - MvS (miquels@drinkel.ow.org)
1220                  */
1221                 if ((ptype->dev == dev || !ptype->dev) &&
1222                     (ptype->af_packet_priv == NULL ||
1223                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1224                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1225                         if (!skb2)
1226                                 break;
1227
1228                         /* skb->nh should be correctly
1229                            set by sender, so that the second statement is
1230                            just protection against buggy protocols.
1231                          */
1232                         skb_reset_mac_header(skb2);
1233
1234                         if (skb_network_header(skb2) < skb2->data ||
1235                             skb2->network_header > skb2->tail) {
1236                                 if (net_ratelimit())
1237                                         printk(KERN_CRIT "protocol %04x is "
1238                                                "buggy, dev %s\n",
1239                                                skb2->protocol, dev->name);
1240                                 skb_reset_network_header(skb2);
1241                         }
1242
1243                         skb2->transport_header = skb2->network_header;
1244                         skb2->pkt_type = PACKET_OUTGOING;
1245                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1246                 }
1247         }
1248         rcu_read_unlock();
1249 }
1250
1251
1252 void __netif_schedule(struct net_device *dev)
1253 {
1254         if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1255                 unsigned long flags;
1256                 struct softnet_data *sd;
1257
1258                 local_irq_save(flags);
1259                 sd = &__get_cpu_var(softnet_data);
1260                 dev->next_sched = sd->output_queue;
1261                 sd->output_queue = dev;
1262                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1263                 local_irq_restore(flags);
1264         }
1265 }
1266 EXPORT_SYMBOL(__netif_schedule);
1267
1268 void dev_kfree_skb_irq(struct sk_buff *skb)
1269 {
1270         if (atomic_dec_and_test(&skb->users)) {
1271                 struct softnet_data *sd;
1272                 unsigned long flags;
1273
1274                 local_irq_save(flags);
1275                 sd = &__get_cpu_var(softnet_data);
1276                 skb->next = sd->completion_queue;
1277                 sd->completion_queue = skb;
1278                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1279                 local_irq_restore(flags);
1280         }
1281 }
1282 EXPORT_SYMBOL(dev_kfree_skb_irq);
1283
1284 void dev_kfree_skb_any(struct sk_buff *skb)
1285 {
1286         if (in_irq() || irqs_disabled())
1287                 dev_kfree_skb_irq(skb);
1288         else
1289                 dev_kfree_skb(skb);
1290 }
1291 EXPORT_SYMBOL(dev_kfree_skb_any);
1292
1293
1294 /**
1295  * netif_device_detach - mark device as removed
1296  * @dev: network device
1297  *
1298  * Mark device as removed from system and therefore no longer available.
1299  */
1300 void netif_device_detach(struct net_device *dev)
1301 {
1302         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1303             netif_running(dev)) {
1304                 netif_stop_queue(dev);
1305         }
1306 }
1307 EXPORT_SYMBOL(netif_device_detach);
1308
1309 /**
1310  * netif_device_attach - mark device as attached
1311  * @dev: network device
1312  *
1313  * Mark device as attached from system and restart if needed.
1314  */
1315 void netif_device_attach(struct net_device *dev)
1316 {
1317         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1318             netif_running(dev)) {
1319                 netif_wake_queue(dev);
1320                 __netdev_watchdog_up(dev);
1321         }
1322 }
1323 EXPORT_SYMBOL(netif_device_attach);
1324
1325
1326 /*
1327  * Invalidate hardware checksum when packet is to be mangled, and
1328  * complete checksum manually on outgoing path.
1329  */
1330 int skb_checksum_help(struct sk_buff *skb)
1331 {
1332         __wsum csum;
1333         int ret = 0, offset;
1334
1335         if (skb->ip_summed == CHECKSUM_COMPLETE)
1336                 goto out_set_summed;
1337
1338         if (unlikely(skb_shinfo(skb)->gso_size)) {
1339                 /* Let GSO fix up the checksum. */
1340                 goto out_set_summed;
1341         }
1342
1343         if (skb_cloned(skb)) {
1344                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1345                 if (ret)
1346                         goto out;
1347         }
1348
1349         offset = skb->csum_start - skb_headroom(skb);
1350         BUG_ON(offset > (int)skb->len);
1351         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1352
1353         offset = skb_headlen(skb) - offset;
1354         BUG_ON(offset <= 0);
1355         BUG_ON(skb->csum_offset + 2 > offset);
1356
1357         *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1358                 csum_fold(csum);
1359 out_set_summed:
1360         skb->ip_summed = CHECKSUM_NONE;
1361 out:
1362         return ret;
1363 }
1364
1365 /**
1366  *      skb_gso_segment - Perform segmentation on skb.
1367  *      @skb: buffer to segment
1368  *      @features: features for the output path (see dev->features)
1369  *
1370  *      This function segments the given skb and returns a list of segments.
1371  *
1372  *      It may return NULL if the skb requires no segmentation.  This is
1373  *      only possible when GSO is used for verifying header integrity.
1374  */
1375 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1376 {
1377         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1378         struct packet_type *ptype;
1379         __be16 type = skb->protocol;
1380         int err;
1381
1382         BUG_ON(skb_shinfo(skb)->frag_list);
1383
1384         skb_reset_mac_header(skb);
1385         skb->mac_len = skb->network_header - skb->mac_header;
1386         __skb_pull(skb, skb->mac_len);
1387
1388         if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1389                 if (skb_header_cloned(skb) &&
1390                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1391                         return ERR_PTR(err);
1392         }
1393
1394         rcu_read_lock();
1395         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1396                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1397                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1398                                 err = ptype->gso_send_check(skb);
1399                                 segs = ERR_PTR(err);
1400                                 if (err || skb_gso_ok(skb, features))
1401                                         break;
1402                                 __skb_push(skb, (skb->data -
1403                                                  skb_network_header(skb)));
1404                         }
1405                         segs = ptype->gso_segment(skb, features);
1406                         break;
1407                 }
1408         }
1409         rcu_read_unlock();
1410
1411         __skb_push(skb, skb->data - skb_mac_header(skb));
1412
1413         return segs;
1414 }
1415
1416 EXPORT_SYMBOL(skb_gso_segment);
1417
1418 /* Take action when hardware reception checksum errors are detected. */
1419 #ifdef CONFIG_BUG
1420 void netdev_rx_csum_fault(struct net_device *dev)
1421 {
1422         if (net_ratelimit()) {
1423                 printk(KERN_ERR "%s: hw csum failure.\n",
1424                         dev ? dev->name : "<unknown>");
1425                 dump_stack();
1426         }
1427 }
1428 EXPORT_SYMBOL(netdev_rx_csum_fault);
1429 #endif
1430
1431 /* Actually, we should eliminate this check as soon as we know, that:
1432  * 1. IOMMU is present and allows to map all the memory.
1433  * 2. No high memory really exists on this machine.
1434  */
1435
1436 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1437 {
1438 #ifdef CONFIG_HIGHMEM
1439         int i;
1440
1441         if (dev->features & NETIF_F_HIGHDMA)
1442                 return 0;
1443
1444         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1445                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1446                         return 1;
1447
1448 #endif
1449         return 0;
1450 }
1451
1452 struct dev_gso_cb {
1453         void (*destructor)(struct sk_buff *skb);
1454 };
1455
1456 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1457
1458 static void dev_gso_skb_destructor(struct sk_buff *skb)
1459 {
1460         struct dev_gso_cb *cb;
1461
1462         do {
1463                 struct sk_buff *nskb = skb->next;
1464
1465                 skb->next = nskb->next;
1466                 nskb->next = NULL;
1467                 kfree_skb(nskb);
1468         } while (skb->next);
1469
1470         cb = DEV_GSO_CB(skb);
1471         if (cb->destructor)
1472                 cb->destructor(skb);
1473 }
1474
1475 /**
1476  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1477  *      @skb: buffer to segment
1478  *
1479  *      This function segments the given skb and stores the list of segments
1480  *      in skb->next.
1481  */
1482 static int dev_gso_segment(struct sk_buff *skb)
1483 {
1484         struct net_device *dev = skb->dev;
1485         struct sk_buff *segs;
1486         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1487                                          NETIF_F_SG : 0);
1488
1489         segs = skb_gso_segment(skb, features);
1490
1491         /* Verifying header integrity only. */
1492         if (!segs)
1493                 return 0;
1494
1495         if (unlikely(IS_ERR(segs)))
1496                 return PTR_ERR(segs);
1497
1498         skb->next = segs;
1499         DEV_GSO_CB(skb)->destructor = skb->destructor;
1500         skb->destructor = dev_gso_skb_destructor;
1501
1502         return 0;
1503 }
1504
1505 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1506 {
1507         if (likely(!skb->next)) {
1508                 if (!list_empty(&ptype_all))
1509                         dev_queue_xmit_nit(skb, dev);
1510
1511                 if (netif_needs_gso(dev, skb)) {
1512                         if (unlikely(dev_gso_segment(skb)))
1513                                 goto out_kfree_skb;
1514                         if (skb->next)
1515                                 goto gso;
1516                 }
1517
1518                 return dev->hard_start_xmit(skb, dev);
1519         }
1520
1521 gso:
1522         do {
1523                 struct sk_buff *nskb = skb->next;
1524                 int rc;
1525
1526                 skb->next = nskb->next;
1527                 nskb->next = NULL;
1528                 rc = dev->hard_start_xmit(nskb, dev);
1529                 if (unlikely(rc)) {
1530                         nskb->next = skb->next;
1531                         skb->next = nskb;
1532                         return rc;
1533                 }
1534                 if (unlikely((netif_queue_stopped(dev) ||
1535                              netif_subqueue_stopped(dev, skb->queue_mapping)) &&
1536                              skb->next))
1537                         return NETDEV_TX_BUSY;
1538         } while (skb->next);
1539
1540         skb->destructor = DEV_GSO_CB(skb)->destructor;
1541
1542 out_kfree_skb:
1543         kfree_skb(skb);
1544         return 0;
1545 }
1546
1547 #define HARD_TX_LOCK(dev, cpu) {                        \
1548         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1549                 netif_tx_lock(dev);                     \
1550         }                                               \
1551 }
1552
1553 #define HARD_TX_UNLOCK(dev) {                           \
1554         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1555                 netif_tx_unlock(dev);                   \
1556         }                                               \
1557 }
1558
1559 /**
1560  *      dev_queue_xmit - transmit a buffer
1561  *      @skb: buffer to transmit
1562  *
1563  *      Queue a buffer for transmission to a network device. The caller must
1564  *      have set the device and priority and built the buffer before calling
1565  *      this function. The function can be called from an interrupt.
1566  *
1567  *      A negative errno code is returned on a failure. A success does not
1568  *      guarantee the frame will be transmitted as it may be dropped due
1569  *      to congestion or traffic shaping.
1570  *
1571  * -----------------------------------------------------------------------------------
1572  *      I notice this method can also return errors from the queue disciplines,
1573  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1574  *      be positive.
1575  *
1576  *      Regardless of the return value, the skb is consumed, so it is currently
1577  *      difficult to retry a send to this method.  (You can bump the ref count
1578  *      before sending to hold a reference for retry if you are careful.)
1579  *
1580  *      When calling this method, interrupts MUST be enabled.  This is because
1581  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1582  *          --BLG
1583  */
1584
1585 int dev_queue_xmit(struct sk_buff *skb)
1586 {
1587         struct net_device *dev = skb->dev;
1588         struct Qdisc *q;
1589         int rc = -ENOMEM;
1590
1591         /* GSO will handle the following emulations directly. */
1592         if (netif_needs_gso(dev, skb))
1593                 goto gso;
1594
1595         if (skb_shinfo(skb)->frag_list &&
1596             !(dev->features & NETIF_F_FRAGLIST) &&
1597             __skb_linearize(skb))
1598                 goto out_kfree_skb;
1599
1600         /* Fragmented skb is linearized if device does not support SG,
1601          * or if at least one of fragments is in highmem and device
1602          * does not support DMA from it.
1603          */
1604         if (skb_shinfo(skb)->nr_frags &&
1605             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1606             __skb_linearize(skb))
1607                 goto out_kfree_skb;
1608
1609         /* If packet is not checksummed and device does not support
1610          * checksumming for this protocol, complete checksumming here.
1611          */
1612         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1613                 skb_set_transport_header(skb, skb->csum_start -
1614                                               skb_headroom(skb));
1615
1616                 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1617                     !((dev->features & NETIF_F_IP_CSUM) &&
1618                       skb->protocol == htons(ETH_P_IP)) &&
1619                     !((dev->features & NETIF_F_IPV6_CSUM) &&
1620                       skb->protocol == htons(ETH_P_IPV6)))
1621                         if (skb_checksum_help(skb))
1622                                 goto out_kfree_skb;
1623         }
1624
1625 gso:
1626         spin_lock_prefetch(&dev->queue_lock);
1627
1628         /* Disable soft irqs for various locks below. Also
1629          * stops preemption for RCU.
1630          */
1631         rcu_read_lock_bh();
1632
1633         /* Updates of qdisc are serialized by queue_lock.
1634          * The struct Qdisc which is pointed to by qdisc is now a
1635          * rcu structure - it may be accessed without acquiring
1636          * a lock (but the structure may be stale.) The freeing of the
1637          * qdisc will be deferred until it's known that there are no
1638          * more references to it.
1639          *
1640          * If the qdisc has an enqueue function, we still need to
1641          * hold the queue_lock before calling it, since queue_lock
1642          * also serializes access to the device queue.
1643          */
1644
1645         q = rcu_dereference(dev->qdisc);
1646 #ifdef CONFIG_NET_CLS_ACT
1647         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1648 #endif
1649         if (q->enqueue) {
1650                 /* Grab device queue */
1651                 spin_lock(&dev->queue_lock);
1652                 q = dev->qdisc;
1653                 if (q->enqueue) {
1654                         /* reset queue_mapping to zero */
1655                         skb->queue_mapping = 0;
1656                         rc = q->enqueue(skb, q);
1657                         qdisc_run(dev);
1658                         spin_unlock(&dev->queue_lock);
1659
1660                         rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1661                         goto out;
1662                 }
1663                 spin_unlock(&dev->queue_lock);
1664         }
1665
1666         /* The device has no queue. Common case for software devices:
1667            loopback, all the sorts of tunnels...
1668
1669            Really, it is unlikely that netif_tx_lock protection is necessary
1670            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1671            counters.)
1672            However, it is possible, that they rely on protection
1673            made by us here.
1674
1675            Check this and shot the lock. It is not prone from deadlocks.
1676            Either shot noqueue qdisc, it is even simpler 8)
1677          */
1678         if (dev->flags & IFF_UP) {
1679                 int cpu = smp_processor_id(); /* ok because BHs are off */
1680
1681                 if (dev->xmit_lock_owner != cpu) {
1682
1683                         HARD_TX_LOCK(dev, cpu);
1684
1685                         if (!netif_queue_stopped(dev) &&
1686                             !netif_subqueue_stopped(dev, skb->queue_mapping)) {
1687                                 rc = 0;
1688                                 if (!dev_hard_start_xmit(skb, dev)) {
1689                                         HARD_TX_UNLOCK(dev);
1690                                         goto out;
1691                                 }
1692                         }
1693                         HARD_TX_UNLOCK(dev);
1694                         if (net_ratelimit())
1695                                 printk(KERN_CRIT "Virtual device %s asks to "
1696                                        "queue packet!\n", dev->name);
1697                 } else {
1698                         /* Recursion is detected! It is possible,
1699                          * unfortunately */
1700                         if (net_ratelimit())
1701                                 printk(KERN_CRIT "Dead loop on virtual device "
1702                                        "%s, fix it urgently!\n", dev->name);
1703                 }
1704         }
1705
1706         rc = -ENETDOWN;
1707         rcu_read_unlock_bh();
1708
1709 out_kfree_skb:
1710         kfree_skb(skb);
1711         return rc;
1712 out:
1713         rcu_read_unlock_bh();
1714         return rc;
1715 }
1716
1717
1718 /*=======================================================================
1719                         Receiver routines
1720   =======================================================================*/
1721
1722 int netdev_max_backlog __read_mostly = 1000;
1723 int netdev_budget __read_mostly = 300;
1724 int weight_p __read_mostly = 64;            /* old backlog weight */
1725
1726 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1727
1728
1729 /**
1730  *      netif_rx        -       post buffer to the network code
1731  *      @skb: buffer to post
1732  *
1733  *      This function receives a packet from a device driver and queues it for
1734  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1735  *      may be dropped during processing for congestion control or by the
1736  *      protocol layers.
1737  *
1738  *      return values:
1739  *      NET_RX_SUCCESS  (no congestion)
1740  *      NET_RX_CN_LOW   (low congestion)
1741  *      NET_RX_CN_MOD   (moderate congestion)
1742  *      NET_RX_CN_HIGH  (high congestion)
1743  *      NET_RX_DROP     (packet was dropped)
1744  *
1745  */
1746
1747 int netif_rx(struct sk_buff *skb)
1748 {
1749         struct softnet_data *queue;
1750         unsigned long flags;
1751
1752         /* if netpoll wants it, pretend we never saw it */
1753         if (netpoll_rx(skb))
1754                 return NET_RX_DROP;
1755
1756         if (!skb->tstamp.tv64)
1757                 net_timestamp(skb);
1758
1759         /*
1760          * The code is rearranged so that the path is the most
1761          * short when CPU is congested, but is still operating.
1762          */
1763         local_irq_save(flags);
1764         queue = &__get_cpu_var(softnet_data);
1765
1766         __get_cpu_var(netdev_rx_stat).total++;
1767         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1768                 if (queue->input_pkt_queue.qlen) {
1769 enqueue:
1770                         dev_hold(skb->dev);
1771                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1772                         local_irq_restore(flags);
1773                         return NET_RX_SUCCESS;
1774                 }
1775
1776                 napi_schedule(&queue->backlog);
1777                 goto enqueue;
1778         }
1779
1780         __get_cpu_var(netdev_rx_stat).dropped++;
1781         local_irq_restore(flags);
1782
1783         kfree_skb(skb);
1784         return NET_RX_DROP;
1785 }
1786
1787 int netif_rx_ni(struct sk_buff *skb)
1788 {
1789         int err;
1790
1791         preempt_disable();
1792         err = netif_rx(skb);
1793         if (local_softirq_pending())
1794                 do_softirq();
1795         preempt_enable();
1796
1797         return err;
1798 }
1799
1800 EXPORT_SYMBOL(netif_rx_ni);
1801
1802 static inline struct net_device *skb_bond(struct sk_buff *skb)
1803 {
1804         struct net_device *dev = skb->dev;
1805
1806         if (dev->master) {
1807                 if (skb_bond_should_drop(skb)) {
1808                         kfree_skb(skb);
1809                         return NULL;
1810                 }
1811                 skb->dev = dev->master;
1812         }
1813
1814         return dev;
1815 }
1816
1817
1818 static void net_tx_action(struct softirq_action *h)
1819 {
1820         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1821
1822         if (sd->completion_queue) {
1823                 struct sk_buff *clist;
1824
1825                 local_irq_disable();
1826                 clist = sd->completion_queue;
1827                 sd->completion_queue = NULL;
1828                 local_irq_enable();
1829
1830                 while (clist) {
1831                         struct sk_buff *skb = clist;
1832                         clist = clist->next;
1833
1834                         BUG_TRAP(!atomic_read(&skb->users));
1835                         __kfree_skb(skb);
1836                 }
1837         }
1838
1839         if (sd->output_queue) {
1840                 struct net_device *head;
1841
1842                 local_irq_disable();
1843                 head = sd->output_queue;
1844                 sd->output_queue = NULL;
1845                 local_irq_enable();
1846
1847                 while (head) {
1848                         struct net_device *dev = head;
1849                         head = head->next_sched;
1850
1851                         smp_mb__before_clear_bit();
1852                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1853
1854                         if (spin_trylock(&dev->queue_lock)) {
1855                                 qdisc_run(dev);
1856                                 spin_unlock(&dev->queue_lock);
1857                         } else {
1858                                 netif_schedule(dev);
1859                         }
1860                 }
1861         }
1862 }
1863
1864 static inline int deliver_skb(struct sk_buff *skb,
1865                               struct packet_type *pt_prev,
1866                               struct net_device *orig_dev)
1867 {
1868         atomic_inc(&skb->users);
1869         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1870 }
1871
1872 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1873 /* These hooks defined here for ATM */
1874 struct net_bridge;
1875 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1876                                                 unsigned char *addr);
1877 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1878
1879 /*
1880  * If bridge module is loaded call bridging hook.
1881  *  returns NULL if packet was consumed.
1882  */
1883 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1884                                         struct sk_buff *skb) __read_mostly;
1885 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1886                                             struct packet_type **pt_prev, int *ret,
1887                                             struct net_device *orig_dev)
1888 {
1889         struct net_bridge_port *port;
1890
1891         if (skb->pkt_type == PACKET_LOOPBACK ||
1892             (port = rcu_dereference(skb->dev->br_port)) == NULL)
1893                 return skb;
1894
1895         if (*pt_prev) {
1896                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1897                 *pt_prev = NULL;
1898         }
1899
1900         return br_handle_frame_hook(port, skb);
1901 }
1902 #else
1903 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
1904 #endif
1905
1906 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1907 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1908 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1909
1910 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1911                                              struct packet_type **pt_prev,
1912                                              int *ret,
1913                                              struct net_device *orig_dev)
1914 {
1915         if (skb->dev->macvlan_port == NULL)
1916                 return skb;
1917
1918         if (*pt_prev) {
1919                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1920                 *pt_prev = NULL;
1921         }
1922         return macvlan_handle_frame_hook(skb);
1923 }
1924 #else
1925 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
1926 #endif
1927
1928 #ifdef CONFIG_NET_CLS_ACT
1929 /* TODO: Maybe we should just force sch_ingress to be compiled in
1930  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1931  * a compare and 2 stores extra right now if we dont have it on
1932  * but have CONFIG_NET_CLS_ACT
1933  * NOTE: This doesnt stop any functionality; if you dont have
1934  * the ingress scheduler, you just cant add policies on ingress.
1935  *
1936  */
1937 static int ing_filter(struct sk_buff *skb)
1938 {
1939         struct Qdisc *q;
1940         struct net_device *dev = skb->dev;
1941         int result = TC_ACT_OK;
1942
1943         if (dev->qdisc_ingress) {
1944                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1945                 if (MAX_RED_LOOP < ttl++) {
1946                         printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1947                                 skb->iif, skb->dev->ifindex);
1948                         return TC_ACT_SHOT;
1949                 }
1950
1951                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1952
1953                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1954
1955                 spin_lock(&dev->ingress_lock);
1956                 if ((q = dev->qdisc_ingress) != NULL)
1957                         result = q->enqueue(skb, q);
1958                 spin_unlock(&dev->ingress_lock);
1959
1960         }
1961
1962         return result;
1963 }
1964 #endif
1965
1966 int netif_receive_skb(struct sk_buff *skb)
1967 {
1968         struct packet_type *ptype, *pt_prev;
1969         struct net_device *orig_dev;
1970         int ret = NET_RX_DROP;
1971         __be16 type;
1972
1973         /* if we've gotten here through NAPI, check netpoll */
1974         if (netpoll_receive_skb(skb))
1975                 return NET_RX_DROP;
1976
1977         if (!skb->tstamp.tv64)
1978                 net_timestamp(skb);
1979
1980         if (!skb->iif)
1981                 skb->iif = skb->dev->ifindex;
1982
1983         orig_dev = skb_bond(skb);
1984
1985         if (!orig_dev)
1986                 return NET_RX_DROP;
1987
1988         __get_cpu_var(netdev_rx_stat).total++;
1989
1990         skb_reset_network_header(skb);
1991         skb_reset_transport_header(skb);
1992         skb->mac_len = skb->network_header - skb->mac_header;
1993
1994         pt_prev = NULL;
1995
1996         rcu_read_lock();
1997
1998 #ifdef CONFIG_NET_CLS_ACT
1999         if (skb->tc_verd & TC_NCLS) {
2000                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2001                 goto ncls;
2002         }
2003 #endif
2004
2005         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2006                 if (!ptype->dev || ptype->dev == skb->dev) {
2007                         if (pt_prev)
2008                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2009                         pt_prev = ptype;
2010                 }
2011         }
2012
2013 #ifdef CONFIG_NET_CLS_ACT
2014         if (pt_prev) {
2015                 ret = deliver_skb(skb, pt_prev, orig_dev);
2016                 pt_prev = NULL; /* noone else should process this after*/
2017         } else {
2018                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2019         }
2020
2021         ret = ing_filter(skb);
2022
2023         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
2024                 kfree_skb(skb);
2025                 goto out;
2026         }
2027
2028         skb->tc_verd = 0;
2029 ncls:
2030 #endif
2031
2032         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2033         if (!skb)
2034                 goto out;
2035         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2036         if (!skb)
2037                 goto out;
2038
2039         type = skb->protocol;
2040         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2041                 if (ptype->type == type &&
2042                     (!ptype->dev || ptype->dev == skb->dev)) {
2043                         if (pt_prev)
2044                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2045                         pt_prev = ptype;
2046                 }
2047         }
2048
2049         if (pt_prev) {
2050                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2051         } else {
2052                 kfree_skb(skb);
2053                 /* Jamal, now you will not able to escape explaining
2054                  * me how you were going to use this. :-)
2055                  */
2056                 ret = NET_RX_DROP;
2057         }
2058
2059 out:
2060         rcu_read_unlock();
2061         return ret;
2062 }
2063
2064 static int process_backlog(struct napi_struct *napi, int quota)
2065 {
2066         int work = 0;
2067         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2068         unsigned long start_time = jiffies;
2069
2070         napi->weight = weight_p;
2071         do {
2072                 struct sk_buff *skb;
2073                 struct net_device *dev;
2074
2075                 local_irq_disable();
2076                 skb = __skb_dequeue(&queue->input_pkt_queue);
2077                 if (!skb) {
2078                         __napi_complete(napi);
2079                         local_irq_enable();
2080                         break;
2081                 }
2082
2083                 local_irq_enable();
2084
2085                 dev = skb->dev;
2086
2087                 netif_receive_skb(skb);
2088
2089                 dev_put(dev);
2090         } while (++work < quota && jiffies == start_time);
2091
2092         return work;
2093 }
2094
2095 /**
2096  * __napi_schedule - schedule for receive
2097  * @napi: entry to schedule
2098  *
2099  * The entry's receive function will be scheduled to run
2100  */
2101 void fastcall __napi_schedule(struct napi_struct *n)
2102 {
2103         unsigned long flags;
2104
2105         local_irq_save(flags);
2106         list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2107         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2108         local_irq_restore(flags);
2109 }
2110 EXPORT_SYMBOL(__napi_schedule);
2111
2112
2113 static void net_rx_action(struct softirq_action *h)
2114 {
2115         struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2116         unsigned long start_time = jiffies;
2117         int budget = netdev_budget;
2118         void *have;
2119
2120         local_irq_disable();
2121
2122         while (!list_empty(list)) {
2123                 struct napi_struct *n;
2124                 int work, weight;
2125
2126                 /* If softirq window is exhuasted then punt.
2127                  *
2128                  * Note that this is a slight policy change from the
2129                  * previous NAPI code, which would allow up to 2
2130                  * jiffies to pass before breaking out.  The test
2131                  * used to be "jiffies - start_time > 1".
2132                  */
2133                 if (unlikely(budget <= 0 || jiffies != start_time))
2134                         goto softnet_break;
2135
2136                 local_irq_enable();
2137
2138                 /* Even though interrupts have been re-enabled, this
2139                  * access is safe because interrupts can only add new
2140                  * entries to the tail of this list, and only ->poll()
2141                  * calls can remove this head entry from the list.
2142                  */
2143                 n = list_entry(list->next, struct napi_struct, poll_list);
2144
2145                 have = netpoll_poll_lock(n);
2146
2147                 weight = n->weight;
2148
2149                 work = n->poll(n, weight);
2150
2151                 WARN_ON_ONCE(work > weight);
2152
2153                 budget -= work;
2154
2155                 local_irq_disable();
2156
2157                 /* Drivers must not modify the NAPI state if they
2158                  * consume the entire weight.  In such cases this code
2159                  * still "owns" the NAPI instance and therefore can
2160                  * move the instance around on the list at-will.
2161                  */
2162                 if (unlikely(work == weight))
2163                         list_move_tail(&n->poll_list, list);
2164
2165                 netpoll_poll_unlock(have);
2166         }
2167 out:
2168         local_irq_enable();
2169
2170 #ifdef CONFIG_NET_DMA
2171         /*
2172          * There may not be any more sk_buffs coming right now, so push
2173          * any pending DMA copies to hardware
2174          */
2175         if (!cpus_empty(net_dma.channel_mask)) {
2176                 int chan_idx;
2177                 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2178                         struct dma_chan *chan = net_dma.channels[chan_idx];
2179                         if (chan)
2180                                 dma_async_memcpy_issue_pending(chan);
2181                 }
2182         }
2183 #endif
2184
2185         return;
2186
2187 softnet_break:
2188         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2189         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2190         goto out;
2191 }
2192
2193 static gifconf_func_t * gifconf_list [NPROTO];
2194
2195 /**
2196  *      register_gifconf        -       register a SIOCGIF handler
2197  *      @family: Address family
2198  *      @gifconf: Function handler
2199  *
2200  *      Register protocol dependent address dumping routines. The handler
2201  *      that is passed must not be freed or reused until it has been replaced
2202  *      by another handler.
2203  */
2204 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2205 {
2206         if (family >= NPROTO)
2207                 return -EINVAL;
2208         gifconf_list[family] = gifconf;
2209         return 0;
2210 }
2211
2212
2213 /*
2214  *      Map an interface index to its name (SIOCGIFNAME)
2215  */
2216
2217 /*
2218  *      We need this ioctl for efficient implementation of the
2219  *      if_indextoname() function required by the IPv6 API.  Without
2220  *      it, we would have to search all the interfaces to find a
2221  *      match.  --pb
2222  */
2223
2224 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2225 {
2226         struct net_device *dev;
2227         struct ifreq ifr;
2228
2229         /*
2230          *      Fetch the caller's info block.
2231          */
2232
2233         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2234                 return -EFAULT;
2235
2236         read_lock(&dev_base_lock);
2237         dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2238         if (!dev) {
2239                 read_unlock(&dev_base_lock);
2240                 return -ENODEV;
2241         }
2242
2243         strcpy(ifr.ifr_name, dev->name);
2244         read_unlock(&dev_base_lock);
2245
2246         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2247                 return -EFAULT;
2248         return 0;
2249 }
2250
2251 /*
2252  *      Perform a SIOCGIFCONF call. This structure will change
2253  *      size eventually, and there is nothing I can do about it.
2254  *      Thus we will need a 'compatibility mode'.
2255  */
2256
2257 static int dev_ifconf(struct net *net, char __user *arg)
2258 {
2259         struct ifconf ifc;
2260         struct net_device *dev;
2261         char __user *pos;
2262         int len;
2263         int total;
2264         int i;
2265
2266         /*
2267          *      Fetch the caller's info block.
2268          */
2269
2270         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2271                 return -EFAULT;
2272
2273         pos = ifc.ifc_buf;
2274         len = ifc.ifc_len;
2275
2276         /*
2277          *      Loop over the interfaces, and write an info block for each.
2278          */
2279
2280         total = 0;
2281         for_each_netdev(net, dev) {
2282                 for (i = 0; i < NPROTO; i++) {
2283                         if (gifconf_list[i]) {
2284                                 int done;
2285                                 if (!pos)
2286                                         done = gifconf_list[i](dev, NULL, 0);
2287                                 else
2288                                         done = gifconf_list[i](dev, pos + total,
2289                                                                len - total);
2290                                 if (done < 0)
2291                                         return -EFAULT;
2292                                 total += done;
2293                         }
2294                 }
2295         }
2296
2297         /*
2298          *      All done.  Write the updated control block back to the caller.
2299          */
2300         ifc.ifc_len = total;
2301
2302         /*
2303          *      Both BSD and Solaris return 0 here, so we do too.
2304          */
2305         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2306 }
2307
2308 #ifdef CONFIG_PROC_FS
2309 /*
2310  *      This is invoked by the /proc filesystem handler to display a device
2311  *      in detail.
2312  */
2313 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2314 {
2315         struct net *net = seq->private;
2316         loff_t off;
2317         struct net_device *dev;
2318
2319         read_lock(&dev_base_lock);
2320         if (!*pos)
2321                 return SEQ_START_TOKEN;
2322
2323         off = 1;
2324         for_each_netdev(net, dev)
2325                 if (off++ == *pos)
2326                         return dev;
2327
2328         return NULL;
2329 }
2330
2331 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2332 {
2333         struct net *net = seq->private;
2334         ++*pos;
2335         return v == SEQ_START_TOKEN ?
2336                 first_net_device(net) : next_net_device((struct net_device *)v);
2337 }
2338
2339 void dev_seq_stop(struct seq_file *seq, void *v)
2340 {
2341         read_unlock(&dev_base_lock);
2342 }
2343
2344 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2345 {
2346         struct net_device_stats *stats = dev->get_stats(dev);
2347
2348         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2349                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2350                    dev->name, stats->rx_bytes, stats->rx_packets,
2351                    stats->rx_errors,
2352                    stats->rx_dropped + stats->rx_missed_errors,
2353                    stats->rx_fifo_errors,
2354                    stats->rx_length_errors + stats->rx_over_errors +
2355                     stats->rx_crc_errors + stats->rx_frame_errors,
2356                    stats->rx_compressed, stats->multicast,
2357                    stats->tx_bytes, stats->tx_packets,
2358                    stats->tx_errors, stats->tx_dropped,
2359                    stats->tx_fifo_errors, stats->collisions,
2360                    stats->tx_carrier_errors +
2361                     stats->tx_aborted_errors +
2362                     stats->tx_window_errors +
2363                     stats->tx_heartbeat_errors,
2364                    stats->tx_compressed);
2365 }
2366
2367 /*
2368  *      Called from the PROCfs module. This now uses the new arbitrary sized
2369  *      /proc/net interface to create /proc/net/dev
2370  */
2371 static int dev_seq_show(struct seq_file *seq, void *v)
2372 {
2373         if (v == SEQ_START_TOKEN)
2374                 seq_puts(seq, "Inter-|   Receive                            "
2375                               "                    |  Transmit\n"
2376                               " face |bytes    packets errs drop fifo frame "
2377                               "compressed multicast|bytes    packets errs "
2378                               "drop fifo colls carrier compressed\n");
2379         else
2380                 dev_seq_printf_stats(seq, v);
2381         return 0;
2382 }
2383
2384 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2385 {
2386         struct netif_rx_stats *rc = NULL;
2387
2388         while (*pos < NR_CPUS)
2389                 if (cpu_online(*pos)) {
2390                         rc = &per_cpu(netdev_rx_stat, *pos);
2391                         break;
2392                 } else
2393                         ++*pos;
2394         return rc;
2395 }
2396
2397 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2398 {
2399         return softnet_get_online(pos);
2400 }
2401
2402 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2403 {
2404         ++*pos;
2405         return softnet_get_online(pos);
2406 }
2407
2408 static void softnet_seq_stop(struct seq_file *seq, void *v)
2409 {
2410 }
2411
2412 static int softnet_seq_show(struct seq_file *seq, void *v)
2413 {
2414         struct netif_rx_stats *s = v;
2415
2416         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2417                    s->total, s->dropped, s->time_squeeze, 0,
2418                    0, 0, 0, 0, /* was fastroute */
2419                    s->cpu_collision );
2420         return 0;
2421 }
2422
2423 static const struct seq_operations dev_seq_ops = {
2424         .start = dev_seq_start,
2425         .next  = dev_seq_next,
2426         .stop  = dev_seq_stop,
2427         .show  = dev_seq_show,
2428 };
2429
2430 static int dev_seq_open(struct inode *inode, struct file *file)
2431 {
2432         struct seq_file *seq;
2433         int res;
2434         res =  seq_open(file, &dev_seq_ops);
2435         if (!res) {
2436                 seq = file->private_data;
2437                 seq->private = get_net(PROC_NET(inode));
2438         }
2439         return res;
2440 }
2441
2442 static int dev_seq_release(struct inode *inode, struct file *file)
2443 {
2444         struct seq_file *seq = file->private_data;
2445         struct net *net = seq->private;
2446         put_net(net);
2447         return seq_release(inode, file);
2448 }
2449
2450 static const struct file_operations dev_seq_fops = {
2451         .owner   = THIS_MODULE,
2452         .open    = dev_seq_open,
2453         .read    = seq_read,
2454         .llseek  = seq_lseek,
2455         .release = dev_seq_release,
2456 };
2457
2458 static const struct seq_operations softnet_seq_ops = {
2459         .start = softnet_seq_start,
2460         .next  = softnet_seq_next,
2461         .stop  = softnet_seq_stop,
2462         .show  = softnet_seq_show,
2463 };
2464
2465 static int softnet_seq_open(struct inode *inode, struct file *file)
2466 {
2467         return seq_open(file, &softnet_seq_ops);
2468 }
2469
2470 static const struct file_operations softnet_seq_fops = {
2471         .owner   = THIS_MODULE,
2472         .open    = softnet_seq_open,
2473         .read    = seq_read,
2474         .llseek  = seq_lseek,
2475         .release = seq_release,
2476 };
2477
2478 static void *ptype_get_idx(loff_t pos)
2479 {
2480         struct packet_type *pt = NULL;
2481         loff_t i = 0;
2482         int t;
2483
2484         list_for_each_entry_rcu(pt, &ptype_all, list) {
2485                 if (i == pos)
2486                         return pt;
2487                 ++i;
2488         }
2489
2490         for (t = 0; t < 16; t++) {
2491                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2492                         if (i == pos)
2493                                 return pt;
2494                         ++i;
2495                 }
2496         }
2497         return NULL;
2498 }
2499
2500 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2501 {
2502         rcu_read_lock();
2503         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2504 }
2505
2506 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2507 {
2508         struct packet_type *pt;
2509         struct list_head *nxt;
2510         int hash;
2511
2512         ++*pos;
2513         if (v == SEQ_START_TOKEN)
2514                 return ptype_get_idx(0);
2515
2516         pt = v;
2517         nxt = pt->list.next;
2518         if (pt->type == htons(ETH_P_ALL)) {
2519                 if (nxt != &ptype_all)
2520                         goto found;
2521                 hash = 0;
2522                 nxt = ptype_base[0].next;
2523         } else
2524                 hash = ntohs(pt->type) & 15;
2525
2526         while (nxt == &ptype_base[hash]) {
2527                 if (++hash >= 16)
2528                         return NULL;
2529                 nxt = ptype_base[hash].next;
2530         }
2531 found:
2532         return list_entry(nxt, struct packet_type, list);
2533 }
2534
2535 static void ptype_seq_stop(struct seq_file *seq, void *v)
2536 {
2537         rcu_read_unlock();
2538 }
2539
2540 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2541 {
2542 #ifdef CONFIG_KALLSYMS
2543         unsigned long offset = 0, symsize;
2544         const char *symname;
2545         char *modname;
2546         char namebuf[128];
2547
2548         symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2549                                   &modname, namebuf);
2550
2551         if (symname) {
2552                 char *delim = ":";
2553
2554                 if (!modname)
2555                         modname = delim = "";
2556                 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2557                            symname, offset);
2558                 return;
2559         }
2560 #endif
2561
2562         seq_printf(seq, "[%p]", sym);
2563 }
2564
2565 static int ptype_seq_show(struct seq_file *seq, void *v)
2566 {
2567         struct packet_type *pt = v;
2568
2569         if (v == SEQ_START_TOKEN)
2570                 seq_puts(seq, "Type Device      Function\n");
2571         else {
2572                 if (pt->type == htons(ETH_P_ALL))
2573                         seq_puts(seq, "ALL ");
2574                 else
2575                         seq_printf(seq, "%04x", ntohs(pt->type));
2576
2577                 seq_printf(seq, " %-8s ",
2578                            pt->dev ? pt->dev->name : "");
2579                 ptype_seq_decode(seq,  pt->func);
2580                 seq_putc(seq, '\n');
2581         }
2582
2583         return 0;
2584 }
2585
2586 static const struct seq_operations ptype_seq_ops = {
2587         .start = ptype_seq_start,
2588         .next  = ptype_seq_next,
2589         .stop  = ptype_seq_stop,
2590         .show  = ptype_seq_show,
2591 };
2592
2593 static int ptype_seq_open(struct inode *inode, struct file *file)
2594 {
2595         return seq_open(file, &ptype_seq_ops);
2596 }
2597
2598 static const struct file_operations ptype_seq_fops = {
2599         .owner   = THIS_MODULE,
2600         .open    = ptype_seq_open,
2601         .read    = seq_read,
2602         .llseek  = seq_lseek,
2603         .release = seq_release,
2604 };
2605
2606
2607 static int dev_proc_net_init(struct net *net)
2608 {
2609         int rc = -ENOMEM;
2610
2611         if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2612                 goto out;
2613         if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2614                 goto out_dev;
2615         if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2616                 goto out_softnet;
2617
2618         if (wext_proc_init(net))
2619                 goto out_ptype;
2620         rc = 0;
2621 out:
2622         return rc;
2623 out_ptype:
2624         proc_net_remove(net, "ptype");
2625 out_softnet:
2626         proc_net_remove(net, "softnet_stat");
2627 out_dev:
2628         proc_net_remove(net, "dev");
2629         goto out;
2630 }
2631
2632 static void dev_proc_net_exit(struct net *net)
2633 {
2634         wext_proc_exit(net);
2635
2636         proc_net_remove(net, "ptype");
2637         proc_net_remove(net, "softnet_stat");
2638         proc_net_remove(net, "dev");
2639 }
2640
2641 static struct pernet_operations dev_proc_ops = {
2642         .init = dev_proc_net_init,
2643         .exit = dev_proc_net_exit,
2644 };
2645
2646 static int __init dev_proc_init(void)
2647 {
2648         return register_pernet_subsys(&dev_proc_ops);
2649 }
2650 #else
2651 #define dev_proc_init() 0
2652 #endif  /* CONFIG_PROC_FS */
2653
2654
2655 /**
2656  *      netdev_set_master       -       set up master/slave pair
2657  *      @slave: slave device
2658  *      @master: new master device
2659  *
2660  *      Changes the master device of the slave. Pass %NULL to break the
2661  *      bonding. The caller must hold the RTNL semaphore. On a failure
2662  *      a negative errno code is returned. On success the reference counts
2663  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2664  *      function returns zero.
2665  */
2666 int netdev_set_master(struct net_device *slave, struct net_device *master)
2667 {
2668         struct net_device *old = slave->master;
2669
2670         ASSERT_RTNL();
2671
2672         if (master) {
2673                 if (old)
2674                         return -EBUSY;
2675                 dev_hold(master);
2676         }
2677
2678         slave->master = master;
2679
2680         synchronize_net();
2681
2682         if (old)
2683                 dev_put(old);
2684
2685         if (master)
2686                 slave->flags |= IFF_SLAVE;
2687         else
2688                 slave->flags &= ~IFF_SLAVE;
2689
2690         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2691         return 0;
2692 }
2693
2694 static void __dev_set_promiscuity(struct net_device *dev, int inc)
2695 {
2696         unsigned short old_flags = dev->flags;
2697
2698         ASSERT_RTNL();
2699
2700         if ((dev->promiscuity += inc) == 0)
2701                 dev->flags &= ~IFF_PROMISC;
2702         else
2703                 dev->flags |= IFF_PROMISC;
2704         if (dev->flags != old_flags) {
2705                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2706                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2707                                                                "left");
2708                 audit_log(current->audit_context, GFP_ATOMIC,
2709                         AUDIT_ANOM_PROMISCUOUS,
2710                         "dev=%s prom=%d old_prom=%d auid=%u",
2711                         dev->name, (dev->flags & IFF_PROMISC),
2712                         (old_flags & IFF_PROMISC),
2713                         audit_get_loginuid(current->audit_context));
2714
2715                 if (dev->change_rx_flags)
2716                         dev->change_rx_flags(dev, IFF_PROMISC);
2717         }
2718 }
2719
2720 /**
2721  *      dev_set_promiscuity     - update promiscuity count on a device
2722  *      @dev: device
2723  *      @inc: modifier
2724  *
2725  *      Add or remove promiscuity from a device. While the count in the device
2726  *      remains above zero the interface remains promiscuous. Once it hits zero
2727  *      the device reverts back to normal filtering operation. A negative inc
2728  *      value is used to drop promiscuity on the device.
2729  */
2730 void dev_set_promiscuity(struct net_device *dev, int inc)
2731 {
2732         unsigned short old_flags = dev->flags;
2733
2734         __dev_set_promiscuity(dev, inc);
2735         if (dev->flags != old_flags)
2736                 dev_set_rx_mode(dev);
2737 }
2738
2739 /**
2740  *      dev_set_allmulti        - update allmulti count on a device
2741  *      @dev: device
2742  *      @inc: modifier
2743  *
2744  *      Add or remove reception of all multicast frames to a device. While the
2745  *      count in the device remains above zero the interface remains listening
2746  *      to all interfaces. Once it hits zero the device reverts back to normal
2747  *      filtering operation. A negative @inc value is used to drop the counter
2748  *      when releasing a resource needing all multicasts.
2749  */
2750
2751 void dev_set_allmulti(struct net_device *dev, int inc)
2752 {
2753         unsigned short old_flags = dev->flags;
2754
2755         ASSERT_RTNL();
2756
2757         dev->flags |= IFF_ALLMULTI;
2758         if ((dev->allmulti += inc) == 0)
2759                 dev->flags &= ~IFF_ALLMULTI;
2760         if (dev->flags ^ old_flags) {
2761                 if (dev->change_rx_flags)
2762                         dev->change_rx_flags(dev, IFF_ALLMULTI);
2763                 dev_set_rx_mode(dev);
2764         }
2765 }
2766
2767 /*
2768  *      Upload unicast and multicast address lists to device and
2769  *      configure RX filtering. When the device doesn't support unicast
2770  *      filtering it is put in promiscous mode while unicast addresses
2771  *      are present.
2772  */
2773 void __dev_set_rx_mode(struct net_device *dev)
2774 {
2775         /* dev_open will call this function so the list will stay sane. */
2776         if (!(dev->flags&IFF_UP))
2777                 return;
2778
2779         if (!netif_device_present(dev))
2780                 return;
2781
2782         if (dev->set_rx_mode)
2783                 dev->set_rx_mode(dev);
2784         else {
2785                 /* Unicast addresses changes may only happen under the rtnl,
2786                  * therefore calling __dev_set_promiscuity here is safe.
2787                  */
2788                 if (dev->uc_count > 0 && !dev->uc_promisc) {
2789                         __dev_set_promiscuity(dev, 1);
2790                         dev->uc_promisc = 1;
2791                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
2792                         __dev_set_promiscuity(dev, -1);
2793                         dev->uc_promisc = 0;
2794                 }
2795
2796                 if (dev->set_multicast_list)
2797                         dev->set_multicast_list(dev);
2798         }
2799 }
2800
2801 void dev_set_rx_mode(struct net_device *dev)
2802 {
2803         netif_tx_lock_bh(dev);
2804         __dev_set_rx_mode(dev);
2805         netif_tx_unlock_bh(dev);
2806 }
2807
2808 int __dev_addr_delete(struct dev_addr_list **list, int *count,
2809                       void *addr, int alen, int glbl)
2810 {
2811         struct dev_addr_list *da;
2812
2813         for (; (da = *list) != NULL; list = &da->next) {
2814                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2815                     alen == da->da_addrlen) {
2816                         if (glbl) {
2817                                 int old_glbl = da->da_gusers;
2818                                 da->da_gusers = 0;
2819                                 if (old_glbl == 0)
2820                                         break;
2821                         }
2822                         if (--da->da_users)
2823                                 return 0;
2824
2825                         *list = da->next;
2826                         kfree(da);
2827                         (*count)--;
2828                         return 0;
2829                 }
2830         }
2831         return -ENOENT;
2832 }
2833
2834 int __dev_addr_add(struct dev_addr_list **list, int *count,
2835                    void *addr, int alen, int glbl)
2836 {
2837         struct dev_addr_list *da;
2838
2839         for (da = *list; da != NULL; da = da->next) {
2840                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2841                     da->da_addrlen == alen) {
2842                         if (glbl) {
2843                                 int old_glbl = da->da_gusers;
2844                                 da->da_gusers = 1;
2845                                 if (old_glbl)
2846                                         return 0;
2847                         }
2848                         da->da_users++;
2849                         return 0;
2850                 }
2851         }
2852
2853         da = kmalloc(sizeof(*da), GFP_ATOMIC);
2854         if (da == NULL)
2855                 return -ENOMEM;
2856         memcpy(da->da_addr, addr, alen);
2857         da->da_addrlen = alen;
2858         da->da_users = 1;
2859         da->da_gusers = glbl ? 1 : 0;
2860         da->next = *list;
2861         *list = da;
2862         (*count)++;
2863         return 0;
2864 }
2865
2866 /**
2867  *      dev_unicast_delete      - Release secondary unicast address.
2868  *      @dev: device
2869  *      @addr: address to delete
2870  *      @alen: length of @addr
2871  *
2872  *      Release reference to a secondary unicast address and remove it
2873  *      from the device if the reference count drops to zero.
2874  *
2875  *      The caller must hold the rtnl_mutex.
2876  */
2877 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2878 {
2879         int err;
2880
2881         ASSERT_RTNL();
2882
2883         netif_tx_lock_bh(dev);
2884         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2885         if (!err)
2886                 __dev_set_rx_mode(dev);
2887         netif_tx_unlock_bh(dev);
2888         return err;
2889 }
2890 EXPORT_SYMBOL(dev_unicast_delete);
2891
2892 /**
2893  *      dev_unicast_add         - add a secondary unicast address
2894  *      @dev: device
2895  *      @addr: address to delete
2896  *      @alen: length of @addr
2897  *
2898  *      Add a secondary unicast address to the device or increase
2899  *      the reference count if it already exists.
2900  *
2901  *      The caller must hold the rtnl_mutex.
2902  */
2903 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2904 {
2905         int err;
2906
2907         ASSERT_RTNL();
2908
2909         netif_tx_lock_bh(dev);
2910         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2911         if (!err)
2912                 __dev_set_rx_mode(dev);
2913         netif_tx_unlock_bh(dev);
2914         return err;
2915 }
2916 EXPORT_SYMBOL(dev_unicast_add);
2917
2918 static void __dev_addr_discard(struct dev_addr_list **list)
2919 {
2920         struct dev_addr_list *tmp;
2921
2922         while (*list != NULL) {
2923                 tmp = *list;
2924                 *list = tmp->next;
2925                 if (tmp->da_users > tmp->da_gusers)
2926                         printk("__dev_addr_discard: address leakage! "
2927                                "da_users=%d\n", tmp->da_users);
2928                 kfree(tmp);
2929         }
2930 }
2931
2932 static void dev_addr_discard(struct net_device *dev)
2933 {
2934         netif_tx_lock_bh(dev);
2935
2936         __dev_addr_discard(&dev->uc_list);
2937         dev->uc_count = 0;
2938
2939         __dev_addr_discard(&dev->mc_list);
2940         dev->mc_count = 0;
2941
2942         netif_tx_unlock_bh(dev);
2943 }
2944
2945 unsigned dev_get_flags(const struct net_device *dev)
2946 {
2947         unsigned flags;
2948
2949         flags = (dev->flags & ~(IFF_PROMISC |
2950                                 IFF_ALLMULTI |
2951                                 IFF_RUNNING |
2952                                 IFF_LOWER_UP |
2953                                 IFF_DORMANT)) |
2954                 (dev->gflags & (IFF_PROMISC |
2955                                 IFF_ALLMULTI));
2956
2957         if (netif_running(dev)) {
2958                 if (netif_oper_up(dev))
2959                         flags |= IFF_RUNNING;
2960                 if (netif_carrier_ok(dev))
2961                         flags |= IFF_LOWER_UP;
2962                 if (netif_dormant(dev))
2963                         flags |= IFF_DORMANT;
2964         }
2965
2966         return flags;
2967 }
2968
2969 int dev_change_flags(struct net_device *dev, unsigned flags)
2970 {
2971         int ret, changes;
2972         int old_flags = dev->flags;
2973
2974         ASSERT_RTNL();
2975
2976         /*
2977          *      Set the flags on our device.
2978          */
2979
2980         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2981                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2982                                IFF_AUTOMEDIA)) |
2983                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2984                                     IFF_ALLMULTI));
2985
2986         /*
2987          *      Load in the correct multicast list now the flags have changed.
2988          */
2989
2990         if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
2991                 dev->change_rx_flags(dev, IFF_MULTICAST);
2992
2993         dev_set_rx_mode(dev);
2994
2995         /*
2996          *      Have we downed the interface. We handle IFF_UP ourselves
2997          *      according to user attempts to set it, rather than blindly
2998          *      setting it.
2999          */
3000
3001         ret = 0;
3002         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
3003                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3004
3005                 if (!ret)
3006                         dev_set_rx_mode(dev);
3007         }
3008
3009         if (dev->flags & IFF_UP &&
3010             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3011                                           IFF_VOLATILE)))
3012                 raw_notifier_call_chain(&netdev_chain,
3013                                 NETDEV_CHANGE, dev);
3014
3015         if ((flags ^ dev->gflags) & IFF_PROMISC) {
3016                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3017                 dev->gflags ^= IFF_PROMISC;
3018                 dev_set_promiscuity(dev, inc);
3019         }
3020
3021         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3022            is important. Some (broken) drivers set IFF_PROMISC, when
3023            IFF_ALLMULTI is requested not asking us and not reporting.
3024          */
3025         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3026                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3027                 dev->gflags ^= IFF_ALLMULTI;
3028                 dev_set_allmulti(dev, inc);
3029         }
3030
3031         /* Exclude state transition flags, already notified */
3032         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3033         if (changes)
3034                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3035
3036         return ret;
3037 }
3038
3039 int dev_set_mtu(struct net_device *dev, int new_mtu)
3040 {
3041         int err;
3042
3043         if (new_mtu == dev->mtu)
3044                 return 0;
3045
3046         /*      MTU must be positive.    */
3047         if (new_mtu < 0)
3048                 return -EINVAL;
3049
3050         if (!netif_device_present(dev))
3051                 return -ENODEV;
3052
3053         err = 0;
3054         if (dev->change_mtu)
3055                 err = dev->change_mtu(dev, new_mtu);
3056         else
3057                 dev->mtu = new_mtu;
3058         if (!err && dev->flags & IFF_UP)
3059                 raw_notifier_call_chain(&netdev_chain,
3060                                 NETDEV_CHANGEMTU, dev);
3061         return err;
3062 }
3063
3064 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3065 {
3066         int err;
3067
3068         if (!dev->set_mac_address)
3069                 return -EOPNOTSUPP;
3070         if (sa->sa_family != dev->type)
3071                 return -EINVAL;
3072         if (!netif_device_present(dev))
3073                 return -ENODEV;
3074         err = dev->set_mac_address(dev, sa);
3075         if (!err)
3076                 raw_notifier_call_chain(&netdev_chain,
3077                                 NETDEV_CHANGEADDR, dev);
3078         return err;
3079 }
3080
3081 /*
3082  *      Perform the SIOCxIFxxx calls.
3083  */
3084 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3085 {
3086         int err;
3087         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3088
3089         if (!dev)
3090                 return -ENODEV;
3091
3092         switch (cmd) {
3093                 case SIOCGIFFLAGS:      /* Get interface flags */
3094                         ifr->ifr_flags = dev_get_flags(dev);
3095                         return 0;
3096
3097                 case SIOCSIFFLAGS:      /* Set interface flags */
3098                         return dev_change_flags(dev, ifr->ifr_flags);
3099
3100                 case SIOCGIFMETRIC:     /* Get the metric on the interface
3101                                            (currently unused) */
3102                         ifr->ifr_metric = 0;
3103                         return 0;
3104
3105                 case SIOCSIFMETRIC:     /* Set the metric on the interface
3106                                            (currently unused) */
3107                         return -EOPNOTSUPP;
3108
3109                 case SIOCGIFMTU:        /* Get the MTU of a device */
3110                         ifr->ifr_mtu = dev->mtu;
3111                         return 0;
3112
3113                 case SIOCSIFMTU:        /* Set the MTU of a device */
3114                         return dev_set_mtu(dev, ifr->ifr_mtu);
3115
3116                 case SIOCGIFHWADDR:
3117                         if (!dev->addr_len)
3118                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3119                         else
3120                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3121                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3122                         ifr->ifr_hwaddr.sa_family = dev->type;
3123                         return 0;
3124
3125                 case SIOCSIFHWADDR:
3126                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3127
3128                 case SIOCSIFHWBROADCAST:
3129                         if (ifr->ifr_hwaddr.sa_family != dev->type)
3130                                 return -EINVAL;
3131                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3132                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3133                         raw_notifier_call_chain(&netdev_chain,
3134                                             NETDEV_CHANGEADDR, dev);
3135                         return 0;
3136
3137                 case SIOCGIFMAP:
3138                         ifr->ifr_map.mem_start = dev->mem_start;
3139                         ifr->ifr_map.mem_end   = dev->mem_end;
3140                         ifr->ifr_map.base_addr = dev->base_addr;
3141                         ifr->ifr_map.irq       = dev->irq;
3142                         ifr->ifr_map.dma       = dev->dma;
3143                         ifr->ifr_map.port      = dev->if_port;
3144                         return 0;
3145
3146                 case SIOCSIFMAP:
3147                         if (dev->set_config) {
3148                                 if (!netif_device_present(dev))
3149                                         return -ENODEV;
3150                                 return dev->set_config(dev, &ifr->ifr_map);
3151                         }
3152                         return -EOPNOTSUPP;
3153
3154                 case SIOCADDMULTI:
3155                         if (!dev->set_multicast_list ||
3156                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3157                                 return -EINVAL;
3158                         if (!netif_device_present(dev))
3159                                 return -ENODEV;
3160                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3161                                           dev->addr_len, 1);
3162
3163                 case SIOCDELMULTI:
3164                         if (!dev->set_multicast_list ||
3165                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3166                                 return -EINVAL;
3167                         if (!netif_device_present(dev))
3168                                 return -ENODEV;
3169                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3170                                              dev->addr_len, 1);
3171
3172                 case SIOCGIFINDEX:
3173                         ifr->ifr_ifindex = dev->ifindex;
3174                         return 0;
3175
3176                 case SIOCGIFTXQLEN:
3177                         ifr->ifr_qlen = dev->tx_queue_len;
3178                         return 0;
3179
3180                 case SIOCSIFTXQLEN:
3181                         if (ifr->ifr_qlen < 0)
3182                                 return -EINVAL;
3183                         dev->tx_queue_len = ifr->ifr_qlen;
3184                         return 0;
3185
3186                 case SIOCSIFNAME:
3187                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3188                         return dev_change_name(dev, ifr->ifr_newname);
3189
3190                 /*
3191                  *      Unknown or private ioctl
3192                  */
3193
3194                 default:
3195                         if ((cmd >= SIOCDEVPRIVATE &&
3196                             cmd <= SIOCDEVPRIVATE + 15) ||
3197                             cmd == SIOCBONDENSLAVE ||
3198                             cmd == SIOCBONDRELEASE ||
3199                             cmd == SIOCBONDSETHWADDR ||
3200                             cmd == SIOCBONDSLAVEINFOQUERY ||
3201                             cmd == SIOCBONDINFOQUERY ||
3202                             cmd == SIOCBONDCHANGEACTIVE ||
3203                             cmd == SIOCGMIIPHY ||
3204                             cmd == SIOCGMIIREG ||
3205                             cmd == SIOCSMIIREG ||
3206                             cmd == SIOCBRADDIF ||
3207                             cmd == SIOCBRDELIF ||
3208                             cmd == SIOCWANDEV) {
3209                                 err = -EOPNOTSUPP;
3210                                 if (dev->do_ioctl) {
3211                                         if (netif_device_present(dev))
3212                                                 err = dev->do_ioctl(dev, ifr,
3213                                                                     cmd);
3214                                         else
3215                                                 err = -ENODEV;
3216                                 }
3217                         } else
3218                                 err = -EINVAL;
3219
3220         }
3221         return err;
3222 }
3223
3224 /*
3225  *      This function handles all "interface"-type I/O control requests. The actual
3226  *      'doing' part of this is dev_ifsioc above.
3227  */
3228
3229 /**
3230  *      dev_ioctl       -       network device ioctl
3231  *      @cmd: command to issue
3232  *      @arg: pointer to a struct ifreq in user space
3233  *
3234  *      Issue ioctl functions to devices. This is normally called by the
3235  *      user space syscall interfaces but can sometimes be useful for
3236  *      other purposes. The return value is the return from the syscall if
3237  *      positive or a negative errno code on error.
3238  */
3239
3240 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3241 {
3242         struct ifreq ifr;
3243         int ret;
3244         char *colon;
3245
3246         /* One special case: SIOCGIFCONF takes ifconf argument
3247            and requires shared lock, because it sleeps writing
3248            to user space.
3249          */
3250
3251         if (cmd == SIOCGIFCONF) {
3252                 rtnl_lock();
3253                 ret = dev_ifconf(net, (char __user *) arg);
3254                 rtnl_unlock();
3255                 return ret;
3256         }
3257         if (cmd == SIOCGIFNAME)
3258                 return dev_ifname(net, (struct ifreq __user *)arg);
3259
3260         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3261                 return -EFAULT;
3262
3263         ifr.ifr_name[IFNAMSIZ-1] = 0;
3264
3265         colon = strchr(ifr.ifr_name, ':');
3266         if (colon)
3267                 *colon = 0;
3268
3269         /*
3270          *      See which interface the caller is talking about.
3271          */
3272
3273         switch (cmd) {
3274                 /*
3275                  *      These ioctl calls:
3276                  *      - can be done by all.
3277                  *      - atomic and do not require locking.
3278                  *      - return a value
3279                  */
3280                 case SIOCGIFFLAGS:
3281                 case SIOCGIFMETRIC:
3282                 case SIOCGIFMTU:
3283                 case SIOCGIFHWADDR:
3284                 case SIOCGIFSLAVE:
3285                 case SIOCGIFMAP:
3286                 case SIOCGIFINDEX:
3287                 case SIOCGIFTXQLEN:
3288                         dev_load(net, ifr.ifr_name);
3289                         read_lock(&dev_base_lock);
3290                         ret = dev_ifsioc(net, &ifr, cmd);
3291                         read_unlock(&dev_base_lock);
3292                         if (!ret) {
3293                                 if (colon)
3294                                         *colon = ':';
3295                                 if (copy_to_user(arg, &ifr,
3296                                                  sizeof(struct ifreq)))
3297                                         ret = -EFAULT;
3298                         }
3299                         return ret;
3300
3301                 case SIOCETHTOOL:
3302                         dev_load(net, ifr.ifr_name);
3303                         rtnl_lock();
3304                         ret = dev_ethtool(net, &ifr);
3305                         rtnl_unlock();
3306                         if (!ret) {
3307                                 if (colon)
3308                                         *colon = ':';
3309                                 if (copy_to_user(arg, &ifr,
3310                                                  sizeof(struct ifreq)))
3311                                         ret = -EFAULT;
3312                         }
3313                         return ret;
3314
3315                 /*
3316                  *      These ioctl calls:
3317                  *      - require superuser power.
3318                  *      - require strict serialization.
3319                  *      - return a value
3320                  */
3321                 case SIOCGMIIPHY:
3322                 case SIOCGMIIREG:
3323                 case SIOCSIFNAME:
3324                         if (!capable(CAP_NET_ADMIN))
3325                                 return -EPERM;
3326                         dev_load(net, ifr.ifr_name);
3327                         rtnl_lock();
3328                         ret = dev_ifsioc(net, &ifr, cmd);
3329                         rtnl_unlock();
3330                         if (!ret) {
3331                                 if (colon)
3332                                         *colon = ':';
3333                                 if (copy_to_user(arg, &ifr,
3334                                                  sizeof(struct ifreq)))
3335                                         ret = -EFAULT;
3336                         }
3337                         return ret;
3338
3339                 /*
3340                  *      These ioctl calls:
3341                  *      - require superuser power.
3342                  *      - require strict serialization.
3343                  *      - do not return a value
3344                  */
3345                 case SIOCSIFFLAGS:
3346                 case SIOCSIFMETRIC:
3347                 case SIOCSIFMTU:
3348                 case SIOCSIFMAP:
3349                 case SIOCSIFHWADDR:
3350                 case SIOCSIFSLAVE:
3351                 case SIOCADDMULTI:
3352                 case SIOCDELMULTI:
3353                 case SIOCSIFHWBROADCAST:
3354                 case SIOCSIFTXQLEN:
3355                 case SIOCSMIIREG:
3356                 case SIOCBONDENSLAVE:
3357                 case SIOCBONDRELEASE:
3358                 case SIOCBONDSETHWADDR:
3359                 case SIOCBONDCHANGEACTIVE:
3360                 case SIOCBRADDIF:
3361                 case SIOCBRDELIF:
3362                         if (!capable(CAP_NET_ADMIN))
3363                                 return -EPERM;
3364                         /* fall through */
3365                 case SIOCBONDSLAVEINFOQUERY:
3366                 case SIOCBONDINFOQUERY:
3367                         dev_load(net, ifr.ifr_name);
3368                         rtnl_lock();
3369                         ret = dev_ifsioc(net, &ifr, cmd);
3370                         rtnl_unlock();
3371                         return ret;
3372
3373                 case SIOCGIFMEM:
3374                         /* Get the per device memory space. We can add this but
3375                          * currently do not support it */
3376                 case SIOCSIFMEM:
3377                         /* Set the per device memory buffer space.
3378                          * Not applicable in our case */
3379                 case SIOCSIFLINK:
3380                         return -EINVAL;
3381
3382                 /*
3383                  *      Unknown or private ioctl.
3384                  */
3385                 default:
3386                         if (cmd == SIOCWANDEV ||
3387                             (cmd >= SIOCDEVPRIVATE &&
3388                              cmd <= SIOCDEVPRIVATE + 15)) {
3389                                 dev_load(net, ifr.ifr_name);
3390                                 rtnl_lock();
3391                                 ret = dev_ifsioc(net, &ifr, cmd);
3392                                 rtnl_unlock();
3393                                 if (!ret && copy_to_user(arg, &ifr,
3394                                                          sizeof(struct ifreq)))
3395                                         ret = -EFAULT;
3396                                 return ret;
3397                         }
3398                         /* Take care of Wireless Extensions */
3399                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3400                                 return wext_handle_ioctl(net, &ifr, cmd, arg);
3401                         return -EINVAL;
3402         }
3403 }
3404
3405
3406 /**
3407  *      dev_new_index   -       allocate an ifindex
3408  *
3409  *      Returns a suitable unique value for a new device interface
3410  *      number.  The caller must hold the rtnl semaphore or the
3411  *      dev_base_lock to be sure it remains unique.
3412  */
3413 static int dev_new_index(struct net *net)
3414 {
3415         static int ifindex;
3416         for (;;) {
3417                 if (++ifindex <= 0)
3418                         ifindex = 1;
3419                 if (!__dev_get_by_index(net, ifindex))
3420                         return ifindex;
3421         }
3422 }
3423
3424 /* Delayed registration/unregisteration */
3425 static DEFINE_SPINLOCK(net_todo_list_lock);
3426 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3427
3428 static void net_set_todo(struct net_device *dev)
3429 {
3430         spin_lock(&net_todo_list_lock);
3431         list_add_tail(&dev->todo_list, &net_todo_list);
3432         spin_unlock(&net_todo_list_lock);
3433 }
3434
3435 /**
3436  *      register_netdevice      - register a network device
3437  *      @dev: device to register
3438  *
3439  *      Take a completed network device structure and add it to the kernel
3440  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3441  *      chain. 0 is returned on success. A negative errno code is returned
3442  *      on a failure to set up the device, or if the name is a duplicate.
3443  *
3444  *      Callers must hold the rtnl semaphore. You may want
3445  *      register_netdev() instead of this.
3446  *
3447  *      BUGS:
3448  *      The locking appears insufficient to guarantee two parallel registers
3449  *      will not get the same name.
3450  */
3451
3452 int register_netdevice(struct net_device *dev)
3453 {
3454         struct hlist_head *head;
3455         struct hlist_node *p;
3456         int ret;
3457         struct net *net;
3458
3459         BUG_ON(dev_boot_phase);
3460         ASSERT_RTNL();
3461
3462         might_sleep();
3463
3464         /* When net_device's are persistent, this will be fatal. */
3465         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3466         BUG_ON(!dev->nd_net);
3467         net = dev->nd_net;
3468
3469         spin_lock_init(&dev->queue_lock);
3470         spin_lock_init(&dev->_xmit_lock);
3471         netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3472         dev->xmit_lock_owner = -1;
3473         spin_lock_init(&dev->ingress_lock);
3474
3475         dev->iflink = -1;
3476
3477         /* Init, if this function is available */
3478         if (dev->init) {
3479                 ret = dev->init(dev);
3480                 if (ret) {
3481                         if (ret > 0)
3482                                 ret = -EIO;
3483                         goto out;
3484                 }
3485         }
3486
3487         if (!dev_valid_name(dev->name)) {
3488                 ret = -EINVAL;
3489                 goto err_uninit;
3490         }
3491
3492         dev->ifindex = dev_new_index(net);
3493         if (dev->iflink == -1)
3494                 dev->iflink = dev->ifindex;
3495
3496         /* Check for existence of name */
3497         head = dev_name_hash(net, dev->name);
3498         hlist_for_each(p, head) {
3499                 struct net_device *d
3500                         = hlist_entry(p, struct net_device, name_hlist);
3501                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3502                         ret = -EEXIST;
3503                         goto err_uninit;
3504                 }
3505         }
3506
3507         /* Fix illegal checksum combinations */
3508         if ((dev->features & NETIF_F_HW_CSUM) &&
3509             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3510                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3511                        dev->name);
3512                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3513         }
3514
3515         if ((dev->features & NETIF_F_NO_CSUM) &&
3516             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3517                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3518                        dev->name);
3519                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3520         }
3521
3522
3523         /* Fix illegal SG+CSUM combinations. */
3524         if ((dev->features & NETIF_F_SG) &&
3525             !(dev->features & NETIF_F_ALL_CSUM)) {
3526                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3527                        dev->name);
3528                 dev->features &= ~NETIF_F_SG;
3529         }
3530
3531         /* TSO requires that SG is present as well. */
3532         if ((dev->features & NETIF_F_TSO) &&
3533             !(dev->features & NETIF_F_SG)) {
3534                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3535                        dev->name);
3536                 dev->features &= ~NETIF_F_TSO;
3537         }
3538         if (dev->features & NETIF_F_UFO) {
3539                 if (!(dev->features & NETIF_F_HW_CSUM)) {
3540                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3541                                         "NETIF_F_HW_CSUM feature.\n",
3542                                                         dev->name);
3543                         dev->features &= ~NETIF_F_UFO;
3544                 }
3545                 if (!(dev->features & NETIF_F_SG)) {
3546                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3547                                         "NETIF_F_SG feature.\n",
3548                                         dev->name);
3549                         dev->features &= ~NETIF_F_UFO;
3550                 }
3551         }
3552
3553         /*
3554          *      nil rebuild_header routine,
3555          *      that should be never called and used as just bug trap.
3556          */
3557
3558         if (!dev->rebuild_header)
3559                 dev->rebuild_header = default_rebuild_header;
3560
3561         ret = netdev_register_sysfs(dev);
3562         if (ret)
3563                 goto err_uninit;
3564         dev->reg_state = NETREG_REGISTERED;
3565
3566         /*
3567          *      Default initial state at registry is that the
3568          *      device is present.
3569          */
3570
3571         set_bit(__LINK_STATE_PRESENT, &dev->state);
3572
3573         dev_init_scheduler(dev);
3574         write_lock_bh(&dev_base_lock);
3575         list_add_tail(&dev->dev_list, &net->dev_base_head);
3576         hlist_add_head(&dev->name_hlist, head);
3577         hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
3578         dev_hold(dev);
3579         write_unlock_bh(&dev_base_lock);
3580
3581         /* Notify protocols, that a new device appeared. */
3582         ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3583         ret = notifier_to_errno(ret);
3584         if (ret)
3585                 unregister_netdevice(dev);
3586
3587 out:
3588         return ret;
3589
3590 err_uninit:
3591         if (dev->uninit)
3592                 dev->uninit(dev);
3593         goto out;
3594 }
3595
3596 /**
3597  *      register_netdev - register a network device
3598  *      @dev: device to register
3599  *
3600  *      Take a completed network device structure and add it to the kernel
3601  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3602  *      chain. 0 is returned on success. A negative errno code is returned
3603  *      on a failure to set up the device, or if the name is a duplicate.
3604  *
3605  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
3606  *      and expands the device name if you passed a format string to
3607  *      alloc_netdev.
3608  */
3609 int register_netdev(struct net_device *dev)
3610 {
3611         int err;
3612
3613         rtnl_lock();
3614
3615         /*
3616          * If the name is a format string the caller wants us to do a
3617          * name allocation.
3618          */
3619         if (strchr(dev->name, '%')) {
3620                 err = dev_alloc_name(dev, dev->name);
3621                 if (err < 0)
3622                         goto out;
3623         }
3624
3625         err = register_netdevice(dev);
3626 out:
3627         rtnl_unlock();
3628         return err;
3629 }
3630 EXPORT_SYMBOL(register_netdev);
3631
3632 /*
3633  * netdev_wait_allrefs - wait until all references are gone.
3634  *
3635  * This is called when unregistering network devices.
3636  *
3637  * Any protocol or device that holds a reference should register
3638  * for netdevice notification, and cleanup and put back the
3639  * reference if they receive an UNREGISTER event.
3640  * We can get stuck here if buggy protocols don't correctly
3641  * call dev_put.
3642  */
3643 static void netdev_wait_allrefs(struct net_device *dev)
3644 {
3645         unsigned long rebroadcast_time, warning_time;
3646
3647         rebroadcast_time = warning_time = jiffies;
3648         while (atomic_read(&dev->refcnt) != 0) {
3649                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3650                         rtnl_lock();
3651
3652                         /* Rebroadcast unregister notification */
3653                         raw_notifier_call_chain(&netdev_chain,
3654                                             NETDEV_UNREGISTER, dev);
3655
3656                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3657                                      &dev->state)) {
3658                                 /* We must not have linkwatch events
3659                                  * pending on unregister. If this
3660                                  * happens, we simply run the queue
3661                                  * unscheduled, resulting in a noop
3662                                  * for this device.
3663                                  */
3664                                 linkwatch_run_queue();
3665                         }
3666
3667                         __rtnl_unlock();
3668
3669                         rebroadcast_time = jiffies;
3670                 }
3671
3672                 msleep(250);
3673
3674                 if (time_after(jiffies, warning_time + 10 * HZ)) {
3675                         printk(KERN_EMERG "unregister_netdevice: "
3676                                "waiting for %s to become free. Usage "
3677                                "count = %d\n",
3678                                dev->name, atomic_read(&dev->refcnt));
3679                         warning_time = jiffies;
3680                 }
3681         }
3682 }
3683
3684 /* The sequence is:
3685  *
3686  *      rtnl_lock();
3687  *      ...
3688  *      register_netdevice(x1);
3689  *      register_netdevice(x2);
3690  *      ...
3691  *      unregister_netdevice(y1);
3692  *      unregister_netdevice(y2);
3693  *      ...
3694  *      rtnl_unlock();
3695  *      free_netdev(y1);
3696  *      free_netdev(y2);
3697  *
3698  * We are invoked by rtnl_unlock() after it drops the semaphore.
3699  * This allows us to deal with problems:
3700  * 1) We can delete sysfs objects which invoke hotplug
3701  *    without deadlocking with linkwatch via keventd.
3702  * 2) Since we run with the RTNL semaphore not held, we can sleep
3703  *    safely in order to wait for the netdev refcnt to drop to zero.
3704  */
3705 static DEFINE_MUTEX(net_todo_run_mutex);
3706 void netdev_run_todo(void)
3707 {
3708         struct list_head list;
3709
3710         /* Need to guard against multiple cpu's getting out of order. */
3711         mutex_lock(&net_todo_run_mutex);
3712
3713         /* Not safe to do outside the semaphore.  We must not return
3714          * until all unregister events invoked by the local processor
3715          * have been completed (either by this todo run, or one on
3716          * another cpu).
3717          */
3718         if (list_empty(&net_todo_list))
3719                 goto out;
3720
3721         /* Snapshot list, allow later requests */
3722         spin_lock(&net_todo_list_lock);
3723         list_replace_init(&net_todo_list, &list);
3724         spin_unlock(&net_todo_list_lock);
3725
3726         while (!list_empty(&list)) {
3727                 struct net_device *dev
3728                         = list_entry(list.next, struct net_device, todo_list);
3729                 list_del(&dev->todo_list);
3730
3731                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3732                         printk(KERN_ERR "network todo '%s' but state %d\n",
3733                                dev->name, dev->reg_state);
3734                         dump_stack();
3735                         continue;
3736                 }
3737
3738                 dev->reg_state = NETREG_UNREGISTERED;
3739
3740                 netdev_wait_allrefs(dev);
3741
3742                 /* paranoia */
3743                 BUG_ON(atomic_read(&dev->refcnt));
3744                 BUG_TRAP(!dev->ip_ptr);
3745                 BUG_TRAP(!dev->ip6_ptr);
3746                 BUG_TRAP(!dev->dn_ptr);
3747
3748                 if (dev->destructor)
3749                         dev->destructor(dev);
3750
3751                 /* Free network device */
3752                 kobject_put(&dev->dev.kobj);
3753         }
3754
3755 out:
3756         mutex_unlock(&net_todo_run_mutex);
3757 }
3758
3759 static struct net_device_stats *internal_stats(struct net_device *dev)
3760 {
3761         return &dev->stats;
3762 }
3763
3764 /**
3765  *      alloc_netdev_mq - allocate network device
3766  *      @sizeof_priv:   size of private data to allocate space for
3767  *      @name:          device name format string
3768  *      @setup:         callback to initialize device
3769  *      @queue_count:   the number of subqueues to allocate
3770  *
3771  *      Allocates a struct net_device with private data area for driver use
3772  *      and performs basic initialization.  Also allocates subquue structs
3773  *      for each queue on the device at the end of the netdevice.
3774  */
3775 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3776                 void (*setup)(struct net_device *), unsigned int queue_count)
3777 {
3778         void *p;
3779         struct net_device *dev;
3780         int alloc_size;
3781
3782         BUG_ON(strlen(name) >= sizeof(dev->name));
3783
3784         /* ensure 32-byte alignment of both the device and private area */
3785         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
3786                      (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
3787                      ~NETDEV_ALIGN_CONST;
3788         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3789
3790         p = kzalloc(alloc_size, GFP_KERNEL);
3791         if (!p) {
3792                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3793                 return NULL;
3794         }
3795
3796         dev = (struct net_device *)
3797                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3798         dev->padded = (char *)dev - (char *)p;
3799         dev->nd_net = &init_net;
3800
3801         if (sizeof_priv) {
3802                 dev->priv = ((char *)dev +
3803                              ((sizeof(struct net_device) +
3804                                (sizeof(struct net_device_subqueue) *
3805                                 (queue_count - 1)) + NETDEV_ALIGN_CONST)
3806                               & ~NETDEV_ALIGN_CONST));
3807         }
3808
3809         dev->egress_subqueue_count = queue_count;
3810
3811         dev->get_stats = internal_stats;
3812         netpoll_netdev_init(dev);
3813         setup(dev);
3814         strcpy(dev->name, name);
3815         return dev;
3816 }
3817 EXPORT_SYMBOL(alloc_netdev_mq);
3818
3819 /**
3820  *      free_netdev - free network device
3821  *      @dev: device
3822  *
3823  *      This function does the last stage of destroying an allocated device
3824  *      interface. The reference to the device object is released.
3825  *      If this is the last reference then it will be freed.
3826  */
3827 void free_netdev(struct net_device *dev)
3828 {
3829 #ifdef CONFIG_SYSFS
3830         /*  Compatibility with error handling in drivers */
3831         if (dev->reg_state == NETREG_UNINITIALIZED) {
3832                 kfree((char *)dev - dev->padded);
3833                 return;
3834         }
3835
3836         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3837         dev->reg_state = NETREG_RELEASED;
3838
3839         /* will free via device release */
3840         put_device(&dev->dev);
3841 #else
3842         kfree((char *)dev - dev->padded);
3843 #endif
3844 }
3845
3846 /* Synchronize with packet receive processing. */
3847 void synchronize_net(void)
3848 {
3849         might_sleep();
3850         synchronize_rcu();
3851 }
3852
3853 /**
3854  *      unregister_netdevice - remove device from the kernel
3855  *      @dev: device
3856  *
3857  *      This function shuts down a device interface and removes it
3858  *      from the kernel tables. On success 0 is returned, on a failure
3859  *      a negative errno code is returned.
3860  *
3861  *      Callers must hold the rtnl semaphore.  You may want
3862  *      unregister_netdev() instead of this.
3863  */
3864
3865 void unregister_netdevice(struct net_device *dev)
3866 {
3867         BUG_ON(dev_boot_phase);
3868         ASSERT_RTNL();
3869
3870         /* Some devices call without registering for initialization unwind. */
3871         if (dev->reg_state == NETREG_UNINITIALIZED) {
3872                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3873                                   "was registered\n", dev->name, dev);
3874
3875                 WARN_ON(1);
3876                 return;
3877         }
3878
3879         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3880
3881         /* If device is running, close it first. */
3882         if (dev->flags & IFF_UP)
3883                 dev_close(dev);
3884
3885         /* And unlink it from device chain. */
3886         write_lock_bh(&dev_base_lock);
3887         list_del(&dev->dev_list);
3888         hlist_del(&dev->name_hlist);
3889         hlist_del(&dev->index_hlist);
3890         write_unlock_bh(&dev_base_lock);
3891
3892         dev->reg_state = NETREG_UNREGISTERING;
3893
3894         synchronize_net();
3895
3896         /* Shutdown queueing discipline. */
3897         dev_shutdown(dev);
3898
3899
3900         /* Notify protocols, that we are about to destroy
3901            this device. They should clean all the things.
3902         */
3903         raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3904
3905         /*
3906          *      Flush the unicast and multicast chains
3907          */
3908         dev_addr_discard(dev);
3909
3910         if (dev->uninit)
3911                 dev->uninit(dev);
3912
3913         /* Notifier chain MUST detach us from master device. */
3914         BUG_TRAP(!dev->master);
3915
3916         /* Remove entries from sysfs */
3917         netdev_unregister_sysfs(dev);
3918
3919         /* Finish processing unregister after unlock */
3920         net_set_todo(dev);
3921
3922         synchronize_net();
3923
3924         dev_put(dev);
3925 }
3926
3927 /**
3928  *      unregister_netdev - remove device from the kernel
3929  *      @dev: device
3930  *
3931  *      This function shuts down a device interface and removes it
3932  *      from the kernel tables. On success 0 is returned, on a failure
3933  *      a negative errno code is returned.
3934  *
3935  *      This is just a wrapper for unregister_netdevice that takes
3936  *      the rtnl semaphore.  In general you want to use this and not
3937  *      unregister_netdevice.
3938  */
3939 void unregister_netdev(struct net_device *dev)
3940 {
3941         rtnl_lock();
3942         unregister_netdevice(dev);
3943         rtnl_unlock();
3944 }
3945
3946 EXPORT_SYMBOL(unregister_netdev);
3947
3948 static int dev_cpu_callback(struct notifier_block *nfb,
3949                             unsigned long action,
3950                             void *ocpu)
3951 {
3952         struct sk_buff **list_skb;
3953         struct net_device **list_net;
3954         struct sk_buff *skb;
3955         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3956         struct softnet_data *sd, *oldsd;
3957
3958         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
3959                 return NOTIFY_OK;
3960
3961         local_irq_disable();
3962         cpu = smp_processor_id();
3963         sd = &per_cpu(softnet_data, cpu);
3964         oldsd = &per_cpu(softnet_data, oldcpu);
3965
3966         /* Find end of our completion_queue. */
3967         list_skb = &sd->completion_queue;
3968         while (*list_skb)
3969                 list_skb = &(*list_skb)->next;
3970         /* Append completion queue from offline CPU. */
3971         *list_skb = oldsd->completion_queue;
3972         oldsd->completion_queue = NULL;
3973
3974         /* Find end of our output_queue. */
3975         list_net = &sd->output_queue;
3976         while (*list_net)
3977                 list_net = &(*list_net)->next_sched;
3978         /* Append output queue from offline CPU. */
3979         *list_net = oldsd->output_queue;
3980         oldsd->output_queue = NULL;
3981
3982         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3983         local_irq_enable();
3984
3985         /* Process offline CPU's input_pkt_queue */
3986         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3987                 netif_rx(skb);
3988
3989         return NOTIFY_OK;
3990 }
3991
3992 #ifdef CONFIG_NET_DMA
3993 /**
3994  * net_dma_rebalance - try to maintain one DMA channel per CPU
3995  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
3996  *
3997  * This is called when the number of channels allocated to the net_dma client
3998  * changes.  The net_dma client tries to have one DMA channel per CPU.
3999  */
4000
4001 static void net_dma_rebalance(struct net_dma *net_dma)
4002 {
4003         unsigned int cpu, i, n, chan_idx;
4004         struct dma_chan *chan;
4005
4006         if (cpus_empty(net_dma->channel_mask)) {
4007                 for_each_online_cpu(cpu)
4008                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4009                 return;
4010         }
4011
4012         i = 0;
4013         cpu = first_cpu(cpu_online_map);
4014
4015         for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4016                 chan = net_dma->channels[chan_idx];
4017
4018                 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4019                    + (i < (num_online_cpus() %
4020                         cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4021
4022                 while(n) {
4023                         per_cpu(softnet_data, cpu).net_dma = chan;
4024                         cpu = next_cpu(cpu, cpu_online_map);
4025                         n--;
4026                 }
4027                 i++;
4028         }
4029 }
4030
4031 /**
4032  * netdev_dma_event - event callback for the net_dma_client
4033  * @client: should always be net_dma_client
4034  * @chan: DMA channel for the event
4035  * @state: DMA state to be handled
4036  */
4037 static enum dma_state_client
4038 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4039         enum dma_state state)
4040 {
4041         int i, found = 0, pos = -1;
4042         struct net_dma *net_dma =
4043                 container_of(client, struct net_dma, client);
4044         enum dma_state_client ack = DMA_DUP; /* default: take no action */
4045
4046         spin_lock(&net_dma->lock);
4047         switch (state) {
4048         case DMA_RESOURCE_AVAILABLE:
4049                 for (i = 0; i < NR_CPUS; i++)
4050                         if (net_dma->channels[i] == chan) {
4051                                 found = 1;
4052                                 break;
4053                         } else if (net_dma->channels[i] == NULL && pos < 0)
4054                                 pos = i;
4055
4056                 if (!found && pos >= 0) {
4057                         ack = DMA_ACK;
4058                         net_dma->channels[pos] = chan;
4059                         cpu_set(pos, net_dma->channel_mask);
4060                         net_dma_rebalance(net_dma);
4061                 }
4062                 break;
4063         case DMA_RESOURCE_REMOVED:
4064                 for (i = 0; i < NR_CPUS; i++)
4065                         if (net_dma->channels[i] == chan) {
4066                                 found = 1;
4067                                 pos = i;
4068                                 break;
4069                         }
4070
4071                 if (found) {
4072                         ack = DMA_ACK;
4073                         cpu_clear(pos, net_dma->channel_mask);
4074                         net_dma->channels[i] = NULL;
4075                         net_dma_rebalance(net_dma);
4076                 }
4077                 break;
4078         default:
4079                 break;
4080         }
4081         spin_unlock(&net_dma->lock);
4082
4083         return ack;
4084 }
4085
4086 /**
4087  * netdev_dma_regiser - register the networking subsystem as a DMA client
4088  */
4089 static int __init netdev_dma_register(void)
4090 {
4091         spin_lock_init(&net_dma.lock);
4092         dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4093         dma_async_client_register(&net_dma.client);
4094         dma_async_client_chan_request(&net_dma.client);
4095         return 0;
4096 }
4097
4098 #else
4099 static int __init netdev_dma_register(void) { return -ENODEV; }
4100 #endif /* CONFIG_NET_DMA */
4101
4102 /**
4103  *      netdev_compute_feature - compute conjunction of two feature sets
4104  *      @all: first feature set
4105  *      @one: second feature set
4106  *
4107  *      Computes a new feature set after adding a device with feature set
4108  *      @one to the master device with current feature set @all.  Returns
4109  *      the new feature set.
4110  */
4111 int netdev_compute_features(unsigned long all, unsigned long one)
4112 {
4113         /* if device needs checksumming, downgrade to hw checksumming */
4114         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4115                 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4116
4117         /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4118         if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4119                 all ^= NETIF_F_HW_CSUM
4120                         | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4121
4122         if (one & NETIF_F_GSO)
4123                 one |= NETIF_F_GSO_SOFTWARE;
4124         one |= NETIF_F_GSO;
4125
4126         /* If even one device supports robust GSO, enable it for all. */
4127         if (one & NETIF_F_GSO_ROBUST)
4128                 all |= NETIF_F_GSO_ROBUST;
4129
4130         all &= one | NETIF_F_LLTX;
4131
4132         if (!(all & NETIF_F_ALL_CSUM))
4133                 all &= ~NETIF_F_SG;
4134         if (!(all & NETIF_F_SG))
4135                 all &= ~NETIF_F_GSO_MASK;
4136
4137         return all;
4138 }
4139 EXPORT_SYMBOL(netdev_compute_features);
4140
4141 /* Initialize per network namespace state */
4142 static int netdev_init(struct net *net)
4143 {
4144         int i;
4145         INIT_LIST_HEAD(&net->dev_base_head);
4146         rwlock_init(&dev_base_lock);
4147
4148         net->dev_name_head = kmalloc(
4149                 sizeof(*net->dev_name_head)*NETDEV_HASHENTRIES, GFP_KERNEL);
4150         if (!net->dev_name_head)
4151                 return -ENOMEM;
4152
4153         net->dev_index_head = kmalloc(
4154                 sizeof(*net->dev_index_head)*NETDEV_HASHENTRIES, GFP_KERNEL);
4155         if (!net->dev_index_head) {
4156                 kfree(net->dev_name_head);
4157                 return -ENOMEM;
4158         }
4159
4160         for (i = 0; i < NETDEV_HASHENTRIES; i++)
4161                 INIT_HLIST_HEAD(&net->dev_name_head[i]);
4162
4163         for (i = 0; i < NETDEV_HASHENTRIES; i++)
4164                 INIT_HLIST_HEAD(&net->dev_index_head[i]);
4165
4166         return 0;
4167 }
4168
4169 static void netdev_exit(struct net *net)
4170 {
4171         kfree(net->dev_name_head);
4172         kfree(net->dev_index_head);
4173 }
4174
4175 static struct pernet_operations netdev_net_ops = {
4176         .init = netdev_init,
4177         .exit = netdev_exit,
4178 };
4179
4180 /*
4181  *      Initialize the DEV module. At boot time this walks the device list and
4182  *      unhooks any devices that fail to initialise (normally hardware not
4183  *      present) and leaves us with a valid list of present and active devices.
4184  *
4185  */
4186
4187 /*
4188  *       This is called single threaded during boot, so no need
4189  *       to take the rtnl semaphore.
4190  */
4191 static int __init net_dev_init(void)
4192 {
4193         int i, rc = -ENOMEM;
4194
4195         BUG_ON(!dev_boot_phase);
4196
4197         if (dev_proc_init())
4198                 goto out;
4199
4200         if (netdev_sysfs_init())
4201                 goto out;
4202
4203         INIT_LIST_HEAD(&ptype_all);
4204         for (i = 0; i < 16; i++)
4205                 INIT_LIST_HEAD(&ptype_base[i]);
4206
4207         if (register_pernet_subsys(&netdev_net_ops))
4208                 goto out;
4209
4210         /*
4211          *      Initialise the packet receive queues.
4212          */
4213
4214         for_each_possible_cpu(i) {
4215                 struct softnet_data *queue;
4216
4217                 queue = &per_cpu(softnet_data, i);
4218                 skb_queue_head_init(&queue->input_pkt_queue);
4219                 queue->completion_queue = NULL;
4220                 INIT_LIST_HEAD(&queue->poll_list);
4221
4222                 queue->backlog.poll = process_backlog;
4223                 queue->backlog.weight = weight_p;
4224         }
4225
4226         netdev_dma_register();
4227
4228         dev_boot_phase = 0;
4229
4230         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4231         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4232
4233         hotcpu_notifier(dev_cpu_callback, 0);
4234         dst_init();
4235         dev_mcast_init();
4236         rc = 0;
4237 out:
4238         return rc;
4239 }
4240
4241 subsys_initcall(net_dev_init);
4242
4243 EXPORT_SYMBOL(__dev_get_by_index);
4244 EXPORT_SYMBOL(__dev_get_by_name);
4245 EXPORT_SYMBOL(__dev_remove_pack);
4246 EXPORT_SYMBOL(dev_valid_name);
4247 EXPORT_SYMBOL(dev_add_pack);
4248 EXPORT_SYMBOL(dev_alloc_name);
4249 EXPORT_SYMBOL(dev_close);
4250 EXPORT_SYMBOL(dev_get_by_flags);
4251 EXPORT_SYMBOL(dev_get_by_index);
4252 EXPORT_SYMBOL(dev_get_by_name);
4253 EXPORT_SYMBOL(dev_open);
4254 EXPORT_SYMBOL(dev_queue_xmit);
4255 EXPORT_SYMBOL(dev_remove_pack);
4256 EXPORT_SYMBOL(dev_set_allmulti);
4257 EXPORT_SYMBOL(dev_set_promiscuity);
4258 EXPORT_SYMBOL(dev_change_flags);
4259 EXPORT_SYMBOL(dev_set_mtu);
4260 EXPORT_SYMBOL(dev_set_mac_address);
4261 EXPORT_SYMBOL(free_netdev);
4262 EXPORT_SYMBOL(netdev_boot_setup_check);
4263 EXPORT_SYMBOL(netdev_set_master);
4264 EXPORT_SYMBOL(netdev_state_change);
4265 EXPORT_SYMBOL(netif_receive_skb);
4266 EXPORT_SYMBOL(netif_rx);
4267 EXPORT_SYMBOL(register_gifconf);
4268 EXPORT_SYMBOL(register_netdevice);
4269 EXPORT_SYMBOL(register_netdevice_notifier);
4270 EXPORT_SYMBOL(skb_checksum_help);
4271 EXPORT_SYMBOL(synchronize_net);
4272 EXPORT_SYMBOL(unregister_netdevice);
4273 EXPORT_SYMBOL(unregister_netdevice_notifier);
4274 EXPORT_SYMBOL(net_enable_timestamp);
4275 EXPORT_SYMBOL(net_disable_timestamp);
4276 EXPORT_SYMBOL(dev_get_flags);
4277
4278 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4279 EXPORT_SYMBOL(br_handle_frame_hook);
4280 EXPORT_SYMBOL(br_fdb_get_hook);
4281 EXPORT_SYMBOL(br_fdb_put_hook);
4282 #endif
4283
4284 #ifdef CONFIG_KMOD
4285 EXPORT_SYMBOL(dev_load);
4286 #endif
4287
4288 EXPORT_PER_CPU_SYMBOL(softnet_data);