net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/ethtool.h>
  94 #include <linux/notifier.h>
  95 #include <linux/skbuff.h>
  96 #include <net/net_namespace.h>
  97 #include <net/sock.h>
  98 #include <linux/rtnetlink.h>
  99 #include <linux/proc_fs.h>
 100 #include <linux/seq_file.h>
 101 #include <linux/stat.h>
 102 #include <linux/if_bridge.h>
 103 #include <linux/if_macvlan.h>
 104 #include <net/dst.h>
 105 #include <net/pkt_sched.h>
 106 #include <net/checksum.h>
 107 #include <linux/highmem.h>
 108 #include <linux/init.h>
 109 #include <linux/kmod.h>
 110 #include <linux/module.h>
 111 #include <linux/kallsyms.h>
 112 #include <linux/netpoll.h>
 113 #include <linux/rcupdate.h>
 114 #include <linux/delay.h>
 115 #include <net/wext.h>
 116 #include <net/iw_handler.h>
 117 #include <asm/current.h>
 118 #include <linux/audit.h>
 119 #include <linux/dmaengine.h>
 120 #include <linux/err.h>
 121 #include <linux/ctype.h>
 122 #include <linux/if_arp.h>
 123 #include <linux/if_vlan.h>
 124 #include <linux/ip.h>
 125 #include <net/ip.h>
 126 #include <linux/ipv6.h>
 127 #include <linux/in.h>
 128 #include <linux/jhash.h>
 129 #include <linux/random.h>
 130
 131 #include "net-sysfs.h"
 132
 133 /*
 134  *      The list of packet types we will receive (as opposed to discard)
 135  *      and the routines to invoke.
 136  *
 137  *      Why 16. Because with 16 the only overlap we get on a hash of the
 138  *      low nibble of the protocol value is RARP/SNAP/X.25.
 139  *
 140  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 141  *             sure which should go first, but I bet it won't make much
 142  *             difference if we are running VLANs.  The good news is that
 143  *             this protocol won't be in the list unless compiled in, so
 144  *             the average user (w/out VLANs) will not be adversely affected.
 145  *             --BLG
 146  *
 147  *              0800    IP
 148  *              8100    802.1Q VLAN
 149  *              0001    802.3
 150  *              0002    AX.25
 151  *              0004    802.2
 152  *              8035    RARP
 153  *              0005    SNAP
 154  *              0805    X.25
 155  *              0806    ARP
 156  *              8137    IPX
 157  *              0009    Localtalk
 158  *              86DD    IPv6
 159  */
 160
 161 #define PTYPE_HASH_SIZE (16)
 162 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
 163
 164 static DEFINE_SPINLOCK(ptype_lock);
 165 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 166 static struct list_head ptype_all __read_mostly;        /* Taps */
 167
 168 #ifdef CONFIG_NET_DMA
 169 struct net_dma {
 170         struct dma_client client;
 171         spinlock_t lock;
 172         cpumask_t channel_mask;
 173         struct dma_chan **channels;
 174 };
 175
 176 static enum dma_state_client
 177 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 178         enum dma_state state);
 179
 180 static struct net_dma net_dma = {
 181         .client = {
 182                 .event_callback = netdev_dma_event,
 183         },
 184 };
 185 #endif
 186
 187 /*
 188  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 189  * semaphore.
 190  *
 191  * Pure readers hold dev_base_lock for reading.
 192  *
 193  * Writers must hold the rtnl semaphore while they loop through the
 194  * dev_base_head list, and hold dev_base_lock for writing when they do the
 195  * actual updates.  This allows pure readers to access the list even
 196  * while a writer is preparing to update it.
 197  *
 198  * To put it another way, dev_base_lock is held for writing only to
 199  * protect against pure readers; the rtnl semaphore provides the
 200  * protection against other writers.
 201  *
 202  * See, for example usages, register_netdevice() and
 203  * unregister_netdevice(), which must be called with the rtnl
 204  * semaphore held.
 205  */
 206 DEFINE_RWLOCK(dev_base_lock);
 207
 208 EXPORT_SYMBOL(dev_base_lock);
 209
 210 #define NETDEV_HASHBITS 8
 211 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 212
 213 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 214 {
 215         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 216         return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 217 }
 218
 219 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 220 {
 221         return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 222 }
 223
 224 /* Device list insertion */
 225 static int list_netdevice(struct net_device *dev)
 226 {
 227         struct net *net = dev_net(dev);
 228
 229         ASSERT_RTNL();
 230
 231         write_lock_bh(&dev_base_lock);
 232         list_add_tail(&dev->dev_list, &net->dev_base_head);
 233         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 234         hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 235         write_unlock_bh(&dev_base_lock);
 236         return 0;
 237 }
 238
 239 /* Device list removal */
 240 static void unlist_netdevice(struct net_device *dev)
 241 {
 242         ASSERT_RTNL();
 243
 244         /* Unlink dev from the device chain */
 245         write_lock_bh(&dev_base_lock);
 246         list_del(&dev->dev_list);
 247         hlist_del(&dev->name_hlist);
 248         hlist_del(&dev->index_hlist);
 249         write_unlock_bh(&dev_base_lock);
 250 }
 251
 252 /*
 253  *      Our notifier list
 254  */
 255
 256 static RAW_NOTIFIER_HEAD(netdev_chain);
 257
 258 /*
 259  *      Device drivers call our routines to queue packets here. We empty the
 260  *      queue in the local softnet handler.
 261  */
 262
 263 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 264
 265 #ifdef CONFIG_LOCKDEP
 266 /*
 267  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 268  * according to dev->type
 269  */
 270 static const unsigned short netdev_lock_type[] =
 271         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 272          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 273          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 274          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 275          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 276          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 277          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 278          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 279          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 280          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 281          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 282          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 283          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 284          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 285          ARPHRD_NONE};
 286
 287 static const char *netdev_lock_name[] =
 288         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 289          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 290          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 291          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 292          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 293          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 294          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 295          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 296          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 297          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 298          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 299          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 300          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 301          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 302          "_xmit_NONE"};
 303
 304 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 305 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 306
 307 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 308 {
 309         int i;
 310
 311         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 312                 if (netdev_lock_type[i] == dev_type)
 313                         return i;
 314         /* the last key is used by default */
 315         return ARRAY_SIZE(netdev_lock_type) - 1;
 316 }
 317
 318 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 319                                                  unsigned short dev_type)
 320 {
 321         int i;
 322
 323         i = netdev_lock_pos(dev_type);
 324         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 325                                    netdev_lock_name[i]);
 326 }
 327
 328 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 329 {
 330         int i;
 331
 332         i = netdev_lock_pos(dev->type);
 333         lockdep_set_class_and_name(&dev->addr_list_lock,
 334                                    &netdev_addr_lock_key[i],
 335                                    netdev_lock_name[i]);
 336 }
 337 #else
 338 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 339                                                  unsigned short dev_type)
 340 {
 341 }
 342 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 343 {
 344 }
 345 #endif
 346
 347 /*******************************************************************************
 348
 349                 Protocol management and registration routines
 350
 351 *******************************************************************************/
 352
 353 /*
 354  *      Add a protocol ID to the list. Now that the input handler is
 355  *      smarter we can dispense with all the messy stuff that used to be
 356  *      here.
 357  *
 358  *      BEWARE!!! Protocol handlers, mangling input packets,
 359  *      MUST BE last in hash buckets and checking protocol handlers
 360  *      MUST start from promiscuous ptype_all chain in net_bh.
 361  *      It is true now, do not change it.
 362  *      Explanation follows: if protocol handler, mangling packet, will
 363  *      be the first on list, it is not able to sense, that packet
 364  *      is cloned and should be copied-on-write, so that it will
 365  *      change it and subsequent readers will get broken packet.
 366  *                                                      --ANK (980803)
 367  */
 368
 369 /**
 370  *      dev_add_pack - add packet handler
 371  *      @pt: packet type declaration
 372  *
 373  *      Add a protocol handler to the networking stack. The passed &packet_type
 374  *      is linked into kernel lists and may not be freed until it has been
 375  *      removed from the kernel lists.
 376  *
 377  *      This call does not sleep therefore it can not
 378  *      guarantee all CPU's that are in middle of receiving packets
 379  *      will see the new packet type (until the next received packet).
 380  */
 381
 382 void dev_add_pack(struct packet_type *pt)
 383 {
 384         int hash;
 385
 386         spin_lock_bh(&ptype_lock);
 387         if (pt->type == htons(ETH_P_ALL))
 388                 list_add_rcu(&pt->list, &ptype_all);
 389         else {
 390                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 391                 list_add_rcu(&pt->list, &ptype_base[hash]);
 392         }
 393         spin_unlock_bh(&ptype_lock);
 394 }
 395
 396 /**
 397  *      __dev_remove_pack        - remove packet handler
 398  *      @pt: packet type declaration
 399  *
 400  *      Remove a protocol handler that was previously added to the kernel
 401  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 402  *      from the kernel lists and can be freed or reused once this function
 403  *      returns.
 404  *
 405  *      The packet type might still be in use by receivers
 406  *      and must not be freed until after all the CPU's have gone
 407  *      through a quiescent state.
 408  */
 409 void __dev_remove_pack(struct packet_type *pt)
 410 {
 411         struct list_head *head;
 412         struct packet_type *pt1;
 413
 414         spin_lock_bh(&ptype_lock);
 415
 416         if (pt->type == htons(ETH_P_ALL))
 417                 head = &ptype_all;
 418         else
 419                 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 420
 421         list_for_each_entry(pt1, head, list) {
 422                 if (pt == pt1) {
 423                         list_del_rcu(&pt->list);
 424                         goto out;
 425                 }
 426         }
 427
 428         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 429 out:
 430         spin_unlock_bh(&ptype_lock);
 431 }
 432 /**
 433  *      dev_remove_pack  - remove packet handler
 434  *      @pt: packet type declaration
 435  *
 436  *      Remove a protocol handler that was previously added to the kernel
 437  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 438  *      from the kernel lists and can be freed or reused once this function
 439  *      returns.
 440  *
 441  *      This call sleeps to guarantee that no CPU is looking at the packet
 442  *      type after return.
 443  */
 444 void dev_remove_pack(struct packet_type *pt)
 445 {
 446         __dev_remove_pack(pt);
 447
 448         synchronize_net();
 449 }
 450
 451 /******************************************************************************
 452
 453                       Device Boot-time Settings Routines
 454
 455 *******************************************************************************/
 456
 457 /* Boot time configuration table */
 458 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 459
 460 /**
 461  *      netdev_boot_setup_add   - add new setup entry
 462  *      @name: name of the device
 463  *      @map: configured settings for the device
 464  *
 465  *      Adds new setup entry to the dev_boot_setup list.  The function
 466  *      returns 0 on error and 1 on success.  This is a generic routine to
 467  *      all netdevices.
 468  */
 469 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 470 {
 471         struct netdev_boot_setup *s;
 472         int i;
 473
 474         s = dev_boot_setup;
 475         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 476                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 477                         memset(s[i].name, 0, sizeof(s[i].name));
 478                         strlcpy(s[i].name, name, IFNAMSIZ);
 479                         memcpy(&s[i].map, map, sizeof(s[i].map));
 480                         break;
 481                 }
 482         }
 483
 484         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 485 }
 486
 487 /**
 488  *      netdev_boot_setup_check - check boot time settings
 489  *      @dev: the netdevice
 490  *
 491  *      Check boot time settings for the device.
 492  *      The found settings are set for the device to be used
 493  *      later in the device probing.
 494  *      Returns 0 if no settings found, 1 if they are.
 495  */
 496 int netdev_boot_setup_check(struct net_device *dev)
 497 {
 498         struct netdev_boot_setup *s = dev_boot_setup;
 499         int i;
 500
 501         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 502                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 503                     !strcmp(dev->name, s[i].name)) {
 504                         dev->irq        = s[i].map.irq;
 505                         dev->base_addr  = s[i].map.base_addr;
 506                         dev->mem_start  = s[i].map.mem_start;
 507                         dev->mem_end    = s[i].map.mem_end;
 508                         return 1;
 509                 }
 510         }
 511         return 0;
 512 }
 513
 514
 515 /**
 516  *      netdev_boot_base        - get address from boot time settings
 517  *      @prefix: prefix for network device
 518  *      @unit: id for network device
 519  *
 520  *      Check boot time settings for the base address of device.
 521  *      The found settings are set for the device to be used
 522  *      later in the device probing.
 523  *      Returns 0 if no settings found.
 524  */
 525 unsigned long netdev_boot_base(const char *prefix, int unit)
 526 {
 527         const struct netdev_boot_setup *s = dev_boot_setup;
 528         char name[IFNAMSIZ];
 529         int i;
 530
 531         sprintf(name, "%s%d", prefix, unit);
 532
 533         /*
 534          * If device already registered then return base of 1
 535          * to indicate not to probe for this interface
 536          */
 537         if (__dev_get_by_name(&init_net, name))
 538                 return 1;
 539
 540         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 541                 if (!strcmp(name, s[i].name))
 542                         return s[i].map.base_addr;
 543         return 0;
 544 }
 545
 546 /*
 547  * Saves at boot time configured settings for any netdevice.
 548  */
 549 int __init netdev_boot_setup(char *str)
 550 {
 551         int ints[5];
 552         struct ifmap map;
 553
 554         str = get_options(str, ARRAY_SIZE(ints), ints);
 555         if (!str || !*str)
 556                 return 0;
 557
 558         /* Save settings */
 559         memset(&map, 0, sizeof(map));
 560         if (ints[0] > 0)
 561                 map.irq = ints[1];
 562         if (ints[0] > 1)
 563                 map.base_addr = ints[2];
 564         if (ints[0] > 2)
 565                 map.mem_start = ints[3];
 566         if (ints[0] > 3)
 567                 map.mem_end = ints[4];
 568
 569         /* Add new entry to the list */
 570         return netdev_boot_setup_add(str, &map);
 571 }
 572
 573 __setup("netdev=", netdev_boot_setup);
 574
 575 /*******************************************************************************
 576
 577                             Device Interface Subroutines
 578
 579 *******************************************************************************/
 580
 581 /**
 582  *      __dev_get_by_name       - find a device by its name
 583  *      @net: the applicable net namespace
 584  *      @name: name to find
 585  *
 586  *      Find an interface by name. Must be called under RTNL semaphore
 587  *      or @dev_base_lock. If the name is found a pointer to the device
 588  *      is returned. If the name is not found then %NULL is returned. The
 589  *      reference counters are not incremented so the caller must be
 590  *      careful with locks.
 591  */
 592
 593 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 594 {
 595         struct hlist_node *p;
 596
 597         hlist_for_each(p, dev_name_hash(net, name)) {
 598                 struct net_device *dev
 599                         = hlist_entry(p, struct net_device, name_hlist);
 600                 if (!strncmp(dev->name, name, IFNAMSIZ))
 601                         return dev;
 602         }
 603         return NULL;
 604 }
 605
 606 /**
 607  *      dev_get_by_name         - find a device by its name
 608  *      @net: the applicable net namespace
 609  *      @name: name to find
 610  *
 611  *      Find an interface by name. This can be called from any
 612  *      context and does its own locking. The returned handle has
 613  *      the usage count incremented and the caller must use dev_put() to
 614  *      release it when it is no longer needed. %NULL is returned if no
 615  *      matching device is found.
 616  */
 617
 618 struct net_device *dev_get_by_name(struct net *net, const char *name)
 619 {
 620         struct net_device *dev;
 621
 622         read_lock(&dev_base_lock);
 623         dev = __dev_get_by_name(net, name);
 624         if (dev)
 625                 dev_hold(dev);
 626         read_unlock(&dev_base_lock);
 627         return dev;
 628 }
 629
 630 /**
 631  *      __dev_get_by_index - find a device by its ifindex
 632  *      @net: the applicable net namespace
 633  *      @ifindex: index of device
 634  *
 635  *      Search for an interface by index. Returns %NULL if the device
 636  *      is not found or a pointer to the device. The device has not
 637  *      had its reference counter increased so the caller must be careful
 638  *      about locking. The caller must hold either the RTNL semaphore
 639  *      or @dev_base_lock.
 640  */
 641
 642 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 643 {
 644         struct hlist_node *p;
 645
 646         hlist_for_each(p, dev_index_hash(net, ifindex)) {
 647                 struct net_device *dev
 648                         = hlist_entry(p, struct net_device, index_hlist);
 649                 if (dev->ifindex == ifindex)
 650                         return dev;
 651         }
 652         return NULL;
 653 }
 654
 655
 656 /**
 657  *      dev_get_by_index - find a device by its ifindex
 658  *      @net: the applicable net namespace
 659  *      @ifindex: index of device
 660  *
 661  *      Search for an interface by index. Returns NULL if the device
 662  *      is not found or a pointer to the device. The device returned has
 663  *      had a reference added and the pointer is safe until the user calls
 664  *      dev_put to indicate they have finished with it.
 665  */
 666
 667 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 668 {
 669         struct net_device *dev;
 670
 671         read_lock(&dev_base_lock);
 672         dev = __dev_get_by_index(net, ifindex);
 673         if (dev)
 674                 dev_hold(dev);
 675         read_unlock(&dev_base_lock);
 676         return dev;
 677 }
 678
 679 /**
 680  *      dev_getbyhwaddr - find a device by its hardware address
 681  *      @net: the applicable net namespace
 682  *      @type: media type of device
 683  *      @ha: hardware address
 684  *
 685  *      Search for an interface by MAC address. Returns NULL if the device
 686  *      is not found or a pointer to the device. The caller must hold the
 687  *      rtnl semaphore. The returned device has not had its ref count increased
 688  *      and the caller must therefore be careful about locking
 689  *
 690  *      BUGS:
 691  *      If the API was consistent this would be __dev_get_by_hwaddr
 692  */
 693
 694 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 695 {
 696         struct net_device *dev;
 697
 698         ASSERT_RTNL();
 699
 700         for_each_netdev(net, dev)
 701                 if (dev->type == type &&
 702                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 703                         return dev;
 704
 705         return NULL;
 706 }
 707
 708 EXPORT_SYMBOL(dev_getbyhwaddr);
 709
 710 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 711 {
 712         struct net_device *dev;
 713
 714         ASSERT_RTNL();
 715         for_each_netdev(net, dev)
 716                 if (dev->type == type)
 717                         return dev;
 718
 719         return NULL;
 720 }
 721
 722 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 723
 724 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 725 {
 726         struct net_device *dev;
 727
 728         rtnl_lock();
 729         dev = __dev_getfirstbyhwtype(net, type);
 730         if (dev)
 731                 dev_hold(dev);
 732         rtnl_unlock();
 733         return dev;
 734 }
 735
 736 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 737
 738 /**
 739  *      dev_get_by_flags - find any device with given flags
 740  *      @net: the applicable net namespace
 741  *      @if_flags: IFF_* values
 742  *      @mask: bitmask of bits in if_flags to check
 743  *
 744  *      Search for any interface with the given flags. Returns NULL if a device
 745  *      is not found or a pointer to the device. The device returned has
 746  *      had a reference added and the pointer is safe until the user calls
 747  *      dev_put to indicate they have finished with it.
 748  */
 749
 750 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 751 {
 752         struct net_device *dev, *ret;
 753
 754         ret = NULL;
 755         read_lock(&dev_base_lock);
 756         for_each_netdev(net, dev) {
 757                 if (((dev->flags ^ if_flags) & mask) == 0) {
 758                         dev_hold(dev);
 759                         ret = dev;
 760                         break;
 761                 }
 762         }
 763         read_unlock(&dev_base_lock);
 764         return ret;
 765 }
 766
 767 /**
 768  *      dev_valid_name - check if name is okay for network device
 769  *      @name: name string
 770  *
 771  *      Network device names need to be valid file names to
 772  *      to allow sysfs to work.  We also disallow any kind of
 773  *      whitespace.
 774  */
 775 int dev_valid_name(const char *name)
 776 {
 777         if (*name == '\0')
 778                 return 0;
 779         if (strlen(name) >= IFNAMSIZ)
 780                 return 0;
 781         if (!strcmp(name, ".") || !strcmp(name, ".."))
 782                 return 0;
 783
 784         while (*name) {
 785                 if (*name == '/' || isspace(*name))
 786                         return 0;
 787                 name++;
 788         }
 789         return 1;
 790 }
 791
 792 /**
 793  *      __dev_alloc_name - allocate a name for a device
 794  *      @net: network namespace to allocate the device name in
 795  *      @name: name format string
 796  *      @buf:  scratch buffer and result name string
 797  *
 798  *      Passed a format string - eg "lt%d" it will try and find a suitable
 799  *      id. It scans list of devices to build up a free map, then chooses
 800  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 801  *      while allocating the name and adding the device in order to avoid
 802  *      duplicates.
 803  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 804  *      Returns the number of the unit assigned or a negative errno code.
 805  */
 806
 807 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 808 {
 809         int i = 0;
 810         const char *p;
 811         const int max_netdevices = 8*PAGE_SIZE;
 812         unsigned long *inuse;
 813         struct net_device *d;
 814
 815         p = strnchr(name, IFNAMSIZ-1, '%');
 816         if (p) {
 817                 /*
 818                  * Verify the string as this thing may have come from
 819                  * the user.  There must be either one "%d" and no other "%"
 820                  * characters.
 821                  */
 822                 if (p[1] != 'd' || strchr(p + 2, '%'))
 823                         return -EINVAL;
 824
 825                 /* Use one page as a bit array of possible slots */
 826                 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 827                 if (!inuse)
 828                         return -ENOMEM;
 829
 830                 for_each_netdev(net, d) {
 831                         if (!sscanf(d->name, name, &i))
 832                                 continue;
 833                         if (i < 0 || i >= max_netdevices)
 834                                 continue;
 835
 836                         /*  avoid cases where sscanf is not exact inverse of printf */
 837                         snprintf(buf, IFNAMSIZ, name, i);
 838                         if (!strncmp(buf, d->name, IFNAMSIZ))
 839                                 set_bit(i, inuse);
 840                 }
 841
 842                 i = find_first_zero_bit(inuse, max_netdevices);
 843                 free_page((unsigned long) inuse);
 844         }
 845
 846         snprintf(buf, IFNAMSIZ, name, i);
 847         if (!__dev_get_by_name(net, buf))
 848                 return i;
 849
 850         /* It is possible to run out of possible slots
 851          * when the name is long and there isn't enough space left
 852          * for the digits, or if all bits are used.
 853          */
 854         return -ENFILE;
 855 }
 856
 857 /**
 858  *      dev_alloc_name - allocate a name for a device
 859  *      @dev: device
 860  *      @name: name format string
 861  *
 862  *      Passed a format string - eg "lt%d" it will try and find a suitable
 863  *      id. It scans list of devices to build up a free map, then chooses
 864  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 865  *      while allocating the name and adding the device in order to avoid
 866  *      duplicates.
 867  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 868  *      Returns the number of the unit assigned or a negative errno code.
 869  */
 870
 871 int dev_alloc_name(struct net_device *dev, const char *name)
 872 {
 873         char buf[IFNAMSIZ];
 874         struct net *net;
 875         int ret;
 876
 877         BUG_ON(!dev_net(dev));
 878         net = dev_net(dev);
 879         ret = __dev_alloc_name(net, name, buf);
 880         if (ret >= 0)
 881                 strlcpy(dev->name, buf, IFNAMSIZ);
 882         return ret;
 883 }
 884
 885
 886 /**
 887  *      dev_change_name - change name of a device
 888  *      @dev: device
 889  *      @newname: name (or format string) must be at least IFNAMSIZ
 890  *
 891  *      Change name of a device, can pass format strings "eth%d".
 892  *      for wildcarding.
 893  */
 894 int dev_change_name(struct net_device *dev, char *newname)
 895 {
 896         char oldname[IFNAMSIZ];
 897         int err = 0;
 898         int ret;
 899         struct net *net;
 900
 901         ASSERT_RTNL();
 902         BUG_ON(!dev_net(dev));
 903
 904         net = dev_net(dev);
 905         if (dev->flags & IFF_UP)
 906                 return -EBUSY;
 907
 908         if (!dev_valid_name(newname))
 909                 return -EINVAL;
 910
 911         if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 912                 return 0;
 913
 914         memcpy(oldname, dev->name, IFNAMSIZ);
 915
 916         if (strchr(newname, '%')) {
 917                 err = dev_alloc_name(dev, newname);
 918                 if (err < 0)
 919                         return err;
 920                 strcpy(newname, dev->name);
 921         }
 922         else if (__dev_get_by_name(net, newname))
 923                 return -EEXIST;
 924         else
 925                 strlcpy(dev->name, newname, IFNAMSIZ);
 926
 927 rollback:
 928         err = device_rename(&dev->dev, dev->name);
 929         if (err) {
 930                 memcpy(dev->name, oldname, IFNAMSIZ);
 931                 return err;
 932         }
 933
 934         write_lock_bh(&dev_base_lock);
 935         hlist_del(&dev->name_hlist);
 936         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 937         write_unlock_bh(&dev_base_lock);
 938
 939         ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 940         ret = notifier_to_errno(ret);
 941
 942         if (ret) {
 943                 if (err) {
 944                         printk(KERN_ERR
 945                                "%s: name change rollback failed: %d.\n",
 946                                dev->name, ret);
 947                 } else {
 948                         err = ret;
 949                         memcpy(dev->name, oldname, IFNAMSIZ);
 950                         goto rollback;
 951                 }
 952         }
 953
 954         return err;
 955 }
 956
 957 /**
 958  *      netdev_features_change - device changes features
 959  *      @dev: device to cause notification
 960  *
 961  *      Called to indicate a device has changed features.
 962  */
 963 void netdev_features_change(struct net_device *dev)
 964 {
 965         call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 966 }
 967 EXPORT_SYMBOL(netdev_features_change);
 968
 969 /**
 970  *      netdev_state_change - device changes state
 971  *      @dev: device to cause notification
 972  *
 973  *      Called to indicate a device has changed state. This function calls
 974  *      the notifier chains for netdev_chain and sends a NEWLINK message
 975  *      to the routing socket.
 976  */
 977 void netdev_state_change(struct net_device *dev)
 978 {
 979         if (dev->flags & IFF_UP) {
 980                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
 981                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 982         }
 983 }
 984
 985 void netdev_bonding_change(struct net_device *dev)
 986 {
 987         call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
 988 }
 989 EXPORT_SYMBOL(netdev_bonding_change);
 990
 991 /**
 992  *      dev_load        - load a network module
 993  *      @net: the applicable net namespace
 994  *      @name: name of interface
 995  *
 996  *      If a network interface is not present and the process has suitable
 997  *      privileges this function loads the module. If module loading is not
 998  *      available in this kernel then it becomes a nop.
 999  */
1000
1001 void dev_load(struct net *net, const char *name)
1002 {
1003         struct net_device *dev;
1004
1005         read_lock(&dev_base_lock);
1006         dev = __dev_get_by_name(net, name);
1007         read_unlock(&dev_base_lock);
1008
1009         if (!dev && capable(CAP_SYS_MODULE))
1010                 request_module("%s", name);
1011 }
1012
1013 /**
1014  *      dev_open        - prepare an interface for use.
1015  *      @dev:   device to open
1016  *
1017  *      Takes a device from down to up state. The device's private open
1018  *      function is invoked and then the multicast lists are loaded. Finally
1019  *      the device is moved into the up state and a %NETDEV_UP message is
1020  *      sent to the netdev notifier chain.
1021  *
1022  *      Calling this function on an active interface is a nop. On a failure
1023  *      a negative errno code is returned.
1024  */
1025 int dev_open(struct net_device *dev)
1026 {
1027         int ret = 0;
1028
1029         ASSERT_RTNL();
1030
1031         /*
1032          *      Is it already up?
1033          */
1034
1035         if (dev->flags & IFF_UP)
1036                 return 0;
1037
1038         /*
1039          *      Is it even present?
1040          */
1041         if (!netif_device_present(dev))
1042                 return -ENODEV;
1043
1044         /*
1045          *      Call device private open method
1046          */
1047         set_bit(__LINK_STATE_START, &dev->state);
1048
1049         if (dev->validate_addr)
1050                 ret = dev->validate_addr(dev);
1051
1052         if (!ret && dev->open)
1053                 ret = dev->open(dev);
1054
1055         /*
1056          *      If it went open OK then:
1057          */
1058
1059         if (ret)
1060                 clear_bit(__LINK_STATE_START, &dev->state);
1061         else {
1062                 /*
1063                  *      Set the flags.
1064                  */
1065                 dev->flags |= IFF_UP;
1066
1067                 /*
1068                  *      Initialize multicasting status
1069                  */
1070                 dev_set_rx_mode(dev);
1071
1072                 /*
1073                  *      Wakeup transmit queue engine
1074                  */
1075                 dev_activate(dev);
1076
1077                 /*
1078                  *      ... and announce new interface.
1079                  */
1080                 call_netdevice_notifiers(NETDEV_UP, dev);
1081         }
1082
1083         return ret;
1084 }
1085
1086 /**
1087  *      dev_close - shutdown an interface.
1088  *      @dev: device to shutdown
1089  *
1090  *      This function moves an active device into down state. A
1091  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1092  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1093  *      chain.
1094  */
1095 int dev_close(struct net_device *dev)
1096 {
1097         ASSERT_RTNL();
1098
1099         might_sleep();
1100
1101         if (!(dev->flags & IFF_UP))
1102                 return 0;
1103
1104         /*
1105          *      Tell people we are going down, so that they can
1106          *      prepare to death, when device is still operating.
1107          */
1108         call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1109
1110         clear_bit(__LINK_STATE_START, &dev->state);
1111
1112         /* Synchronize to scheduled poll. We cannot touch poll list,
1113          * it can be even on different cpu. So just clear netif_running().
1114          *
1115          * dev->stop() will invoke napi_disable() on all of it's
1116          * napi_struct instances on this device.
1117          */
1118         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1119
1120         dev_deactivate(dev);
1121
1122         /*
1123          *      Call the device specific close. This cannot fail.
1124          *      Only if device is UP
1125          *
1126          *      We allow it to be called even after a DETACH hot-plug
1127          *      event.
1128          */
1129         if (dev->stop)
1130                 dev->stop(dev);
1131
1132         /*
1133          *      Device is now down.
1134          */
1135
1136         dev->flags &= ~IFF_UP;
1137
1138         /*
1139          * Tell people we are down
1140          */
1141         call_netdevice_notifiers(NETDEV_DOWN, dev);
1142
1143         return 0;
1144 }
1145
1146
1147 /**
1148  *      dev_disable_lro - disable Large Receive Offload on a device
1149  *      @dev: device
1150  *
1151  *      Disable Large Receive Offload (LRO) on a net device.  Must be
1152  *      called under RTNL.  This is needed if received packets may be
1153  *      forwarded to another interface.
1154  */
1155 void dev_disable_lro(struct net_device *dev)
1156 {
1157         if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1158             dev->ethtool_ops->set_flags) {
1159                 u32 flags = dev->ethtool_ops->get_flags(dev);
1160                 if (flags & ETH_FLAG_LRO) {
1161                         flags &= ~ETH_FLAG_LRO;
1162                         dev->ethtool_ops->set_flags(dev, flags);
1163                 }
1164         }
1165         WARN_ON(dev->features & NETIF_F_LRO);
1166 }
1167 EXPORT_SYMBOL(dev_disable_lro);
1168
1169
1170 static int dev_boot_phase = 1;
1171
1172 /*
1173  *      Device change register/unregister. These are not inline or static
1174  *      as we export them to the world.
1175  */
1176
1177 /**
1178  *      register_netdevice_notifier - register a network notifier block
1179  *      @nb: notifier
1180  *
1181  *      Register a notifier to be called when network device events occur.
1182  *      The notifier passed is linked into the kernel structures and must
1183  *      not be reused until it has been unregistered. A negative errno code
1184  *      is returned on a failure.
1185  *
1186  *      When registered all registration and up events are replayed
1187  *      to the new notifier to allow device to have a race free
1188  *      view of the network device list.
1189  */
1190
1191 int register_netdevice_notifier(struct notifier_block *nb)
1192 {
1193         struct net_device *dev;
1194         struct net_device *last;
1195         struct net *net;
1196         int err;
1197
1198         rtnl_lock();
1199         err = raw_notifier_chain_register(&netdev_chain, nb);
1200         if (err)
1201                 goto unlock;
1202         if (dev_boot_phase)
1203                 goto unlock;
1204         for_each_net(net) {
1205                 for_each_netdev(net, dev) {
1206                         err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1207                         err = notifier_to_errno(err);
1208                         if (err)
1209                                 goto rollback;
1210
1211                         if (!(dev->flags & IFF_UP))
1212                                 continue;
1213
1214                         nb->notifier_call(nb, NETDEV_UP, dev);
1215                 }
1216         }
1217
1218 unlock:
1219         rtnl_unlock();
1220         return err;
1221
1222 rollback:
1223         last = dev;
1224         for_each_net(net) {
1225                 for_each_netdev(net, dev) {
1226                         if (dev == last)
1227                                 break;
1228
1229                         if (dev->flags & IFF_UP) {
1230                                 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1231                                 nb->notifier_call(nb, NETDEV_DOWN, dev);
1232                         }
1233                         nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1234                 }
1235         }
1236
1237         raw_notifier_chain_unregister(&netdev_chain, nb);
1238         goto unlock;
1239 }
1240
1241 /**
1242  *      unregister_netdevice_notifier - unregister a network notifier block
1243  *      @nb: notifier
1244  *
1245  *      Unregister a notifier previously registered by
1246  *      register_netdevice_notifier(). The notifier is unlinked into the
1247  *      kernel structures and may then be reused. A negative errno code
1248  *      is returned on a failure.
1249  */
1250
1251 int unregister_netdevice_notifier(struct notifier_block *nb)
1252 {
1253         int err;
1254
1255         rtnl_lock();
1256         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1257         rtnl_unlock();
1258         return err;
1259 }
1260
1261 /**
1262  *      call_netdevice_notifiers - call all network notifier blocks
1263  *      @val: value passed unmodified to notifier function
1264  *      @dev: net_device pointer passed unmodified to notifier function
1265  *
1266  *      Call all network notifier blocks.  Parameters and return value
1267  *      are as for raw_notifier_call_chain().
1268  */
1269
1270 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1271 {
1272         return raw_notifier_call_chain(&netdev_chain, val, dev);
1273 }
1274
1275 /* When > 0 there are consumers of rx skb time stamps */
1276 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1277
1278 void net_enable_timestamp(void)
1279 {
1280         atomic_inc(&netstamp_needed);
1281 }
1282
1283 void net_disable_timestamp(void)
1284 {
1285         atomic_dec(&netstamp_needed);
1286 }
1287
1288 static inline void net_timestamp(struct sk_buff *skb)
1289 {
1290         if (atomic_read(&netstamp_needed))
1291                 __net_timestamp(skb);
1292         else
1293                 skb->tstamp.tv64 = 0;
1294 }
1295
1296 /*
1297  *      Support routine. Sends outgoing frames to any network
1298  *      taps currently in use.
1299  */
1300
1301 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1302 {
1303         struct packet_type *ptype;
1304
1305         net_timestamp(skb);
1306
1307         rcu_read_lock();
1308         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1309                 /* Never send packets back to the socket
1310                  * they originated from - MvS (miquels@drinkel.ow.org)
1311                  */
1312                 if ((ptype->dev == dev || !ptype->dev) &&
1313                     (ptype->af_packet_priv == NULL ||
1314                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1315                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1316                         if (!skb2)
1317                                 break;
1318
1319                         /* skb->nh should be correctly
1320                            set by sender, so that the second statement is
1321                            just protection against buggy protocols.
1322                          */
1323                         skb_reset_mac_header(skb2);
1324
1325                         if (skb_network_header(skb2) < skb2->data ||
1326                             skb2->network_header > skb2->tail) {
1327                                 if (net_ratelimit())
1328                                         printk(KERN_CRIT "protocol %04x is "
1329                                                "buggy, dev %s\n",
1330                                                skb2->protocol, dev->name);
1331                                 skb_reset_network_header(skb2);
1332                         }
1333
1334                         skb2->transport_header = skb2->network_header;
1335                         skb2->pkt_type = PACKET_OUTGOING;
1336                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1337                 }
1338         }
1339         rcu_read_unlock();
1340 }
1341
1342
1343 static inline void __netif_reschedule(struct Qdisc *q)
1344 {
1345         struct softnet_data *sd;
1346         unsigned long flags;
1347
1348         local_irq_save(flags);
1349         sd = &__get_cpu_var(softnet_data);
1350         q->next_sched = sd->output_queue;
1351         sd->output_queue = q;
1352         raise_softirq_irqoff(NET_TX_SOFTIRQ);
1353         local_irq_restore(flags);
1354 }
1355
1356 void __netif_schedule(struct Qdisc *q)
1357 {
1358         if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1359                 __netif_reschedule(q);
1360 }
1361 EXPORT_SYMBOL(__netif_schedule);
1362
1363 void dev_kfree_skb_irq(struct sk_buff *skb)
1364 {
1365         if (atomic_dec_and_test(&skb->users)) {
1366                 struct softnet_data *sd;
1367                 unsigned long flags;
1368
1369                 local_irq_save(flags);
1370                 sd = &__get_cpu_var(softnet_data);
1371                 skb->next = sd->completion_queue;
1372                 sd->completion_queue = skb;
1373                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1374                 local_irq_restore(flags);
1375         }
1376 }
1377 EXPORT_SYMBOL(dev_kfree_skb_irq);
1378
1379 void dev_kfree_skb_any(struct sk_buff *skb)
1380 {
1381         if (in_irq() || irqs_disabled())
1382                 dev_kfree_skb_irq(skb);
1383         else
1384                 dev_kfree_skb(skb);
1385 }
1386 EXPORT_SYMBOL(dev_kfree_skb_any);
1387
1388
1389 /**
1390  * netif_device_detach - mark device as removed
1391  * @dev: network device
1392  *
1393  * Mark device as removed from system and therefore no longer available.
1394  */
1395 void netif_device_detach(struct net_device *dev)
1396 {
1397         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1398             netif_running(dev)) {
1399                 netif_stop_queue(dev);
1400         }
1401 }
1402 EXPORT_SYMBOL(netif_device_detach);
1403
1404 /**
1405  * netif_device_attach - mark device as attached
1406  * @dev: network device
1407  *
1408  * Mark device as attached from system and restart if needed.
1409  */
1410 void netif_device_attach(struct net_device *dev)
1411 {
1412         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1413             netif_running(dev)) {
1414                 netif_wake_queue(dev);
1415                 __netdev_watchdog_up(dev);
1416         }
1417 }
1418 EXPORT_SYMBOL(netif_device_attach);
1419
1420 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1421 {
1422         return ((features & NETIF_F_GEN_CSUM) ||
1423                 ((features & NETIF_F_IP_CSUM) &&
1424                  protocol == htons(ETH_P_IP)) ||
1425                 ((features & NETIF_F_IPV6_CSUM) &&
1426                  protocol == htons(ETH_P_IPV6)));
1427 }
1428
1429 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1430 {
1431         if (can_checksum_protocol(dev->features, skb->protocol))
1432                 return true;
1433
1434         if (skb->protocol == htons(ETH_P_8021Q)) {
1435                 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1436                 if (can_checksum_protocol(dev->features & dev->vlan_features,
1437                                           veh->h_vlan_encapsulated_proto))
1438                         return true;
1439         }
1440
1441         return false;
1442 }
1443
1444 /*
1445  * Invalidate hardware checksum when packet is to be mangled, and
1446  * complete checksum manually on outgoing path.
1447  */
1448 int skb_checksum_help(struct sk_buff *skb)
1449 {
1450         __wsum csum;
1451         int ret = 0, offset;
1452
1453         if (skb->ip_summed == CHECKSUM_COMPLETE)
1454                 goto out_set_summed;
1455
1456         if (unlikely(skb_shinfo(skb)->gso_size)) {
1457                 /* Let GSO fix up the checksum. */
1458                 goto out_set_summed;
1459         }
1460
1461         offset = skb->csum_start - skb_headroom(skb);
1462         BUG_ON(offset >= skb_headlen(skb));
1463         csum = skb_checksum(skb, offset, skb->len - offset, 0);
1464
1465         offset += skb->csum_offset;
1466         BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1467
1468         if (skb_cloned(skb) &&
1469             !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1470                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1471                 if (ret)
1472                         goto out;
1473         }
1474
1475         *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1476 out_set_summed:
1477         skb->ip_summed = CHECKSUM_NONE;
1478 out:
1479         return ret;
1480 }
1481
1482 /**
1483  *      skb_gso_segment - Perform segmentation on skb.
1484  *      @skb: buffer to segment
1485  *      @features: features for the output path (see dev->features)
1486  *
1487  *      This function segments the given skb and returns a list of segments.
1488  *
1489  *      It may return NULL if the skb requires no segmentation.  This is
1490  *      only possible when GSO is used for verifying header integrity.
1491  */
1492 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1493 {
1494         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1495         struct packet_type *ptype;
1496         __be16 type = skb->protocol;
1497         int err;
1498
1499         BUG_ON(skb_shinfo(skb)->frag_list);
1500
1501         skb_reset_mac_header(skb);
1502         skb->mac_len = skb->network_header - skb->mac_header;
1503         __skb_pull(skb, skb->mac_len);
1504
1505         if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1506                 if (skb_header_cloned(skb) &&
1507                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1508                         return ERR_PTR(err);
1509         }
1510
1511         rcu_read_lock();
1512         list_for_each_entry_rcu(ptype,
1513                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1514                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1515                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1516                                 err = ptype->gso_send_check(skb);
1517                                 segs = ERR_PTR(err);
1518                                 if (err || skb_gso_ok(skb, features))
1519                                         break;
1520                                 __skb_push(skb, (skb->data -
1521                                                  skb_network_header(skb)));
1522                         }
1523                         segs = ptype->gso_segment(skb, features);
1524                         break;
1525                 }
1526         }
1527         rcu_read_unlock();
1528
1529         __skb_push(skb, skb->data - skb_mac_header(skb));
1530
1531         return segs;
1532 }
1533
1534 EXPORT_SYMBOL(skb_gso_segment);
1535
1536 /* Take action when hardware reception checksum errors are detected. */
1537 #ifdef CONFIG_BUG
1538 void netdev_rx_csum_fault(struct net_device *dev)
1539 {
1540         if (net_ratelimit()) {
1541                 printk(KERN_ERR "%s: hw csum failure.\n",
1542                         dev ? dev->name : "<unknown>");
1543                 dump_stack();
1544         }
1545 }
1546 EXPORT_SYMBOL(netdev_rx_csum_fault);
1547 #endif
1548
1549 /* Actually, we should eliminate this check as soon as we know, that:
1550  * 1. IOMMU is present and allows to map all the memory.
1551  * 2. No high memory really exists on this machine.
1552  */
1553
1554 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1555 {
1556 #ifdef CONFIG_HIGHMEM
1557         int i;
1558
1559         if (dev->features & NETIF_F_HIGHDMA)
1560                 return 0;
1561
1562         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1563                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1564                         return 1;
1565
1566 #endif
1567         return 0;
1568 }
1569
1570 struct dev_gso_cb {
1571         void (*destructor)(struct sk_buff *skb);
1572 };
1573
1574 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1575
1576 static void dev_gso_skb_destructor(struct sk_buff *skb)
1577 {
1578         struct dev_gso_cb *cb;
1579
1580         do {
1581                 struct sk_buff *nskb = skb->next;
1582
1583                 skb->next = nskb->next;
1584                 nskb->next = NULL;
1585                 kfree_skb(nskb);
1586         } while (skb->next);
1587
1588         cb = DEV_GSO_CB(skb);
1589         if (cb->destructor)
1590                 cb->destructor(skb);
1591 }
1592
1593 /**
1594  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1595  *      @skb: buffer to segment
1596  *
1597  *      This function segments the given skb and stores the list of segments
1598  *      in skb->next.
1599  */
1600 static int dev_gso_segment(struct sk_buff *skb)
1601 {
1602         struct net_device *dev = skb->dev;
1603         struct sk_buff *segs;
1604         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1605                                          NETIF_F_SG : 0);
1606
1607         segs = skb_gso_segment(skb, features);
1608
1609         /* Verifying header integrity only. */
1610         if (!segs)
1611                 return 0;
1612
1613         if (IS_ERR(segs))
1614                 return PTR_ERR(segs);
1615
1616         skb->next = segs;
1617         DEV_GSO_CB(skb)->destructor = skb->destructor;
1618         skb->destructor = dev_gso_skb_destructor;
1619
1620         return 0;
1621 }
1622
1623 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1624                         struct netdev_queue *txq)
1625 {
1626         if (likely(!skb->next)) {
1627                 if (!list_empty(&ptype_all))
1628                         dev_queue_xmit_nit(skb, dev);
1629
1630                 if (netif_needs_gso(dev, skb)) {
1631                         if (unlikely(dev_gso_segment(skb)))
1632                                 goto out_kfree_skb;
1633                         if (skb->next)
1634                                 goto gso;
1635                 }
1636
1637                 return dev->hard_start_xmit(skb, dev);
1638         }
1639
1640 gso:
1641         do {
1642                 struct sk_buff *nskb = skb->next;
1643                 int rc;
1644
1645                 skb->next = nskb->next;
1646                 nskb->next = NULL;
1647                 rc = dev->hard_start_xmit(nskb, dev);
1648                 if (unlikely(rc)) {
1649                         nskb->next = skb->next;
1650                         skb->next = nskb;
1651                         return rc;
1652                 }
1653                 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1654                         return NETDEV_TX_BUSY;
1655         } while (skb->next);
1656
1657         skb->destructor = DEV_GSO_CB(skb)->destructor;
1658
1659 out_kfree_skb:
1660         kfree_skb(skb);
1661         return 0;
1662 }
1663
1664 static u32 simple_tx_hashrnd;
1665 static int simple_tx_hashrnd_initialized = 0;
1666
1667 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1668 {
1669         u32 addr1, addr2, ports;
1670         u32 hash, ihl;
1671         u8 ip_proto = 0;
1672
1673         if (unlikely(!simple_tx_hashrnd_initialized)) {
1674                 get_random_bytes(&simple_tx_hashrnd, 4);
1675                 simple_tx_hashrnd_initialized = 1;
1676         }
1677
1678         switch (skb->protocol) {
1679         case __constant_htons(ETH_P_IP):
1680                 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1681                         ip_proto = ip_hdr(skb)->protocol;
1682                 addr1 = ip_hdr(skb)->saddr;
1683                 addr2 = ip_hdr(skb)->daddr;
1684                 ihl = ip_hdr(skb)->ihl;
1685                 break;
1686         case __constant_htons(ETH_P_IPV6):
1687                 ip_proto = ipv6_hdr(skb)->nexthdr;
1688                 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1689                 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1690                 ihl = (40 >> 2);
1691                 break;
1692         default:
1693                 return 0;
1694         }
1695
1696
1697         switch (ip_proto) {
1698         case IPPROTO_TCP:
1699         case IPPROTO_UDP:
1700         case IPPROTO_DCCP:
1701         case IPPROTO_ESP:
1702         case IPPROTO_AH:
1703         case IPPROTO_SCTP:
1704         case IPPROTO_UDPLITE:
1705                 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1706                 break;
1707
1708         default:
1709                 ports = 0;
1710                 break;
1711         }
1712
1713         hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1714
1715         return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1716 }
1717
1718 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1719                                         struct sk_buff *skb)
1720 {
1721         u16 queue_index = 0;
1722
1723         if (dev->select_queue)
1724                 queue_index = dev->select_queue(dev, skb);
1725         else if (dev->real_num_tx_queues > 1)
1726                 queue_index = simple_tx_hash(dev, skb);
1727
1728         skb_set_queue_mapping(skb, queue_index);
1729         return netdev_get_tx_queue(dev, queue_index);
1730 }
1731
1732 /**
1733  *      dev_queue_xmit - transmit a buffer
1734  *      @skb: buffer to transmit
1735  *
1736  *      Queue a buffer for transmission to a network device. The caller must
1737  *      have set the device and priority and built the buffer before calling
1738  *      this function. The function can be called from an interrupt.
1739  *
1740  *      A negative errno code is returned on a failure. A success does not
1741  *      guarantee the frame will be transmitted as it may be dropped due
1742  *      to congestion or traffic shaping.
1743  *
1744  * -----------------------------------------------------------------------------------
1745  *      I notice this method can also return errors from the queue disciplines,
1746  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1747  *      be positive.
1748  *
1749  *      Regardless of the return value, the skb is consumed, so it is currently
1750  *      difficult to retry a send to this method.  (You can bump the ref count
1751  *      before sending to hold a reference for retry if you are careful.)
1752  *
1753  *      When calling this method, interrupts MUST be enabled.  This is because
1754  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1755  *          --BLG
1756  */
1757 int dev_queue_xmit(struct sk_buff *skb)
1758 {
1759         struct net_device *dev = skb->dev;
1760         struct netdev_queue *txq;
1761         struct Qdisc *q;
1762         int rc = -ENOMEM;
1763
1764         /* GSO will handle the following emulations directly. */
1765         if (netif_needs_gso(dev, skb))
1766                 goto gso;
1767
1768         if (skb_shinfo(skb)->frag_list &&
1769             !(dev->features & NETIF_F_FRAGLIST) &&
1770             __skb_linearize(skb))
1771                 goto out_kfree_skb;
1772
1773         /* Fragmented skb is linearized if device does not support SG,
1774          * or if at least one of fragments is in highmem and device
1775          * does not support DMA from it.
1776          */
1777         if (skb_shinfo(skb)->nr_frags &&
1778             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1779             __skb_linearize(skb))
1780                 goto out_kfree_skb;
1781
1782         /* If packet is not checksummed and device does not support
1783          * checksumming for this protocol, complete checksumming here.
1784          */
1785         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1786                 skb_set_transport_header(skb, skb->csum_start -
1787                                               skb_headroom(skb));
1788                 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1789                         goto out_kfree_skb;
1790         }
1791
1792 gso:
1793         /* Disable soft irqs for various locks below. Also
1794          * stops preemption for RCU.
1795          */
1796         rcu_read_lock_bh();
1797
1798         txq = dev_pick_tx(dev, skb);
1799         q = rcu_dereference(txq->qdisc);
1800
1801 #ifdef CONFIG_NET_CLS_ACT
1802         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1803 #endif
1804         if (q->enqueue) {
1805                 spinlock_t *root_lock = qdisc_lock(q);
1806
1807                 spin_lock(root_lock);
1808
1809                 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1810                         kfree_skb(skb);
1811                         rc = NET_XMIT_DROP;
1812                 } else {
1813                         rc = qdisc_enqueue_root(skb, q);
1814                         qdisc_run(q);
1815                 }
1816                 spin_unlock(root_lock);
1817
1818                 goto out;
1819         }
1820
1821         /* The device has no queue. Common case for software devices:
1822            loopback, all the sorts of tunnels...
1823
1824            Really, it is unlikely that netif_tx_lock protection is necessary
1825            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1826            counters.)
1827            However, it is possible, that they rely on protection
1828            made by us here.
1829
1830            Check this and shot the lock. It is not prone from deadlocks.
1831            Either shot noqueue qdisc, it is even simpler 8)
1832          */
1833         if (dev->flags & IFF_UP) {
1834                 int cpu = smp_processor_id(); /* ok because BHs are off */
1835
1836                 if (txq->xmit_lock_owner != cpu) {
1837
1838                         HARD_TX_LOCK(dev, txq, cpu);
1839
1840                         if (!netif_tx_queue_stopped(txq)) {
1841                                 rc = 0;
1842                                 if (!dev_hard_start_xmit(skb, dev, txq)) {
1843                                         HARD_TX_UNLOCK(dev, txq);
1844                                         goto out;
1845                                 }
1846                         }
1847                         HARD_TX_UNLOCK(dev, txq);
1848                         if (net_ratelimit())
1849                                 printk(KERN_CRIT "Virtual device %s asks to "
1850                                        "queue packet!\n", dev->name);
1851                 } else {
1852                         /* Recursion is detected! It is possible,
1853                          * unfortunately */
1854                         if (net_ratelimit())
1855                                 printk(KERN_CRIT "Dead loop on virtual device "
1856                                        "%s, fix it urgently!\n", dev->name);
1857                 }
1858         }
1859
1860         rc = -ENETDOWN;
1861         rcu_read_unlock_bh();
1862
1863 out_kfree_skb:
1864         kfree_skb(skb);
1865         return rc;
1866 out:
1867         rcu_read_unlock_bh();
1868         return rc;
1869 }
1870
1871
1872 /*=======================================================================
1873                         Receiver routines
1874   =======================================================================*/
1875
1876 int netdev_max_backlog __read_mostly = 1000;
1877 int netdev_budget __read_mostly = 300;
1878 int weight_p __read_mostly = 64;            /* old backlog weight */
1879
1880 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1881
1882
1883 /**
1884  *      netif_rx        -       post buffer to the network code
1885  *      @skb: buffer to post
1886  *
1887  *      This function receives a packet from a device driver and queues it for
1888  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1889  *      may be dropped during processing for congestion control or by the
1890  *      protocol layers.
1891  *
1892  *      return values:
1893  *      NET_RX_SUCCESS  (no congestion)
1894  *      NET_RX_DROP     (packet was dropped)
1895  *
1896  */
1897
1898 int netif_rx(struct sk_buff *skb)
1899 {
1900         struct softnet_data *queue;
1901         unsigned long flags;
1902
1903         /* if netpoll wants it, pretend we never saw it */
1904         if (netpoll_rx(skb))
1905                 return NET_RX_DROP;
1906
1907         if (!skb->tstamp.tv64)
1908                 net_timestamp(skb);
1909
1910         /*
1911          * The code is rearranged so that the path is the most
1912          * short when CPU is congested, but is still operating.
1913          */
1914         local_irq_save(flags);
1915         queue = &__get_cpu_var(softnet_data);
1916
1917         __get_cpu_var(netdev_rx_stat).total++;
1918         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1919                 if (queue->input_pkt_queue.qlen) {
1920 enqueue:
1921                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1922                         local_irq_restore(flags);
1923                         return NET_RX_SUCCESS;
1924                 }
1925
1926                 napi_schedule(&queue->backlog);
1927                 goto enqueue;
1928         }
1929
1930         __get_cpu_var(netdev_rx_stat).dropped++;
1931         local_irq_restore(flags);
1932
1933         kfree_skb(skb);
1934         return NET_RX_DROP;
1935 }
1936
1937 int netif_rx_ni(struct sk_buff *skb)
1938 {
1939         int err;
1940
1941         preempt_disable();
1942         err = netif_rx(skb);
1943         if (local_softirq_pending())
1944                 do_softirq();
1945         preempt_enable();
1946
1947         return err;
1948 }
1949
1950 EXPORT_SYMBOL(netif_rx_ni);
1951
1952 static void net_tx_action(struct softirq_action *h)
1953 {
1954         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1955
1956         if (sd->completion_queue) {
1957                 struct sk_buff *clist;
1958
1959                 local_irq_disable();
1960                 clist = sd->completion_queue;
1961                 sd->completion_queue = NULL;
1962                 local_irq_enable();
1963
1964                 while (clist) {
1965                         struct sk_buff *skb = clist;
1966                         clist = clist->next;
1967
1968                         WARN_ON(atomic_read(&skb->users));
1969                         __kfree_skb(skb);
1970                 }
1971         }
1972
1973         if (sd->output_queue) {
1974                 struct Qdisc *head;
1975
1976                 local_irq_disable();
1977                 head = sd->output_queue;
1978                 sd->output_queue = NULL;
1979                 local_irq_enable();
1980
1981                 while (head) {
1982                         struct Qdisc *q = head;
1983                         spinlock_t *root_lock;
1984
1985                         head = head->next_sched;
1986
1987                         root_lock = qdisc_lock(q);
1988                         if (spin_trylock(root_lock)) {
1989                                 smp_mb__before_clear_bit();
1990                                 clear_bit(__QDISC_STATE_SCHED,
1991                                           &q->state);
1992                                 qdisc_run(q);
1993                                 spin_unlock(root_lock);
1994                         } else {
1995                                 if (!test_bit(__QDISC_STATE_DEACTIVATED,
1996                                               &q->state)) {
1997                                         __netif_reschedule(q);
1998                                 } else {
1999                                         smp_mb__before_clear_bit();
2000                                         clear_bit(__QDISC_STATE_SCHED,
2001                                                   &q->state);
2002                                 }
2003                         }
2004                 }
2005         }
2006 }
2007
2008 static inline int deliver_skb(struct sk_buff *skb,
2009                               struct packet_type *pt_prev,
2010                               struct net_device *orig_dev)
2011 {
2012         atomic_inc(&skb->users);
2013         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2014 }
2015
2016 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2017 /* These hooks defined here for ATM */
2018 struct net_bridge;
2019 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2020                                                 unsigned char *addr);
2021 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2022
2023 /*
2024  * If bridge module is loaded call bridging hook.
2025  *  returns NULL if packet was consumed.
2026  */
2027 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2028                                         struct sk_buff *skb) __read_mostly;
2029 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2030                                             struct packet_type **pt_prev, int *ret,
2031                                             struct net_device *orig_dev)
2032 {
2033         struct net_bridge_port *port;
2034
2035         if (skb->pkt_type == PACKET_LOOPBACK ||
2036             (port = rcu_dereference(skb->dev->br_port)) == NULL)
2037                 return skb;
2038
2039         if (*pt_prev) {
2040                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2041                 *pt_prev = NULL;
2042         }
2043
2044         return br_handle_frame_hook(port, skb);
2045 }
2046 #else
2047 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
2048 #endif
2049
2050 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2051 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2052 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2053
2054 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2055                                              struct packet_type **pt_prev,
2056                                              int *ret,
2057                                              struct net_device *orig_dev)
2058 {
2059         if (skb->dev->macvlan_port == NULL)
2060                 return skb;
2061
2062         if (*pt_prev) {
2063                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2064                 *pt_prev = NULL;
2065         }
2066         return macvlan_handle_frame_hook(skb);
2067 }
2068 #else
2069 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
2070 #endif
2071
2072 #ifdef CONFIG_NET_CLS_ACT
2073 /* TODO: Maybe we should just force sch_ingress to be compiled in
2074  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2075  * a compare and 2 stores extra right now if we dont have it on
2076  * but have CONFIG_NET_CLS_ACT
2077  * NOTE: This doesnt stop any functionality; if you dont have
2078  * the ingress scheduler, you just cant add policies on ingress.
2079  *
2080  */
2081 static int ing_filter(struct sk_buff *skb)
2082 {
2083         struct net_device *dev = skb->dev;
2084         u32 ttl = G_TC_RTTL(skb->tc_verd);
2085         struct netdev_queue *rxq;
2086         int result = TC_ACT_OK;
2087         struct Qdisc *q;
2088
2089         if (MAX_RED_LOOP < ttl++) {
2090                 printk(KERN_WARNING
2091                        "Redir loop detected Dropping packet (%d->%d)\n",
2092                        skb->iif, dev->ifindex);
2093                 return TC_ACT_SHOT;
2094         }
2095
2096         skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2097         skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2098
2099         rxq = &dev->rx_queue;
2100
2101         q = rxq->qdisc;
2102         if (q != &noop_qdisc) {
2103                 spin_lock(qdisc_lock(q));
2104                 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2105                         result = qdisc_enqueue_root(skb, q);
2106                 spin_unlock(qdisc_lock(q));
2107         }
2108
2109         return result;
2110 }
2111
2112 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2113                                          struct packet_type **pt_prev,
2114                                          int *ret, struct net_device *orig_dev)
2115 {
2116         if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2117                 goto out;
2118
2119         if (*pt_prev) {
2120                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2121                 *pt_prev = NULL;
2122         } else {
2123                 /* Huh? Why does turning on AF_PACKET affect this? */
2124                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2125         }
2126
2127         switch (ing_filter(skb)) {
2128         case TC_ACT_SHOT:
2129         case TC_ACT_STOLEN:
2130                 kfree_skb(skb);
2131                 return NULL;
2132         }
2133
2134 out:
2135         skb->tc_verd = 0;
2136         return skb;
2137 }
2138 #endif
2139
2140 /*
2141  *      netif_nit_deliver - deliver received packets to network taps
2142  *      @skb: buffer
2143  *
2144  *      This function is used to deliver incoming packets to network
2145  *      taps. It should be used when the normal netif_receive_skb path
2146  *      is bypassed, for example because of VLAN acceleration.
2147  */
2148 void netif_nit_deliver(struct sk_buff *skb)
2149 {
2150         struct packet_type *ptype;
2151
2152         if (list_empty(&ptype_all))
2153                 return;
2154
2155         skb_reset_network_header(skb);
2156         skb_reset_transport_header(skb);
2157         skb->mac_len = skb->network_header - skb->mac_header;
2158
2159         rcu_read_lock();
2160         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2161                 if (!ptype->dev || ptype->dev == skb->dev)
2162                         deliver_skb(skb, ptype, skb->dev);
2163         }
2164         rcu_read_unlock();
2165 }
2166
2167 /**
2168  *      netif_receive_skb - process receive buffer from network
2169  *      @skb: buffer to process
2170  *
2171  *      netif_receive_skb() is the main receive data processing function.
2172  *      It always succeeds. The buffer may be dropped during processing
2173  *      for congestion control or by the protocol layers.
2174  *
2175  *      This function may only be called from softirq context and interrupts
2176  *      should be enabled.
2177  *
2178  *      Return values (usually ignored):
2179  *      NET_RX_SUCCESS: no congestion
2180  *      NET_RX_DROP: packet was dropped
2181  */
2182 int netif_receive_skb(struct sk_buff *skb)
2183 {
2184         struct packet_type *ptype, *pt_prev;
2185         struct net_device *orig_dev;
2186         struct net_device *null_or_orig;
2187         int ret = NET_RX_DROP;
2188         __be16 type;
2189
2190         /* if we've gotten here through NAPI, check netpoll */
2191         if (netpoll_receive_skb(skb))
2192                 return NET_RX_DROP;
2193
2194         if (!skb->tstamp.tv64)
2195                 net_timestamp(skb);
2196
2197         if (!skb->iif)
2198                 skb->iif = skb->dev->ifindex;
2199
2200         null_or_orig = NULL;
2201         orig_dev = skb->dev;
2202         if (orig_dev->master) {
2203                 if (skb_bond_should_drop(skb))
2204                         null_or_orig = orig_dev; /* deliver only exact match */
2205                 else
2206                         skb->dev = orig_dev->master;
2207         }
2208
2209         __get_cpu_var(netdev_rx_stat).total++;
2210
2211         skb_reset_network_header(skb);
2212         skb_reset_transport_header(skb);
2213         skb->mac_len = skb->network_header - skb->mac_header;
2214
2215         pt_prev = NULL;
2216
2217         rcu_read_lock();
2218
2219         /* Don't receive packets in an exiting network namespace */
2220         if (!net_alive(dev_net(skb->dev)))
2221                 goto out;
2222
2223 #ifdef CONFIG_NET_CLS_ACT
2224         if (skb->tc_verd & TC_NCLS) {
2225                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2226                 goto ncls;
2227         }
2228 #endif
2229
2230         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2231                 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2232                     ptype->dev == orig_dev) {
2233                         if (pt_prev)
2234                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2235                         pt_prev = ptype;
2236                 }
2237         }
2238
2239 #ifdef CONFIG_NET_CLS_ACT
2240         skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2241         if (!skb)
2242                 goto out;
2243 ncls:
2244 #endif
2245
2246         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2247         if (!skb)
2248                 goto out;
2249         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2250         if (!skb)
2251                 goto out;
2252
2253         type = skb->protocol;
2254         list_for_each_entry_rcu(ptype,
2255                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2256                 if (ptype->type == type &&
2257                     (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2258                      ptype->dev == orig_dev)) {
2259                         if (pt_prev)
2260                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2261                         pt_prev = ptype;
2262                 }
2263         }
2264
2265         if (pt_prev) {
2266                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2267         } else {
2268                 kfree_skb(skb);
2269                 /* Jamal, now you will not able to escape explaining
2270                  * me how you were going to use this. :-)
2271                  */
2272                 ret = NET_RX_DROP;
2273         }
2274
2275 out:
2276         rcu_read_unlock();
2277         return ret;
2278 }
2279
2280 /* Network device is going away, flush any packets still pending  */
2281 static void flush_backlog(void *arg)
2282 {
2283         struct net_device *dev = arg;
2284         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2285         struct sk_buff *skb, *tmp;
2286
2287         skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2288                 if (skb->dev == dev) {
2289                         __skb_unlink(skb, &queue->input_pkt_queue);
2290                         kfree_skb(skb);
2291                 }
2292 }
2293
2294 static int process_backlog(struct napi_struct *napi, int quota)
2295 {
2296         int work = 0;
2297         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2298         unsigned long start_time = jiffies;
2299
2300         napi->weight = weight_p;
2301         do {
2302                 struct sk_buff *skb;
2303
2304                 local_irq_disable();
2305                 skb = __skb_dequeue(&queue->input_pkt_queue);
2306                 if (!skb) {
2307                         __napi_complete(napi);
2308                         local_irq_enable();
2309                         break;
2310                 }
2311                 local_irq_enable();
2312
2313                 netif_receive_skb(skb);
2314         } while (++work < quota && jiffies == start_time);
2315
2316         return work;
2317 }
2318
2319 /**
2320  * __napi_schedule - schedule for receive
2321  * @n: entry to schedule
2322  *
2323  * The entry's receive function will be scheduled to run
2324  */
2325 void __napi_schedule(struct napi_struct *n)
2326 {
2327         unsigned long flags;
2328
2329         local_irq_save(flags);
2330         list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2331         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2332         local_irq_restore(flags);
2333 }
2334 EXPORT_SYMBOL(__napi_schedule);
2335
2336
2337 static void net_rx_action(struct softirq_action *h)
2338 {
2339         struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2340         unsigned long start_time = jiffies;
2341         int budget = netdev_budget;
2342         void *have;
2343
2344         local_irq_disable();
2345
2346         while (!list_empty(list)) {
2347                 struct napi_struct *n;
2348                 int work, weight;
2349
2350                 /* If softirq window is exhuasted then punt.
2351                  *
2352                  * Note that this is a slight policy change from the
2353                  * previous NAPI code, which would allow up to 2
2354                  * jiffies to pass before breaking out.  The test
2355                  * used to be "jiffies - start_time > 1".
2356                  */
2357                 if (unlikely(budget <= 0 || jiffies != start_time))
2358                         goto softnet_break;
2359
2360                 local_irq_enable();
2361
2362                 /* Even though interrupts have been re-enabled, this
2363                  * access is safe because interrupts can only add new
2364                  * entries to the tail of this list, and only ->poll()
2365                  * calls can remove this head entry from the list.
2366                  */
2367                 n = list_entry(list->next, struct napi_struct, poll_list);
2368
2369                 have = netpoll_poll_lock(n);
2370
2371                 weight = n->weight;
2372
2373                 /* This NAPI_STATE_SCHED test is for avoiding a race
2374                  * with netpoll's poll_napi().  Only the entity which
2375                  * obtains the lock and sees NAPI_STATE_SCHED set will
2376                  * actually make the ->poll() call.  Therefore we avoid
2377                  * accidently calling ->poll() when NAPI is not scheduled.
2378                  */
2379                 work = 0;
2380                 if (test_bit(NAPI_STATE_SCHED, &n->state))
2381                         work = n->poll(n, weight);
2382
2383                 WARN_ON_ONCE(work > weight);
2384
2385                 budget -= work;
2386
2387                 local_irq_disable();
2388
2389                 /* Drivers must not modify the NAPI state if they
2390                  * consume the entire weight.  In such cases this code
2391                  * still "owns" the NAPI instance and therefore can
2392                  * move the instance around on the list at-will.
2393                  */
2394                 if (unlikely(work == weight)) {
2395                         if (unlikely(napi_disable_pending(n)))
2396                                 __napi_complete(n);
2397                         else
2398                                 list_move_tail(&n->poll_list, list);
2399                 }
2400
2401                 netpoll_poll_unlock(have);
2402         }
2403 out:
2404         local_irq_enable();
2405
2406 #ifdef CONFIG_NET_DMA
2407         /*
2408          * There may not be any more sk_buffs coming right now, so push
2409          * any pending DMA copies to hardware
2410          */
2411         if (!cpus_empty(net_dma.channel_mask)) {
2412                 int chan_idx;
2413                 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2414                         struct dma_chan *chan = net_dma.channels[chan_idx];
2415                         if (chan)
2416                                 dma_async_memcpy_issue_pending(chan);
2417                 }
2418         }
2419 #endif
2420
2421         return;
2422
2423 softnet_break:
2424         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2425         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2426         goto out;
2427 }
2428
2429 static gifconf_func_t * gifconf_list [NPROTO];
2430
2431 /**
2432  *      register_gifconf        -       register a SIOCGIF handler
2433  *      @family: Address family
2434  *      @gifconf: Function handler
2435  *
2436  *      Register protocol dependent address dumping routines. The handler
2437  *      that is passed must not be freed or reused until it has been replaced
2438  *      by another handler.
2439  */
2440 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2441 {
2442         if (family >= NPROTO)
2443                 return -EINVAL;
2444         gifconf_list[family] = gifconf;
2445         return 0;
2446 }
2447
2448
2449 /*
2450  *      Map an interface index to its name (SIOCGIFNAME)
2451  */
2452
2453 /*
2454  *      We need this ioctl for efficient implementation of the
2455  *      if_indextoname() function required by the IPv6 API.  Without
2456  *      it, we would have to search all the interfaces to find a
2457  *      match.  --pb
2458  */
2459
2460 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2461 {
2462         struct net_device *dev;
2463         struct ifreq ifr;
2464
2465         /*
2466          *      Fetch the caller's info block.
2467          */
2468
2469         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2470                 return -EFAULT;
2471
2472         read_lock(&dev_base_lock);
2473         dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2474         if (!dev) {
2475                 read_unlock(&dev_base_lock);
2476                 return -ENODEV;
2477         }
2478
2479         strcpy(ifr.ifr_name, dev->name);
2480         read_unlock(&dev_base_lock);
2481
2482         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2483                 return -EFAULT;
2484         return 0;
2485 }
2486
2487 /*
2488  *      Perform a SIOCGIFCONF call. This structure will change
2489  *      size eventually, and there is nothing I can do about it.
2490  *      Thus we will need a 'compatibility mode'.
2491  */
2492
2493 static int dev_ifconf(struct net *net, char __user *arg)
2494 {
2495         struct ifconf ifc;
2496         struct net_device *dev;
2497         char __user *pos;
2498         int len;
2499         int total;
2500         int i;
2501
2502         /*
2503          *      Fetch the caller's info block.
2504          */
2505
2506         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2507                 return -EFAULT;
2508
2509         pos = ifc.ifc_buf;
2510         len = ifc.ifc_len;
2511
2512         /*
2513          *      Loop over the interfaces, and write an info block for each.
2514          */
2515
2516         total = 0;
2517         for_each_netdev(net, dev) {
2518                 for (i = 0; i < NPROTO; i++) {
2519                         if (gifconf_list[i]) {
2520                                 int done;
2521                                 if (!pos)
2522                                         done = gifconf_list[i](dev, NULL, 0);
2523                                 else
2524                                         done = gifconf_list[i](dev, pos + total,
2525                                                                len - total);
2526                                 if (done < 0)
2527                                         return -EFAULT;
2528                                 total += done;
2529                         }
2530                 }
2531         }
2532
2533         /*
2534          *      All done.  Write the updated control block back to the caller.
2535          */
2536         ifc.ifc_len = total;
2537
2538         /*
2539          *      Both BSD and Solaris return 0 here, so we do too.
2540          */
2541         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2542 }
2543
2544 #ifdef CONFIG_PROC_FS
2545 /*
2546  *      This is invoked by the /proc filesystem handler to display a device
2547  *      in detail.
2548  */
2549 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2550         __acquires(dev_base_lock)
2551 {
2552         struct net *net = seq_file_net(seq);
2553         loff_t off;
2554         struct net_device *dev;
2555
2556         read_lock(&dev_base_lock);
2557         if (!*pos)
2558                 return SEQ_START_TOKEN;
2559
2560         off = 1;
2561         for_each_netdev(net, dev)
2562                 if (off++ == *pos)
2563                         return dev;
2564
2565         return NULL;
2566 }
2567
2568 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2569 {
2570         struct net *net = seq_file_net(seq);
2571         ++*pos;
2572         return v == SEQ_START_TOKEN ?
2573                 first_net_device(net) : next_net_device((struct net_device *)v);
2574 }
2575
2576 void dev_seq_stop(struct seq_file *seq, void *v)
2577         __releases(dev_base_lock)
2578 {
2579         read_unlock(&dev_base_lock);
2580 }
2581
2582 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2583 {
2584         struct net_device_stats *stats = dev->get_stats(dev);
2585
2586         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2587                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2588                    dev->name, stats->rx_bytes, stats->rx_packets,
2589                    stats->rx_errors,
2590                    stats->rx_dropped + stats->rx_missed_errors,
2591                    stats->rx_fifo_errors,
2592                    stats->rx_length_errors + stats->rx_over_errors +
2593                     stats->rx_crc_errors + stats->rx_frame_errors,
2594                    stats->rx_compressed, stats->multicast,
2595                    stats->tx_bytes, stats->tx_packets,
2596                    stats->tx_errors, stats->tx_dropped,
2597                    stats->tx_fifo_errors, stats->collisions,
2598                    stats->tx_carrier_errors +
2599                     stats->tx_aborted_errors +
2600                     stats->tx_window_errors +
2601                     stats->tx_heartbeat_errors,
2602                    stats->tx_compressed);
2603 }
2604
2605 /*
2606  *      Called from the PROCfs module. This now uses the new arbitrary sized
2607  *      /proc/net interface to create /proc/net/dev
2608  */
2609 static int dev_seq_show(struct seq_file *seq, void *v)
2610 {
2611         if (v == SEQ_START_TOKEN)
2612                 seq_puts(seq, "Inter-|   Receive                            "
2613                               "                    |  Transmit\n"
2614                               " face |bytes    packets errs drop fifo frame "
2615                               "compressed multicast|bytes    packets errs "
2616                               "drop fifo colls carrier compressed\n");
2617         else
2618                 dev_seq_printf_stats(seq, v);
2619         return 0;
2620 }
2621
2622 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2623 {
2624         struct netif_rx_stats *rc = NULL;
2625
2626         while (*pos < nr_cpu_ids)
2627                 if (cpu_online(*pos)) {
2628                         rc = &per_cpu(netdev_rx_stat, *pos);
2629                         break;
2630                 } else
2631                         ++*pos;
2632         return rc;
2633 }
2634
2635 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2636 {
2637         return softnet_get_online(pos);
2638 }
2639
2640 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2641 {
2642         ++*pos;
2643         return softnet_get_online(pos);
2644 }
2645
2646 static void softnet_seq_stop(struct seq_file *seq, void *v)
2647 {
2648 }
2649
2650 static int softnet_seq_show(struct seq_file *seq, void *v)
2651 {
2652         struct netif_rx_stats *s = v;
2653
2654         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2655                    s->total, s->dropped, s->time_squeeze, 0,
2656                    0, 0, 0, 0, /* was fastroute */
2657                    s->cpu_collision );
2658         return 0;
2659 }
2660
2661 static const struct seq_operations dev_seq_ops = {
2662         .start = dev_seq_start,
2663         .next  = dev_seq_next,
2664         .stop  = dev_seq_stop,
2665         .show  = dev_seq_show,
2666 };
2667
2668 static int dev_seq_open(struct inode *inode, struct file *file)
2669 {
2670         return seq_open_net(inode, file, &dev_seq_ops,
2671                             sizeof(struct seq_net_private));
2672 }
2673
2674 static const struct file_operations dev_seq_fops = {
2675         .owner   = THIS_MODULE,
2676         .open    = dev_seq_open,
2677         .read    = seq_read,
2678         .llseek  = seq_lseek,
2679         .release = seq_release_net,
2680 };
2681
2682 static const struct seq_operations softnet_seq_ops = {
2683         .start = softnet_seq_start,
2684         .next  = softnet_seq_next,
2685         .stop  = softnet_seq_stop,
2686         .show  = softnet_seq_show,
2687 };
2688
2689 static int softnet_seq_open(struct inode *inode, struct file *file)
2690 {
2691         return seq_open(file, &softnet_seq_ops);
2692 }
2693
2694 static const struct file_operations softnet_seq_fops = {
2695         .owner   = THIS_MODULE,
2696         .open    = softnet_seq_open,
2697         .read    = seq_read,
2698         .llseek  = seq_lseek,
2699         .release = seq_release,
2700 };
2701
2702 static void *ptype_get_idx(loff_t pos)
2703 {
2704         struct packet_type *pt = NULL;
2705         loff_t i = 0;
2706         int t;
2707
2708         list_for_each_entry_rcu(pt, &ptype_all, list) {
2709                 if (i == pos)
2710                         return pt;
2711                 ++i;
2712         }
2713
2714         for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2715                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2716                         if (i == pos)
2717                                 return pt;
2718                         ++i;
2719                 }
2720         }
2721         return NULL;
2722 }
2723
2724 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2725         __acquires(RCU)
2726 {
2727         rcu_read_lock();
2728         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2729 }
2730
2731 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2732 {
2733         struct packet_type *pt;
2734         struct list_head *nxt;
2735         int hash;
2736
2737         ++*pos;
2738         if (v == SEQ_START_TOKEN)
2739                 return ptype_get_idx(0);
2740
2741         pt = v;
2742         nxt = pt->list.next;
2743         if (pt->type == htons(ETH_P_ALL)) {
2744                 if (nxt != &ptype_all)
2745                         goto found;
2746                 hash = 0;
2747                 nxt = ptype_base[0].next;
2748         } else
2749                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2750
2751         while (nxt == &ptype_base[hash]) {
2752                 if (++hash >= PTYPE_HASH_SIZE)
2753                         return NULL;
2754                 nxt = ptype_base[hash].next;
2755         }
2756 found:
2757         return list_entry(nxt, struct packet_type, list);
2758 }
2759
2760 static void ptype_seq_stop(struct seq_file *seq, void *v)
2761         __releases(RCU)
2762 {
2763         rcu_read_unlock();
2764 }
2765
2766 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2767 {
2768 #ifdef CONFIG_KALLSYMS
2769         unsigned long offset = 0, symsize;
2770         const char *symname;
2771         char *modname;
2772         char namebuf[128];
2773
2774         symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2775                                   &modname, namebuf);
2776
2777         if (symname) {
2778                 char *delim = ":";
2779
2780                 if (!modname)
2781                         modname = delim = "";
2782                 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2783                            symname, offset);
2784                 return;
2785         }
2786 #endif
2787
2788         seq_printf(seq, "[%p]", sym);
2789 }
2790
2791 static int ptype_seq_show(struct seq_file *seq, void *v)
2792 {
2793         struct packet_type *pt = v;
2794
2795         if (v == SEQ_START_TOKEN)
2796                 seq_puts(seq, "Type Device      Function\n");
2797         else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2798                 if (pt->type == htons(ETH_P_ALL))
2799                         seq_puts(seq, "ALL ");
2800                 else
2801                         seq_printf(seq, "%04x", ntohs(pt->type));
2802
2803                 seq_printf(seq, " %-8s ",
2804                            pt->dev ? pt->dev->name : "");
2805                 ptype_seq_decode(seq,  pt->func);
2806                 seq_putc(seq, '\n');
2807         }
2808
2809         return 0;
2810 }
2811
2812 static const struct seq_operations ptype_seq_ops = {
2813         .start = ptype_seq_start,
2814         .next  = ptype_seq_next,
2815         .stop  = ptype_seq_stop,
2816         .show  = ptype_seq_show,
2817 };
2818
2819 static int ptype_seq_open(struct inode *inode, struct file *file)
2820 {
2821         return seq_open_net(inode, file, &ptype_seq_ops,
2822                         sizeof(struct seq_net_private));
2823 }
2824
2825 static const struct file_operations ptype_seq_fops = {
2826         .owner   = THIS_MODULE,
2827         .open    = ptype_seq_open,
2828         .read    = seq_read,
2829         .llseek  = seq_lseek,
2830         .release = seq_release_net,
2831 };
2832
2833
2834 static int __net_init dev_proc_net_init(struct net *net)
2835 {
2836         int rc = -ENOMEM;
2837
2838         if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2839                 goto out;
2840         if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2841                 goto out_dev;
2842         if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2843                 goto out_softnet;
2844
2845         if (wext_proc_init(net))
2846                 goto out_ptype;
2847         rc = 0;
2848 out:
2849         return rc;
2850 out_ptype:
2851         proc_net_remove(net, "ptype");
2852 out_softnet:
2853         proc_net_remove(net, "softnet_stat");
2854 out_dev:
2855         proc_net_remove(net, "dev");
2856         goto out;
2857 }
2858
2859 static void __net_exit dev_proc_net_exit(struct net *net)
2860 {
2861         wext_proc_exit(net);
2862
2863         proc_net_remove(net, "ptype");
2864         proc_net_remove(net, "softnet_stat");
2865         proc_net_remove(net, "dev");
2866 }
2867
2868 static struct pernet_operations __net_initdata dev_proc_ops = {
2869         .init = dev_proc_net_init,
2870         .exit = dev_proc_net_exit,
2871 };
2872
2873 static int __init dev_proc_init(void)
2874 {
2875         return register_pernet_subsys(&dev_proc_ops);
2876 }
2877 #else
2878 #define dev_proc_init() 0
2879 #endif  /* CONFIG_PROC_FS */
2880
2881
2882 /**
2883  *      netdev_set_master       -       set up master/slave pair
2884  *      @slave: slave device
2885  *      @master: new master device
2886  *
2887  *      Changes the master device of the slave. Pass %NULL to break the
2888  *      bonding. The caller must hold the RTNL semaphore. On a failure
2889  *      a negative errno code is returned. On success the reference counts
2890  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2891  *      function returns zero.
2892  */
2893 int netdev_set_master(struct net_device *slave, struct net_device *master)
2894 {
2895         struct net_device *old = slave->master;
2896
2897         ASSERT_RTNL();
2898
2899         if (master) {
2900                 if (old)
2901                         return -EBUSY;
2902                 dev_hold(master);
2903         }
2904
2905         slave->master = master;
2906
2907         synchronize_net();
2908
2909         if (old)
2910                 dev_put(old);
2911
2912         if (master)
2913                 slave->flags |= IFF_SLAVE;
2914         else
2915                 slave->flags &= ~IFF_SLAVE;
2916
2917         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2918         return 0;
2919 }
2920
2921 static void dev_change_rx_flags(struct net_device *dev, int flags)
2922 {
2923         if (dev->flags & IFF_UP && dev->change_rx_flags)
2924                 dev->change_rx_flags(dev, flags);
2925 }
2926
2927 static int __dev_set_promiscuity(struct net_device *dev, int inc)
2928 {
2929         unsigned short old_flags = dev->flags;
2930
2931         ASSERT_RTNL();
2932
2933         dev->flags |= IFF_PROMISC;
2934         dev->promiscuity += inc;
2935         if (dev->promiscuity == 0) {
2936                 /*
2937                  * Avoid overflow.
2938                  * If inc causes overflow, untouch promisc and return error.
2939                  */
2940                 if (inc < 0)
2941                         dev->flags &= ~IFF_PROMISC;
2942                 else {
2943                         dev->promiscuity -= inc;
2944                         printk(KERN_WARNING "%s: promiscuity touches roof, "
2945                                 "set promiscuity failed, promiscuity feature "
2946                                 "of device might be broken.\n", dev->name);
2947                         return -EOVERFLOW;
2948                 }
2949         }
2950         if (dev->flags != old_flags) {
2951                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2952                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2953                                                                "left");
2954                 if (audit_enabled)
2955                         audit_log(current->audit_context, GFP_ATOMIC,
2956                                 AUDIT_ANOM_PROMISCUOUS,
2957                                 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2958                                 dev->name, (dev->flags & IFF_PROMISC),
2959                                 (old_flags & IFF_PROMISC),
2960                                 audit_get_loginuid(current),
2961                                 current->uid, current->gid,
2962                                 audit_get_sessionid(current));
2963
2964                 dev_change_rx_flags(dev, IFF_PROMISC);
2965         }
2966         return 0;
2967 }
2968
2969 /**
2970  *      dev_set_promiscuity     - update promiscuity count on a device
2971  *      @dev: device
2972  *      @inc: modifier
2973  *
2974  *      Add or remove promiscuity from a device. While the count in the device
2975  *      remains above zero the interface remains promiscuous. Once it hits zero
2976  *      the device reverts back to normal filtering operation. A negative inc
2977  *      value is used to drop promiscuity on the device.
2978  *      Return 0 if successful or a negative errno code on error.
2979  */
2980 int dev_set_promiscuity(struct net_device *dev, int inc)
2981 {
2982         unsigned short old_flags = dev->flags;
2983         int err;
2984
2985         err = __dev_set_promiscuity(dev, inc);
2986         if (err < 0)
2987                 return err;
2988         if (dev->flags != old_flags)
2989                 dev_set_rx_mode(dev);
2990         return err;
2991 }
2992
2993 /**
2994  *      dev_set_allmulti        - update allmulti count on a device
2995  *      @dev: device
2996  *      @inc: modifier
2997  *
2998  *      Add or remove reception of all multicast frames to a device. While the
2999  *      count in the device remains above zero the interface remains listening
3000  *      to all interfaces. Once it hits zero the device reverts back to normal
3001  *      filtering operation. A negative @inc value is used to drop the counter
3002  *      when releasing a resource needing all multicasts.
3003  *      Return 0 if successful or a negative errno code on error.
3004  */
3005
3006 int dev_set_allmulti(struct net_device *dev, int inc)
3007 {
3008         unsigned short old_flags = dev->flags;
3009
3010         ASSERT_RTNL();
3011
3012         dev->flags |= IFF_ALLMULTI;
3013         dev->allmulti += inc;
3014         if (dev->allmulti == 0) {
3015                 /*
3016                  * Avoid overflow.
3017                  * If inc causes overflow, untouch allmulti and return error.
3018                  */
3019                 if (inc < 0)
3020                         dev->flags &= ~IFF_ALLMULTI;
3021                 else {
3022                         dev->allmulti -= inc;
3023                         printk(KERN_WARNING "%s: allmulti touches roof, "
3024                                 "set allmulti failed, allmulti feature of "
3025                                 "device might be broken.\n", dev->name);
3026                         return -EOVERFLOW;
3027                 }
3028         }
3029         if (dev->flags ^ old_flags) {
3030                 dev_change_rx_flags(dev, IFF_ALLMULTI);
3031                 dev_set_rx_mode(dev);
3032         }
3033         return 0;
3034 }
3035
3036 /*
3037  *      Upload unicast and multicast address lists to device and
3038  *      configure RX filtering. When the device doesn't support unicast
3039  *      filtering it is put in promiscuous mode while unicast addresses
3040  *      are present.
3041  */
3042 void __dev_set_rx_mode(struct net_device *dev)
3043 {
3044         /* dev_open will call this function so the list will stay sane. */
3045         if (!(dev->flags&IFF_UP))
3046                 return;
3047
3048         if (!netif_device_present(dev))
3049                 return;
3050
3051         if (dev->set_rx_mode)
3052                 dev->set_rx_mode(dev);
3053         else {
3054                 /* Unicast addresses changes may only happen under the rtnl,
3055                  * therefore calling __dev_set_promiscuity here is safe.
3056                  */
3057                 if (dev->uc_count > 0 && !dev->uc_promisc) {
3058                         __dev_set_promiscuity(dev, 1);
3059                         dev->uc_promisc = 1;
3060                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3061                         __dev_set_promiscuity(dev, -1);
3062                         dev->uc_promisc = 0;
3063                 }
3064
3065                 if (dev->set_multicast_list)
3066                         dev->set_multicast_list(dev);
3067         }
3068 }
3069
3070 void dev_set_rx_mode(struct net_device *dev)
3071 {
3072         netif_addr_lock_bh(dev);
3073         __dev_set_rx_mode(dev);
3074         netif_addr_unlock_bh(dev);
3075 }
3076
3077 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3078                       void *addr, int alen, int glbl)
3079 {
3080         struct dev_addr_list *da;
3081
3082         for (; (da = *list) != NULL; list = &da->next) {
3083                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3084                     alen == da->da_addrlen) {
3085                         if (glbl) {
3086                                 int old_glbl = da->da_gusers;
3087                                 da->da_gusers = 0;
3088                                 if (old_glbl == 0)
3089                                         break;
3090                         }
3091                         if (--da->da_users)
3092                                 return 0;
3093
3094                         *list = da->next;
3095                         kfree(da);
3096                         (*count)--;
3097                         return 0;
3098                 }
3099         }
3100         return -ENOENT;
3101 }
3102
3103 int __dev_addr_add(struct dev_addr_list **list, int *count,
3104                    void *addr, int alen, int glbl)
3105 {
3106         struct dev_addr_list *da;
3107
3108         for (da = *list; da != NULL; da = da->next) {
3109                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3110                     da->da_addrlen == alen) {
3111                         if (glbl) {
3112                                 int old_glbl = da->da_gusers;
3113                                 da->da_gusers = 1;
3114                                 if (old_glbl)
3115                                         return 0;
3116                         }
3117                         da->da_users++;
3118                         return 0;
3119                 }
3120         }
3121
3122         da = kzalloc(sizeof(*da), GFP_ATOMIC);
3123         if (da == NULL)
3124                 return -ENOMEM;
3125         memcpy(da->da_addr, addr, alen);
3126         da->da_addrlen = alen;
3127         da->da_users = 1;
3128         da->da_gusers = glbl ? 1 : 0;
3129         da->next = *list;
3130         *list = da;
3131         (*count)++;
3132         return 0;
3133 }
3134
3135 /**
3136  *      dev_unicast_delete      - Release secondary unicast address.
3137  *      @dev: device
3138  *      @addr: address to delete
3139  *      @alen: length of @addr
3140  *
3141  *      Release reference to a secondary unicast address and remove it
3142  *      from the device if the reference count drops to zero.
3143  *
3144  *      The caller must hold the rtnl_mutex.
3145  */
3146 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3147 {
3148         int err;
3149
3150         ASSERT_RTNL();
3151
3152         netif_addr_lock_bh(dev);
3153         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3154         if (!err)
3155                 __dev_set_rx_mode(dev);
3156         netif_addr_unlock_bh(dev);
3157         return err;
3158 }
3159 EXPORT_SYMBOL(dev_unicast_delete);
3160
3161 /**
3162  *      dev_unicast_add         - add a secondary unicast address
3163  *      @dev: device
3164  *      @addr: address to add
3165  *      @alen: length of @addr
3166  *
3167  *      Add a secondary unicast address to the device or increase
3168  *      the reference count if it already exists.
3169  *
3170  *      The caller must hold the rtnl_mutex.
3171  */
3172 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3173 {
3174         int err;
3175
3176         ASSERT_RTNL();
3177
3178         netif_addr_lock_bh(dev);
3179         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3180         if (!err)
3181                 __dev_set_rx_mode(dev);
3182         netif_addr_unlock_bh(dev);
3183         return err;
3184 }
3185 EXPORT_SYMBOL(dev_unicast_add);
3186
3187 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3188                     struct dev_addr_list **from, int *from_count)
3189 {
3190         struct dev_addr_list *da, *next;
3191         int err = 0;
3192
3193         da = *from;
3194         while (da != NULL) {
3195                 next = da->next;
3196                 if (!da->da_synced) {
3197                         err = __dev_addr_add(to, to_count,
3198                                              da->da_addr, da->da_addrlen, 0);
3199                         if (err < 0)
3200                                 break;
3201                         da->da_synced = 1;
3202                         da->da_users++;
3203                 } else if (da->da_users == 1) {
3204                         __dev_addr_delete(to, to_count,
3205                                           da->da_addr, da->da_addrlen, 0);
3206                         __dev_addr_delete(from, from_count,
3207                                           da->da_addr, da->da_addrlen, 0);
3208                 }
3209                 da = next;
3210         }
3211         return err;
3212 }
3213
3214 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3215                        struct dev_addr_list **from, int *from_count)
3216 {
3217         struct dev_addr_list *da, *next;
3218
3219         da = *from;
3220         while (da != NULL) {
3221                 next = da->next;
3222                 if (da->da_synced) {
3223                         __dev_addr_delete(to, to_count,
3224                                           da->da_addr, da->da_addrlen, 0);
3225                         da->da_synced = 0;
3226                         __dev_addr_delete(from, from_count,
3227                                           da->da_addr, da->da_addrlen, 0);
3228                 }
3229                 da = next;
3230         }
3231 }
3232
3233 /**
3234  *      dev_unicast_sync - Synchronize device's unicast list to another device
3235  *      @to: destination device
3236  *      @from: source device
3237  *
3238  *      Add newly added addresses to the destination device and release
3239  *      addresses that have no users left. The source device must be
3240  *      locked by netif_tx_lock_bh.
3241  *
3242  *      This function is intended to be called from the dev->set_rx_mode
3243  *      function of layered software devices.
3244  */
3245 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3246 {
3247         int err = 0;
3248
3249         netif_addr_lock_bh(to);
3250         err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3251                               &from->uc_list, &from->uc_count);
3252         if (!err)
3253                 __dev_set_rx_mode(to);
3254         netif_addr_unlock_bh(to);
3255         return err;
3256 }
3257 EXPORT_SYMBOL(dev_unicast_sync);
3258
3259 /**
3260  *      dev_unicast_unsync - Remove synchronized addresses from the destination device
3261  *      @to: destination device
3262  *      @from: source device
3263  *
3264  *      Remove all addresses that were added to the destination device by
3265  *      dev_unicast_sync(). This function is intended to be called from the
3266  *      dev->stop function of layered software devices.
3267  */
3268 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3269 {
3270         netif_addr_lock_bh(from);
3271         netif_addr_lock(to);
3272
3273         __dev_addr_unsync(&to->uc_list, &to->uc_count,
3274                           &from->uc_list, &from->uc_count);
3275         __dev_set_rx_mode(to);
3276
3277         netif_addr_unlock(to);
3278         netif_addr_unlock_bh(from);
3279 }
3280 EXPORT_SYMBOL(dev_unicast_unsync);
3281
3282 static void __dev_addr_discard(struct dev_addr_list **list)
3283 {
3284         struct dev_addr_list *tmp;
3285
3286         while (*list != NULL) {
3287                 tmp = *list;
3288                 *list = tmp->next;
3289                 if (tmp->da_users > tmp->da_gusers)
3290                         printk("__dev_addr_discard: address leakage! "
3291                                "da_users=%d\n", tmp->da_users);
3292                 kfree(tmp);
3293         }
3294 }
3295
3296 static void dev_addr_discard(struct net_device *dev)
3297 {
3298         netif_addr_lock_bh(dev);
3299
3300         __dev_addr_discard(&dev->uc_list);
3301         dev->uc_count = 0;
3302
3303         __dev_addr_discard(&dev->mc_list);
3304         dev->mc_count = 0;
3305
3306         netif_addr_unlock_bh(dev);
3307 }
3308
3309 unsigned dev_get_flags(const struct net_device *dev)
3310 {
3311         unsigned flags;
3312
3313         flags = (dev->flags & ~(IFF_PROMISC |
3314                                 IFF_ALLMULTI |
3315                                 IFF_RUNNING |
3316                                 IFF_LOWER_UP |
3317                                 IFF_DORMANT)) |
3318                 (dev->gflags & (IFF_PROMISC |
3319                                 IFF_ALLMULTI));
3320
3321         if (netif_running(dev)) {
3322                 if (netif_oper_up(dev))
3323                         flags |= IFF_RUNNING;
3324                 if (netif_carrier_ok(dev))
3325                         flags |= IFF_LOWER_UP;
3326                 if (netif_dormant(dev))
3327                         flags |= IFF_DORMANT;
3328         }
3329
3330         return flags;
3331 }
3332
3333 int dev_change_flags(struct net_device *dev, unsigned flags)
3334 {
3335         int ret, changes;
3336         int old_flags = dev->flags;
3337
3338         ASSERT_RTNL();
3339
3340         /*
3341          *      Set the flags on our device.
3342          */
3343
3344         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3345                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3346                                IFF_AUTOMEDIA)) |
3347                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3348                                     IFF_ALLMULTI));
3349
3350         /*
3351          *      Load in the correct multicast list now the flags have changed.
3352          */
3353
3354         if ((old_flags ^ flags) & IFF_MULTICAST)
3355                 dev_change_rx_flags(dev, IFF_MULTICAST);
3356
3357         dev_set_rx_mode(dev);
3358
3359         /*
3360          *      Have we downed the interface. We handle IFF_UP ourselves
3361          *      according to user attempts to set it, rather than blindly
3362          *      setting it.
3363          */
3364
3365         ret = 0;
3366         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
3367                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3368
3369                 if (!ret)
3370                         dev_set_rx_mode(dev);
3371         }
3372
3373         if (dev->flags & IFF_UP &&
3374             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3375                                           IFF_VOLATILE)))
3376                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
3377
3378         if ((flags ^ dev->gflags) & IFF_PROMISC) {
3379                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3380                 dev->gflags ^= IFF_PROMISC;
3381                 dev_set_promiscuity(dev, inc);
3382         }
3383
3384         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3385            is important. Some (broken) drivers set IFF_PROMISC, when
3386            IFF_ALLMULTI is requested not asking us and not reporting.
3387          */
3388         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3389                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3390                 dev->gflags ^= IFF_ALLMULTI;
3391                 dev_set_allmulti(dev, inc);
3392         }
3393
3394         /* Exclude state transition flags, already notified */
3395         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3396         if (changes)
3397                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3398
3399         return ret;
3400 }
3401
3402 int dev_set_mtu(struct net_device *dev, int new_mtu)
3403 {
3404         int err;
3405
3406         if (new_mtu == dev->mtu)
3407                 return 0;
3408
3409         /*      MTU must be positive.    */
3410         if (new_mtu < 0)
3411                 return -EINVAL;
3412
3413         if (!netif_device_present(dev))
3414                 return -ENODEV;
3415
3416         err = 0;
3417         if (dev->change_mtu)
3418                 err = dev->change_mtu(dev, new_mtu);
3419         else
3420                 dev->mtu = new_mtu;
3421         if (!err && dev->flags & IFF_UP)
3422                 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3423         return err;
3424 }
3425
3426 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3427 {
3428         int err;
3429
3430         if (!dev->set_mac_address)
3431                 return -EOPNOTSUPP;
3432         if (sa->sa_family != dev->type)
3433                 return -EINVAL;
3434         if (!netif_device_present(dev))
3435                 return -ENODEV;
3436         err = dev->set_mac_address(dev, sa);
3437         if (!err)
3438                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3439         return err;
3440 }
3441
3442 /*
3443  *      Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3444  */
3445 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3446 {
3447         int err;
3448         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3449
3450         if (!dev)
3451                 return -ENODEV;
3452
3453         switch (cmd) {
3454                 case SIOCGIFFLAGS:      /* Get interface flags */
3455                         ifr->ifr_flags = dev_get_flags(dev);
3456                         return 0;
3457
3458                 case SIOCGIFMETRIC:     /* Get the metric on the interface
3459                                            (currently unused) */
3460                         ifr->ifr_metric = 0;
3461                         return 0;
3462
3463                 case SIOCGIFMTU:        /* Get the MTU of a device */
3464                         ifr->ifr_mtu = dev->mtu;
3465                         return 0;
3466
3467                 case SIOCGIFHWADDR:
3468                         if (!dev->addr_len)
3469                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3470                         else
3471                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3472                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3473                         ifr->ifr_hwaddr.sa_family = dev->type;
3474                         return 0;
3475
3476                 case SIOCGIFSLAVE:
3477                         err = -EINVAL;
3478                         break;
3479
3480                 case SIOCGIFMAP:
3481                         ifr->ifr_map.mem_start = dev->mem_start;
3482                         ifr->ifr_map.mem_end   = dev->mem_end;
3483                         ifr->ifr_map.base_addr = dev->base_addr;
3484                         ifr->ifr_map.irq       = dev->irq;
3485                         ifr->ifr_map.dma       = dev->dma;
3486                         ifr->ifr_map.port      = dev->if_port;
3487                         return 0;
3488
3489                 case SIOCGIFINDEX:
3490                         ifr->ifr_ifindex = dev->ifindex;
3491                         return 0;
3492
3493                 case SIOCGIFTXQLEN:
3494                         ifr->ifr_qlen = dev->tx_queue_len;
3495                         return 0;
3496
3497                 default:
3498                         /* dev_ioctl() should ensure this case
3499                          * is never reached
3500                          */
3501                         WARN_ON(1);
3502                         err = -EINVAL;
3503                         break;
3504
3505         }
3506         return err;
3507 }
3508
3509 /*
3510  *      Perform the SIOCxIFxxx calls, inside rtnl_lock()
3511  */
3512 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3513 {
3514         int err;
3515         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3516
3517         if (!dev)
3518                 return -ENODEV;
3519
3520         switch (cmd) {
3521                 case SIOCSIFFLAGS:      /* Set interface flags */
3522                         return dev_change_flags(dev, ifr->ifr_flags);
3523
3524                 case SIOCSIFMETRIC:     /* Set the metric on the interface
3525                                            (currently unused) */
3526                         return -EOPNOTSUPP;
3527
3528                 case SIOCSIFMTU:        /* Set the MTU of a device */
3529                         return dev_set_mtu(dev, ifr->ifr_mtu);
3530
3531                 case SIOCSIFHWADDR:
3532                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3533
3534                 case SIOCSIFHWBROADCAST:
3535                         if (ifr->ifr_hwaddr.sa_family != dev->type)
3536                                 return -EINVAL;
3537                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3538                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3539                         call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3540                         return 0;
3541
3542                 case SIOCSIFMAP:
3543                         if (dev->set_config) {
3544                                 if (!netif_device_present(dev))
3545                                         return -ENODEV;
3546                                 return dev->set_config(dev, &ifr->ifr_map);
3547                         }
3548                         return -EOPNOTSUPP;
3549
3550                 case SIOCADDMULTI:
3551                         if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3552                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3553                                 return -EINVAL;
3554                         if (!netif_device_present(dev))
3555                                 return -ENODEV;
3556                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3557                                           dev->addr_len, 1);
3558
3559                 case SIOCDELMULTI:
3560                         if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3561                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3562                                 return -EINVAL;
3563                         if (!netif_device_present(dev))
3564                                 return -ENODEV;
3565                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3566                                              dev->addr_len, 1);
3567
3568                 case SIOCSIFTXQLEN:
3569                         if (ifr->ifr_qlen < 0)
3570                                 return -EINVAL;
3571                         dev->tx_queue_len = ifr->ifr_qlen;
3572                         return 0;
3573
3574                 case SIOCSIFNAME:
3575                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3576                         return dev_change_name(dev, ifr->ifr_newname);
3577
3578                 /*
3579                  *      Unknown or private ioctl
3580                  */
3581
3582                 default:
3583                         if ((cmd >= SIOCDEVPRIVATE &&
3584                             cmd <= SIOCDEVPRIVATE + 15) ||
3585                             cmd == SIOCBONDENSLAVE ||
3586                             cmd == SIOCBONDRELEASE ||
3587                             cmd == SIOCBONDSETHWADDR ||
3588                             cmd == SIOCBONDSLAVEINFOQUERY ||
3589                             cmd == SIOCBONDINFOQUERY ||
3590                             cmd == SIOCBONDCHANGEACTIVE ||
3591                             cmd == SIOCGMIIPHY ||
3592                             cmd == SIOCGMIIREG ||
3593                             cmd == SIOCSMIIREG ||
3594                             cmd == SIOCBRADDIF ||
3595                             cmd == SIOCBRDELIF ||
3596                             cmd == SIOCWANDEV) {
3597                                 err = -EOPNOTSUPP;
3598                                 if (dev->do_ioctl) {
3599                                         if (netif_device_present(dev))
3600                                                 err = dev->do_ioctl(dev, ifr,
3601                                                                     cmd);
3602                                         else
3603                                                 err = -ENODEV;
3604                                 }
3605                         } else
3606                                 err = -EINVAL;
3607
3608         }
3609         return err;
3610 }
3611
3612 /*
3613  *      This function handles all "interface"-type I/O control requests. The actual
3614  *      'doing' part of this is dev_ifsioc above.
3615  */
3616
3617 /**
3618  *      dev_ioctl       -       network device ioctl
3619  *      @net: the applicable net namespace
3620  *      @cmd: command to issue
3621  *      @arg: pointer to a struct ifreq in user space
3622  *
3623  *      Issue ioctl functions to devices. This is normally called by the
3624  *      user space syscall interfaces but can sometimes be useful for
3625  *      other purposes. The return value is the return from the syscall if
3626  *      positive or a negative errno code on error.
3627  */
3628
3629 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3630 {
3631         struct ifreq ifr;
3632         int ret;
3633         char *colon;
3634
3635         /* One special case: SIOCGIFCONF takes ifconf argument
3636            and requires shared lock, because it sleeps writing
3637            to user space.
3638          */
3639
3640         if (cmd == SIOCGIFCONF) {
3641                 rtnl_lock();
3642                 ret = dev_ifconf(net, (char __user *) arg);
3643                 rtnl_unlock();
3644                 return ret;
3645         }
3646         if (cmd == SIOCGIFNAME)
3647                 return dev_ifname(net, (struct ifreq __user *)arg);
3648
3649         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3650                 return -EFAULT;
3651
3652         ifr.ifr_name[IFNAMSIZ-1] = 0;
3653
3654         colon = strchr(ifr.ifr_name, ':');
3655         if (colon)
3656                 *colon = 0;
3657
3658         /*
3659          *      See which interface the caller is talking about.
3660          */
3661
3662         switch (cmd) {
3663                 /*
3664                  *      These ioctl calls:
3665                  *      - can be done by all.
3666                  *      - atomic and do not require locking.
3667                  *      - return a value
3668                  */
3669                 case SIOCGIFFLAGS:
3670                 case SIOCGIFMETRIC:
3671                 case SIOCGIFMTU:
3672                 case SIOCGIFHWADDR:
3673                 case SIOCGIFSLAVE:
3674                 case SIOCGIFMAP:
3675                 case SIOCGIFINDEX:
3676                 case SIOCGIFTXQLEN:
3677                         dev_load(net, ifr.ifr_name);
3678                         read_lock(&dev_base_lock);
3679                         ret = dev_ifsioc_locked(net, &ifr, cmd);
3680                         read_unlock(&dev_base_lock);
3681                         if (!ret) {
3682                                 if (colon)
3683                                         *colon = ':';
3684                                 if (copy_to_user(arg, &ifr,
3685                                                  sizeof(struct ifreq)))
3686                                         ret = -EFAULT;
3687                         }
3688                         return ret;
3689
3690                 case SIOCETHTOOL:
3691                         dev_load(net, ifr.ifr_name);
3692                         rtnl_lock();
3693                         ret = dev_ethtool(net, &ifr);
3694                         rtnl_unlock();
3695                         if (!ret) {
3696                                 if (colon)
3697                                         *colon = ':';
3698                                 if (copy_to_user(arg, &ifr,
3699                                                  sizeof(struct ifreq)))
3700                                         ret = -EFAULT;
3701                         }
3702                         return ret;
3703
3704                 /*
3705                  *      These ioctl calls:
3706                  *      - require superuser power.
3707                  *      - require strict serialization.
3708                  *      - return a value
3709                  */
3710                 case SIOCGMIIPHY:
3711                 case SIOCGMIIREG:
3712                 case SIOCSIFNAME:
3713                         if (!capable(CAP_NET_ADMIN))
3714                                 return -EPERM;
3715                         dev_load(net, ifr.ifr_name);
3716                         rtnl_lock();
3717                         ret = dev_ifsioc(net, &ifr, cmd);
3718                         rtnl_unlock();
3719                         if (!ret) {
3720                                 if (colon)
3721                                         *colon = ':';
3722                                 if (copy_to_user(arg, &ifr,
3723                                                  sizeof(struct ifreq)))
3724                                         ret = -EFAULT;
3725                         }
3726                         return ret;
3727
3728                 /*
3729                  *      These ioctl calls:
3730                  *      - require superuser power.
3731                  *      - require strict serialization.
3732                  *      - do not return a value
3733                  */
3734                 case SIOCSIFFLAGS:
3735                 case SIOCSIFMETRIC:
3736                 case SIOCSIFMTU:
3737                 case SIOCSIFMAP:
3738                 case SIOCSIFHWADDR:
3739                 case SIOCSIFSLAVE:
3740                 case SIOCADDMULTI:
3741                 case SIOCDELMULTI:
3742                 case SIOCSIFHWBROADCAST:
3743                 case SIOCSIFTXQLEN:
3744                 case SIOCSMIIREG:
3745                 case SIOCBONDENSLAVE:
3746                 case SIOCBONDRELEASE:
3747                 case SIOCBONDSETHWADDR:
3748                 case SIOCBONDCHANGEACTIVE:
3749                 case SIOCBRADDIF:
3750                 case SIOCBRDELIF:
3751                         if (!capable(CAP_NET_ADMIN))
3752                                 return -EPERM;
3753                         /* fall through */
3754                 case SIOCBONDSLAVEINFOQUERY:
3755                 case SIOCBONDINFOQUERY:
3756                         dev_load(net, ifr.ifr_name);
3757                         rtnl_lock();
3758                         ret = dev_ifsioc(net, &ifr, cmd);
3759                         rtnl_unlock();
3760                         return ret;
3761
3762                 case SIOCGIFMEM:
3763                         /* Get the per device memory space. We can add this but
3764                          * currently do not support it */
3765                 case SIOCSIFMEM:
3766                         /* Set the per device memory buffer space.
3767                          * Not applicable in our case */
3768                 case SIOCSIFLINK:
3769                         return -EINVAL;
3770
3771                 /*
3772                  *      Unknown or private ioctl.
3773                  */
3774                 default:
3775                         if (cmd == SIOCWANDEV ||
3776                             (cmd >= SIOCDEVPRIVATE &&
3777                              cmd <= SIOCDEVPRIVATE + 15)) {
3778                                 dev_load(net, ifr.ifr_name);
3779                                 rtnl_lock();
3780                                 ret = dev_ifsioc(net, &ifr, cmd);
3781                                 rtnl_unlock();
3782                                 if (!ret && copy_to_user(arg, &ifr,
3783                                                          sizeof(struct ifreq)))
3784                                         ret = -EFAULT;
3785                                 return ret;
3786                         }
3787                         /* Take care of Wireless Extensions */
3788                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3789                                 return wext_handle_ioctl(net, &ifr, cmd, arg);
3790                         return -EINVAL;
3791         }
3792 }
3793
3794
3795 /**
3796  *      dev_new_index   -       allocate an ifindex
3797  *      @net: the applicable net namespace
3798  *
3799  *      Returns a suitable unique value for a new device interface
3800  *      number.  The caller must hold the rtnl semaphore or the
3801  *      dev_base_lock to be sure it remains unique.
3802  */
3803 static int dev_new_index(struct net *net)
3804 {
3805         static int ifindex;
3806         for (;;) {
3807                 if (++ifindex <= 0)
3808                         ifindex = 1;
3809                 if (!__dev_get_by_index(net, ifindex))
3810                         return ifindex;
3811         }
3812 }
3813
3814 /* Delayed registration/unregisteration */
3815 static DEFINE_SPINLOCK(net_todo_list_lock);
3816 static LIST_HEAD(net_todo_list);
3817
3818 static void net_set_todo(struct net_device *dev)
3819 {
3820         spin_lock(&net_todo_list_lock);
3821         list_add_tail(&dev->todo_list, &net_todo_list);
3822         spin_unlock(&net_todo_list_lock);
3823 }
3824
3825 static void rollback_registered(struct net_device *dev)
3826 {
3827         BUG_ON(dev_boot_phase);
3828         ASSERT_RTNL();
3829
3830         /* Some devices call without registering for initialization unwind. */
3831         if (dev->reg_state == NETREG_UNINITIALIZED) {
3832                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3833                                   "was registered\n", dev->name, dev);
3834
3835                 WARN_ON(1);
3836                 return;
3837         }
3838
3839         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3840
3841         /* If device is running, close it first. */
3842         dev_close(dev);
3843
3844         /* And unlink it from device chain. */
3845         unlist_netdevice(dev);
3846
3847         dev->reg_state = NETREG_UNREGISTERING;
3848
3849         synchronize_net();
3850
3851         /* Shutdown queueing discipline. */
3852         dev_shutdown(dev);
3853
3854
3855         /* Notify protocols, that we are about to destroy
3856            this device. They should clean all the things.
3857         */
3858         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3859
3860         /*
3861          *      Flush the unicast and multicast chains
3862          */
3863         dev_addr_discard(dev);
3864
3865         if (dev->uninit)
3866                 dev->uninit(dev);
3867
3868         /* Notifier chain MUST detach us from master device. */
3869         WARN_ON(dev->master);
3870
3871         /* Remove entries from kobject tree */
3872         netdev_unregister_kobject(dev);
3873
3874         synchronize_net();
3875
3876         dev_put(dev);
3877 }
3878
3879 static void __netdev_init_queue_locks_one(struct net_device *dev,
3880                                           struct netdev_queue *dev_queue,
3881                                           void *_unused)
3882 {
3883         spin_lock_init(&dev_queue->_xmit_lock);
3884         netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3885         dev_queue->xmit_lock_owner = -1;
3886 }
3887
3888 static void netdev_init_queue_locks(struct net_device *dev)
3889 {
3890         netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3891         __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3892 }
3893
3894 /**
3895  *      register_netdevice      - register a network device
3896  *      @dev: device to register
3897  *
3898  *      Take a completed network device structure and add it to the kernel
3899  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3900  *      chain. 0 is returned on success. A negative errno code is returned
3901  *      on a failure to set up the device, or if the name is a duplicate.
3902  *
3903  *      Callers must hold the rtnl semaphore. You may want
3904  *      register_netdev() instead of this.
3905  *
3906  *      BUGS:
3907  *      The locking appears insufficient to guarantee two parallel registers
3908  *      will not get the same name.
3909  */
3910
3911 int register_netdevice(struct net_device *dev)
3912 {
3913         struct hlist_head *head;
3914         struct hlist_node *p;
3915         int ret;
3916         struct net *net;
3917
3918         BUG_ON(dev_boot_phase);
3919         ASSERT_RTNL();
3920
3921         might_sleep();
3922
3923         /* When net_device's are persistent, this will be fatal. */
3924         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3925         BUG_ON(!dev_net(dev));
3926         net = dev_net(dev);
3927
3928         spin_lock_init(&dev->addr_list_lock);
3929         netdev_set_addr_lockdep_class(dev);
3930         netdev_init_queue_locks(dev);
3931
3932         dev->iflink = -1;
3933
3934         /* Init, if this function is available */
3935         if (dev->init) {
3936                 ret = dev->init(dev);
3937                 if (ret) {
3938                         if (ret > 0)
3939                                 ret = -EIO;
3940                         goto out;
3941                 }
3942         }
3943
3944         if (!dev_valid_name(dev->name)) {
3945                 ret = -EINVAL;
3946                 goto err_uninit;
3947         }
3948
3949         dev->ifindex = dev_new_index(net);
3950         if (dev->iflink == -1)
3951                 dev->iflink = dev->ifindex;
3952
3953         /* Check for existence of name */
3954         head = dev_name_hash(net, dev->name);
3955         hlist_for_each(p, head) {
3956                 struct net_device *d
3957                         = hlist_entry(p, struct net_device, name_hlist);
3958                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3959                         ret = -EEXIST;
3960                         goto err_uninit;
3961                 }
3962         }
3963
3964         /* Fix illegal checksum combinations */
3965         if ((dev->features & NETIF_F_HW_CSUM) &&
3966             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3967                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3968                        dev->name);
3969                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3970         }
3971
3972         if ((dev->features & NETIF_F_NO_CSUM) &&
3973             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3974                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3975                        dev->name);
3976                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3977         }
3978
3979
3980         /* Fix illegal SG+CSUM combinations. */
3981         if ((dev->features & NETIF_F_SG) &&
3982             !(dev->features & NETIF_F_ALL_CSUM)) {
3983                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3984                        dev->name);
3985                 dev->features &= ~NETIF_F_SG;
3986         }
3987
3988         /* TSO requires that SG is present as well. */
3989         if ((dev->features & NETIF_F_TSO) &&
3990             !(dev->features & NETIF_F_SG)) {
3991                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3992                        dev->name);
3993                 dev->features &= ~NETIF_F_TSO;
3994         }
3995         if (dev->features & NETIF_F_UFO) {
3996                 if (!(dev->features & NETIF_F_HW_CSUM)) {
3997                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3998                                         "NETIF_F_HW_CSUM feature.\n",
3999                                                         dev->name);
4000                         dev->features &= ~NETIF_F_UFO;
4001                 }
4002                 if (!(dev->features & NETIF_F_SG)) {
4003                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
4004                                         "NETIF_F_SG feature.\n",
4005                                         dev->name);
4006                         dev->features &= ~NETIF_F_UFO;
4007                 }
4008         }
4009
4010         /* Enable software GSO if SG is supported. */
4011         if (dev->features & NETIF_F_SG)
4012                 dev->features |= NETIF_F_GSO;
4013
4014         netdev_initialize_kobject(dev);
4015         ret = netdev_register_kobject(dev);
4016         if (ret)
4017                 goto err_uninit;
4018         dev->reg_state = NETREG_REGISTERED;
4019
4020         /*
4021          *      Default initial state at registry is that the
4022          *      device is present.
4023          */
4024
4025         set_bit(__LINK_STATE_PRESENT, &dev->state);
4026
4027         dev_init_scheduler(dev);
4028         dev_hold(dev);
4029         list_netdevice(dev);
4030
4031         /* Notify protocols, that a new device appeared. */
4032         ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4033         ret = notifier_to_errno(ret);
4034         if (ret) {
4035                 rollback_registered(dev);
4036                 dev->reg_state = NETREG_UNREGISTERED;
4037         }
4038
4039 out:
4040         return ret;
4041
4042 err_uninit:
4043         if (dev->uninit)
4044                 dev->uninit(dev);
4045         goto out;
4046 }
4047
4048 /**
4049  *      register_netdev - register a network device
4050  *      @dev: device to register
4051  *
4052  *      Take a completed network device structure and add it to the kernel
4053  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4054  *      chain. 0 is returned on success. A negative errno code is returned
4055  *      on a failure to set up the device, or if the name is a duplicate.
4056  *
4057  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
4058  *      and expands the device name if you passed a format string to
4059  *      alloc_netdev.
4060  */
4061 int register_netdev(struct net_device *dev)
4062 {
4063         int err;
4064
4065         rtnl_lock();
4066
4067         /*
4068          * If the name is a format string the caller wants us to do a
4069          * name allocation.
4070          */
4071         if (strchr(dev->name, '%')) {
4072                 err = dev_alloc_name(dev, dev->name);
4073                 if (err < 0)
4074                         goto out;
4075         }
4076
4077         err = register_netdevice(dev);
4078 out:
4079         rtnl_unlock();
4080         return err;
4081 }
4082 EXPORT_SYMBOL(register_netdev);
4083
4084 /*
4085  * netdev_wait_allrefs - wait until all references are gone.
4086  *
4087  * This is called when unregistering network devices.
4088  *
4089  * Any protocol or device that holds a reference should register
4090  * for netdevice notification, and cleanup and put back the
4091  * reference if they receive an UNREGISTER event.
4092  * We can get stuck here if buggy protocols don't correctly
4093  * call dev_put.
4094  */
4095 static void netdev_wait_allrefs(struct net_device *dev)
4096 {
4097         unsigned long rebroadcast_time, warning_time;
4098
4099         rebroadcast_time = warning_time = jiffies;
4100         while (atomic_read(&dev->refcnt) != 0) {
4101                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4102                         rtnl_lock();
4103
4104                         /* Rebroadcast unregister notification */
4105                         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4106
4107                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4108                                      &dev->state)) {
4109                                 /* We must not have linkwatch events
4110                                  * pending on unregister. If this
4111                                  * happens, we simply run the queue
4112                                  * unscheduled, resulting in a noop
4113                                  * for this device.
4114                                  */
4115                                 linkwatch_run_queue();
4116                         }
4117
4118                         __rtnl_unlock();
4119
4120                         rebroadcast_time = jiffies;
4121                 }
4122
4123                 msleep(250);
4124
4125                 if (time_after(jiffies, warning_time + 10 * HZ)) {
4126                         printk(KERN_EMERG "unregister_netdevice: "
4127                                "waiting for %s to become free. Usage "
4128                                "count = %d\n",
4129                                dev->name, atomic_read(&dev->refcnt));
4130                         warning_time = jiffies;
4131                 }
4132         }
4133 }
4134
4135 /* The sequence is:
4136  *
4137  *      rtnl_lock();
4138  *      ...
4139  *      register_netdevice(x1);
4140  *      register_netdevice(x2);
4141  *      ...
4142  *      unregister_netdevice(y1);
4143  *      unregister_netdevice(y2);
4144  *      ...
4145  *      rtnl_unlock();
4146  *      free_netdev(y1);
4147  *      free_netdev(y2);
4148  *
4149  * We are invoked by rtnl_unlock() after it drops the semaphore.
4150  * This allows us to deal with problems:
4151  * 1) We can delete sysfs objects which invoke hotplug
4152  *    without deadlocking with linkwatch via keventd.
4153  * 2) Since we run with the RTNL semaphore not held, we can sleep
4154  *    safely in order to wait for the netdev refcnt to drop to zero.
4155  */
4156 static DEFINE_MUTEX(net_todo_run_mutex);
4157 void netdev_run_todo(void)
4158 {
4159         struct list_head list;
4160
4161         /* Need to guard against multiple cpu's getting out of order. */
4162         mutex_lock(&net_todo_run_mutex);
4163
4164         /* Not safe to do outside the semaphore.  We must not return
4165          * until all unregister events invoked by the local processor
4166          * have been completed (either by this todo run, or one on
4167          * another cpu).
4168          */
4169         if (list_empty(&net_todo_list))
4170                 goto out;
4171
4172         /* Snapshot list, allow later requests */
4173         spin_lock(&net_todo_list_lock);
4174         list_replace_init(&net_todo_list, &list);
4175         spin_unlock(&net_todo_list_lock);
4176
4177         while (!list_empty(&list)) {
4178                 struct net_device *dev
4179                         = list_entry(list.next, struct net_device, todo_list);
4180                 list_del(&dev->todo_list);
4181
4182                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4183                         printk(KERN_ERR "network todo '%s' but state %d\n",
4184                                dev->name, dev->reg_state);
4185                         dump_stack();
4186                         continue;
4187                 }
4188
4189                 dev->reg_state = NETREG_UNREGISTERED;
4190
4191                 on_each_cpu(flush_backlog, dev, 1);
4192
4193                 netdev_wait_allrefs(dev);
4194
4195                 /* paranoia */
4196                 BUG_ON(atomic_read(&dev->refcnt));
4197                 WARN_ON(dev->ip_ptr);
4198                 WARN_ON(dev->ip6_ptr);
4199                 WARN_ON(dev->dn_ptr);
4200
4201                 if (dev->destructor)
4202                         dev->destructor(dev);
4203
4204                 /* Free network device */
4205                 kobject_put(&dev->dev.kobj);
4206         }
4207
4208 out:
4209         mutex_unlock(&net_todo_run_mutex);
4210 }
4211
4212 static struct net_device_stats *internal_stats(struct net_device *dev)
4213 {
4214         return &dev->stats;
4215 }
4216
4217 static void netdev_init_one_queue(struct net_device *dev,
4218                                   struct netdev_queue *queue,
4219                                   void *_unused)
4220 {
4221         queue->dev = dev;
4222 }
4223
4224 static void netdev_init_queues(struct net_device *dev)
4225 {
4226         netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4227         netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4228         spin_lock_init(&dev->tx_global_lock);
4229 }
4230
4231 /**
4232  *      alloc_netdev_mq - allocate network device
4233  *      @sizeof_priv:   size of private data to allocate space for
4234  *      @name:          device name format string
4235  *      @setup:         callback to initialize device
4236  *      @queue_count:   the number of subqueues to allocate
4237  *
4238  *      Allocates a struct net_device with private data area for driver use
4239  *      and performs basic initialization.  Also allocates subquue structs
4240  *      for each queue on the device at the end of the netdevice.
4241  */
4242 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4243                 void (*setup)(struct net_device *), unsigned int queue_count)
4244 {
4245         struct netdev_queue *tx;
4246         struct net_device *dev;
4247         size_t alloc_size;
4248         void *p;
4249
4250         BUG_ON(strlen(name) >= sizeof(dev->name));
4251
4252         alloc_size = sizeof(struct net_device);
4253         if (sizeof_priv) {
4254                 /* ensure 32-byte alignment of private area */
4255                 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4256                 alloc_size += sizeof_priv;
4257         }
4258         /* ensure 32-byte alignment of whole construct */
4259         alloc_size += NETDEV_ALIGN_CONST;
4260
4261         p = kzalloc(alloc_size, GFP_KERNEL);
4262         if (!p) {
4263                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4264                 return NULL;
4265         }
4266
4267         tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4268         if (!tx) {
4269                 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4270                        "tx qdiscs.\n");
4271                 kfree(p);
4272                 return NULL;
4273         }
4274
4275         dev = (struct net_device *)
4276                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4277         dev->padded = (char *)dev - (char *)p;
4278         dev_net_set(dev, &init_net);
4279
4280         dev->_tx = tx;
4281         dev->num_tx_queues = queue_count;
4282         dev->real_num_tx_queues = queue_count;
4283
4284         if (sizeof_priv) {
4285                 dev->priv = ((char *)dev +
4286                              ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4287                               & ~NETDEV_ALIGN_CONST));
4288         }
4289
4290         dev->gso_max_size = GSO_MAX_SIZE;
4291
4292         netdev_init_queues(dev);
4293
4294         dev->get_stats = internal_stats;
4295         netpoll_netdev_init(dev);
4296         setup(dev);
4297         strcpy(dev->name, name);
4298         return dev;
4299 }
4300 EXPORT_SYMBOL(alloc_netdev_mq);
4301
4302 /**
4303  *      free_netdev - free network device
4304  *      @dev: device
4305  *
4306  *      This function does the last stage of destroying an allocated device
4307  *      interface. The reference to the device object is released.
4308  *      If this is the last reference then it will be freed.
4309  */
4310 void free_netdev(struct net_device *dev)
4311 {
4312         release_net(dev_net(dev));
4313
4314         kfree(dev->_tx);
4315
4316         /*  Compatibility with error handling in drivers */
4317         if (dev->reg_state == NETREG_UNINITIALIZED) {
4318                 kfree((char *)dev - dev->padded);
4319                 return;
4320         }
4321
4322         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4323         dev->reg_state = NETREG_RELEASED;
4324
4325         /* will free via device release */
4326         put_device(&dev->dev);
4327 }
4328
4329 /* Synchronize with packet receive processing. */
4330 void synchronize_net(void)
4331 {
4332         might_sleep();
4333         synchronize_rcu();
4334 }
4335
4336 /**
4337  *      unregister_netdevice - remove device from the kernel
4338  *      @dev: device
4339  *
4340  *      This function shuts down a device interface and removes it
4341  *      from the kernel tables.
4342  *
4343  *      Callers must hold the rtnl semaphore.  You may want
4344  *      unregister_netdev() instead of this.
4345  */
4346
4347 void unregister_netdevice(struct net_device *dev)
4348 {
4349         ASSERT_RTNL();
4350
4351         rollback_registered(dev);
4352         /* Finish processing unregister after unlock */
4353         net_set_todo(dev);
4354 }
4355
4356 /**
4357  *      unregister_netdev - remove device from the kernel
4358  *      @dev: device
4359  *
4360  *      This function shuts down a device interface and removes it
4361  *      from the kernel tables.
4362  *
4363  *      This is just a wrapper for unregister_netdevice that takes
4364  *      the rtnl semaphore.  In general you want to use this and not
4365  *      unregister_netdevice.
4366  */
4367 void unregister_netdev(struct net_device *dev)
4368 {
4369         rtnl_lock();
4370         unregister_netdevice(dev);
4371         rtnl_unlock();
4372 }
4373
4374 EXPORT_SYMBOL(unregister_netdev);
4375
4376 /**
4377  *      dev_change_net_namespace - move device to different nethost namespace
4378  *      @dev: device
4379  *      @net: network namespace
4380  *      @pat: If not NULL name pattern to try if the current device name
4381  *            is already taken in the destination network namespace.
4382  *
4383  *      This function shuts down a device interface and moves it
4384  *      to a new network namespace. On success 0 is returned, on
4385  *      a failure a netagive errno code is returned.
4386  *
4387  *      Callers must hold the rtnl semaphore.
4388  */
4389
4390 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4391 {
4392         char buf[IFNAMSIZ];
4393         const char *destname;
4394         int err;
4395
4396         ASSERT_RTNL();
4397
4398         /* Don't allow namespace local devices to be moved. */
4399         err = -EINVAL;
4400         if (dev->features & NETIF_F_NETNS_LOCAL)
4401                 goto out;
4402
4403         /* Ensure the device has been registrered */
4404         err = -EINVAL;
4405         if (dev->reg_state != NETREG_REGISTERED)
4406                 goto out;
4407
4408         /* Get out if there is nothing todo */
4409         err = 0;
4410         if (net_eq(dev_net(dev), net))
4411                 goto out;
4412
4413         /* Pick the destination device name, and ensure
4414          * we can use it in the destination network namespace.
4415          */
4416         err = -EEXIST;
4417         destname = dev->name;
4418         if (__dev_get_by_name(net, destname)) {
4419                 /* We get here if we can't use the current device name */
4420                 if (!pat)
4421                         goto out;
4422                 if (!dev_valid_name(pat))
4423                         goto out;
4424                 if (strchr(pat, '%')) {
4425                         if (__dev_alloc_name(net, pat, buf) < 0)
4426                                 goto out;
4427                         destname = buf;
4428                 } else
4429                         destname = pat;
4430                 if (__dev_get_by_name(net, destname))
4431                         goto out;
4432         }
4433
4434         /*
4435          * And now a mini version of register_netdevice unregister_netdevice.
4436          */
4437
4438         /* If device is running close it first. */
4439         dev_close(dev);
4440
4441         /* And unlink it from device chain */
4442         err = -ENODEV;
4443         unlist_netdevice(dev);
4444
4445         synchronize_net();
4446
4447         /* Shutdown queueing discipline. */
4448         dev_shutdown(dev);
4449
4450         /* Notify protocols, that we are about to destroy
4451            this device. They should clean all the things.
4452         */
4453         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4454
4455         /*
4456          *      Flush the unicast and multicast chains
4457          */
4458         dev_addr_discard(dev);
4459
4460         /* Actually switch the network namespace */
4461         dev_net_set(dev, net);
4462
4463         /* Assign the new device name */
4464         if (destname != dev->name)
4465                 strcpy(dev->name, destname);
4466
4467         /* If there is an ifindex conflict assign a new one */
4468         if (__dev_get_by_index(net, dev->ifindex)) {
4469                 int iflink = (dev->iflink == dev->ifindex);
4470                 dev->ifindex = dev_new_index(net);
4471                 if (iflink)
4472                         dev->iflink = dev->ifindex;
4473         }
4474
4475         /* Fixup kobjects */
4476         netdev_unregister_kobject(dev);
4477         err = netdev_register_kobject(dev);
4478         WARN_ON(err);
4479
4480         /* Add the device back in the hashes */
4481         list_netdevice(dev);
4482
4483         /* Notify protocols, that a new device appeared. */
4484         call_netdevice_notifiers(NETDEV_REGISTER, dev);
4485
4486         synchronize_net();
4487         err = 0;
4488 out:
4489         return err;
4490 }
4491
4492 static int dev_cpu_callback(struct notifier_block *nfb,
4493                             unsigned long action,
4494                             void *ocpu)
4495 {
4496         struct sk_buff **list_skb;
4497         struct Qdisc **list_net;
4498         struct sk_buff *skb;
4499         unsigned int cpu, oldcpu = (unsigned long)ocpu;
4500         struct softnet_data *sd, *oldsd;
4501
4502         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4503                 return NOTIFY_OK;
4504
4505         local_irq_disable();
4506         cpu = smp_processor_id();
4507         sd = &per_cpu(softnet_data, cpu);
4508         oldsd = &per_cpu(softnet_data, oldcpu);
4509
4510         /* Find end of our completion_queue. */
4511         list_skb = &sd->completion_queue;
4512         while (*list_skb)
4513                 list_skb = &(*list_skb)->next;
4514         /* Append completion queue from offline CPU. */
4515         *list_skb = oldsd->completion_queue;
4516         oldsd->completion_queue = NULL;
4517
4518         /* Find end of our output_queue. */
4519         list_net = &sd->output_queue;
4520         while (*list_net)
4521                 list_net = &(*list_net)->next_sched;
4522         /* Append output queue from offline CPU. */
4523         *list_net = oldsd->output_queue;
4524         oldsd->output_queue = NULL;
4525
4526         raise_softirq_irqoff(NET_TX_SOFTIRQ);
4527         local_irq_enable();
4528
4529         /* Process offline CPU's input_pkt_queue */
4530         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4531                 netif_rx(skb);
4532
4533         return NOTIFY_OK;
4534 }
4535
4536 #ifdef CONFIG_NET_DMA
4537 /**
4538  * net_dma_rebalance - try to maintain one DMA channel per CPU
4539  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4540  *
4541  * This is called when the number of channels allocated to the net_dma client
4542  * changes.  The net_dma client tries to have one DMA channel per CPU.
4543  */
4544
4545 static void net_dma_rebalance(struct net_dma *net_dma)
4546 {
4547         unsigned int cpu, i, n, chan_idx;
4548         struct dma_chan *chan;
4549
4550         if (cpus_empty(net_dma->channel_mask)) {
4551                 for_each_online_cpu(cpu)
4552                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4553                 return;
4554         }
4555
4556         i = 0;
4557         cpu = first_cpu(cpu_online_map);
4558
4559         for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4560                 chan = net_dma->channels[chan_idx];
4561
4562                 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4563                    + (i < (num_online_cpus() %
4564                         cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4565
4566                 while(n) {
4567                         per_cpu(softnet_data, cpu).net_dma = chan;
4568                         cpu = next_cpu(cpu, cpu_online_map);
4569                         n--;
4570                 }
4571                 i++;
4572         }
4573 }
4574
4575 /**
4576  * netdev_dma_event - event callback for the net_dma_client
4577  * @client: should always be net_dma_client
4578  * @chan: DMA channel for the event
4579  * @state: DMA state to be handled
4580  */
4581 static enum dma_state_client
4582 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4583         enum dma_state state)
4584 {
4585         int i, found = 0, pos = -1;
4586         struct net_dma *net_dma =
4587                 container_of(client, struct net_dma, client);
4588         enum dma_state_client ack = DMA_DUP; /* default: take no action */
4589
4590         spin_lock(&net_dma->lock);
4591         switch (state) {
4592         case DMA_RESOURCE_AVAILABLE:
4593                 for (i = 0; i < nr_cpu_ids; i++)
4594                         if (net_dma->channels[i] == chan) {
4595                                 found = 1;
4596                                 break;
4597                         } else if (net_dma->channels[i] == NULL && pos < 0)
4598                                 pos = i;
4599
4600                 if (!found && pos >= 0) {
4601                         ack = DMA_ACK;
4602                         net_dma->channels[pos] = chan;
4603                         cpu_set(pos, net_dma->channel_mask);
4604                         net_dma_rebalance(net_dma);
4605                 }
4606                 break;
4607         case DMA_RESOURCE_REMOVED:
4608                 for (i = 0; i < nr_cpu_ids; i++)
4609                         if (net_dma->channels[i] == chan) {
4610                                 found = 1;
4611                                 pos = i;
4612                                 break;
4613                         }
4614
4615                 if (found) {
4616                         ack = DMA_ACK;
4617                         cpu_clear(pos, net_dma->channel_mask);
4618                         net_dma->channels[i] = NULL;
4619                         net_dma_rebalance(net_dma);
4620                 }
4621                 break;
4622         default:
4623                 break;
4624         }
4625         spin_unlock(&net_dma->lock);
4626
4627         return ack;
4628 }
4629
4630 /**
4631  * netdev_dma_regiser - register the networking subsystem as a DMA client
4632  */
4633 static int __init netdev_dma_register(void)
4634 {
4635         net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4636                                                                 GFP_KERNEL);
4637         if (unlikely(!net_dma.channels)) {
4638                 printk(KERN_NOTICE
4639                                 "netdev_dma: no memory for net_dma.channels\n");
4640                 return -ENOMEM;
4641         }
4642         spin_lock_init(&net_dma.lock);
4643         dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4644         dma_async_client_register(&net_dma.client);
4645         dma_async_client_chan_request(&net_dma.client);
4646         return 0;
4647 }
4648
4649 #else
4650 static int __init netdev_dma_register(void) { return -ENODEV; }
4651 #endif /* CONFIG_NET_DMA */
4652
4653 /**
4654  *      netdev_compute_feature - compute conjunction of two feature sets
4655  *      @all: first feature set
4656  *      @one: second feature set
4657  *
4658  *      Computes a new feature set after adding a device with feature set
4659  *      @one to the master device with current feature set @all.  Returns
4660  *      the new feature set.
4661  */
4662 int netdev_compute_features(unsigned long all, unsigned long one)
4663 {
4664         /* if device needs checksumming, downgrade to hw checksumming */
4665         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4666                 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4667
4668         /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4669         if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4670                 all ^= NETIF_F_HW_CSUM
4671                         | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4672
4673         if (one & NETIF_F_GSO)
4674                 one |= NETIF_F_GSO_SOFTWARE;
4675         one |= NETIF_F_GSO;
4676
4677         /* If even one device supports robust GSO, enable it for all. */
4678         if (one & NETIF_F_GSO_ROBUST)
4679                 all |= NETIF_F_GSO_ROBUST;
4680
4681         all &= one | NETIF_F_LLTX;
4682
4683         if (!(all & NETIF_F_ALL_CSUM))
4684                 all &= ~NETIF_F_SG;
4685         if (!(all & NETIF_F_SG))
4686                 all &= ~NETIF_F_GSO_MASK;
4687
4688         return all;
4689 }
4690 EXPORT_SYMBOL(netdev_compute_features);
4691
4692 static struct hlist_head *netdev_create_hash(void)
4693 {
4694         int i;
4695         struct hlist_head *hash;
4696
4697         hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4698         if (hash != NULL)
4699                 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4700                         INIT_HLIST_HEAD(&hash[i]);
4701
4702         return hash;
4703 }
4704
4705 /* Initialize per network namespace state */
4706 static int __net_init netdev_init(struct net *net)
4707 {
4708         INIT_LIST_HEAD(&net->dev_base_head);
4709
4710         net->dev_name_head = netdev_create_hash();
4711         if (net->dev_name_head == NULL)
4712                 goto err_name;
4713
4714         net->dev_index_head = netdev_create_hash();
4715         if (net->dev_index_head == NULL)
4716                 goto err_idx;
4717
4718         return 0;
4719
4720 err_idx:
4721         kfree(net->dev_name_head);
4722 err_name:
4723         return -ENOMEM;
4724 }
4725
4726 char *netdev_drivername(struct net_device *dev, char *buffer, int len)
4727 {
4728         struct device_driver *driver;
4729         struct device *parent;
4730
4731         if (len <= 0 || !buffer)
4732                 return buffer;
4733         buffer[0] = 0;
4734
4735         parent = dev->dev.parent;
4736
4737         if (!parent)
4738                 return buffer;
4739
4740         driver = parent->driver;
4741         if (driver && driver->name)
4742                 strlcpy(buffer, driver->name, len);
4743         return buffer;
4744 }
4745
4746 static void __net_exit netdev_exit(struct net *net)
4747 {
4748         kfree(net->dev_name_head);
4749         kfree(net->dev_index_head);
4750 }
4751
4752 static struct pernet_operations __net_initdata netdev_net_ops = {
4753         .init = netdev_init,
4754         .exit = netdev_exit,
4755 };
4756
4757 static void __net_exit default_device_exit(struct net *net)
4758 {
4759         struct net_device *dev, *next;
4760         /*
4761          * Push all migratable of the network devices back to the
4762          * initial network namespace
4763          */
4764         rtnl_lock();
4765         for_each_netdev_safe(net, dev, next) {
4766                 int err;
4767                 char fb_name[IFNAMSIZ];
4768
4769                 /* Ignore unmoveable devices (i.e. loopback) */
4770                 if (dev->features & NETIF_F_NETNS_LOCAL)
4771                         continue;
4772
4773                 /* Push remaing network devices to init_net */
4774                 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4775                 err = dev_change_net_namespace(dev, &init_net, fb_name);
4776                 if (err) {
4777                         printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4778                                 __func__, dev->name, err);
4779                         BUG();
4780                 }
4781         }
4782         rtnl_unlock();
4783 }
4784
4785 static struct pernet_operations __net_initdata default_device_ops = {
4786         .exit = default_device_exit,
4787 };
4788
4789 /*
4790  *      Initialize the DEV module. At boot time this walks the device list and
4791  *      unhooks any devices that fail to initialise (normally hardware not
4792  *      present) and leaves us with a valid list of present and active devices.
4793  *
4794  */
4795
4796 /*
4797  *       This is called single threaded during boot, so no need
4798  *       to take the rtnl semaphore.
4799  */
4800 static int __init net_dev_init(void)
4801 {
4802         int i, rc = -ENOMEM;
4803
4804         BUG_ON(!dev_boot_phase);
4805
4806         if (dev_proc_init())
4807                 goto out;
4808
4809         if (netdev_kobject_init())
4810                 goto out;
4811
4812         INIT_LIST_HEAD(&ptype_all);
4813         for (i = 0; i < PTYPE_HASH_SIZE; i++)
4814                 INIT_LIST_HEAD(&ptype_base[i]);
4815
4816         if (register_pernet_subsys(&netdev_net_ops))
4817                 goto out;
4818
4819         if (register_pernet_device(&default_device_ops))
4820                 goto out;
4821
4822         /*
4823          *      Initialise the packet receive queues.
4824          */
4825
4826         for_each_possible_cpu(i) {
4827                 struct softnet_data *queue;
4828
4829                 queue = &per_cpu(softnet_data, i);
4830                 skb_queue_head_init(&queue->input_pkt_queue);
4831                 queue->completion_queue = NULL;
4832                 INIT_LIST_HEAD(&queue->poll_list);
4833
4834                 queue->backlog.poll = process_backlog;
4835                 queue->backlog.weight = weight_p;
4836         }
4837
4838         netdev_dma_register();
4839
4840         dev_boot_phase = 0;
4841
4842         open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4843         open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4844
4845         hotcpu_notifier(dev_cpu_callback, 0);
4846         dst_init();
4847         dev_mcast_init();
4848         rc = 0;
4849 out:
4850         return rc;
4851 }
4852
4853 subsys_initcall(net_dev_init);
4854
4855 EXPORT_SYMBOL(__dev_get_by_index);
4856 EXPORT_SYMBOL(__dev_get_by_name);
4857 EXPORT_SYMBOL(__dev_remove_pack);
4858 EXPORT_SYMBOL(dev_valid_name);
4859 EXPORT_SYMBOL(dev_add_pack);
4860 EXPORT_SYMBOL(dev_alloc_name);
4861 EXPORT_SYMBOL(dev_close);
4862 EXPORT_SYMBOL(dev_get_by_flags);
4863 EXPORT_SYMBOL(dev_get_by_index);
4864 EXPORT_SYMBOL(dev_get_by_name);
4865 EXPORT_SYMBOL(dev_open);
4866 EXPORT_SYMBOL(dev_queue_xmit);
4867 EXPORT_SYMBOL(dev_remove_pack);
4868 EXPORT_SYMBOL(dev_set_allmulti);
4869 EXPORT_SYMBOL(dev_set_promiscuity);
4870 EXPORT_SYMBOL(dev_change_flags);
4871 EXPORT_SYMBOL(dev_set_mtu);
4872 EXPORT_SYMBOL(dev_set_mac_address);
4873 EXPORT_SYMBOL(free_netdev);
4874 EXPORT_SYMBOL(netdev_boot_setup_check);
4875 EXPORT_SYMBOL(netdev_set_master);
4876 EXPORT_SYMBOL(netdev_state_change);
4877 EXPORT_SYMBOL(netif_receive_skb);
4878 EXPORT_SYMBOL(netif_rx);
4879 EXPORT_SYMBOL(register_gifconf);
4880 EXPORT_SYMBOL(register_netdevice);
4881 EXPORT_SYMBOL(register_netdevice_notifier);
4882 EXPORT_SYMBOL(skb_checksum_help);
4883 EXPORT_SYMBOL(synchronize_net);
4884 EXPORT_SYMBOL(unregister_netdevice);
4885 EXPORT_SYMBOL(unregister_netdevice_notifier);
4886 EXPORT_SYMBOL(net_enable_timestamp);
4887 EXPORT_SYMBOL(net_disable_timestamp);
4888 EXPORT_SYMBOL(dev_get_flags);
4889
4890 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4891 EXPORT_SYMBOL(br_handle_frame_hook);
4892 EXPORT_SYMBOL(br_fdb_get_hook);
4893 EXPORT_SYMBOL(br_fdb_put_hook);
4894 #endif
4895
4896 #ifdef CONFIG_KMOD
4897 EXPORT_SYMBOL(dev_load);
4898 #endif
4899
4900 EXPORT_PER_CPU_SYMBOL(softnet_data);