]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/net/mv643xx_eth.c
Merge git://git.infradead.org/mtd-2.6
[linux-2.6-omap-h63xx.git] / drivers / net / mv643xx_eth.c
index 0a18b9e96da152db6c55eaa0026cc578863226eb..b9dcdbd369f87b87e5e9e12650e50d7ada6d9465 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
 #include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/etherdevice.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
-#include <linux/mii.h>
+#include <linux/phy.h>
 #include <linux/mv643xx_eth.h>
 #include <asm/io.h>
 #include <asm/types.h>
 #include <asm/system.h>
 
 static char mv643xx_eth_driver_name[] = "mv643xx_eth";
-static char mv643xx_eth_driver_version[] = "1.3";
+static char mv643xx_eth_driver_version[] = "1.4";
 
-#define MV643XX_ETH_CHECKSUM_OFFLOAD_TX
-#define MV643XX_ETH_NAPI
-#define MV643XX_ETH_TX_FAST_REFILL
-
-#ifdef MV643XX_ETH_CHECKSUM_OFFLOAD_TX
-#define MAX_DESCS_PER_SKB      (MAX_SKB_FRAGS + 1)
-#else
-#define MAX_DESCS_PER_SKB      1
-#endif
 
 /*
  * Registers shared between all ports.
  */
 #define PHY_ADDR                       0x0000
 #define SMI_REG                                0x0004
+#define  SMI_BUSY                      0x10000000
+#define  SMI_READ_VALID                        0x08000000
+#define  SMI_OPCODE_READ               0x04000000
+#define  SMI_OPCODE_WRITE              0x00000000
+#define ERR_INT_CAUSE                  0x0080
+#define  ERR_INT_SMI_DONE              0x00000010
+#define ERR_INT_MASK                   0x0084
 #define WINDOW_BASE(w)                 (0x0200 + ((w) << 3))
 #define WINDOW_SIZE(w)                 (0x0204 + ((w) << 3))
 #define WINDOW_REMAP_HIGH(w)           (0x0280 + ((w) << 2))
@@ -104,16 +103,12 @@ static char mv643xx_eth_driver_version[] = "1.3";
 #define TX_BW_MTU(p)                   (0x0458 + ((p) << 10))
 #define TX_BW_BURST(p)                 (0x045c + ((p) << 10))
 #define INT_CAUSE(p)                   (0x0460 + ((p) << 10))
-#define  INT_TX_END_0                  0x00080000
 #define  INT_TX_END                    0x07f80000
-#define  INT_RX                                0x0007fbfc
+#define  INT_RX                                0x000003fc
 #define  INT_EXT                       0x00000002
 #define INT_CAUSE_EXT(p)               (0x0464 + ((p) << 10))
-#define  INT_EXT_LINK                  0x00100000
-#define  INT_EXT_PHY                   0x00010000
-#define  INT_EXT_TX_ERROR_0            0x00000100
-#define  INT_EXT_TX_0                  0x00000001
-#define  INT_EXT_TX                    0x0000ffff
+#define  INT_EXT_LINK_PHY              0x00110000
+#define  INT_EXT_TX                    0x000000ff
 #define INT_MASK(p)                    (0x0468 + ((p) << 10))
 #define INT_MASK_EXT(p)                        (0x046c + ((p) << 10))
 #define TX_FIFO_URGENT_THRESHOLD(p)    (0x0474 + ((p) << 10))
@@ -171,8 +166,8 @@ static char mv643xx_eth_driver_version[] = "1.3";
 #define FORCE_LINK_PASS                                (1 << 1)
 #define SERIAL_PORT_ENABLE                     (1 << 0)
 
-#define DEFAULT_RX_QUEUE_SIZE          400
-#define DEFAULT_TX_QUEUE_SIZE          800
+#define DEFAULT_RX_QUEUE_SIZE          128
+#define DEFAULT_TX_QUEUE_SIZE          256
 
 
 /*
@@ -249,9 +244,23 @@ struct mv643xx_eth_shared_private {
        void __iomem *base;
 
        /*
-        * Protects access to SMI_REG, which is shared between ports.
+        * Points at the right SMI instance to use.
+        */
+       struct mv643xx_eth_shared_private *smi;
+
+       /*
+        * Provides access to local SMI interface.
         */
-       spinlock_t phy_lock;
+       struct mii_bus *smi_bus;
+
+       /*
+        * If we have access to the error interrupt pin (which is
+        * somewhat misnamed as it not only reflects internal errors
+        * but also reflects SMI completion), use that to wait for
+        * SMI access completion instead of polling the SMI busy bit.
+        */
+       int err_interrupt;
+       wait_queue_head_t smi_busy_wait;
 
        /*
         * Per-port MBUS window access register value.
@@ -263,9 +272,13 @@ struct mv643xx_eth_shared_private {
         */
        unsigned int t_clk;
        int extended_rx_coal_limit;
-       int tx_bw_control_moved;
+       int tx_bw_control;
 };
 
+#define TX_BW_CONTROL_ABSENT           0
+#define TX_BW_CONTROL_OLD_LAYOUT       1
+#define TX_BW_CONTROL_NEW_LAYOUT       2
+
 
 /* per-port *****************************************************************/
 struct mib_counters {
@@ -314,8 +327,6 @@ struct rx_queue {
        dma_addr_t rx_desc_dma;
        int rx_desc_area_size;
        struct sk_buff **rx_skb;
-
-       struct timer_list rx_oom;
 };
 
 struct tx_queue {
@@ -330,7 +341,12 @@ struct tx_queue {
        struct tx_desc *tx_desc_area;
        dma_addr_t tx_desc_dma;
        int tx_desc_area_size;
-       struct sk_buff **tx_skb;
+
+       struct sk_buff_head tx_skb;
+
+       unsigned long tx_packets;
+       unsigned long tx_bytes;
+       unsigned long tx_dropped;
 };
 
 struct mv643xx_eth_private {
@@ -339,14 +355,24 @@ struct mv643xx_eth_private {
 
        struct net_device *dev;
 
-       struct mv643xx_eth_shared_private *shared_smi;
-       int phy_addr;
-
-       spinlock_t lock;
+       struct phy_device *phy;
 
+       struct timer_list mib_counters_timer;
+       spinlock_t mib_counters_lock;
        struct mib_counters mib_counters;
+
        struct work_struct tx_timeout_task;
-       struct mii_if_info mii;
+
+       struct napi_struct napi;
+       u8 work_link;
+       u8 work_tx;
+       u8 work_tx_end;
+       u8 work_rx;
+       u8 work_rx_refill;
+       u8 work_rx_oom;
+
+       int skb_size;
+       struct sk_buff_head rx_recycle;
 
        /*
         * RX state.
@@ -354,9 +380,8 @@ struct mv643xx_eth_private {
        int default_rx_ring_size;
        unsigned long rx_desc_sram_addr;
        int rx_desc_sram_size;
-       u8 rxq_mask;
-       int rxq_primary;
-       struct napi_struct napi;
+       int rxq_count;
+       struct timer_list rx_oom;
        struct rx_queue rxq[8];
 
        /*
@@ -365,12 +390,8 @@ struct mv643xx_eth_private {
        int default_tx_ring_size;
        unsigned long tx_desc_sram_addr;
        int tx_desc_sram_size;
-       u8 txq_mask;
-       int txq_primary;
+       int txq_count;
        struct tx_queue txq[8];
-#ifdef MV643XX_ETH_TX_FAST_REFILL
-       int tx_clean_threshold;
-#endif
 };
 
 
@@ -440,94 +461,21 @@ static void txq_disable(struct tx_queue *txq)
                udelay(10);
 }
 
-static void __txq_maybe_wake(struct tx_queue *txq)
+static void txq_maybe_wake(struct tx_queue *txq)
 {
        struct mv643xx_eth_private *mp = txq_to_mp(txq);
+       struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index);
 
-       /*
-        * netif_{stop,wake}_queue() flow control only applies to
-        * the primary queue.
-        */
-       BUG_ON(txq->index != mp->txq_primary);
-
-       if (txq->tx_ring_size - txq->tx_desc_count >= MAX_DESCS_PER_SKB)
-               netif_wake_queue(mp->dev);
-}
-
-
-/* rx ***********************************************************************/
-static void txq_reclaim(struct tx_queue *txq, int force);
-
-static void rxq_refill(struct rx_queue *rxq)
-{
-       struct mv643xx_eth_private *mp = rxq_to_mp(rxq);
-       unsigned long flags;
-
-       spin_lock_irqsave(&mp->lock, flags);
-
-       while (rxq->rx_desc_count < rxq->rx_ring_size) {
-               int skb_size;
-               struct sk_buff *skb;
-               int unaligned;
-               int rx;
-
-               /*
-                * Reserve 2+14 bytes for an ethernet header (the
-                * hardware automatically prepends 2 bytes of dummy
-                * data to each received packet), 16 bytes for up to
-                * four VLAN tags, and 4 bytes for the trailing FCS
-                * -- 36 bytes total.
-                */
-               skb_size = mp->dev->mtu + 36;
-
-               /*
-                * Make sure that the skb size is a multiple of 8
-                * bytes, as the lower three bits of the receive
-                * descriptor's buffer size field are ignored by
-                * the hardware.
-                */
-               skb_size = (skb_size + 7) & ~7;
-
-               skb = dev_alloc_skb(skb_size + dma_get_cache_alignment() - 1);
-               if (skb == NULL)
-                       break;
-
-               unaligned = (u32)skb->data & (dma_get_cache_alignment() - 1);
-               if (unaligned)
-                       skb_reserve(skb, dma_get_cache_alignment() - unaligned);
-
-               rxq->rx_desc_count++;
-               rx = rxq->rx_used_desc;
-               rxq->rx_used_desc = (rx + 1) % rxq->rx_ring_size;
-
-               rxq->rx_desc_area[rx].buf_ptr = dma_map_single(NULL, skb->data,
-                                               skb_size, DMA_FROM_DEVICE);
-               rxq->rx_desc_area[rx].buf_size = skb_size;
-               rxq->rx_skb[rx] = skb;
-               wmb();
-               rxq->rx_desc_area[rx].cmd_sts = BUFFER_OWNED_BY_DMA |
-                                               RX_ENABLE_INTERRUPT;
-               wmb();
-
-               /*
-                * The hardware automatically prepends 2 bytes of
-                * dummy data to each received packet, so that the
-                * IP header ends up 16-byte aligned.
-                */
-               skb_reserve(skb, 2);
+       if (netif_tx_queue_stopped(nq)) {
+               __netif_tx_lock(nq, smp_processor_id());
+               if (txq->tx_ring_size - txq->tx_desc_count >= MAX_SKB_FRAGS + 1)
+                       netif_tx_wake_queue(nq);
+               __netif_tx_unlock(nq);
        }
-
-       if (rxq->rx_desc_count != rxq->rx_ring_size)
-               mod_timer(&rxq->rx_oom, jiffies + (HZ / 10));
-
-       spin_unlock_irqrestore(&mp->lock, flags);
 }
 
-static inline void rxq_refill_timer_wrapper(unsigned long data)
-{
-       rxq_refill((struct rx_queue *)data);
-}
 
+/* rx napi ******************************************************************/
 static int rxq_process(struct rx_queue *rxq, int budget)
 {
        struct mv643xx_eth_private *mp = rxq_to_mp(rxq);
@@ -539,31 +487,31 @@ static int rxq_process(struct rx_queue *rxq, int budget)
                struct rx_desc *rx_desc;
                unsigned int cmd_sts;
                struct sk_buff *skb;
-               unsigned long flags;
-
-               spin_lock_irqsave(&mp->lock, flags);
+               u16 byte_cnt;
 
                rx_desc = &rxq->rx_desc_area[rxq->rx_curr_desc];
 
                cmd_sts = rx_desc->cmd_sts;
-               if (cmd_sts & BUFFER_OWNED_BY_DMA) {
-                       spin_unlock_irqrestore(&mp->lock, flags);
+               if (cmd_sts & BUFFER_OWNED_BY_DMA)
                        break;
-               }
                rmb();
 
                skb = rxq->rx_skb[rxq->rx_curr_desc];
                rxq->rx_skb[rxq->rx_curr_desc] = NULL;
 
-               rxq->rx_curr_desc = (rxq->rx_curr_desc + 1) % rxq->rx_ring_size;
+               rxq->rx_curr_desc++;
+               if (rxq->rx_curr_desc == rxq->rx_ring_size)
+                       rxq->rx_curr_desc = 0;
 
-               spin_unlock_irqrestore(&mp->lock, flags);
-
-               dma_unmap_single(NULL, rx_desc->buf_ptr + 2,
+               dma_unmap_single(NULL, rx_desc->buf_ptr,
                                 rx_desc->buf_size, DMA_FROM_DEVICE);
                rxq->rx_desc_count--;
                rx++;
 
+               mp->work_rx_refill |= 1 << rxq->index;
+
+               byte_cnt = rx_desc->byte_cnt;
+
                /*
                 * Update statistics.
                 *
@@ -573,7 +521,7 @@ static int rxq_process(struct rx_queue *rxq, int budget)
                 * byte CRC at the end of the packet (which we do count).
                 */
                stats->rx_packets++;
-               stats->rx_bytes += rx_desc->byte_cnt - 2;
+               stats->rx_bytes += byte_cnt - 2;
 
                /*
                 * In case we received a packet without first / last bits
@@ -596,72 +544,84 @@ static int rxq_process(struct rx_queue *rxq, int budget)
                        if (cmd_sts & ERROR_SUMMARY)
                                stats->rx_errors++;
 
-                       dev_kfree_skb_irq(skb);
+                       dev_kfree_skb(skb);
                } else {
                        /*
                         * The -4 is for the CRC in the trailer of the
                         * received packet
                         */
-                       skb_put(skb, rx_desc->byte_cnt - 2 - 4);
+                       skb_put(skb, byte_cnt - 2 - 4);
 
-                       if (cmd_sts & LAYER_4_CHECKSUM_OK) {
+                       if (cmd_sts & LAYER_4_CHECKSUM_OK)
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
-                               skb->csum = htons(
-                                       (cmd_sts & 0x0007fff8) >> 3);
-                       }
                        skb->protocol = eth_type_trans(skb, mp->dev);
-#ifdef MV643XX_ETH_NAPI
                        netif_receive_skb(skb);
-#else
-                       netif_rx(skb);
-#endif
                }
 
                mp->dev->last_rx = jiffies;
        }
 
-       rxq_refill(rxq);
+       if (rx < budget)
+               mp->work_rx &= ~(1 << rxq->index);
 
        return rx;
 }
 
-#ifdef MV643XX_ETH_NAPI
-static int mv643xx_eth_poll(struct napi_struct *napi, int budget)
+static int rxq_refill(struct rx_queue *rxq, int budget)
 {
-       struct mv643xx_eth_private *mp;
-       int rx;
-       int i;
+       struct mv643xx_eth_private *mp = rxq_to_mp(rxq);
+       int refilled;
 
-       mp = container_of(napi, struct mv643xx_eth_private, napi);
+       refilled = 0;
+       while (refilled < budget && rxq->rx_desc_count < rxq->rx_ring_size) {
+               struct sk_buff *skb;
+               int unaligned;
+               int rx;
+
+               skb = __skb_dequeue(&mp->rx_recycle);
+               if (skb == NULL)
+                       skb = dev_alloc_skb(mp->skb_size +
+                                           dma_get_cache_alignment() - 1);
 
-#ifdef MV643XX_ETH_TX_FAST_REFILL
-       if (++mp->tx_clean_threshold > 5) {
-               mp->tx_clean_threshold = 0;
-               for (i = 0; i < 8; i++)
-                       if (mp->txq_mask & (1 << i))
-                               txq_reclaim(mp->txq + i, 0);
-
-               if (netif_carrier_ok(mp->dev)) {
-                       spin_lock_irq(&mp->lock);
-                       __txq_maybe_wake(mp->txq + mp->txq_primary);
-                       spin_unlock_irq(&mp->lock);
+               if (skb == NULL) {
+                       mp->work_rx_oom |= 1 << rxq->index;
+                       goto oom;
                }
-       }
-#endif
 
-       rx = 0;
-       for (i = 7; rx < budget && i >= 0; i--)
-               if (mp->rxq_mask & (1 << i))
-                       rx += rxq_process(mp->rxq + i, budget - rx);
+               unaligned = (u32)skb->data & (dma_get_cache_alignment() - 1);
+               if (unaligned)
+                       skb_reserve(skb, dma_get_cache_alignment() - unaligned);
 
-       if (rx < budget) {
-               netif_rx_complete(mp->dev, napi);
-               wrl(mp, INT_MASK(mp->port_num), INT_TX_END | INT_RX | INT_EXT);
+               refilled++;
+               rxq->rx_desc_count++;
+
+               rx = rxq->rx_used_desc++;
+               if (rxq->rx_used_desc == rxq->rx_ring_size)
+                       rxq->rx_used_desc = 0;
+
+               rxq->rx_desc_area[rx].buf_ptr = dma_map_single(NULL, skb->data,
+                                               mp->skb_size, DMA_FROM_DEVICE);
+               rxq->rx_desc_area[rx].buf_size = mp->skb_size;
+               rxq->rx_skb[rx] = skb;
+               wmb();
+               rxq->rx_desc_area[rx].cmd_sts = BUFFER_OWNED_BY_DMA |
+                                               RX_ENABLE_INTERRUPT;
+               wmb();
+
+               /*
+                * The hardware automatically prepends 2 bytes of
+                * dummy data to each received packet, so that the
+                * IP header ends up 16-byte aligned.
+                */
+               skb_reserve(skb, 2);
        }
 
-       return rx;
+       if (refilled < budget)
+               mp->work_rx_refill &= ~(1 << rxq->index);
+
+oom:
+       return refilled;
 }
-#endif
 
 
 /* tx ***********************************************************************/
@@ -684,8 +644,9 @@ static int txq_alloc_desc_index(struct tx_queue *txq)
 
        BUG_ON(txq->tx_desc_count >= txq->tx_ring_size);
 
-       tx_desc_curr = txq->tx_curr_desc;
-       txq->tx_curr_desc = (tx_desc_curr + 1) % txq->tx_ring_size;
+       tx_desc_curr = txq->tx_curr_desc++;
+       if (txq->tx_curr_desc == txq->tx_ring_size)
+               txq->tx_curr_desc = 0;
 
        BUG_ON(txq->tx_curr_desc == txq->tx_used_desc);
 
@@ -714,10 +675,8 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
                        desc->cmd_sts = BUFFER_OWNED_BY_DMA |
                                        ZERO_PADDING | TX_LAST_DESC |
                                        TX_ENABLE_INTERRUPT;
-                       txq->tx_skb[tx_index] = skb;
                } else {
                        desc->cmd_sts = BUFFER_OWNED_BY_DMA;
-                       txq->tx_skb[tx_index] = NULL;
                }
 
                desc->l4i_chk = 0;
@@ -734,144 +693,228 @@ static inline __be16 sum16_as_be(__sum16 sum)
        return (__force __be16)sum;
 }
 
-static void txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
+static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
 {
        struct mv643xx_eth_private *mp = txq_to_mp(txq);
        int nr_frags = skb_shinfo(skb)->nr_frags;
        int tx_index;
        struct tx_desc *desc;
        u32 cmd_sts;
+       u16 l4i_chk;
        int length;
 
        cmd_sts = TX_FIRST_DESC | GEN_CRC | BUFFER_OWNED_BY_DMA;
-
-       tx_index = txq_alloc_desc_index(txq);
-       desc = &txq->tx_desc_area[tx_index];
-
-       if (nr_frags) {
-               txq_submit_frag_skb(txq, skb);
-
-               length = skb_headlen(skb);
-               txq->tx_skb[tx_index] = NULL;
-       } else {
-               cmd_sts |= ZERO_PADDING | TX_LAST_DESC | TX_ENABLE_INTERRUPT;
-               length = skb->len;
-               txq->tx_skb[tx_index] = skb;
-       }
-
-       desc->byte_cnt = length;
-       desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
+       l4i_chk = 0;
 
        if (skb->ip_summed == CHECKSUM_PARTIAL) {
-               int mac_hdr_len;
+               int tag_bytes;
 
                BUG_ON(skb->protocol != htons(ETH_P_IP) &&
                       skb->protocol != htons(ETH_P_8021Q));
 
-               cmd_sts |= GEN_TCP_UDP_CHECKSUM |
-                          GEN_IP_V4_CHECKSUM   |
-                          ip_hdr(skb)->ihl << TX_IHL_SHIFT;
+               tag_bytes = (void *)ip_hdr(skb) - (void *)skb->data - ETH_HLEN;
+               if (unlikely(tag_bytes & ~12)) {
+                       if (skb_checksum_help(skb) == 0)
+                               goto no_csum;
+                       kfree_skb(skb);
+                       return 1;
+               }
 
-               mac_hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
-               switch (mac_hdr_len - ETH_HLEN) {
-               case 0:
-                       break;
-               case 4:
+               if (tag_bytes & 4)
                        cmd_sts |= MAC_HDR_EXTRA_4_BYTES;
-                       break;
-               case 8:
+               if (tag_bytes & 8)
                        cmd_sts |= MAC_HDR_EXTRA_8_BYTES;
-                       break;
-               case 12:
-                       cmd_sts |= MAC_HDR_EXTRA_4_BYTES;
-                       cmd_sts |= MAC_HDR_EXTRA_8_BYTES;
-                       break;
-               default:
-                       if (net_ratelimit())
-                               dev_printk(KERN_ERR, &txq_to_mp(txq)->dev->dev,
-                                  "mac header length is %d?!\n", mac_hdr_len);
-                       break;
-               }
+
+               cmd_sts |= GEN_TCP_UDP_CHECKSUM |
+                          GEN_IP_V4_CHECKSUM   |
+                          ip_hdr(skb)->ihl << TX_IHL_SHIFT;
 
                switch (ip_hdr(skb)->protocol) {
                case IPPROTO_UDP:
                        cmd_sts |= UDP_FRAME;
-                       desc->l4i_chk = ntohs(sum16_as_be(udp_hdr(skb)->check));
+                       l4i_chk = ntohs(sum16_as_be(udp_hdr(skb)->check));
                        break;
                case IPPROTO_TCP:
-                       desc->l4i_chk = ntohs(sum16_as_be(tcp_hdr(skb)->check));
+                       l4i_chk = ntohs(sum16_as_be(tcp_hdr(skb)->check));
                        break;
                default:
                        BUG();
                }
        } else {
+no_csum:
                /* Errata BTS #50, IHL must be 5 if no HW checksum */
                cmd_sts |= 5 << TX_IHL_SHIFT;
-               desc->l4i_chk = 0;
        }
 
+       tx_index = txq_alloc_desc_index(txq);
+       desc = &txq->tx_desc_area[tx_index];
+
+       if (nr_frags) {
+               txq_submit_frag_skb(txq, skb);
+               length = skb_headlen(skb);
+       } else {
+               cmd_sts |= ZERO_PADDING | TX_LAST_DESC | TX_ENABLE_INTERRUPT;
+               length = skb->len;
+       }
+
+       desc->l4i_chk = l4i_chk;
+       desc->byte_cnt = length;
+       desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
+
+       __skb_queue_tail(&txq->tx_skb, skb);
+
        /* ensure all other descriptors are written before first cmd_sts */
        wmb();
        desc->cmd_sts = cmd_sts;
 
-       /* clear TX_END interrupt status */
-       wrl(mp, INT_CAUSE(mp->port_num), ~(INT_TX_END_0 << txq->index));
-       rdl(mp, INT_CAUSE(mp->port_num));
+       /* clear TX_END status */
+       mp->work_tx_end &= ~(1 << txq->index);
 
        /* ensure all descriptors are written before poking hardware */
        wmb();
        txq_enable(txq);
 
        txq->tx_desc_count += nr_frags + 1;
+
+       return 0;
 }
 
 static int mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct mv643xx_eth_private *mp = netdev_priv(dev);
-       struct net_device_stats *stats = &dev->stats;
+       int queue;
        struct tx_queue *txq;
-       unsigned long flags;
+       struct netdev_queue *nq;
+
+       queue = skb_get_queue_mapping(skb);
+       txq = mp->txq + queue;
+       nq = netdev_get_tx_queue(dev, queue);
 
        if (has_tiny_unaligned_frags(skb) && __skb_linearize(skb)) {
-               stats->tx_dropped++;
+               txq->tx_dropped++;
                dev_printk(KERN_DEBUG, &dev->dev,
                           "failed to linearize skb with tiny "
                           "unaligned fragment\n");
                return NETDEV_TX_BUSY;
        }
 
-       spin_lock_irqsave(&mp->lock, flags);
-
-       txq = mp->txq + mp->txq_primary;
-
-       if (txq->tx_ring_size - txq->tx_desc_count < MAX_DESCS_PER_SKB) {
-               spin_unlock_irqrestore(&mp->lock, flags);
-               if (txq->index == mp->txq_primary && net_ratelimit())
-                       dev_printk(KERN_ERR, &dev->dev,
-                                  "primary tx queue full?!\n");
+       if (txq->tx_ring_size - txq->tx_desc_count < MAX_SKB_FRAGS + 1) {
+               if (net_ratelimit())
+                       dev_printk(KERN_ERR, &dev->dev, "tx queue full?!\n");
                kfree_skb(skb);
                return NETDEV_TX_OK;
        }
 
-       txq_submit_skb(txq, skb);
-       stats->tx_bytes += skb->len;
-       stats->tx_packets++;
-       dev->trans_start = jiffies;
-
-       if (txq->index == mp->txq_primary) {
+       if (!txq_submit_skb(txq, skb)) {
                int entries_left;
 
+               txq->tx_bytes += skb->len;
+               txq->tx_packets++;
+               dev->trans_start = jiffies;
+
                entries_left = txq->tx_ring_size - txq->tx_desc_count;
-               if (entries_left < MAX_DESCS_PER_SKB)
-                       netif_stop_queue(dev);
+               if (entries_left < MAX_SKB_FRAGS + 1)
+                       netif_tx_stop_queue(nq);
        }
 
-       spin_unlock_irqrestore(&mp->lock, flags);
-
        return NETDEV_TX_OK;
 }
 
 
+/* tx napi ******************************************************************/
+static void txq_kick(struct tx_queue *txq)
+{
+       struct mv643xx_eth_private *mp = txq_to_mp(txq);
+       struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index);
+       u32 hw_desc_ptr;
+       u32 expected_ptr;
+
+       __netif_tx_lock(nq, smp_processor_id());
+
+       if (rdl(mp, TXQ_COMMAND(mp->port_num)) & (1 << txq->index))
+               goto out;
+
+       hw_desc_ptr = rdl(mp, TXQ_CURRENT_DESC_PTR(mp->port_num, txq->index));
+       expected_ptr = (u32)txq->tx_desc_dma +
+                               txq->tx_curr_desc * sizeof(struct tx_desc);
+
+       if (hw_desc_ptr != expected_ptr)
+               txq_enable(txq);
+
+out:
+       __netif_tx_unlock(nq);
+
+       mp->work_tx_end &= ~(1 << txq->index);
+}
+
+static int txq_reclaim(struct tx_queue *txq, int budget, int force)
+{
+       struct mv643xx_eth_private *mp = txq_to_mp(txq);
+       struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index);
+       int reclaimed;
+
+       __netif_tx_lock(nq, smp_processor_id());
+
+       reclaimed = 0;
+       while (reclaimed < budget && txq->tx_desc_count > 0) {
+               int tx_index;
+               struct tx_desc *desc;
+               u32 cmd_sts;
+               struct sk_buff *skb;
+
+               tx_index = txq->tx_used_desc;
+               desc = &txq->tx_desc_area[tx_index];
+               cmd_sts = desc->cmd_sts;
+
+               if (cmd_sts & BUFFER_OWNED_BY_DMA) {
+                       if (!force)
+                               break;
+                       desc->cmd_sts = cmd_sts & ~BUFFER_OWNED_BY_DMA;
+               }
+
+               txq->tx_used_desc = tx_index + 1;
+               if (txq->tx_used_desc == txq->tx_ring_size)
+                       txq->tx_used_desc = 0;
+
+               reclaimed++;
+               txq->tx_desc_count--;
+
+               skb = NULL;
+               if (cmd_sts & TX_LAST_DESC)
+                       skb = __skb_dequeue(&txq->tx_skb);
+
+               if (cmd_sts & ERROR_SUMMARY) {
+                       dev_printk(KERN_INFO, &mp->dev->dev, "tx error\n");
+                       mp->dev->stats.tx_errors++;
+               }
+
+               if (cmd_sts & TX_FIRST_DESC) {
+                       dma_unmap_single(NULL, desc->buf_ptr,
+                                        desc->byte_cnt, DMA_TO_DEVICE);
+               } else {
+                       dma_unmap_page(NULL, desc->buf_ptr,
+                                      desc->byte_cnt, DMA_TO_DEVICE);
+               }
+
+               if (skb != NULL) {
+                       if (skb_queue_len(&mp->rx_recycle) <
+                                       mp->default_rx_ring_size &&
+                           skb_recycle_check(skb, mp->skb_size))
+                               __skb_queue_head(&mp->rx_recycle, skb);
+                       else
+                               dev_kfree_skb(skb);
+               }
+       }
+
+       __netif_tx_unlock(nq);
+
+       if (reclaimed < budget)
+               mp->work_tx &= ~(1 << txq->index);
+
+       return reclaimed;
+}
+
+
 /* tx rate control **********************************************************/
 /*
  * Set total maximum TX rate (shared by all TX queues for this port)
@@ -895,14 +938,17 @@ static void tx_set_rate(struct mv643xx_eth_private *mp, int rate, int burst)
        if (bucket_size > 65535)
                bucket_size = 65535;
 
-       if (mp->shared->tx_bw_control_moved) {
-               wrl(mp, TX_BW_RATE_MOVED(mp->port_num), token_rate);
-               wrl(mp, TX_BW_MTU_MOVED(mp->port_num), mtu);
-               wrl(mp, TX_BW_BURST_MOVED(mp->port_num), bucket_size);
-       } else {
+       switch (mp->shared->tx_bw_control) {
+       case TX_BW_CONTROL_OLD_LAYOUT:
                wrl(mp, TX_BW_RATE(mp->port_num), token_rate);
                wrl(mp, TX_BW_MTU(mp->port_num), mtu);
                wrl(mp, TX_BW_BURST(mp->port_num), bucket_size);
+               break;
+       case TX_BW_CONTROL_NEW_LAYOUT:
+               wrl(mp, TX_BW_RATE_MOVED(mp->port_num), token_rate);
+               wrl(mp, TX_BW_MTU_MOVED(mp->port_num), mtu);
+               wrl(mp, TX_BW_BURST_MOVED(mp->port_num), bucket_size);
+               break;
        }
 }
 
@@ -934,14 +980,21 @@ static void txq_set_fixed_prio_mode(struct tx_queue *txq)
        /*
         * Turn on fixed priority mode.
         */
-       if (mp->shared->tx_bw_control_moved)
-               off = TXQ_FIX_PRIO_CONF_MOVED(mp->port_num);
-       else
+       off = 0;
+       switch (mp->shared->tx_bw_control) {
+       case TX_BW_CONTROL_OLD_LAYOUT:
                off = TXQ_FIX_PRIO_CONF(mp->port_num);
+               break;
+       case TX_BW_CONTROL_NEW_LAYOUT:
+               off = TXQ_FIX_PRIO_CONF_MOVED(mp->port_num);
+               break;
+       }
 
-       val = rdl(mp, off);
-       val |= 1 << txq->index;
-       wrl(mp, off, val);
+       if (off) {
+               val = rdl(mp, off);
+               val |= 1 << txq->index;
+               wrl(mp, off, val);
+       }
 }
 
 static void txq_set_wrr(struct tx_queue *txq, int weight)
@@ -953,95 +1006,150 @@ static void txq_set_wrr(struct tx_queue *txq, int weight)
        /*
         * Turn off fixed priority mode.
         */
-       if (mp->shared->tx_bw_control_moved)
-               off = TXQ_FIX_PRIO_CONF_MOVED(mp->port_num);
-       else
+       off = 0;
+       switch (mp->shared->tx_bw_control) {
+       case TX_BW_CONTROL_OLD_LAYOUT:
                off = TXQ_FIX_PRIO_CONF(mp->port_num);
+               break;
+       case TX_BW_CONTROL_NEW_LAYOUT:
+               off = TXQ_FIX_PRIO_CONF_MOVED(mp->port_num);
+               break;
+       }
 
-       val = rdl(mp, off);
-       val &= ~(1 << txq->index);
-       wrl(mp, off, val);
+       if (off) {
+               val = rdl(mp, off);
+               val &= ~(1 << txq->index);
+               wrl(mp, off, val);
 
-       /*
-        * Configure WRR weight for this queue.
-        */
-       off = TXQ_BW_WRR_CONF(mp->port_num, txq->index);
+               /*
+                * Configure WRR weight for this queue.
+                */
+               off = TXQ_BW_WRR_CONF(mp->port_num, txq->index);
 
-       val = rdl(mp, off);
-       val = (val & ~0xff) | (weight & 0xff);
-       wrl(mp, off, val);
+               val = rdl(mp, off);
+               val = (val & ~0xff) | (weight & 0xff);
+               wrl(mp, off, val);
+       }
 }
 
 
 /* mii management interface *************************************************/
-#define SMI_BUSY               0x10000000
-#define SMI_READ_VALID         0x08000000
-#define SMI_OPCODE_READ                0x04000000
-#define SMI_OPCODE_WRITE       0x00000000
+static irqreturn_t mv643xx_eth_err_irq(int irq, void *dev_id)
+{
+       struct mv643xx_eth_shared_private *msp = dev_id;
 
-static void smi_reg_read(struct mv643xx_eth_private *mp, unsigned int addr,
-                        unsigned int reg, unsigned int *value)
+       if (readl(msp->base + ERR_INT_CAUSE) & ERR_INT_SMI_DONE) {
+               writel(~ERR_INT_SMI_DONE, msp->base + ERR_INT_CAUSE);
+               wake_up(&msp->smi_busy_wait);
+               return IRQ_HANDLED;
+       }
+
+       return IRQ_NONE;
+}
+
+static int smi_is_done(struct mv643xx_eth_shared_private *msp)
 {
-       void __iomem *smi_reg = mp->shared_smi->base + SMI_REG;
-       unsigned long flags;
-       int i;
+       return !(readl(msp->base + SMI_REG) & SMI_BUSY);
+}
 
-       /* the SMI register is a shared resource */
-       spin_lock_irqsave(&mp->shared_smi->phy_lock, flags);
+static int smi_wait_ready(struct mv643xx_eth_shared_private *msp)
+{
+       if (msp->err_interrupt == NO_IRQ) {
+               int i;
 
-       /* wait for the SMI register to become available */
-       for (i = 0; readl(smi_reg) & SMI_BUSY; i++) {
-               if (i == 1000) {
-                       printk("%s: PHY busy timeout\n", mp->dev->name);
-                       goto out;
+               for (i = 0; !smi_is_done(msp); i++) {
+                       if (i == 10)
+                               return -ETIMEDOUT;
+                       msleep(10);
                }
-               udelay(10);
+
+               return 0;
+       }
+
+       if (!smi_is_done(msp)) {
+               wait_event_timeout(msp->smi_busy_wait, smi_is_done(msp),
+                                  msecs_to_jiffies(100));
+               if (!smi_is_done(msp))
+                       return -ETIMEDOUT;
+       }
+
+       return 0;
+}
+
+static int smi_bus_read(struct mii_bus *bus, int addr, int reg)
+{
+       struct mv643xx_eth_shared_private *msp = bus->priv;
+       void __iomem *smi_reg = msp->base + SMI_REG;
+       int ret;
+
+       if (smi_wait_ready(msp)) {
+               printk("mv643xx_eth: SMI bus busy timeout\n");
+               return -ETIMEDOUT;
        }
 
        writel(SMI_OPCODE_READ | (reg << 21) | (addr << 16), smi_reg);
 
-       /* now wait for the data to be valid */
-       for (i = 0; !(readl(smi_reg) & SMI_READ_VALID); i++) {
-               if (i == 1000) {
-                       printk("%s: PHY read timeout\n", mp->dev->name);
-                       goto out;
-               }
-               udelay(10);
+       if (smi_wait_ready(msp)) {
+               printk("mv643xx_eth: SMI bus busy timeout\n");
+               return -ETIMEDOUT;
        }
 
-       *value = readl(smi_reg) & 0xffff;
-out:
-       spin_unlock_irqrestore(&mp->shared_smi->phy_lock, flags);
+       ret = readl(smi_reg);
+       if (!(ret & SMI_READ_VALID)) {
+               printk("mv643xx_eth: SMI bus read not valid\n");
+               return -ENODEV;
+       }
+
+       return ret & 0xffff;
 }
 
-static void smi_reg_write(struct mv643xx_eth_private *mp,
-                         unsigned int addr,
-                         unsigned int reg, unsigned int value)
+static int smi_bus_write(struct mii_bus *bus, int addr, int reg, u16 val)
 {
-       void __iomem *smi_reg = mp->shared_smi->base + SMI_REG;
-       unsigned long flags;
-       int i;
+       struct mv643xx_eth_shared_private *msp = bus->priv;
+       void __iomem *smi_reg = msp->base + SMI_REG;
 
-       /* the SMI register is a shared resource */
-       spin_lock_irqsave(&mp->shared_smi->phy_lock, flags);
-
-       /* wait for the SMI register to become available */
-       for (i = 0; readl(smi_reg) & SMI_BUSY; i++) {
-               if (i == 1000) {
-                       printk("%s: PHY busy timeout\n", mp->dev->name);
-                       goto out;
-               }
-               udelay(10);
+       if (smi_wait_ready(msp)) {
+               printk("mv643xx_eth: SMI bus busy timeout\n");
+               return -ETIMEDOUT;
        }
 
        writel(SMI_OPCODE_WRITE | (reg << 21) |
-               (addr << 16) | (value & 0xffff), smi_reg);
-out:
-       spin_unlock_irqrestore(&mp->shared_smi->phy_lock, flags);
+               (addr << 16) | (val & 0xffff), smi_reg);
+
+       if (smi_wait_ready(msp)) {
+               printk("mv643xx_eth: SMI bus busy timeout\n");
+               return -ETIMEDOUT;
+       }
+
+       return 0;
 }
 
 
-/* mib counters *************************************************************/
+/* statistics ***************************************************************/
+static struct net_device_stats *mv643xx_eth_get_stats(struct net_device *dev)
+{
+       struct mv643xx_eth_private *mp = netdev_priv(dev);
+       struct net_device_stats *stats = &dev->stats;
+       unsigned long tx_packets = 0;
+       unsigned long tx_bytes = 0;
+       unsigned long tx_dropped = 0;
+       int i;
+
+       for (i = 0; i < mp->txq_count; i++) {
+               struct tx_queue *txq = mp->txq + i;
+
+               tx_packets += txq->tx_packets;
+               tx_bytes += txq->tx_bytes;
+               tx_dropped += txq->tx_dropped;
+       }
+
+       stats->tx_packets = tx_packets;
+       stats->tx_bytes = tx_bytes;
+       stats->tx_dropped = tx_dropped;
+
+       return stats;
+}
+
 static inline u32 mib_read(struct mv643xx_eth_private *mp, int offset)
 {
        return rdl(mp, MIB_COUNTERS(mp->port_num) + offset);
@@ -1059,6 +1167,7 @@ static void mib_counters_update(struct mv643xx_eth_private *mp)
 {
        struct mib_counters *p = &mp->mib_counters;
 
+       spin_lock(&mp->mib_counters_lock);
        p->good_octets_received += mib_read(mp, 0x00);
        p->good_octets_received += (u64)mib_read(mp, 0x04) << 32;
        p->bad_octets_received += mib_read(mp, 0x08);
@@ -1091,6 +1200,16 @@ static void mib_counters_update(struct mv643xx_eth_private *mp)
        p->bad_crc_event += mib_read(mp, 0x74);
        p->collision += mib_read(mp, 0x78);
        p->late_collision += mib_read(mp, 0x7c);
+       spin_unlock(&mp->mib_counters_lock);
+
+       mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ);
+}
+
+static void mib_counters_timer_wrapper(unsigned long _mp)
+{
+       struct mv643xx_eth_private *mp = (void *)_mp;
+
+       mib_counters_update(mp);
 }
 
 
@@ -1156,9 +1275,9 @@ static int mv643xx_eth_get_settings(struct net_device *dev, struct ethtool_cmd *
        struct mv643xx_eth_private *mp = netdev_priv(dev);
        int err;
 
-       spin_lock_irq(&mp->lock);
-       err = mii_ethtool_gset(&mp->mii, cmd);
-       spin_unlock_irq(&mp->lock);
+       err = phy_read_status(mp->phy);
+       if (err == 0)
+               err = phy_ethtool_gset(mp->phy, cmd);
 
        /*
         * The MAC does not support 1000baseT_Half.
@@ -1206,18 +1325,13 @@ static int mv643xx_eth_get_settings_phyless(struct net_device *dev, struct ethto
 static int mv643xx_eth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
        struct mv643xx_eth_private *mp = netdev_priv(dev);
-       int err;
 
        /*
         * The MAC does not support 1000baseT_Half.
         */
        cmd->advertising &= ~ADVERTISED_1000baseT_Half;
 
-       spin_lock_irq(&mp->lock);
-       err = mii_ethtool_sset(&mp->mii, cmd);
-       spin_unlock_irq(&mp->lock);
-
-       return err;
+       return phy_ethtool_sset(mp->phy, cmd);
 }
 
 static int mv643xx_eth_set_settings_phyless(struct net_device *dev, struct ethtool_cmd *cmd)
@@ -1239,7 +1353,7 @@ static int mv643xx_eth_nway_reset(struct net_device *dev)
 {
        struct mv643xx_eth_private *mp = netdev_priv(dev);
 
-       return mii_nway_restart(&mp->mii);
+       return genphy_restart_aneg(mp->phy);
 }
 
 static int mv643xx_eth_nway_reset_phyless(struct net_device *dev)
@@ -1249,14 +1363,7 @@ static int mv643xx_eth_nway_reset_phyless(struct net_device *dev)
 
 static u32 mv643xx_eth_get_link(struct net_device *dev)
 {
-       struct mv643xx_eth_private *mp = netdev_priv(dev);
-
-       return mii_link_ok(&mp->mii);
-}
-
-static u32 mv643xx_eth_get_link_phyless(struct net_device *dev)
-{
-       return 1;
+       return !!netif_carrier_ok(dev);
 }
 
 static void mv643xx_eth_get_strings(struct net_device *dev,
@@ -1277,9 +1384,10 @@ static void mv643xx_eth_get_ethtool_stats(struct net_device *dev,
                                          struct ethtool_stats *stats,
                                          uint64_t *data)
 {
-       struct mv643xx_eth_private *mp = dev->priv;
+       struct mv643xx_eth_private *mp = netdev_priv(dev);
        int i;
 
+       mv643xx_eth_get_stats(dev);
        mib_counters_update(mp);
 
        for (i = 0; i < ARRAY_SIZE(mv643xx_eth_stats); i++) {
@@ -1323,7 +1431,7 @@ static const struct ethtool_ops mv643xx_eth_ethtool_ops_phyless = {
        .set_settings           = mv643xx_eth_set_settings_phyless,
        .get_drvinfo            = mv643xx_eth_get_drvinfo,
        .nway_reset             = mv643xx_eth_nway_reset_phyless,
-       .get_link               = mv643xx_eth_get_link_phyless,
+       .get_link               = mv643xx_eth_get_link,
        .set_sg                 = ethtool_op_set_sg,
        .get_strings            = mv643xx_eth_get_strings,
        .get_ethtool_stats      = mv643xx_eth_get_ethtool_stats,
@@ -1487,7 +1595,7 @@ static int rxq_init(struct mv643xx_eth_private *mp, int index)
 
        size = rxq->rx_ring_size * sizeof(struct rx_desc);
 
-       if (index == mp->rxq_primary && size <= mp->rx_desc_sram_size) {
+       if (index == 0 && size <= mp->rx_desc_sram_size) {
                rxq->rx_desc_area = ioremap(mp->rx_desc_sram_addr,
                                                mp->rx_desc_sram_size);
                rxq->rx_desc_dma = mp->rx_desc_sram_addr;
@@ -1515,20 +1623,21 @@ static int rxq_init(struct mv643xx_eth_private *mp, int index)
 
        rx_desc = (struct rx_desc *)rxq->rx_desc_area;
        for (i = 0; i < rxq->rx_ring_size; i++) {
-               int nexti = (i + 1) % rxq->rx_ring_size;
+               int nexti;
+
+               nexti = i + 1;
+               if (nexti == rxq->rx_ring_size)
+                       nexti = 0;
+
                rx_desc[i].next_desc_ptr = rxq->rx_desc_dma +
                                        nexti * sizeof(struct rx_desc);
        }
 
-       init_timer(&rxq->rx_oom);
-       rxq->rx_oom.data = (unsigned long)rxq;
-       rxq->rx_oom.function = rxq_refill_timer_wrapper;
-
        return 0;
 
 
 out_free:
-       if (index == mp->rxq_primary && size <= mp->rx_desc_sram_size)
+       if (index == 0 && size <= mp->rx_desc_sram_size)
                iounmap(rxq->rx_desc_area);
        else
                dma_free_coherent(NULL, size,
@@ -1546,8 +1655,6 @@ static void rxq_deinit(struct rx_queue *rxq)
 
        rxq_disable(rxq);
 
-       del_timer_sync(&rxq->rx_oom);
-
        for (i = 0; i < rxq->rx_ring_size; i++) {
                if (rxq->rx_skb[i]) {
                        dev_kfree_skb(rxq->rx_skb[i]);
@@ -1561,7 +1668,7 @@ static void rxq_deinit(struct rx_queue *rxq)
                           rxq->rx_desc_count);
        }
 
-       if (rxq->index == mp->rxq_primary &&
+       if (rxq->index == 0 &&
            rxq->rx_desc_area_size <= mp->rx_desc_sram_size)
                iounmap(rxq->rx_desc_area);
        else
@@ -1588,7 +1695,7 @@ static int txq_init(struct mv643xx_eth_private *mp, int index)
 
        size = txq->tx_ring_size * sizeof(struct tx_desc);
 
-       if (index == mp->txq_primary && size <= mp->tx_desc_sram_size) {
+       if (index == 0 && size <= mp->tx_desc_sram_size) {
                txq->tx_desc_area = ioremap(mp->tx_desc_sram_addr,
                                                mp->tx_desc_sram_size);
                txq->tx_desc_dma = mp->tx_desc_sram_addr;
@@ -1601,120 +1708,97 @@ static int txq_init(struct mv643xx_eth_private *mp, int index)
        if (txq->tx_desc_area == NULL) {
                dev_printk(KERN_ERR, &mp->dev->dev,
                           "can't allocate tx ring (%d bytes)\n", size);
-               goto out;
+               return -ENOMEM;
        }
        memset(txq->tx_desc_area, 0, size);
 
        txq->tx_desc_area_size = size;
-       txq->tx_skb = kmalloc(txq->tx_ring_size * sizeof(*txq->tx_skb),
-                                                               GFP_KERNEL);
-       if (txq->tx_skb == NULL) {
-               dev_printk(KERN_ERR, &mp->dev->dev,
-                          "can't allocate tx skb ring\n");
-               goto out_free;
-       }
 
        tx_desc = (struct tx_desc *)txq->tx_desc_area;
        for (i = 0; i < txq->tx_ring_size; i++) {
                struct tx_desc *txd = tx_desc + i;
-               int nexti = (i + 1) % txq->tx_ring_size;
+               int nexti;
+
+               nexti = i + 1;
+               if (nexti == txq->tx_ring_size)
+                       nexti = 0;
 
                txd->cmd_sts = 0;
                txd->next_desc_ptr = txq->tx_desc_dma +
                                        nexti * sizeof(struct tx_desc);
        }
 
-       return 0;
-
-
-out_free:
-       if (index == mp->txq_primary && size <= mp->tx_desc_sram_size)
-               iounmap(txq->tx_desc_area);
-       else
-               dma_free_coherent(NULL, size,
-                                 txq->tx_desc_area,
-                                 txq->tx_desc_dma);
+       skb_queue_head_init(&txq->tx_skb);
 
-out:
-       return -ENOMEM;
+       return 0;
 }
 
-static void txq_reclaim(struct tx_queue *txq, int force)
+static void txq_deinit(struct tx_queue *txq)
 {
        struct mv643xx_eth_private *mp = txq_to_mp(txq);
-       unsigned long flags;
-
-       spin_lock_irqsave(&mp->lock, flags);
-       while (txq->tx_desc_count > 0) {
-               int tx_index;
-               struct tx_desc *desc;
-               u32 cmd_sts;
-               struct sk_buff *skb;
-               dma_addr_t addr;
-               int count;
 
-               tx_index = txq->tx_used_desc;
-               desc = &txq->tx_desc_area[tx_index];
-               cmd_sts = desc->cmd_sts;
+       txq_disable(txq);
+       txq_reclaim(txq, txq->tx_ring_size, 1);
 
-               if (cmd_sts & BUFFER_OWNED_BY_DMA) {
-                       if (!force)
-                               break;
-                       desc->cmd_sts = cmd_sts & ~BUFFER_OWNED_BY_DMA;
-               }
+       BUG_ON(txq->tx_used_desc != txq->tx_curr_desc);
 
-               txq->tx_used_desc = (tx_index + 1) % txq->tx_ring_size;
-               txq->tx_desc_count--;
+       if (txq->index == 0 &&
+           txq->tx_desc_area_size <= mp->tx_desc_sram_size)
+               iounmap(txq->tx_desc_area);
+       else
+               dma_free_coherent(NULL, txq->tx_desc_area_size,
+                                 txq->tx_desc_area, txq->tx_desc_dma);
+}
 
-               addr = desc->buf_ptr;
-               count = desc->byte_cnt;
-               skb = txq->tx_skb[tx_index];
-               txq->tx_skb[tx_index] = NULL;
 
-               if (cmd_sts & ERROR_SUMMARY) {
-                       dev_printk(KERN_INFO, &mp->dev->dev, "tx error\n");
-                       mp->dev->stats.tx_errors++;
-               }
+/* netdev ops and related ***************************************************/
+static int mv643xx_eth_collect_events(struct mv643xx_eth_private *mp)
+{
+       u32 int_cause;
+       u32 int_cause_ext;
 
-               /*
-                * Drop mp->lock while we free the skb.
-                */
-               spin_unlock_irqrestore(&mp->lock, flags);
+       int_cause = rdl(mp, INT_CAUSE(mp->port_num)) &
+                       (INT_TX_END | INT_RX | INT_EXT);
+       if (int_cause == 0)
+               return 0;
 
-               if (cmd_sts & TX_FIRST_DESC)
-                       dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(NULL, addr, count, DMA_TO_DEVICE);
+       int_cause_ext = 0;
+       if (int_cause & INT_EXT)
+               int_cause_ext = rdl(mp, INT_CAUSE_EXT(mp->port_num));
 
-               if (skb)
-                       dev_kfree_skb_irq(skb);
+       int_cause &= INT_TX_END | INT_RX;
+       if (int_cause) {
+               wrl(mp, INT_CAUSE(mp->port_num), ~int_cause);
+               mp->work_tx_end |= ((int_cause & INT_TX_END) >> 19) &
+                               ~(rdl(mp, TXQ_COMMAND(mp->port_num)) & 0xff);
+               mp->work_rx |= (int_cause & INT_RX) >> 2;
+       }
 
-               spin_lock_irqsave(&mp->lock, flags);
+       int_cause_ext &= INT_EXT_LINK_PHY | INT_EXT_TX;
+       if (int_cause_ext) {
+               wrl(mp, INT_CAUSE_EXT(mp->port_num), ~int_cause_ext);
+               if (int_cause_ext & INT_EXT_LINK_PHY)
+                       mp->work_link = 1;
+               mp->work_tx |= int_cause_ext & INT_EXT_TX;
        }
-       spin_unlock_irqrestore(&mp->lock, flags);
+
+       return 1;
 }
 
-static void txq_deinit(struct tx_queue *txq)
+static irqreturn_t mv643xx_eth_irq(int irq, void *dev_id)
 {
-       struct mv643xx_eth_private *mp = txq_to_mp(txq);
-
-       txq_disable(txq);
-       txq_reclaim(txq, 1);
+       struct net_device *dev = (struct net_device *)dev_id;
+       struct mv643xx_eth_private *mp = netdev_priv(dev);
 
-       BUG_ON(txq->tx_used_desc != txq->tx_curr_desc);
+       if (unlikely(!mv643xx_eth_collect_events(mp)))
+               return IRQ_NONE;
 
-       if (txq->index == mp->txq_primary &&
-           txq->tx_desc_area_size <= mp->tx_desc_sram_size)
-               iounmap(txq->tx_desc_area);
-       else
-               dma_free_coherent(NULL, txq->tx_desc_area_size,
-                                 txq->tx_desc_area, txq->tx_desc_dma);
+       wrl(mp, INT_MASK(mp->port_num), 0);
+       napi_schedule(&mp->napi);
 
-       kfree(txq->tx_skb);
+       return IRQ_HANDLED;
 }
 
-
-/* netdev ops and related ***************************************************/
 static void handle_link_event(struct mv643xx_eth_private *mp)
 {
        struct net_device *dev = mp->dev;
@@ -1731,15 +1815,12 @@ static void handle_link_event(struct mv643xx_eth_private *mp)
                        printk(KERN_INFO "%s: link down\n", dev->name);
 
                        netif_carrier_off(dev);
-                       netif_stop_queue(dev);
 
-                       for (i = 0; i < 8; i++) {
+                       for (i = 0; i < mp->txq_count; i++) {
                                struct tx_queue *txq = mp->txq + i;
 
-                               if (mp->txq_mask & (1 << i)) {
-                                       txq_reclaim(txq, 1);
-                                       txq_reset_hw_ptr(txq);
-                               }
+                               txq_reclaim(txq, txq->tx_ring_size, 1);
+                               txq_reset_hw_ptr(txq);
                        }
                }
                return;
@@ -1767,119 +1848,93 @@ static void handle_link_event(struct mv643xx_eth_private *mp)
                         speed, duplex ? "full" : "half",
                         fc ? "en" : "dis");
 
-       if (!netif_carrier_ok(dev)) {
+       if (!netif_carrier_ok(dev))
                netif_carrier_on(dev);
-               netif_wake_queue(dev);
-       }
 }
 
-static irqreturn_t mv643xx_eth_irq(int irq, void *dev_id)
+static int mv643xx_eth_poll(struct napi_struct *napi, int budget)
 {
-       struct net_device *dev = (struct net_device *)dev_id;
-       struct mv643xx_eth_private *mp = netdev_priv(dev);
-       u32 int_cause;
-       u32 int_cause_ext;
-
-       int_cause = rdl(mp, INT_CAUSE(mp->port_num)) &
-                       (INT_TX_END | INT_RX | INT_EXT);
-       if (int_cause == 0)
-               return IRQ_NONE;
-
-       int_cause_ext = 0;
-       if (int_cause & INT_EXT) {
-               int_cause_ext = rdl(mp, INT_CAUSE_EXT(mp->port_num))
-                               & (INT_EXT_LINK | INT_EXT_PHY | INT_EXT_TX);
-               wrl(mp, INT_CAUSE_EXT(mp->port_num), ~int_cause_ext);
-       }
-
-       if (int_cause_ext & (INT_EXT_PHY | INT_EXT_LINK))
-               handle_link_event(mp);
+       struct mv643xx_eth_private *mp;
+       int work_done;
 
-       /*
-        * RxBuffer or RxError set for any of the 8 queues?
-        */
-#ifdef MV643XX_ETH_NAPI
-       if (int_cause & INT_RX) {
-               wrl(mp, INT_CAUSE(mp->port_num), ~(int_cause & INT_RX));
-               wrl(mp, INT_MASK(mp->port_num), 0x00000000);
-               rdl(mp, INT_MASK(mp->port_num));
+       mp = container_of(napi, struct mv643xx_eth_private, napi);
 
-               netif_rx_schedule(dev, &mp->napi);
-       }
-#else
-       if (int_cause & INT_RX) {
-               int i;
+       mp->work_rx_refill |= mp->work_rx_oom;
+       mp->work_rx_oom = 0;
 
-               for (i = 7; i >= 0; i--)
-                       if (mp->rxq_mask & (1 << i))
-                               rxq_process(mp->rxq + i, INT_MAX);
-       }
-#endif
+       work_done = 0;
+       while (work_done < budget) {
+               u8 queue_mask;
+               int queue;
+               int work_tbd;
 
-       /*
-        * TxBuffer or TxError set for any of the 8 queues?
-        */
-       if (int_cause_ext & INT_EXT_TX) {
-               int i;
+               if (mp->work_link) {
+                       mp->work_link = 0;
+                       handle_link_event(mp);
+                       continue;
+               }
 
-               for (i = 0; i < 8; i++)
-                       if (mp->txq_mask & (1 << i))
-                               txq_reclaim(mp->txq + i, 0);
+               queue_mask = mp->work_tx | mp->work_tx_end |
+                               mp->work_rx | mp->work_rx_refill;
+               if (!queue_mask) {
+                       if (mv643xx_eth_collect_events(mp))
+                               continue;
+                       break;
+               }
 
-               /*
-                * Enough space again in the primary TX queue for a
-                * full packet?
-                */
-               if (netif_carrier_ok(dev)) {
-                       spin_lock(&mp->lock);
-                       __txq_maybe_wake(mp->txq + mp->txq_primary);
-                       spin_unlock(&mp->lock);
+               queue = fls(queue_mask) - 1;
+               queue_mask = 1 << queue;
+
+               work_tbd = budget - work_done;
+               if (work_tbd > 16)
+                       work_tbd = 16;
+
+               if (mp->work_tx_end & queue_mask) {
+                       txq_kick(mp->txq + queue);
+               } else if (mp->work_tx & queue_mask) {
+                       work_done += txq_reclaim(mp->txq + queue, work_tbd, 0);
+                       txq_maybe_wake(mp->txq + queue);
+               } else if (mp->work_rx & queue_mask) {
+                       work_done += rxq_process(mp->rxq + queue, work_tbd);
+               } else if (mp->work_rx_refill & queue_mask) {
+                       work_done += rxq_refill(mp->rxq + queue, work_tbd);
+               } else {
+                       BUG();
                }
        }
 
-       /*
-        * Any TxEnd interrupts?
-        */
-       if (int_cause & INT_TX_END) {
-               int i;
-
-               wrl(mp, INT_CAUSE(mp->port_num), ~(int_cause & INT_TX_END));
-
-               spin_lock(&mp->lock);
-               for (i = 0; i < 8; i++) {
-                       struct tx_queue *txq = mp->txq + i;
-                       u32 hw_desc_ptr;
-                       u32 expected_ptr;
-
-                       if ((int_cause & (INT_TX_END_0 << i)) == 0)
-                               continue;
+       if (work_done < budget) {
+               if (mp->work_rx_oom)
+                       mod_timer(&mp->rx_oom, jiffies + (HZ / 10));
+               napi_complete(napi);
+               wrl(mp, INT_MASK(mp->port_num), INT_TX_END | INT_RX | INT_EXT);
+       }
 
-                       hw_desc_ptr =
-                               rdl(mp, TXQ_CURRENT_DESC_PTR(mp->port_num, i));
-                       expected_ptr = (u32)txq->tx_desc_dma +
-                               txq->tx_curr_desc * sizeof(struct tx_desc);
+       return work_done;
+}
 
-                       if (hw_desc_ptr != expected_ptr)
-                               txq_enable(txq);
-               }
-               spin_unlock(&mp->lock);
-       }
+static inline void oom_timer_wrapper(unsigned long data)
+{
+       struct mv643xx_eth_private *mp = (void *)data;
 
-       return IRQ_HANDLED;
+       napi_schedule(&mp->napi);
 }
 
 static void phy_reset(struct mv643xx_eth_private *mp)
 {
-       unsigned int data;
+       int data;
+
+       data = phy_read(mp->phy, MII_BMCR);
+       if (data < 0)
+               return;
 
-       smi_reg_read(mp, mp->phy_addr, MII_BMCR, &data);
        data |= BMCR_RESET;
-       smi_reg_write(mp, mp->phy_addr, MII_BMCR, data);
+       if (phy_write(mp->phy, MII_BMCR, data) < 0)
+               return;
 
        do {
-               udelay(1);
-               smi_reg_read(mp, mp->phy_addr, MII_BMCR, &data);
-       } while (data & BMCR_RESET);
+               data = phy_read(mp->phy, MII_BMCR);
+       } while (data >= 0 && data & BMCR_RESET);
 }
 
 static void port_start(struct mv643xx_eth_private *mp)
@@ -1890,7 +1945,7 @@ static void port_start(struct mv643xx_eth_private *mp)
        /*
         * Perform PHY reset, if there is a PHY.
         */
-       if (mp->phy_addr != -1) {
+       if (mp->phy != NULL) {
                struct ethtool_cmd cmd;
 
                mv643xx_eth_get_settings(mp->dev, &cmd);
@@ -1907,7 +1962,7 @@ static void port_start(struct mv643xx_eth_private *mp)
        wrl(mp, PORT_SERIAL_CONTROL(mp->port_num), pscr);
 
        pscr |= DO_NOT_FORCE_LINK_FAIL;
-       if (mp->phy_addr == -1)
+       if (mp->phy == NULL)
                pscr |= FORCE_LINK_PASS;
        wrl(mp, PORT_SERIAL_CONTROL(mp->port_num), pscr);
 
@@ -1917,12 +1972,9 @@ static void port_start(struct mv643xx_eth_private *mp)
         * Configure TX path and queues.
         */
        tx_set_rate(mp, 1000000000, 16777216);
-       for (i = 0; i < 8; i++) {
+       for (i = 0; i < mp->txq_count; i++) {
                struct tx_queue *txq = mp->txq + i;
 
-               if ((mp->txq_mask & (1 << i)) == 0)
-                       continue;
-
                txq_reset_hw_ptr(txq);
                txq_set_rate(txq, 1000000000, 16777216);
                txq_set_fixed_prio_mode(txq);
@@ -1935,9 +1987,10 @@ static void port_start(struct mv643xx_eth_private *mp)
 
        /*
         * Receive all unmatched unicast, TCP, UDP, BPDU and broadcast
-        * frames to RX queue #0.
+        * frames to RX queue #0, and include the pseudo-header when
+        * calculating receive checksums.
         */
-       wrl(mp, PORT_CONFIG(mp->port_num), 0x00000000);
+       wrl(mp, PORT_CONFIG(mp->port_num), 0x02000000);
 
        /*
         * Treat BPDUs as normal multicasts, and disable partition mode.
@@ -1947,14 +2000,11 @@ static void port_start(struct mv643xx_eth_private *mp)
        /*
         * Enable the receive queues.
         */
-       for (i = 0; i < 8; i++) {
+       for (i = 0; i < mp->rxq_count; i++) {
                struct rx_queue *rxq = mp->rxq + i;
                int off = RXQ_CURRENT_DESC_PTR(mp->port_num, i);
                u32 addr;
 
-               if ((mp->rxq_mask & (1 << i)) == 0)
-                       continue;
-
                addr = (u32)rxq->rx_desc_dma;
                addr += rxq->rx_curr_desc * sizeof(struct rx_desc);
                wrl(mp, off, addr);
@@ -1993,6 +2043,26 @@ static void set_tx_coal(struct mv643xx_eth_private *mp, unsigned int delay)
        wrl(mp, TX_FIFO_URGENT_THRESHOLD(mp->port_num), (coal & 0x3fff) << 4);
 }
 
+static void mv643xx_eth_recalc_skb_size(struct mv643xx_eth_private *mp)
+{
+       int skb_size;
+
+       /*
+        * Reserve 2+14 bytes for an ethernet header (the hardware
+        * automatically prepends 2 bytes of dummy data to each
+        * received packet), 16 bytes for up to four VLAN tags, and
+        * 4 bytes for the trailing FCS -- 36 bytes total.
+        */
+       skb_size = mp->dev->mtu + 36;
+
+       /*
+        * Make sure that the skb size is a multiple of 8 bytes, as
+        * the lower three bits of the receive descriptor's buffer
+        * size field are ignored by the hardware.
+        */
+       mp->skb_size = (skb_size + 7) & ~7;
+}
+
 static int mv643xx_eth_open(struct net_device *dev)
 {
        struct mv643xx_eth_private *mp = netdev_priv(dev);
@@ -2004,8 +2074,7 @@ static int mv643xx_eth_open(struct net_device *dev)
        rdl(mp, INT_CAUSE_EXT(mp->port_num));
 
        err = request_irq(dev->irq, mv643xx_eth_irq,
-                         IRQF_SHARED | IRQF_SAMPLE_RANDOM,
-                         dev->name, dev);
+                         IRQF_SHARED, dev->name, dev);
        if (err) {
                dev_printk(KERN_ERR, &dev->dev, "can't assign irq\n");
                return -EAGAIN;
@@ -2013,58 +2082,53 @@ static int mv643xx_eth_open(struct net_device *dev)
 
        init_mac_tables(mp);
 
-       for (i = 0; i < 8; i++) {
-               if ((mp->rxq_mask & (1 << i)) == 0)
-                       continue;
+       mv643xx_eth_recalc_skb_size(mp);
+
+       napi_enable(&mp->napi);
+
+       skb_queue_head_init(&mp->rx_recycle);
 
+       for (i = 0; i < mp->rxq_count; i++) {
                err = rxq_init(mp, i);
                if (err) {
                        while (--i >= 0)
-                               if (mp->rxq_mask & (1 << i))
-                                       rxq_deinit(mp->rxq + i);
+                               rxq_deinit(mp->rxq + i);
                        goto out;
                }
 
-               rxq_refill(mp->rxq + i);
+               rxq_refill(mp->rxq + i, INT_MAX);
        }
 
-       for (i = 0; i < 8; i++) {
-               if ((mp->txq_mask & (1 << i)) == 0)
-                       continue;
+       if (mp->work_rx_oom) {
+               mp->rx_oom.expires = jiffies + (HZ / 10);
+               add_timer(&mp->rx_oom);
+       }
 
+       for (i = 0; i < mp->txq_count; i++) {
                err = txq_init(mp, i);
                if (err) {
                        while (--i >= 0)
-                               if (mp->txq_mask & (1 << i))
-                                       txq_deinit(mp->txq + i);
+                               txq_deinit(mp->txq + i);
                        goto out_free;
                }
        }
 
-#ifdef MV643XX_ETH_NAPI
-       napi_enable(&mp->napi);
-#endif
-
        netif_carrier_off(dev);
-       netif_stop_queue(dev);
 
        port_start(mp);
 
        set_rx_coal(mp, 0);
        set_tx_coal(mp, 0);
 
-       wrl(mp, INT_MASK_EXT(mp->port_num),
-           INT_EXT_LINK | INT_EXT_PHY | INT_EXT_TX);
-
+       wrl(mp, INT_MASK_EXT(mp->port_num), INT_EXT_LINK_PHY | INT_EXT_TX);
        wrl(mp, INT_MASK(mp->port_num), INT_TX_END | INT_RX | INT_EXT);
 
        return 0;
 
 
 out_free:
-       for (i = 0; i < 8; i++)
-               if (mp->rxq_mask & (1 << i))
-                       rxq_deinit(mp->rxq + i);
+       for (i = 0; i < mp->rxq_count; i++)
+               rxq_deinit(mp->rxq + i);
 out:
        free_irq(dev->irq, dev);
 
@@ -2076,12 +2140,10 @@ static void port_reset(struct mv643xx_eth_private *mp)
        unsigned int data;
        int i;
 
-       for (i = 0; i < 8; i++) {
-               if (mp->rxq_mask & (1 << i))
-                       rxq_disable(mp->rxq + i);
-               if (mp->txq_mask & (1 << i))
-                       txq_disable(mp->txq + i);
-       }
+       for (i = 0; i < mp->rxq_count; i++)
+               rxq_disable(mp->rxq + i);
+       for (i = 0; i < mp->txq_count; i++)
+               txq_disable(mp->txq + i);
 
        while (1) {
                u32 ps = rdl(mp, PORT_STATUS(mp->port_num));
@@ -2107,23 +2169,26 @@ static int mv643xx_eth_stop(struct net_device *dev)
        wrl(mp, INT_MASK(mp->port_num), 0x00000000);
        rdl(mp, INT_MASK(mp->port_num));
 
-#ifdef MV643XX_ETH_NAPI
+       del_timer_sync(&mp->mib_counters_timer);
+
        napi_disable(&mp->napi);
-#endif
+
+       del_timer_sync(&mp->rx_oom);
+
        netif_carrier_off(dev);
-       netif_stop_queue(dev);
 
        free_irq(dev->irq, dev);
 
        port_reset(mp);
+       mv643xx_eth_get_stats(dev);
        mib_counters_update(mp);
 
-       for (i = 0; i < 8; i++) {
-               if (mp->rxq_mask & (1 << i))
-                       rxq_deinit(mp->rxq + i);
-               if (mp->txq_mask & (1 << i))
-                       txq_deinit(mp->txq + i);
-       }
+       skb_queue_purge(&mp->rx_recycle);
+
+       for (i = 0; i < mp->rxq_count; i++)
+               rxq_deinit(mp->rxq + i);
+       for (i = 0; i < mp->txq_count; i++)
+               txq_deinit(mp->txq + i);
 
        return 0;
 }
@@ -2132,8 +2197,8 @@ static int mv643xx_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
        struct mv643xx_eth_private *mp = netdev_priv(dev);
 
-       if (mp->phy_addr != -1)
-               return generic_mii_ioctl(&mp->mii, if_mii(ifr), cmd, NULL);
+       if (mp->phy != NULL)
+               return phy_mii_ioctl(mp->phy, if_mii(ifr), cmd);
 
        return -EOPNOTSUPP;
 }
@@ -2146,6 +2211,7 @@ static int mv643xx_eth_change_mtu(struct net_device *dev, int new_mtu)
                return -EINVAL;
 
        dev->mtu = new_mtu;
+       mv643xx_eth_recalc_skb_size(mp);
        tx_set_rate(mp, 1000000000, 16777216);
 
        if (!netif_running(dev))
@@ -2173,12 +2239,10 @@ static void tx_timeout_task(struct work_struct *ugly)
 
        mp = container_of(ugly, struct mv643xx_eth_private, tx_timeout_task);
        if (netif_running(mp->dev)) {
-               netif_stop_queue(mp->dev);
-
+               netif_tx_stop_all_queues(mp->dev);
                port_reset(mp);
                port_start(mp);
-
-               __txq_maybe_wake(mp->txq + mp->txq_primary);
+               netif_tx_wake_all_queues(mp->dev);
        }
 }
 
@@ -2205,22 +2269,6 @@ static void mv643xx_eth_netpoll(struct net_device *dev)
 }
 #endif
 
-static int mv643xx_eth_mdio_read(struct net_device *dev, int addr, int reg)
-{
-       struct mv643xx_eth_private *mp = netdev_priv(dev);
-       int val;
-
-       smi_reg_read(mp, addr, reg, &val);
-
-       return val;
-}
-
-static void mv643xx_eth_mdio_write(struct net_device *dev, int addr, int reg, int val)
-{
-       struct mv643xx_eth_private *mp = netdev_priv(dev);
-       smi_reg_write(mp, addr, reg, val);
-}
-
 
 /* platform glue ************************************************************/
 static void
@@ -2272,14 +2320,20 @@ static void infer_hw_params(struct mv643xx_eth_shared_private *msp)
                msp->extended_rx_coal_limit = 0;
 
        /*
-        * Check whether the TX rate control registers are in the
-        * old or the new place.
+        * Check whether the MAC supports TX rate control, and if
+        * yes, whether its associated registers are in the old or
+        * the new place.
         */
        writel(1, msp->base + TX_BW_MTU_MOVED(0));
-       if (readl(msp->base + TX_BW_MTU_MOVED(0)) & 1)
-               msp->tx_bw_control_moved = 1;
-       else
-               msp->tx_bw_control_moved = 0;
+       if (readl(msp->base + TX_BW_MTU_MOVED(0)) & 1) {
+               msp->tx_bw_control = TX_BW_CONTROL_NEW_LAYOUT;
+       } else {
+               writel(7, msp->base + TX_BW_RATE(0));
+               if (readl(msp->base + TX_BW_RATE(0)) & 7)
+                       msp->tx_bw_control = TX_BW_CONTROL_OLD_LAYOUT;
+               else
+                       msp->tx_bw_control = TX_BW_CONTROL_ABSENT;
+       }
 }
 
 static int mv643xx_eth_shared_probe(struct platform_device *pdev)
@@ -2309,7 +2363,45 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
        if (msp->base == NULL)
                goto out_free;
 
-       spin_lock_init(&msp->phy_lock);
+       /*
+        * Set up and register SMI bus.
+        */
+       if (pd == NULL || pd->shared_smi == NULL) {
+               msp->smi_bus = mdiobus_alloc();
+               if (msp->smi_bus == NULL)
+                       goto out_unmap;
+
+               msp->smi_bus->priv = msp;
+               msp->smi_bus->name = "mv643xx_eth smi";
+               msp->smi_bus->read = smi_bus_read;
+               msp->smi_bus->write = smi_bus_write,
+               snprintf(msp->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id);
+               msp->smi_bus->parent = &pdev->dev;
+               msp->smi_bus->phy_mask = 0xffffffff;
+               if (mdiobus_register(msp->smi_bus) < 0)
+                       goto out_free_mii_bus;
+               msp->smi = msp;
+       } else {
+               msp->smi = platform_get_drvdata(pd->shared_smi);
+       }
+
+       msp->err_interrupt = NO_IRQ;
+       init_waitqueue_head(&msp->smi_busy_wait);
+
+       /*
+        * Check whether the error interrupt is hooked up.
+        */
+       res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (res != NULL) {
+               int err;
+
+               err = request_irq(res->start, mv643xx_eth_err_irq,
+                                 IRQF_SHARED, "mv643xx_eth", msp);
+               if (!err) {
+                       writel(ERR_INT_SMI_DONE, msp->base + ERR_INT_MASK);
+                       msp->err_interrupt = res->start;
+               }
+       }
 
        /*
         * (Re-)program MBUS remapping windows if we are asked to.
@@ -2327,6 +2419,10 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 
        return 0;
 
+out_free_mii_bus:
+       mdiobus_free(msp->smi_bus);
+out_unmap:
+       iounmap(msp->base);
 out_free:
        kfree(msp);
 out:
@@ -2336,7 +2432,14 @@ out:
 static int mv643xx_eth_shared_remove(struct platform_device *pdev)
 {
        struct mv643xx_eth_shared_private *msp = platform_get_drvdata(pdev);
+       struct mv643xx_eth_shared_platform_data *pd = pdev->dev.platform_data;
 
+       if (pd == NULL || pd->shared_smi == NULL) {
+               mdiobus_free(msp->smi_bus);
+               mdiobus_unregister(msp->smi_bus);
+       }
+       if (msp->err_interrupt != NO_IRQ)
+               free_irq(msp->err_interrupt, msp);
        iounmap(msp->base);
        kfree(msp);
 
@@ -2382,33 +2485,13 @@ static void set_params(struct mv643xx_eth_private *mp,
        else
                uc_addr_get(mp, dev->dev_addr);
 
-       if (pd->phy_addr == -1) {
-               mp->shared_smi = NULL;
-               mp->phy_addr = -1;
-       } else {
-               mp->shared_smi = mp->shared;
-               if (pd->shared_smi != NULL)
-                       mp->shared_smi = platform_get_drvdata(pd->shared_smi);
-
-               if (pd->force_phy_addr || pd->phy_addr) {
-                       mp->phy_addr = pd->phy_addr & 0x3f;
-                       phy_addr_set(mp, mp->phy_addr);
-               } else {
-                       mp->phy_addr = phy_addr_get(mp);
-               }
-       }
-
        mp->default_rx_ring_size = DEFAULT_RX_QUEUE_SIZE;
        if (pd->rx_queue_size)
                mp->default_rx_ring_size = pd->rx_queue_size;
        mp->rx_desc_sram_addr = pd->rx_sram_addr;
        mp->rx_desc_sram_size = pd->rx_sram_size;
 
-       if (pd->rx_queue_mask)
-               mp->rxq_mask = pd->rx_queue_mask;
-       else
-               mp->rxq_mask = 0x01;
-       mp->rxq_primary = fls(mp->rxq_mask) - 1;
+       mp->rxq_count = pd->rx_queue_count ? : 1;
 
        mp->default_tx_ring_size = DEFAULT_TX_QUEUE_SIZE;
        if (pd->tx_queue_size)
@@ -2416,76 +2499,63 @@ static void set_params(struct mv643xx_eth_private *mp,
        mp->tx_desc_sram_addr = pd->tx_sram_addr;
        mp->tx_desc_sram_size = pd->tx_sram_size;
 
-       if (pd->tx_queue_mask)
-               mp->txq_mask = pd->tx_queue_mask;
-       else
-               mp->txq_mask = 0x01;
-       mp->txq_primary = fls(mp->txq_mask) - 1;
+       mp->txq_count = pd->tx_queue_count ? : 1;
 }
 
-static int phy_detect(struct mv643xx_eth_private *mp)
+static struct phy_device *phy_scan(struct mv643xx_eth_private *mp,
+                                  int phy_addr)
 {
-       unsigned int data;
-       unsigned int data2;
+       struct mii_bus *bus = mp->shared->smi->smi_bus;
+       struct phy_device *phydev;
+       int start;
+       int num;
+       int i;
 
-       smi_reg_read(mp, mp->phy_addr, MII_BMCR, &data);
-       smi_reg_write(mp, mp->phy_addr, MII_BMCR, data ^ BMCR_ANENABLE);
+       if (phy_addr == MV643XX_ETH_PHY_ADDR_DEFAULT) {
+               start = phy_addr_get(mp) & 0x1f;
+               num = 32;
+       } else {
+               start = phy_addr & 0x1f;
+               num = 1;
+       }
 
-       smi_reg_read(mp, mp->phy_addr, MII_BMCR, &data2);
-       if (((data ^ data2) & BMCR_ANENABLE) == 0)
-               return -ENODEV;
+       phydev = NULL;
+       for (i = 0; i < num; i++) {
+               int addr = (start + i) & 0x1f;
 
-       smi_reg_write(mp, mp->phy_addr, MII_BMCR, data);
+               if (bus->phy_map[addr] == NULL)
+                       mdiobus_scan(bus, addr);
 
-       return 0;
+               if (phydev == NULL) {
+                       phydev = bus->phy_map[addr];
+                       if (phydev != NULL)
+                               phy_addr_set(mp, addr);
+               }
+       }
+
+       return phydev;
 }
 
-static int phy_init(struct mv643xx_eth_private *mp,
-                   struct mv643xx_eth_platform_data *pd)
+static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex)
 {
-       struct ethtool_cmd cmd;
-       int err;
+       struct phy_device *phy = mp->phy;
 
-       err = phy_detect(mp);
-       if (err) {
-               dev_printk(KERN_INFO, &mp->dev->dev,
-                          "no PHY detected at addr %d\n", mp->phy_addr);
-               return err;
-       }
        phy_reset(mp);
 
-       mp->mii.phy_id = mp->phy_addr;
-       mp->mii.phy_id_mask = 0x3f;
-       mp->mii.reg_num_mask = 0x1f;
-       mp->mii.dev = mp->dev;
-       mp->mii.mdio_read = mv643xx_eth_mdio_read;
-       mp->mii.mdio_write = mv643xx_eth_mdio_write;
-
-       mp->mii.supports_gmii = mii_check_gmii_support(&mp->mii);
-
-       memset(&cmd, 0, sizeof(cmd));
-
-       cmd.port = PORT_MII;
-       cmd.transceiver = XCVR_INTERNAL;
-       cmd.phy_address = mp->phy_addr;
-       if (pd->speed == 0) {
-               cmd.autoneg = AUTONEG_ENABLE;
-               cmd.speed = SPEED_100;
-               cmd.advertising = ADVERTISED_10baseT_Half  |
-                                 ADVERTISED_10baseT_Full  |
-                                 ADVERTISED_100baseT_Half |
-                                 ADVERTISED_100baseT_Full;
-               if (mp->mii.supports_gmii)
-                       cmd.advertising |= ADVERTISED_1000baseT_Full;
+       phy_attach(mp->dev, phy->dev.bus_id, 0, PHY_INTERFACE_MODE_GMII);
+
+       if (speed == 0) {
+               phy->autoneg = AUTONEG_ENABLE;
+               phy->speed = 0;
+               phy->duplex = 0;
+               phy->advertising = phy->supported | ADVERTISED_Autoneg;
        } else {
-               cmd.autoneg = AUTONEG_DISABLE;
-               cmd.speed = pd->speed;
-               cmd.duplex = pd->duplex;
+               phy->autoneg = AUTONEG_DISABLE;
+               phy->advertising = 0;
+               phy->speed = speed;
+               phy->duplex = duplex;
        }
-
-       mv643xx_eth_set_settings(mp->dev, &cmd);
-
-       return 0;
+       phy_start_aneg(phy);
 }
 
 static void init_pscr(struct mv643xx_eth_private *mp, int speed, int duplex)
@@ -2499,7 +2569,7 @@ static void init_pscr(struct mv643xx_eth_private *mp, int speed, int duplex)
        }
 
        pscr = MAX_RX_PACKET_9700BYTE | SERIAL_PORT_CONTROL_RESERVED;
-       if (mp->phy_addr == -1) {
+       if (mp->phy == NULL) {
                pscr |= DISABLE_AUTO_NEG_SPEED_GMII;
                if (speed == SPEED_1000)
                        pscr |= SET_GMII_SPEED_TO_1000;
@@ -2538,7 +2608,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
                return -ENODEV;
        }
 
-       dev = alloc_etherdev(sizeof(struct mv643xx_eth_private));
+       dev = alloc_etherdev_mq(sizeof(struct mv643xx_eth_private), 8);
        if (!dev)
                return -ENOMEM;
 
@@ -2549,33 +2619,47 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
        mp->port_num = pd->port_number;
 
        mp->dev = dev;
-#ifdef MV643XX_ETH_NAPI
-       netif_napi_add(dev, &mp->napi, mv643xx_eth_poll, 64);
-#endif
 
        set_params(mp, pd);
+       dev->real_num_tx_queues = mp->txq_count;
 
-       spin_lock_init(&mp->lock);
-
-       mib_counters_clear(mp);
-       INIT_WORK(&mp->tx_timeout_task, tx_timeout_task);
-
-       if (mp->phy_addr != -1) {
-               err = phy_init(mp, pd);
-               if (err)
-                       goto out;
+       if (pd->phy_addr != MV643XX_ETH_PHY_NONE)
+               mp->phy = phy_scan(mp, pd->phy_addr);
 
+       if (mp->phy != NULL) {
+               phy_init(mp, pd->speed, pd->duplex);
                SET_ETHTOOL_OPS(dev, &mv643xx_eth_ethtool_ops);
        } else {
                SET_ETHTOOL_OPS(dev, &mv643xx_eth_ethtool_ops_phyless);
        }
+
        init_pscr(mp, pd->speed, pd->duplex);
 
 
+       mib_counters_clear(mp);
+
+       init_timer(&mp->mib_counters_timer);
+       mp->mib_counters_timer.data = (unsigned long)mp;
+       mp->mib_counters_timer.function = mib_counters_timer_wrapper;
+       mp->mib_counters_timer.expires = jiffies + 30 * HZ;
+       add_timer(&mp->mib_counters_timer);
+
+       spin_lock_init(&mp->mib_counters_lock);
+
+       INIT_WORK(&mp->tx_timeout_task, tx_timeout_task);
+
+       netif_napi_add(dev, &mp->napi, mv643xx_eth_poll, 128);
+
+       init_timer(&mp->rx_oom);
+       mp->rx_oom.data = (unsigned long)mp;
+       mp->rx_oom.function = oom_timer_wrapper;
+
+
        res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
        BUG_ON(!res);
        dev->irq = res->start;
 
+       dev->get_stats = mv643xx_eth_get_stats;
        dev->hard_start_xmit = mv643xx_eth_xmit;
        dev->open = mv643xx_eth_open;
        dev->stop = mv643xx_eth_stop;
@@ -2590,14 +2674,8 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
        dev->watchdog_timeo = 2 * HZ;
        dev->base_addr = 0;
 
-#ifdef MV643XX_ETH_CHECKSUM_OFFLOAD_TX
-       /*
-        * Zero copy can only work if we use Discovery II memory. Else, we will
-        * have to map the buffers to ISA memory which is only 16 MB
-        */
        dev->features = NETIF_F_SG | NETIF_F_IP_CSUM;
        dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM;
-#endif
 
        SET_NETDEV_DEV(dev, &pdev->dev);
 
@@ -2611,16 +2689,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
        dev_printk(KERN_NOTICE, &dev->dev, "port %d with MAC address %s\n",
                   mp->port_num, print_mac(mac, dev->dev_addr));
 
-       if (dev->features & NETIF_F_SG)
-               dev_printk(KERN_NOTICE, &dev->dev, "scatter/gather enabled\n");
-
-       if (dev->features & NETIF_F_IP_CSUM)
-               dev_printk(KERN_NOTICE, &dev->dev, "tx checksum offload\n");
-
-#ifdef MV643XX_ETH_NAPI
-       dev_printk(KERN_NOTICE, &dev->dev, "napi enabled\n");
-#endif
-
        if (mp->tx_desc_sram_size > 0)
                dev_printk(KERN_NOTICE, &dev->dev, "configured with sram\n");
 
@@ -2637,6 +2705,8 @@ static int mv643xx_eth_remove(struct platform_device *pdev)
        struct mv643xx_eth_private *mp = platform_get_drvdata(pdev);
 
        unregister_netdev(mp->dev);
+       if (mp->phy != NULL)
+               phy_detach(mp->phy);
        flush_scheduled_work();
        free_netdev(mp->dev);