]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branches 'cma', 'cxgb3', 'ehca', 'ipath', 'ipoib', 'mad', 'misc', 'mlx4', ...
authorRoland Dreier <rolandd@cisco.com>
Fri, 10 Oct 2008 00:41:15 +0000 (17:41 -0700)
committerRoland Dreier <rolandd@cisco.com>
Fri, 10 Oct 2008 00:41:15 +0000 (17:41 -0700)
26 files changed:
drivers/infiniband/core/cm.c
drivers/infiniband/core/mad.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_cq.c
drivers/infiniband/hw/ehca/ehca_iverbs.h
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_ruc.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mthca/mthca_catas.c
drivers/infiniband/hw/mthca/mthca_eq.c
drivers/infiniband/hw/mthca/mthca_main.c
drivers/infiniband/hw/nes/nes.c
drivers/infiniband/hw/nes/nes.h
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_hw.c
drivers/infiniband/hw/nes/nes_hw.h
drivers/infiniband/hw/nes/nes_nic.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c

index 922d35f4fc08aa07d69355a812577f9534e82567..3cab0cedfca21f54d83027a78f1f6c6a3c1a1b8e 100644 (file)
@@ -3748,6 +3748,7 @@ error1:
                cm_remove_port_fs(port);
        }
        device_unregister(cm_dev->device);
+       kfree(cm_dev);
 }
 
 static void cm_remove_one(struct ib_device *ib_device)
@@ -3776,6 +3777,7 @@ static void cm_remove_one(struct ib_device *ib_device)
                cm_remove_port_fs(port);
        }
        device_unregister(cm_dev->device);
+       kfree(cm_dev);
 }
 
 static int __init ib_cm_init(void)
index 1adf2efd3cb392b84800241ccca3fbc4d1ffb4ac..49c45feccd5b4148d2e73afff3c846185b9ed75b 100644 (file)
@@ -1697,9 +1697,8 @@ static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
        u8 port_num = mad_agent_priv->agent.port_num;
        u8 lmc;
 
-       send_resp = ((struct ib_mad *)(wr->send_buf.mad))->
-                    mad_hdr.method & IB_MGMT_METHOD_RESP;
-       rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP;
+       send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad);
+       rcv_resp = ib_response_mad(rwc->recv_buf.mad);
 
        if (send_resp == rcv_resp)
                /* both requests, or both responses. GIDs different */
index eb778bfd6f66e306e48133cf806e6544ae529594..ecff98043589c5e15646d28f028e81858d3a6562 100644 (file)
@@ -1155,13 +1155,11 @@ static int iwch_query_port(struct ib_device *ibdev,
                           u8 port, struct ib_port_attr *props)
 {
        PDBG("%s ibdev %p\n", __func__, ibdev);
+
+       memset(props, 0, sizeof(struct ib_port_attr));
        props->max_mtu = IB_MTU_4096;
-       props->lid = 0;
-       props->lmc = 0;
-       props->sm_lid = 0;
-       props->sm_sl = 0;
+       props->active_mtu = IB_MTU_2048;
        props->state = IB_PORT_ACTIVE;
-       props->phys_state = 0;
        props->port_cap_flags =
            IB_PORT_CM_SUP |
            IB_PORT_SNMP_TUNNEL_SUP |
@@ -1170,7 +1168,6 @@ static int iwch_query_port(struct ib_device *ibdev,
            IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
        props->gid_tbl_len = 1;
        props->pkey_tbl_len = 1;
-       props->qkey_viol_cntr = 0;
        props->active_width = 2;
        props->active_speed = 2;
        props->max_msg_sz = -1;
index 1ab919f836a8c516c797619e565cc091e5ee04d9..5d7b7855afb9d2c8fe9178e11d9dcf60cc41096d 100644 (file)
@@ -164,6 +164,13 @@ struct ehca_qmap_entry {
        u16 reported;
 };
 
+struct ehca_queue_map {
+       struct ehca_qmap_entry *map;
+       unsigned int entries;
+       unsigned int tail;
+       unsigned int left_to_poll;
+};
+
 struct ehca_qp {
        union {
                struct ib_qp ib_qp;
@@ -173,8 +180,9 @@ struct ehca_qp {
        enum ehca_ext_qp_type ext_type;
        enum ib_qp_state state;
        struct ipz_queue ipz_squeue;
-       struct ehca_qmap_entry *sq_map;
+       struct ehca_queue_map sq_map;
        struct ipz_queue ipz_rqueue;
+       struct ehca_queue_map rq_map;
        struct h_galpas galpas;
        u32 qkey;
        u32 real_qp_num;
@@ -204,6 +212,8 @@ struct ehca_qp {
        atomic_t nr_events; /* events seen */
        wait_queue_head_t wait_completion;
        int mig_armed;
+       struct list_head sq_err_node;
+       struct list_head rq_err_node;
 };
 
 #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
@@ -233,6 +243,8 @@ struct ehca_cq {
        /* mmap counter for resources mapped into user space */
        u32 mm_count_queue;
        u32 mm_count_galpa;
+       struct list_head sqp_err_list;
+       struct list_head rqp_err_list;
 };
 
 enum ehca_mr_flag {
index 5540b276a33cab4877bdf56833f16fc0c316fb22..33647a95eb9a03f1ff5a47c760fb6d3fc5c2d483 100644 (file)
@@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
        for (i = 0; i < QP_HASHTAB_LEN; i++)
                INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
 
+       INIT_LIST_HEAD(&my_cq->sqp_err_list);
+       INIT_LIST_HEAD(&my_cq->rqp_err_list);
+
        if (context) {
                struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
                struct ehca_create_cq_resp resp;
index a8a2ea585d2f93c038300652e4a329f55ede6f27..8f7f282ead65c01ab05259a5a851968ef05c9eee 100644 (file)
@@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data);
 int ehca_calc_ipd(struct ehca_shca *shca, int port,
                  enum ib_rate path_rate, u32 *ipd);
 
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
+
 #ifdef CONFIG_PPC_64K_PAGES
 void *ehca_alloc_fw_ctrlblock(gfp_t flags);
 void ehca_free_fw_ctrlblock(void *ptr);
index b6bcee036734ff6ee0ff063196463a9b90cb7c40..4dbe2870e0145fac40e3e31e3d8a01298fa10f17 100644 (file)
@@ -396,6 +396,50 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
        queue->is_small = (queue->page_size != 0);
 }
 
+/* needs to be called with cq->spinlock held */
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
+{
+       struct list_head *list, *node;
+
+       /* TODO: support low latency QPs */
+       if (qp->ext_type == EQPT_LLQP)
+               return;
+
+       if (on_sq) {
+               list = &qp->send_cq->sqp_err_list;
+               node = &qp->sq_err_node;
+       } else {
+               list = &qp->recv_cq->rqp_err_list;
+               node = &qp->rq_err_node;
+       }
+
+       if (list_empty(node))
+               list_add_tail(node, list);
+
+       return;
+}
+
+static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cq->spinlock, flags);
+
+       if (!list_empty(node))
+               list_del_init(node);
+
+       spin_unlock_irqrestore(&cq->spinlock, flags);
+}
+
+static void reset_queue_map(struct ehca_queue_map *qmap)
+{
+       int i;
+
+       qmap->tail = 0;
+       for (i = 0; i < qmap->entries; i++)
+               qmap->map[i].reported = 1;
+}
+
 /*
  * Create an ib_qp struct that is either a QP or an SRQ, depending on
  * the value of the is_srq parameter. If init_attr and srq_init_attr share
@@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_qp(
        struct ib_srq_init_attr *srq_init_attr,
        struct ib_udata *udata, int is_srq)
 {
-       struct ehca_qp *my_qp;
+       struct ehca_qp *my_qp, *my_srq = NULL;
        struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
        struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
                                              ib_device);
        struct ib_ucontext *context = NULL;
-       u32 nr_qes;
        u64 h_ret;
        int is_llqp = 0, has_srq = 0;
        int qp_type, max_send_sge, max_recv_sge, ret;
@@ -457,8 +500,7 @@ static struct ehca_qp *internal_create_qp(
 
        /* handle SRQ base QPs */
        if (init_attr->srq) {
-               struct ehca_qp *my_srq =
-                       container_of(init_attr->srq, struct ehca_qp, ib_srq);
+               my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
 
                has_srq = 1;
                parms.ext_type = EQPT_SRQBASE;
@@ -716,15 +758,19 @@ static struct ehca_qp *internal_create_qp(
                                 "and pages ret=%i", ret);
                        goto create_qp_exit2;
                }
-               nr_qes = my_qp->ipz_squeue.queue_length /
+
+               my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
                         my_qp->ipz_squeue.qe_size;
-               my_qp->sq_map = vmalloc(nr_qes *
+               my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
                                        sizeof(struct ehca_qmap_entry));
-               if (!my_qp->sq_map) {
+               if (!my_qp->sq_map.map) {
                        ehca_err(pd->device, "Couldn't allocate squeue "
                                 "map ret=%i", ret);
                        goto create_qp_exit3;
                }
+               INIT_LIST_HEAD(&my_qp->sq_err_node);
+               /* to avoid the generation of bogus flush CQEs */
+               reset_queue_map(&my_qp->sq_map);
        }
 
        if (HAS_RQ(my_qp)) {
@@ -736,6 +782,25 @@ static struct ehca_qp *internal_create_qp(
                                 "and pages ret=%i", ret);
                        goto create_qp_exit4;
                }
+
+               my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
+                       my_qp->ipz_rqueue.qe_size;
+               my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
+                               sizeof(struct ehca_qmap_entry));
+               if (!my_qp->rq_map.map) {
+                       ehca_err(pd->device, "Couldn't allocate squeue "
+                                       "map ret=%i", ret);
+                       goto create_qp_exit5;
+               }
+               INIT_LIST_HEAD(&my_qp->rq_err_node);
+               /* to avoid the generation of bogus flush CQEs */
+               reset_queue_map(&my_qp->rq_map);
+       } else if (init_attr->srq) {
+               /* this is a base QP, use the queue map of the SRQ */
+               my_qp->rq_map = my_srq->rq_map;
+               INIT_LIST_HEAD(&my_qp->rq_err_node);
+
+               my_qp->ipz_rqueue = my_srq->ipz_rqueue;
        }
 
        if (is_srq) {
@@ -799,7 +864,7 @@ static struct ehca_qp *internal_create_qp(
                if (ret) {
                        ehca_err(pd->device,
                                 "Couldn't assign qp to send_cq ret=%i", ret);
-                       goto create_qp_exit6;
+                       goto create_qp_exit7;
                }
        }
 
@@ -825,25 +890,29 @@ static struct ehca_qp *internal_create_qp(
                if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
                        ehca_err(pd->device, "Copy to udata failed");
                        ret = -EINVAL;
-                       goto create_qp_exit7;
+                       goto create_qp_exit8;
                }
        }
 
        return my_qp;
 
-create_qp_exit7:
+create_qp_exit8:
        ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
 
-create_qp_exit6:
+create_qp_exit7:
        kfree(my_qp->mod_qp_parm);
 
+create_qp_exit6:
+       if (HAS_RQ(my_qp))
+               vfree(my_qp->rq_map.map);
+
 create_qp_exit5:
        if (HAS_RQ(my_qp))
                ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
 
 create_qp_exit4:
        if (HAS_SQ(my_qp))
-               vfree(my_qp->sq_map);
+               vfree(my_qp->sq_map.map);
 
 create_qp_exit3:
        if (HAS_SQ(my_qp))
@@ -1035,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
        return 0;
 }
 
+static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
+                         struct ehca_queue_map *qmap)
+{
+       void *wqe_v;
+       u64 q_ofs;
+       u32 wqe_idx;
+
+       /* convert real to abs address */
+       wqe_p = wqe_p & (~(1UL << 63));
+
+       wqe_v = abs_to_virt(wqe_p);
+
+       if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
+               ehca_gen_err("Invalid offset for calculating left cqes "
+                               "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v);
+               return -EFAULT;
+       }
+
+       wqe_idx = q_ofs / ipz_queue->qe_size;
+       if (wqe_idx < qmap->tail)
+               qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx;
+       else
+               qmap->left_to_poll = wqe_idx - qmap->tail;
+
+       return 0;
+}
+
+static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
+{
+       u64 h_ret;
+       void *send_wqe_p, *recv_wqe_p;
+       int ret;
+       unsigned long flags;
+       int qp_num = my_qp->ib_qp.qp_num;
+
+       /* this hcall is not supported on base QPs */
+       if (my_qp->ext_type != EQPT_SRQBASE) {
+               /* get send and receive wqe pointer */
+               h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+                               my_qp->ipz_qp_handle, &my_qp->pf,
+                               &send_wqe_p, &recv_wqe_p, 4);
+               if (h_ret != H_SUCCESS) {
+                       ehca_err(&shca->ib_device, "disable_and_get_wqe() "
+                                "failed ehca_qp=%p qp_num=%x h_ret=%li",
+                                my_qp, qp_num, h_ret);
+                       return ehca2ib_return_code(h_ret);
+               }
+
+               /*
+                * acquire lock to ensure that nobody is polling the cq which
+                * could mean that the qmap->tail pointer is in an
+                * inconsistent state.
+                */
+               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+               ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
+                               &my_qp->sq_map);
+               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+               if (ret)
+                       return ret;
+
+
+               spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+               ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
+                               &my_qp->rq_map);
+               spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+               if (ret)
+                       return ret;
+       } else {
+               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+               my_qp->sq_map.left_to_poll = 0;
+               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+               spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+               my_qp->rq_map.left_to_poll = 0;
+               spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+       }
+
+       /* this assures flush cqes being generated only for pending wqes */
+       if ((my_qp->sq_map.left_to_poll == 0) &&
+                               (my_qp->rq_map.left_to_poll == 0)) {
+               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+               ehca_add_to_err_list(my_qp, 1);
+               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+               if (HAS_RQ(my_qp)) {
+                       spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+                       ehca_add_to_err_list(my_qp, 0);
+                       spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
+                                       flags);
+               }
+       }
+
+       return 0;
+}
+
 /*
  * internal_modify_qp with circumvention to handle aqp0 properly
  * smi_reset2init indicates if this is an internal reset-to-init-call for
@@ -1539,10 +1703,27 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                        goto modify_qp_exit2;
                }
        }
+       if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
+               ret = check_for_left_cqes(my_qp, shca);
+               if (ret)
+                       goto modify_qp_exit2;
+       }
 
        if (statetrans == IB_QPST_ANY2RESET) {
                ipz_qeit_reset(&my_qp->ipz_rqueue);
                ipz_qeit_reset(&my_qp->ipz_squeue);
+
+               if (qp_cur_state == IB_QPS_ERR) {
+                       del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+                       if (HAS_RQ(my_qp))
+                               del_from_err_list(my_qp->recv_cq,
+                                                 &my_qp->rq_err_node);
+               }
+               reset_queue_map(&my_qp->sq_map);
+
+               if (HAS_RQ(my_qp))
+                       reset_queue_map(&my_qp->rq_map);
        }
 
        if (attr_mask & IB_QP_QKEY)
@@ -1958,6 +2139,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
        idr_remove(&ehca_qp_idr, my_qp->token);
        write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
+       /*
+        * SRQs will never get into an error list and do not have a recv_cq,
+        * so we need to skip them here.
+        */
+       if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
+               del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
+
+       if (HAS_SQ(my_qp))
+               del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
        /* now wait until all pending events have completed */
        wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
 
@@ -1983,7 +2174,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
        if (qp_type == IB_QPT_GSI) {
                struct ib_event event;
                ehca_info(dev, "device %s: port %x is inactive.",
-                         shca->ib_device.name, port_num);
+                               shca->ib_device.name, port_num);
                event.device = &shca->ib_device;
                event.event = IB_EVENT_PORT_ERR;
                event.element.port_num = port_num;
@@ -1991,11 +2182,15 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
                ib_dispatch_event(&event);
        }
 
-       if (HAS_RQ(my_qp))
+       if (HAS_RQ(my_qp)) {
                ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+
+               vfree(my_qp->rq_map.map);
+       }
        if (HAS_SQ(my_qp)) {
                ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-               vfree(my_qp->sq_map);
+
+               vfree(my_qp->sq_map.map);
        }
        kmem_cache_free(qp_cache, my_qp);
        atomic_dec(&shca->num_qps);
index 4426d82fe7988c32a2da9b471d2e86a5ead90c68..64928079eafa93bc3a73b13aed73bd6ee57b80bb 100644 (file)
 /* in RC traffic, insert an empty RDMA READ every this many packets */
 #define ACK_CIRC_THRESHOLD 2000000
 
+static u64 replace_wr_id(u64 wr_id, u16 idx)
+{
+       u64 ret;
+
+       ret = wr_id & ~QMAP_IDX_MASK;
+       ret |= idx & QMAP_IDX_MASK;
+
+       return ret;
+}
+
+static u16 get_app_wr_id(u64 wr_id)
+{
+       return wr_id & QMAP_IDX_MASK;
+}
+
 static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
                                  struct ehca_wqe *wqe_p,
-                                 struct ib_recv_wr *recv_wr)
+                                 struct ib_recv_wr *recv_wr,
+                                 u32 rq_map_idx)
 {
        u8 cnt_ds;
        if (unlikely((recv_wr->num_sge < 0) ||
@@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
        /* clear wqe header until sglist */
        memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
 
-       wqe_p->work_request_id = recv_wr->wr_id;
+       wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
        wqe_p->nr_of_data_seg = recv_wr->num_sge;
 
        for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
@@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
        u64 dma_length;
        struct ehca_av *my_av;
        u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+       struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
 
        if (unlikely((send_wr->num_sge < 0) ||
                     (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
@@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
        /* clear wqe header until sglist */
        memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
 
-       wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK;
-       wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK;
+       wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
 
-       qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK;
-       qp->sq_map[sq_map_idx].reported = 0;
+       qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
+       qmap_entry->reported = 0;
 
        switch (send_wr->opcode) {
        case IB_WR_SEND:
@@ -496,7 +512,9 @@ static int internal_post_recv(struct ehca_qp *my_qp,
        struct ehca_wqe *wqe_p;
        int wqe_cnt = 0;
        int ret = 0;
+       u32 rq_map_idx;
        unsigned long flags;
+       struct ehca_qmap_entry *qmap_entry;
 
        if (unlikely(!HAS_RQ(my_qp))) {
                ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
@@ -524,8 +542,15 @@ static int internal_post_recv(struct ehca_qp *my_qp,
                        }
                        goto post_recv_exit0;
                }
+               /*
+                * Get the index of the WQE in the recv queue. The same index
+                * is used for writing into the rq_map.
+                */
+               rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
+
                /* write a RECV WQE into the QUEUE */
-               ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+               ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
+                               rq_map_idx);
                /*
                 * if something failed,
                 * reset the free entry pointer to the start value
@@ -540,6 +565,11 @@ static int internal_post_recv(struct ehca_qp *my_qp,
                        }
                        goto post_recv_exit0;
                }
+
+               qmap_entry = &my_qp->rq_map.map[rq_map_idx];
+               qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
+               qmap_entry->reported = 0;
+
                wqe_cnt++;
        } /* eof for cur_recv_wr */
 
@@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = {
 /* internal function to poll one entry of cq */
 static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
 {
-       int ret = 0;
+       int ret = 0, qmap_tail_idx;
        struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
        struct ehca_cqe *cqe;
        struct ehca_qp *my_qp;
+       struct ehca_qmap_entry *qmap_entry;
+       struct ehca_queue_map *qmap;
        int cqe_count = 0, is_error;
 
 repoll:
@@ -674,27 +706,52 @@ repoll:
                goto repoll;
        wc->qp = &my_qp->ib_qp;
 
-       if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) {
-               struct ehca_qmap_entry *qmap_entry;
+       if (is_error) {
                /*
-                * We got a send completion and need to restore the original
-                * wr_id.
+                * set left_to_poll to 0 because in error state, we will not
+                * get any additional CQEs
                 */
-               qmap_entry = &my_qp->sq_map[cqe->work_request_id &
-                                           QMAP_IDX_MASK];
+               ehca_add_to_err_list(my_qp, 1);
+               my_qp->sq_map.left_to_poll = 0;
 
-               if (qmap_entry->reported) {
-                       ehca_warn(cq->device, "Double cqe on qp_num=%#x",
-                                 my_qp->real_qp_num);
-                       /* found a double cqe, discard it and read next one */
-                       goto repoll;
-               }
-               wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK;
-               wc->wr_id |= qmap_entry->app_wr_id;
-               qmap_entry->reported = 1;
-       else
+               if (HAS_RQ(my_qp))
+                       ehca_add_to_err_list(my_qp, 0);
+               my_qp->rq_map.left_to_poll = 0;
+       }
+
+       qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
+       if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
+               /* We got a send completion. */
+               qmap = &my_qp->sq_map;
+       else
                /* We got a receive completion. */
-               wc->wr_id = cqe->work_request_id;
+               qmap = &my_qp->rq_map;
+
+       qmap_entry = &qmap->map[qmap_tail_idx];
+       if (qmap_entry->reported) {
+               ehca_warn(cq->device, "Double cqe on qp_num=%#x",
+                               my_qp->real_qp_num);
+               /* found a double cqe, discard it and read next one */
+               goto repoll;
+       }
+
+       wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
+       qmap_entry->reported = 1;
+
+       /* this is a proper completion, we need to advance the tail pointer */
+       if (++qmap->tail == qmap->entries)
+               qmap->tail = 0;
+
+       /* if left_to_poll is decremented to 0, add the QP to the error list */
+       if (qmap->left_to_poll > 0) {
+               qmap->left_to_poll--;
+               if ((my_qp->sq_map.left_to_poll == 0) &&
+                               (my_qp->rq_map.left_to_poll == 0)) {
+                       ehca_add_to_err_list(my_qp, 1);
+                       if (HAS_RQ(my_qp))
+                               ehca_add_to_err_list(my_qp, 0);
+               }
+       }
 
        /* eval ib_wc_opcode */
        wc->opcode = ib_wc_opcode[cqe->optype]-1;
@@ -733,13 +790,88 @@ poll_cq_one_exit0:
        return ret;
 }
 
+static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
+                              struct ib_wc *wc, int num_entries,
+                              struct ipz_queue *ipz_queue, int on_sq)
+{
+       int nr = 0;
+       struct ehca_wqe *wqe;
+       u64 offset;
+       struct ehca_queue_map *qmap;
+       struct ehca_qmap_entry *qmap_entry;
+
+       if (on_sq)
+               qmap = &my_qp->sq_map;
+       else
+               qmap = &my_qp->rq_map;
+
+       qmap_entry = &qmap->map[qmap->tail];
+
+       while ((nr < num_entries) && (qmap_entry->reported == 0)) {
+               /* generate flush CQE */
+               memset(wc, 0, sizeof(*wc));
+
+               offset = qmap->tail * ipz_queue->qe_size;
+               wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
+               if (!wqe) {
+                       ehca_err(cq->device, "Invalid wqe offset=%#lx on "
+                                "qp_num=%#x", offset, my_qp->real_qp_num);
+                       return nr;
+               }
+
+               wc->wr_id = replace_wr_id(wqe->work_request_id,
+                                         qmap_entry->app_wr_id);
+
+               if (on_sq) {
+                       switch (wqe->optype) {
+                       case WQE_OPTYPE_SEND:
+                               wc->opcode = IB_WC_SEND;
+                               break;
+                       case WQE_OPTYPE_RDMAWRITE:
+                               wc->opcode = IB_WC_RDMA_WRITE;
+                               break;
+                       case WQE_OPTYPE_RDMAREAD:
+                               wc->opcode = IB_WC_RDMA_READ;
+                               break;
+                       default:
+                               ehca_err(cq->device, "Invalid optype=%x",
+                                               wqe->optype);
+                               return nr;
+                       }
+               } else
+                       wc->opcode = IB_WC_RECV;
+
+               if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
+                       wc->ex.imm_data = wqe->immediate_data;
+                       wc->wc_flags |= IB_WC_WITH_IMM;
+               }
+
+               wc->status = IB_WC_WR_FLUSH_ERR;
+
+               wc->qp = &my_qp->ib_qp;
+
+               /* mark as reported and advance tail pointer */
+               qmap_entry->reported = 1;
+               if (++qmap->tail == qmap->entries)
+                       qmap->tail = 0;
+               qmap_entry = &qmap->map[qmap->tail];
+
+               wc++; nr++;
+       }
+
+       return nr;
+
+}
+
 int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
 {
        struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
        int nr;
+       struct ehca_qp *err_qp;
        struct ib_wc *current_wc = wc;
        int ret = 0;
        unsigned long flags;
+       int entries_left = num_entries;
 
        if (num_entries < 1) {
                ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -749,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
        }
 
        spin_lock_irqsave(&my_cq->spinlock, flags);
-       for (nr = 0; nr < num_entries; nr++) {
+
+       /* generate flush cqes for send queues */
+       list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
+               nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+                               &err_qp->ipz_squeue, 1);
+               entries_left -= nr;
+               current_wc += nr;
+
+               if (entries_left == 0)
+                       break;
+       }
+
+       /* generate flush cqes for receive queues */
+       list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
+               nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+                               &err_qp->ipz_rqueue, 0);
+               entries_left -= nr;
+               current_wc += nr;
+
+               if (entries_left == 0)
+                       break;
+       }
+
+       for (nr = 0; nr < entries_left; nr++) {
                ret = ehca_poll_cq_one(cq, current_wc);
                if (ret)
                        break;
                current_wc++;
        } /* eof for nr */
+       entries_left -= nr;
+
        spin_unlock_irqrestore(&my_cq->spinlock, flags);
        if (ret == -EAGAIN  || !ret)
-               ret = nr;
+               ret = num_entries - entries_left;
 
 poll_cq_exit0:
        return ret;
index 97710522624dfe16218e36b09197177a456dba87..7b93cda1a4bdcf9681c652fa525058078b48f570 100644 (file)
@@ -675,7 +675,8 @@ static void send_rc_ack(struct ipath_qp *qp)
        hdr.lrh[0] = cpu_to_be16(lrh0);
        hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
        hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-       hdr.lrh[3] = cpu_to_be16(dd->ipath_lid);
+       hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
+                                qp->remote_ah_attr.src_path_bits);
        ohdr->bth[0] = cpu_to_be32(bth0);
        ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
        ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
index af051f7576638197fb9633879f08e36d78ae3620..fc0f6d9e6030f04f5412975f4d2102c67267cbd7 100644 (file)
@@ -618,7 +618,8 @@ void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
        qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
        qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
        qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-       qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+       qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
+                                      qp->remote_ah_attr.src_path_bits);
        bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
        bth0 |= extra_bytes << 20;
        ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
index b766e40e9ebf601624e3e33eb072e6a9f9e327b6..eabc4247860b429db0d6603bba4e16c943d0af7c 100644 (file)
@@ -340,9 +340,16 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
        int acc;
        int ret;
        unsigned long flags;
+       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
 
        spin_lock_irqsave(&qp->s_lock, flags);
 
+       if (qp->ibqp.qp_type != IB_QPT_SMI &&
+           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+               ret = -ENETDOWN;
+               goto bail;
+       }
+
        /* Check that state is OK to post send. */
        if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
                goto bail_inval;
index 9559248f265b80090aeecd8c693059e9e43bdad0..baa01deb2436eb5e453a7784c867a3c7105f97d7 100644 (file)
@@ -1058,6 +1058,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        else
                sqd_event = 0;
 
+       if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+               context->rlkey |= (1 << 4);
+
        /*
         * Before passing a kernel QP to the HW, make sure that the
         * ownership bits of the send queue are set and the SQ
index cc440f90000becfd62928c8aee85e07d4ef30653..65ad359fdf164e506ec9a5726ba12c5a5ee882f2 100644 (file)
@@ -149,18 +149,10 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
                ((pci_resource_len(dev->pdev, 0) - 1) &
                 dev->catas_err.addr);
 
-       if (!request_mem_region(addr, dev->catas_err.size * 4,
-                               DRV_NAME)) {
-               mthca_warn(dev, "couldn't request catastrophic error region "
-                          "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
-               return;
-       }
-
        dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
        if (!dev->catas_err.map) {
                mthca_warn(dev, "couldn't map catastrophic error region "
                           "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
-               release_mem_region(addr, dev->catas_err.size * 4);
                return;
        }
 
@@ -175,13 +167,8 @@ void mthca_stop_catas_poll(struct mthca_dev *dev)
 {
        del_timer_sync(&dev->catas_err.timer);
 
-       if (dev->catas_err.map) {
+       if (dev->catas_err.map)
                iounmap(dev->catas_err.map);
-               release_mem_region(pci_resource_start(dev->pdev, 0) +
-                                  ((pci_resource_len(dev->pdev, 0) - 1) &
-                                   dev->catas_err.addr),
-                                  dev->catas_err.size * 4);
-       }
 
        spin_lock_irq(&catas_lock);
        list_del(&dev->catas_err.list);
index cc6858f0b65bb1d6b6d168cb2ad8aa73315fb817..28f0e0c40d7dfa87c8668c6b8264e8a17859b240 100644 (file)
@@ -652,27 +652,13 @@ static int mthca_map_reg(struct mthca_dev *dev,
 {
        unsigned long base = pci_resource_start(dev->pdev, 0);
 
-       if (!request_mem_region(base + offset, size, DRV_NAME))
-               return -EBUSY;
-
        *map = ioremap(base + offset, size);
-       if (!*map) {
-               release_mem_region(base + offset, size);
+       if (!*map)
                return -ENOMEM;
-       }
 
        return 0;
 }
 
-static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
-                           unsigned long size, void __iomem *map)
-{
-       unsigned long base = pci_resource_start(dev->pdev, 0);
-
-       release_mem_region(base + offset, size);
-       iounmap(map);
-}
-
 static int mthca_map_eq_regs(struct mthca_dev *dev)
 {
        if (mthca_is_memfree(dev)) {
@@ -699,9 +685,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev)
                                        dev->fw.arbel.eq_arm_base) + 4, 4,
                                  &dev->eq_regs.arbel.eq_arm)) {
                        mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
-                       mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
-                                       dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
-                                       dev->clr_base);
+                       iounmap(dev->clr_base);
                        return -ENOMEM;
                }
 
@@ -710,12 +694,8 @@ static int mthca_map_eq_regs(struct mthca_dev *dev)
                                  MTHCA_EQ_SET_CI_SIZE,
                                  &dev->eq_regs.arbel.eq_set_ci_base)) {
                        mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
-                       mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
-                                             dev->fw.arbel.eq_arm_base) + 4, 4,
-                                       dev->eq_regs.arbel.eq_arm);
-                       mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
-                                       dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
-                                       dev->clr_base);
+                       iounmap(dev->eq_regs.arbel.eq_arm);
+                       iounmap(dev->clr_base);
                        return -ENOMEM;
                }
        } else {
@@ -731,8 +711,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev)
                                  &dev->eq_regs.tavor.ecr_base)) {
                        mthca_err(dev, "Couldn't map ecr register, "
                                  "aborting.\n");
-                       mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
-                                       dev->clr_base);
+                       iounmap(dev->clr_base);
                        return -ENOMEM;
                }
        }
@@ -744,22 +723,12 @@ static int mthca_map_eq_regs(struct mthca_dev *dev)
 static void mthca_unmap_eq_regs(struct mthca_dev *dev)
 {
        if (mthca_is_memfree(dev)) {
-               mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
-                               dev->fw.arbel.eq_set_ci_base,
-                               MTHCA_EQ_SET_CI_SIZE,
-                               dev->eq_regs.arbel.eq_set_ci_base);
-               mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
-                                     dev->fw.arbel.eq_arm_base) + 4, 4,
-                               dev->eq_regs.arbel.eq_arm);
-               mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
-                               dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
-                               dev->clr_base);
+               iounmap(dev->eq_regs.arbel.eq_set_ci_base);
+               iounmap(dev->eq_regs.arbel.eq_arm);
+               iounmap(dev->clr_base);
        } else {
-               mthca_unmap_reg(dev, MTHCA_ECR_BASE,
-                               MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
-                               dev->eq_regs.tavor.ecr_base);
-               mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
-                               dev->clr_base);
+               iounmap(dev->eq_regs.tavor.ecr_base);
+               iounmap(dev->clr_base);
        }
 }
 
index fb9f91b60f30e401cdb24aad39410db6aae0b0f3..52f60f4eea0070a2f763a2bcb8d3a5373f919963 100644 (file)
@@ -921,58 +921,6 @@ err_uar_table_free:
        return err;
 }
 
-static int mthca_request_regions(struct pci_dev *pdev, int ddr_hidden)
-{
-       int err;
-
-       /*
-        * We can't just use pci_request_regions() because the MSI-X
-        * table is right in the middle of the first BAR.  If we did
-        * pci_request_region and grab all of the first BAR, then
-        * setting up MSI-X would fail, since the PCI core wants to do
-        * request_mem_region on the MSI-X vector table.
-        *
-        * So just request what we need right now, and request any
-        * other regions we need when setting up EQs.
-        */
-       if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
-                               MTHCA_HCR_SIZE, DRV_NAME))
-               return -EBUSY;
-
-       err = pci_request_region(pdev, 2, DRV_NAME);
-       if (err)
-               goto err_bar2_failed;
-
-       if (!ddr_hidden) {
-               err = pci_request_region(pdev, 4, DRV_NAME);
-               if (err)
-                       goto err_bar4_failed;
-       }
-
-       return 0;
-
-err_bar4_failed:
-       pci_release_region(pdev, 2);
-
-err_bar2_failed:
-       release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
-                          MTHCA_HCR_SIZE);
-
-       return err;
-}
-
-static void mthca_release_regions(struct pci_dev *pdev,
-                                 int ddr_hidden)
-{
-       if (!ddr_hidden)
-               pci_release_region(pdev, 4);
-
-       pci_release_region(pdev, 2);
-
-       release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
-                          MTHCA_HCR_SIZE);
-}
-
 static int mthca_enable_msi_x(struct mthca_dev *mdev)
 {
        struct msix_entry entries[3];
@@ -1059,7 +1007,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
        if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
                ddr_hidden = 1;
 
-       err = mthca_request_regions(pdev, ddr_hidden);
+       err = pci_request_regions(pdev, DRV_NAME);
        if (err) {
                dev_err(&pdev->dev, "Cannot obtain PCI resources, "
                        "aborting.\n");
@@ -1196,7 +1144,7 @@ err_free_dev:
        ib_dealloc_device(&mdev->ib_dev);
 
 err_free_res:
-       mthca_release_regions(pdev, ddr_hidden);
+       pci_release_regions(pdev);
 
 err_disable_pdev:
        pci_disable_device(pdev);
@@ -1240,8 +1188,7 @@ static void __mthca_remove_one(struct pci_dev *pdev)
                        pci_disable_msix(pdev);
 
                ib_dealloc_device(&mdev->ib_dev);
-               mthca_release_regions(pdev, mdev->mthca_flags &
-                                     MTHCA_FLAG_DDR_HIDDEN);
+               pci_release_regions(pdev);
                pci_disable_device(pdev);
                pci_set_drvdata(pdev, NULL);
        }
index b0cab64e5e3db0b45e6837a98a9bcfef6fe8e4c4..a2b04d62b1a46d56efd056ad223ccdbc4e77abad 100644 (file)
@@ -70,27 +70,31 @@ int interrupt_mod_interval = 0;
 
 /* Interoperability */
 int mpa_version = 1;
-module_param(mpa_version, int, 0);
+module_param(mpa_version, int, 0644);
 MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)");
 
 /* Interoperability */
 int disable_mpa_crc = 0;
-module_param(disable_mpa_crc, int, 0);
+module_param(disable_mpa_crc, int, 0644);
 MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC");
 
 unsigned int send_first = 0;
-module_param(send_first, int, 0);
+module_param(send_first, int, 0644);
 MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection");
 
 
 unsigned int nes_drv_opt = 0;
-module_param(nes_drv_opt, int, 0);
+module_param(nes_drv_opt, int, 0644);
 MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
 
 unsigned int nes_debug_level = 0;
 module_param_named(debug_level, nes_debug_level, uint, 0644);
 MODULE_PARM_DESC(debug_level, "Enable debug output level");
 
+unsigned int wqm_quanta = 0x10000;
+module_param(wqm_quanta, int, 0644);
+MODULE_PARM_DESC(wqm_quanta, "WQM quanta");
+
 LIST_HEAD(nes_adapter_list);
 static LIST_HEAD(nes_dev_list);
 
@@ -557,12 +561,32 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
                goto bail5;
        }
        nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
+       nesdev->nesadapter->wqm_quanta = wqm_quanta;
 
        /* nesdev->base_doorbell_index =
                        nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */
        nesdev->base_doorbell_index = 1;
        nesdev->doorbell_start = nesdev->nesadapter->doorbell_start;
-       nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % nesdev->nesadapter->port_count;
+       if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+               switch (PCI_FUNC(nesdev->pcidev->devfn) %
+                       nesdev->nesadapter->port_count) {
+               case 1:
+                       nesdev->mac_index = 2;
+                       break;
+               case 2:
+                       nesdev->mac_index = 1;
+                       break;
+               case 3:
+                       nesdev->mac_index = 3;
+                       break;
+               case 0:
+               default:
+                       nesdev->mac_index = 0;
+               }
+       } else {
+               nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) %
+                                               nesdev->nesadapter->port_count;
+       }
 
        tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev);
 
@@ -581,7 +605,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
        nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) |
                        (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
        if (PCI_FUNC(nesdev->pcidev->devfn) < 4) {
-               nesdev->int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+24));
+               nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24));
        }
 
        /* TODO: This really should be the first driver to load, not function 0 */
@@ -772,14 +796,14 @@ static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
 
        list_for_each_entry(nesdev, &nes_dev_list, list) {
                if (i == ee_flsh_adapter) {
-                       devfn      = nesdev->nesadapter->devfn;
-                       bus_number = nesdev->nesadapter->bus_number;
+                       devfn = nesdev->pcidev->devfn;
+                       bus_number = nesdev->pcidev->bus->number;
                        break;
                }
                i++;
        }
 
-       return snprintf(buf, PAGE_SIZE, "%x:%x", bus_number, devfn);
+       return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn);
 }
 
 static ssize_t nes_store_adapter(struct device_driver *ddp,
@@ -1050,6 +1074,55 @@ static ssize_t nes_store_idx_data(struct device_driver *ddp,
        return strnlen(buf, count);
 }
 
+
+/**
+ * nes_show_wqm_quanta
+ */
+static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf)
+{
+       u32 wqm_quanta_value = 0xdead;
+       u32 i = 0;
+       struct nes_device *nesdev;
+
+       list_for_each_entry(nesdev, &nes_dev_list, list) {
+               if (i == ee_flsh_adapter) {
+                       wqm_quanta_value = nesdev->nesadapter->wqm_quanta;
+                       break;
+               }
+               i++;
+       }
+
+       return  snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta);
+}
+
+
+/**
+ * nes_store_wqm_quanta
+ */
+static ssize_t nes_store_wqm_quanta(struct device_driver *ddp,
+                                       const char *buf, size_t count)
+{
+       unsigned long wqm_quanta_value;
+       u32 wqm_config1;
+       u32 i = 0;
+       struct nes_device *nesdev;
+
+       strict_strtoul(buf, 0, &wqm_quanta_value);
+       list_for_each_entry(nesdev, &nes_dev_list, list) {
+               if (i == ee_flsh_adapter) {
+                       nesdev->nesadapter->wqm_quanta = wqm_quanta_value;
+                       wqm_config1 = nes_read_indexed(nesdev,
+                                               NES_IDX_WQM_CONFIG1);
+                       nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1,
+                                       ((wqm_quanta_value << 1) |
+                                       (wqm_config1 & 0x00000001)));
+                       break;
+               }
+               i++;
+       }
+       return strnlen(buf, count);
+}
+
 static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR,
                   nes_show_adapter, nes_store_adapter);
 static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR,
@@ -1068,6 +1141,8 @@ static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR,
                   nes_show_idx_addr, nes_store_idx_addr);
 static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR,
                   nes_show_idx_data, nes_store_idx_data);
+static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR,
+                  nes_show_wqm_quanta, nes_store_wqm_quanta);
 
 static int nes_create_driver_sysfs(struct pci_driver *drv)
 {
@@ -1081,6 +1156,7 @@ static int nes_create_driver_sysfs(struct pci_driver *drv)
        error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data);
        error |= driver_create_file(&drv->driver, &driver_attr_idx_addr);
        error |= driver_create_file(&drv->driver, &driver_attr_idx_data);
+       error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta);
        return error;
 }
 
@@ -1095,6 +1171,7 @@ static void nes_remove_driver_sysfs(struct pci_driver *drv)
        driver_remove_file(&drv->driver, &driver_attr_nonidx_data);
        driver_remove_file(&drv->driver, &driver_attr_idx_addr);
        driver_remove_file(&drv->driver, &driver_attr_idx_data);
+       driver_remove_file(&drv->driver, &driver_attr_wqm_quanta);
 }
 
 /**
index 8eb7ae96974d0846fe52f28a04f579ce12edd28b..1595dc7bba9dcd6f58d1ff9134f6002ebf9b31b7 100644 (file)
@@ -169,7 +169,7 @@ extern int disable_mpa_crc;
 extern unsigned int send_first;
 extern unsigned int nes_drv_opt;
 extern unsigned int nes_debug_level;
-
+extern unsigned int wqm_quanta;
 extern struct list_head nes_adapter_list;
 
 extern atomic_t cm_connects;
index 499d3cf83e1f28e2ee263ed3e92e2fdb18d1f59b..2caf9da81ad50d6db5db8bce1c24cb21c2bf46d5 100644 (file)
@@ -52,7 +52,7 @@
 #include <linux/random.h>
 #include <linux/list.h>
 #include <linux/threads.h>
-
+#include <net/arp.h>
 #include <net/neighbour.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
@@ -1019,23 +1019,43 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
 
 
 /**
- * nes_addr_send_arp
+ * nes_addr_resolve_neigh
  */
-static void nes_addr_send_arp(u32 dst_ip)
+static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
 {
        struct rtable *rt;
        struct flowi fl;
+       struct neighbour *neigh;
+       int rc = -1;
+       DECLARE_MAC_BUF(mac);
 
        memset(&fl, 0, sizeof fl);
        fl.nl_u.ip4_u.daddr = htonl(dst_ip);
        if (ip_route_output_key(&init_net, &rt, &fl)) {
                printk("%s: ip_route_output_key failed for 0x%08X\n",
                                __func__, dst_ip);
-               return;
+               return rc;
+       }
+
+       neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, nesvnic->netdev);
+       if (neigh) {
+               if (neigh->nud_state & NUD_VALID) {
+                       nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
+                                 " is %s, Gateway is 0x%08X \n", dst_ip,
+                                 print_mac(mac, neigh->ha), ntohl(rt->rt_gateway));
+                       nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
+                                            dst_ip, NES_ARP_ADD);
+                       rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
+                                          NES_ARP_RESOLVE);
+               }
+               neigh_release(neigh);
        }
 
-       neigh_event_send(rt->u.dst.neighbour, NULL);
+       if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID)))
+               neigh_event_send(rt->u.dst.neighbour, NULL);
+
        ip_rt_put(rt);
+       return rc;
 }
 
 
@@ -1108,9 +1128,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
        /* get the mac addr for the remote node */
        arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
        if (arpindex < 0) {
-               kfree(cm_node);
-               nes_addr_send_arp(cm_info->rem_addr);
-               return NULL;
+               arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
+               if (arpindex < 0) {
+                       kfree(cm_node);
+                       return NULL;
+               }
        }
 
        /* copy the mac addr to node context */
@@ -1826,7 +1848,7 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
 /**
  * mini_cm_connect - make a connection node with params
  */
-struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
+static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
        struct nes_vnic *nesvnic, u16 private_data_len,
        void *private_data, struct nes_cm_info *cm_info)
 {
@@ -2007,7 +2029,6 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
                ret = rem_ref_cm_node(cm_core, cm_node);
                break;
        }
-       cm_node->cm_id = NULL;
        return ret;
 }
 
index 1513d4066f1b965b2dc55a878f7cb28353c3e91f..7c49cc882d75c580c0c31e20b987cf177d04fc9c 100644 (file)
@@ -55,18 +55,19 @@ u32 int_mod_cq_depth_24;
 u32 int_mod_cq_depth_16;
 u32 int_mod_cq_depth_4;
 u32 int_mod_cq_depth_1;
-
+static const u8 nes_max_critical_error_count = 100;
 #include "nes_cm.h"
 
 static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq);
 static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count);
 static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
-                          u8 OneG_Mode);
+                               struct nes_adapter *nesadapter, u8  OneG_Mode);
 static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
 static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq);
 static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq);
 static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
                                   struct nes_hw_aeqe *aeqe);
+static void process_critical_error(struct nes_device *nesdev);
 static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
 static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
 
@@ -222,11 +223,10 @@ static void nes_nic_tune_timer(struct nes_device *nesdev)
        }
 
        /* boundary checking */
-       if (shared_timer->timer_in_use > NES_NIC_FAST_TIMER_HIGH)
-               shared_timer->timer_in_use = NES_NIC_FAST_TIMER_HIGH;
-       else if (shared_timer->timer_in_use < NES_NIC_FAST_TIMER_LOW) {
-               shared_timer->timer_in_use = NES_NIC_FAST_TIMER_LOW;
-       }
+       if (shared_timer->timer_in_use > shared_timer->threshold_high)
+               shared_timer->timer_in_use = shared_timer->threshold_high;
+       else if (shared_timer->timer_in_use < shared_timer->threshold_low)
+               shared_timer->timer_in_use = shared_timer->threshold_low;
 
        nesdev->currcq_count = 0;
 
@@ -292,9 +292,6 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
 
        if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0)
                return NULL;
-       if (nes_init_serdes(nesdev, hw_rev, port_count, OneG_Mode))
-               return NULL;
-       nes_init_csr_ne020(nesdev, hw_rev, port_count);
 
        max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE);
        nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp);
@@ -353,6 +350,22 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
        nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
                        nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
 
+       if (nes_read_eeprom_values(nesdev, nesadapter)) {
+               printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
+               kfree(nesadapter);
+               return NULL;
+       }
+
+       if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter,
+                                                       OneG_Mode)) {
+               kfree(nesadapter);
+               return NULL;
+       }
+       nes_init_csr_ne020(nesdev, hw_rev, port_count);
+
+       memset(nesadapter->pft_mcast_map, 255,
+              sizeof nesadapter->pft_mcast_map);
+
        /* populate the new nesadapter */
        nesadapter->devfn = nesdev->pcidev->devfn;
        nesadapter->bus_number = nesdev->pcidev->bus->number;
@@ -468,20 +481,25 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
 
        /* setup port configuration */
        if (nesadapter->port_count == 1) {
-               u32temp = 0x00000000;
+               nesadapter->log_port = 0x00000000;
                if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT)
                        nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002);
                else
                        nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
        } else {
-               if (nesadapter->port_count == 2)
-                       u32temp = 0x00000044;
-               else
-                       u32temp = 0x000000e4;
+               if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+                       nesadapter->log_port = 0x000000D8;
+               } else {
+                       if (nesadapter->port_count == 2)
+                               nesadapter->log_port = 0x00000044;
+                       else
+                               nesadapter->log_port = 0x000000e4;
+               }
                nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
        }
 
-       nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, u32temp);
+       nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT,
+                                               nesadapter->log_port);
        nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n",
                        nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT));
 
@@ -706,23 +724,43 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
  * nes_init_serdes
  */
 static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
-                          u8 OneG_Mode)
+                               struct nes_adapter *nesadapter, u8  OneG_Mode)
 {
        int i;
        u32 u32temp;
+       u32 serdes_common_control;
 
        if (hw_rev != NE020_REV) {
                /* init serdes 0 */
 
                nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
-               if (!OneG_Mode)
+               if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+                       serdes_common_control = nes_read_indexed(nesdev,
+                                       NES_IDX_ETH_SERDES_COMMON_CONTROL0);
+                       serdes_common_control |= 0x000000100;
+                       nes_write_indexed(nesdev,
+                                       NES_IDX_ETH_SERDES_COMMON_CONTROL0,
+                                       serdes_common_control);
+               } else if (!OneG_Mode) {
                        nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
-               if (port_count > 1) {
+               }
+               if (((port_count > 1) &&
+                       (nesadapter->phy_type[0] != NES_PHY_TYPE_PUMA_1G)) ||
+                       ((port_count > 2) &&
+                       (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G))) {
                        /* init serdes 1 */
                        nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF);
-                       if (!OneG_Mode)
+                       if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+                               serdes_common_control = nes_read_indexed(nesdev,
+                                       NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+                               serdes_common_control |= 0x000000100;
+                               nes_write_indexed(nesdev,
+                                       NES_IDX_ETH_SERDES_COMMON_CONTROL1,
+                                       serdes_common_control);
+                       } else if (!OneG_Mode) {
                                nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
                        }
+               }
        } else {
                /* init serdes 0 */
                nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
@@ -826,7 +864,8 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou
 
        nes_write_indexed(nesdev, 0x00005000, 0x00018000);
        /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */
-       nes_write_indexed(nesdev, 0x00005004, 0x00020001);
+       nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) |
+                                                        0x00000001);
        nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F);
        nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F);
        nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F);
@@ -1226,6 +1265,7 @@ int nes_init_phy(struct nes_device *nesdev)
                if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
                        printk(PFX "%s: Programming mdc config for 1G\n", __func__);
                        tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+                       tx_config &= 0xFFFFFFE3;
                        tx_config |= 0x04;
                        nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
                }
@@ -1291,7 +1331,8 @@ int nes_init_phy(struct nes_device *nesdev)
                    (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
                        /* setup 10G MDIO operation */
                        tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
-                       tx_config |= 0x14;
+                       tx_config &= 0xFFFFFFE3;
+                       tx_config |= 0x15;
                        nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
                }
                if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
@@ -1315,7 +1356,7 @@ int nes_init_phy(struct nes_device *nesdev)
                                nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008);
                                nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098);
                                nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00);
-                               nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000);
+                               nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0001);
                                nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528);
 
                                /*
@@ -1759,9 +1800,14 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
  */
 void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
 {
+       u64 u64temp;
+       dma_addr_t bus_address;
        struct nes_device *nesdev = nesvnic->nesdev;
        struct nes_hw_cqp_wqe *cqp_wqe;
+       struct nes_hw_nic_sq_wqe *nic_sqe;
        struct nes_hw_nic_rq_wqe *nic_rqe;
+       __le16 *wqe_fragment_length;
+       u16  wqe_fragment_index;
        u64 wqe_frag;
        u32 cqp_head;
        unsigned long flags;
@@ -1770,14 +1816,69 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
        /* Free remaining NIC receive buffers */
        while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
                nic_rqe   = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
-               wqe_frag  = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
-               wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
+               wqe_frag  = (u64)le32_to_cpu(
+                       nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
+               wqe_frag |= ((u64)le32_to_cpu(
+                       nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX]))<<32;
                pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
                                nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
                dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]);
                nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1);
        }
 
+       /* Free remaining NIC transmit buffers */
+       while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) {
+               nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail];
+               wqe_fragment_index = 1;
+               wqe_fragment_length = (__le16 *)
+                       &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+               /* bump past the vlan tag */
+               wqe_fragment_length++;
+               if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
+                       u64temp = (u64)le32_to_cpu(
+                               nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
+                               wqe_fragment_index*2]);
+                       u64temp += ((u64)le32_to_cpu(
+                               nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
+                               + wqe_fragment_index*2]))<<32;
+                       bus_address = (dma_addr_t)u64temp;
+                       if (test_and_clear_bit(nesvnic->nic.sq_tail,
+                                       nesvnic->nic.first_frag_overflow)) {
+                               pci_unmap_single(nesdev->pcidev,
+                                               bus_address,
+                                               le16_to_cpu(wqe_fragment_length[
+                                                       wqe_fragment_index++]),
+                                               PCI_DMA_TODEVICE);
+                       }
+                       for (; wqe_fragment_index < 5; wqe_fragment_index++) {
+                               if (wqe_fragment_length[wqe_fragment_index]) {
+                                       u64temp = le32_to_cpu(
+                                               nic_sqe->wqe_words[
+                                               NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
+                                               wqe_fragment_index*2]);
+                                       u64temp += ((u64)le32_to_cpu(
+                                               nic_sqe->wqe_words[
+                                               NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+
+                                               wqe_fragment_index*2]))<<32;
+                                       bus_address = (dma_addr_t)u64temp;
+                                       pci_unmap_page(nesdev->pcidev,
+                                                       bus_address,
+                                                       le16_to_cpu(
+                                                       wqe_fragment_length[
+                                                       wqe_fragment_index]),
+                                                       PCI_DMA_TODEVICE);
+                               } else
+                                       break;
+                       }
+               }
+               if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail])
+                       dev_kfree_skb(
+                               nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]);
+
+               nesvnic->nic.sq_tail = (++nesvnic->nic.sq_tail)
+                                       & (nesvnic->nic.sq_size - 1);
+       }
+
        spin_lock_irqsave(&nesdev->cqp.lock, flags);
 
        /* Destroy NIC QP */
@@ -1894,7 +1995,30 @@ int nes_napi_isr(struct nes_device *nesdev)
        }
 }
 
-
+static void process_critical_error(struct nes_device *nesdev)
+{
+       u32 debug_error;
+       u32 nes_idx_debug_error_masks0 = 0;
+       u16 error_module = 0;
+
+       debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS);
+       printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n",
+                       (u16)debug_error);
+       nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS,
+                       0x01010000 | (debug_error & 0x0000ffff));
+       if (crit_err_count++ > 10)
+               nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17);
+       error_module = (u16) (debug_error & 0x1F00) >> 8;
+       if (++nesdev->nesadapter->crit_error_count[error_module-1] >=
+                       nes_max_critical_error_count) {
+               printk(KERN_ERR PFX "Masking off critical error for module "
+                       "0x%02X\n", (u16)error_module);
+               nes_idx_debug_error_masks0 = nes_read_indexed(nesdev,
+                       NES_IDX_DEBUG_ERROR_MASKS0);
+               nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0,
+                       nes_idx_debug_error_masks0 | (1 << error_module));
+       }
+}
 /**
  * nes_dpc
  */
@@ -1909,7 +2033,6 @@ void nes_dpc(unsigned long param)
        u32 timer_stat;
        u32 temp_int_stat;
        u32 intf_int_stat;
-       u32 debug_error;
        u32 processed_intf_int = 0;
        u16 processed_timer_int = 0;
        u16 completion_ints = 0;
@@ -1987,14 +2110,7 @@ void nes_dpc(unsigned long param)
                                intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
                                intf_int_stat &= nesdev->intf_int_req;
                                if (NES_INTF_INT_CRITERR & intf_int_stat) {
-                                       debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS);
-                                       printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n",
-                                                       (u16)debug_error);
-                                       nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS,
-                                                       0x01010000 | (debug_error & 0x0000ffff));
-                                       /* BUG(); */
-                                       if (crit_err_count++ > 10)
-                                               nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17);
+                                       process_critical_error(nesdev);
                                }
                                if (NES_INTF_INT_PCIERR & intf_int_stat) {
                                        printk(KERN_ERR PFX "PCI Error reported by device!!!\n");
@@ -2258,7 +2374,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
                        spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
                }
                /* read the PHY interrupt status register */
-               if (nesadapter->OneG_Mode) {
+               if ((nesadapter->OneG_Mode) &&
+               (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
                        do {
                                nes_read_1G_phy_reg(nesdev, 0x1a,
                                                nesadapter->phy_index[mac_index], &phy_data);
@@ -3077,6 +3194,22 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
                        nes_cm_disconn(nesqp);
                        break;
                        /* TODO: additional AEs need to be here */
+               case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+                       nesqp = *((struct nes_qp **)&context);
+                       spin_lock_irqsave(&nesqp->lock, flags);
+                       nesqp->hw_iwarp_state = iwarp_state;
+                       nesqp->hw_tcp_state = tcp_state;
+                       nesqp->last_aeq = async_event_id;
+                       spin_unlock_irqrestore(&nesqp->lock, flags);
+                       if (nesqp->ibqp.event_handler) {
+                               ibevent.device = nesqp->ibqp.device;
+                               ibevent.element.qp = &nesqp->ibqp;
+                               ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+                               nesqp->ibqp.event_handler(&ibevent,
+                                               nesqp->ibqp.qp_context);
+                       }
+                       nes_cm_disconn(nesqp);
+                       break;
                default:
                        nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
                                        async_event_id);
index 7b81e0ae00760eedf15e58ce12deffbb9daf7490..610b9d859597d2a1ecc9dca6ed54192025b6565c 100644 (file)
@@ -156,6 +156,7 @@ enum indexed_regs {
        NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004,
        NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008,
        NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c,
+       NES_IDX_WQM_CONFIG1 = 0x5004,
        NES_IDX_CM_CONFIG = 0x5100,
        NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000,
        NES_IDX_NIC_PHYPORT_TO_USW = 0x6008,
@@ -967,6 +968,7 @@ struct nes_arp_entry {
 #define DEFAULT_JUMBO_NES_QL_TARGET 40
 #define DEFAULT_JUMBO_NES_QL_HIGH   128
 #define NES_NIC_CQ_DOWNWARD_TREND   16
+#define NES_PFT_SIZE               48
 
 struct nes_hw_tune_timer {
     /* u16 cq_count; */
@@ -1079,6 +1081,7 @@ struct nes_adapter {
        u32 et_rx_max_coalesced_frames_high;
        u32 et_rate_sample_interval;
        u32 timer_int_limit;
+       u32 wqm_quanta;
 
        /* Adapter base MAC address */
        u32 mac_addr_low;
@@ -1094,12 +1097,14 @@ struct nes_adapter {
        u16 pd_config_base[4];
 
        u16 link_interrupt_count[4];
+       u8 crit_error_count[32];
 
        /* the phy index for each port */
        u8  phy_index[4];
        u8  mac_sw_state[4];
        u8  mac_link_down[4];
        u8  phy_type[4];
+       u8  log_port;
 
        /* PCI information */
        unsigned int  devfn;
@@ -1113,6 +1118,7 @@ struct nes_adapter {
        u8            virtwq;
        u8            et_use_adaptive_rx_coalesce;
        u8            adapter_fcn_count;
+       u8 pft_mcast_map[NES_PFT_SIZE];
 };
 
 struct nes_pbl {
index 1b0938c87774e54017dd22364520778547b40dee..730358637bb62603111e936f02ac1ad5c336e780 100644 (file)
@@ -91,6 +91,7 @@ static struct nic_qp_map *nic_qp_mapping_per_function[] = {
 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
                | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
 static int debug = -1;
+static int nics_per_function = 1;
 
 /**
  * nes_netdev_poll
@@ -201,7 +202,8 @@ static int nes_netdev_open(struct net_device *netdev)
                nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW"
                                " (Addr:%08X) = %08X, HIGH = %08X.\n",
                                i, nesvnic->qp_nic_index[i],
-                               NES_IDX_PERFECT_FILTER_LOW+((nesvnic->perfect_filter_index + i) * 8),
+                               NES_IDX_PERFECT_FILTER_LOW+
+                                       (nesvnic->qp_nic_index[i] * 8),
                                macaddr_low,
                                (u32)macaddr_high | NES_MAC_ADDR_VALID |
                                ((((u32)nesvnic->nic_index) << 16)));
@@ -272,14 +274,18 @@ static int nes_netdev_stop(struct net_device *netdev)
                        break;
        }
 
-       if (first_nesvnic->netdev_open == 0)
+       if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic)  &&
+               (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) !=
+               PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) {
+                       nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+
+                               (0x200*nesdev->mac_index), 0xffffffff);
+                       nes_write_indexed(first_nesvnic->nesdev,
+                               NES_IDX_MAC_INT_MASK+
+                               (0x200*first_nesvnic->nesdev->mac_index),
+                       ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
+                       NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
+       } else {
                nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff);
-       else if ((first_nesvnic != nesvnic) &&
-                (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) {
-               nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), 0xffffffff);
-               nes_write_indexed(first_nesvnic->nesdev, NES_IDX_MAC_INT_MASK + (0x200 * first_nesvnic->nesdev->mac_index),
-                               ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
-                               NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
        }
 
        nic_active_mask = ~((u32)(1 << nesvnic->nic_index));
@@ -437,7 +443,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
        struct nes_hw_nic_sq_wqe *nic_sqe;
        struct tcphdr *tcph;
        /* struct udphdr *udph; */
-#define NES_MAX_TSO_FRAGS 18
+#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS
        /* 64K segment plus overflow on each side */
        dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS];
        dma_addr_t bus_address;
@@ -605,6 +611,8 @@ tso_sq_no_longer_full:
                                        wqe_fragment_length[wqe_fragment_index] = 0;
                                        set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
                                                                        bus_address);
+                                       tso_wqe_length += skb_headlen(skb) -
+                                                       original_first_length;
                                }
                                while (wqe_fragment_index < 5) {
                                        wqe_fragment_length[wqe_fragment_index] =
@@ -827,6 +835,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 {
        struct nes_vnic *nesvnic = netdev_priv(netdev);
        struct nes_device *nesdev = nesvnic->nesdev;
+       struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
        struct dev_mc_list *multicast_addr;
        u32 nic_active_bit;
        u32 nic_active;
@@ -836,7 +845,12 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
        u8 mc_all_on = 0;
        u8 mc_index;
        int mc_nic_index = -1;
+       u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count *
+                                       nics_per_function, 4);
+       u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated;
+       unsigned long flags;
 
+       spin_lock_irqsave(&nesadapter->resource_lock, flags);
        nic_active_bit = 1 << nesvnic->nic_index;
 
        if (netdev->flags & IFF_PROMISC) {
@@ -847,7 +861,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
                nic_active |= nic_active_bit;
                nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
                mc_all_on = 1;
-       } else if ((netdev->flags & IFF_ALLMULTI) || (netdev->mc_count > NES_MULTICAST_PF_MAX) ||
+       } else if ((netdev->flags & IFF_ALLMULTI) ||
                           (nesvnic->nic_index > 3)) {
                nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
                nic_active |= nic_active_bit;
@@ -866,17 +880,34 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
        }
 
        nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n",
-                         netdev->mc_count, (netdev->flags & IFF_PROMISC)?1:0,
-                         (netdev->flags & IFF_ALLMULTI)?1:0);
+                 netdev->mc_count, !!(netdev->flags & IFF_PROMISC),
+                 !!(netdev->flags & IFF_ALLMULTI));
        if (!mc_all_on) {
                multicast_addr = netdev->mc_list;
-               perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + 0x80;
-               perfect_filter_register_address += nesvnic->nic_index*0x40;
-               for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) {
-                       while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0))
+               perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW +
+                                               pft_entries_preallocated * 0x8;
+               for (mc_index = 0; mc_index < max_pft_entries_avaiable;
+               mc_index++) {
+                       while (multicast_addr && nesvnic->mcrq_mcast_filter &&
+                       ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic,
+                                       multicast_addr->dmi_addr)) == 0)) {
                                multicast_addr = multicast_addr->next;
+                       }
                        if (mc_nic_index < 0)
                                mc_nic_index = nesvnic->nic_index;
+                       while (nesadapter->pft_mcast_map[mc_index] < 16 &&
+                               nesadapter->pft_mcast_map[mc_index] !=
+                                       nesvnic->nic_index &&
+                                       mc_index < max_pft_entries_avaiable) {
+                                               nes_debug(NES_DBG_NIC_RX,
+                                       "mc_index=%d skipping nic_index=%d,\
+                                       used for=%d \n", mc_index,
+                                       nesvnic->nic_index,
+                                       nesadapter->pft_mcast_map[mc_index]);
+                               mc_index++;
+                       }
+                       if (mc_index >= max_pft_entries_avaiable)
+                               break;
                        if (multicast_addr) {
                                DECLARE_MAC_BUF(mac);
                                nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %s to register 0x%04X nic_idx=%d\n",
@@ -897,15 +928,33 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
                                                (u32)macaddr_high | NES_MAC_ADDR_VALID |
                                                ((((u32)(1<<mc_nic_index)) << 16)));
                                multicast_addr = multicast_addr->next;
+                               nesadapter->pft_mcast_map[mc_index] =
+                                                       nesvnic->nic_index;
                        } else {
                                nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n",
                                                  perfect_filter_register_address+(mc_index * 8));
                                nes_write_indexed(nesdev,
                                                perfect_filter_register_address+4+(mc_index * 8),
                                                0);
+                               nesadapter->pft_mcast_map[mc_index] = 255;
                        }
                }
+               /* PFT is not large enough */
+               if (multicast_addr && multicast_addr->next) {
+                       nic_active = nes_read_indexed(nesdev,
+                                               NES_IDX_NIC_MULTICAST_ALL);
+                       nic_active |= nic_active_bit;
+                       nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL,
+                                                               nic_active);
+                       nic_active = nes_read_indexed(nesdev,
+                                               NES_IDX_NIC_UNICAST_ALL);
+                       nic_active &= ~nic_active_bit;
+                       nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL,
+                                                               nic_active);
+               }
        }
+
+       spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
 }
 
 
@@ -918,6 +967,10 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
        struct nes_device *nesdev = nesvnic->nesdev;
        int ret = 0;
        u8 jumbomode = 0;
+       u32 nic_active;
+       u32 nic_active_bit;
+       u32 uc_all_active;
+       u32 mc_all_active;
 
        if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
                return -EINVAL;
@@ -931,8 +984,24 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
        nes_nic_init_timer_defaults(nesdev, jumbomode);
 
        if (netif_running(netdev)) {
+               nic_active_bit = 1 << nesvnic->nic_index;
+               mc_all_active = nes_read_indexed(nesdev,
+                               NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit;
+               uc_all_active = nes_read_indexed(nesdev,
+                               NES_IDX_NIC_UNICAST_ALL)  & nic_active_bit;
+
                nes_netdev_stop(netdev);
                nes_netdev_open(netdev);
+
+               nic_active = nes_read_indexed(nesdev,
+                                       NES_IDX_NIC_MULTICAST_ALL);
+               nic_active |= mc_all_active;
+               nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL,
+                                                       nic_active);
+
+               nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+               nic_active |= uc_all_active;
+               nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
        }
 
        return ret;
@@ -1208,10 +1277,12 @@ static void nes_netdev_get_drvinfo(struct net_device *netdev,
                struct ethtool_drvinfo *drvinfo)
 {
        struct nes_vnic *nesvnic = netdev_priv(netdev);
+       struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
 
        strcpy(drvinfo->driver, DRV_NAME);
        strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev));
-       strcpy(drvinfo->fw_version, "TBD");
+       sprintf(drvinfo->fw_version, "%u.%u", nesadapter->firmware_version>>16,
+                               nesadapter->firmware_version & 0x000000ff);
        strcpy(drvinfo->version, DRV_VERSION);
        drvinfo->n_stats = nes_netdev_get_stats_count(netdev);
        drvinfo->testinfo_len = 0;
@@ -1587,7 +1658,9 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
                        nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id,
                        nesvnic->nic_index, nesvnic->logical_port,  nesdev->mac_index);
 
-       if (nesvnic->nesdev->nesadapter->port_count == 1) {
+       if (nesvnic->nesdev->nesadapter->port_count == 1 &&
+               nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) {
+
                nesvnic->qp_nic_index[0] = nesvnic->nic_index;
                nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1;
                if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) {
@@ -1598,11 +1671,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
                        nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3;
                }
        } else {
-               if (nesvnic->nesdev->nesadapter->port_count == 2) {
-                       nesvnic->qp_nic_index[0] = nesvnic->nic_index;
-                       nesvnic->qp_nic_index[1] = nesvnic->nic_index + 2;
-                       nesvnic->qp_nic_index[2] = 0xf;
-                       nesvnic->qp_nic_index[3] = 0xf;
+               if (nesvnic->nesdev->nesadapter->port_count == 2 ||
+                       (nesvnic->nesdev->nesadapter->port_count == 1 &&
+                       nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) {
+                               nesvnic->qp_nic_index[0] = nesvnic->nic_index;
+                               nesvnic->qp_nic_index[1] = nesvnic->nic_index
+                                                                       + 2;
+                               nesvnic->qp_nic_index[2] = 0xf;
+                               nesvnic->qp_nic_index[3] = 0xf;
                } else {
                        nesvnic->qp_nic_index[0] = nesvnic->nic_index;
                        nesvnic->qp_nic_index[1] = 0xf;
index 05eb41b8ab631692fcd46686eedc0940443236c9..68ba5c3482e47097b50998db96dc8941b37363d2 100644 (file)
@@ -268,10 +268,9 @@ struct ipoib_lro {
 };
 
 /*
- * Device private locking: tx_lock protects members used in TX fast
- * path (and we use LLTX so upper layers don't do extra locking).
- * lock protects everything else.  lock nests inside of tx_lock (ie
- * tx_lock must be acquired first if needed).
+ * Device private locking: network stack tx_lock protects members used
+ * in TX fast path, lock protects everything else.  lock nests inside
+ * of tx_lock (ie tx_lock must be acquired first if needed).
  */
 struct ipoib_dev_priv {
        spinlock_t lock;
@@ -320,7 +319,6 @@ struct ipoib_dev_priv {
 
        struct ipoib_rx_buf *rx_ring;
 
-       spinlock_t           tx_lock;
        struct ipoib_tx_buf *tx_ring;
        unsigned             tx_head;
        unsigned             tx_tail;
index 341ffedafed6704a9cb9995f49582e66c88c4505..7b14c2c395008fc2acc3b38505c81cbeb3111929 100644 (file)
@@ -786,7 +786,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
        dev_kfree_skb_any(tx_req->skb);
 
-       spin_lock_irqsave(&priv->tx_lock, flags);
+       netif_tx_lock(dev);
+
        ++tx->tx_tail;
        if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
            netif_queue_stopped(dev) &&
@@ -801,7 +802,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
                           "(status=%d, wrid=%d vend_err %x)\n",
                           wc->status, wr_id, wc->vendor_err);
 
-               spin_lock(&priv->lock);
+               spin_lock_irqsave(&priv->lock, flags);
                neigh = tx->neigh;
 
                if (neigh) {
@@ -821,10 +822,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
                clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
 
-               spin_unlock(&priv->lock);
+               spin_unlock_irqrestore(&priv->lock, flags);
        }
 
-       spin_unlock_irqrestore(&priv->tx_lock, flags);
+       netif_tx_unlock(dev);
 }
 
 int ipoib_cm_dev_open(struct net_device *dev)
@@ -1149,7 +1150,6 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
 {
        struct ipoib_dev_priv *priv = netdev_priv(p->dev);
        struct ipoib_cm_tx_buf *tx_req;
-       unsigned long flags;
        unsigned long begin;
 
        ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
@@ -1180,12 +1180,12 @@ timeout:
                                    DMA_TO_DEVICE);
                dev_kfree_skb_any(tx_req->skb);
                ++p->tx_tail;
-               spin_lock_irqsave(&priv->tx_lock, flags);
+               netif_tx_lock_bh(p->dev);
                if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
                    netif_queue_stopped(p->dev) &&
                    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
                        netif_wake_queue(p->dev);
-               spin_unlock_irqrestore(&priv->tx_lock, flags);
+               netif_tx_unlock_bh(p->dev);
        }
 
        if (p->qp)
@@ -1202,6 +1202,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
        struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
        struct net_device *dev = priv->dev;
        struct ipoib_neigh *neigh;
+       unsigned long flags;
        int ret;
 
        switch (event->event) {
@@ -1220,8 +1221,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
        case IB_CM_REJ_RECEIVED:
        case IB_CM_TIMEWAIT_EXIT:
                ipoib_dbg(priv, "CM error %d.\n", event->event);
-               spin_lock_irq(&priv->tx_lock);
-               spin_lock(&priv->lock);
+               netif_tx_lock_bh(dev);
+               spin_lock_irqsave(&priv->lock, flags);
                neigh = tx->neigh;
 
                if (neigh) {
@@ -1239,8 +1240,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
                        queue_work(ipoib_workqueue, &priv->cm.reap_task);
                }
 
-               spin_unlock(&priv->lock);
-               spin_unlock_irq(&priv->tx_lock);
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_tx_unlock_bh(dev);
                break;
        default:
                break;
@@ -1294,19 +1295,24 @@ static void ipoib_cm_tx_start(struct work_struct *work)
        struct ib_sa_path_rec pathrec;
        u32 qpn;
 
-       spin_lock_irqsave(&priv->tx_lock, flags);
-       spin_lock(&priv->lock);
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
+
        while (!list_empty(&priv->cm.start_list)) {
                p = list_entry(priv->cm.start_list.next, typeof(*p), list);
                list_del_init(&p->list);
                neigh = p->neigh;
                qpn = IPOIB_QPN(neigh->neighbour->ha);
                memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
-               spin_unlock(&priv->lock);
-               spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_tx_unlock_bh(dev);
+
                ret = ipoib_cm_tx_init(p, qpn, &pathrec);
-               spin_lock_irqsave(&priv->tx_lock, flags);
-               spin_lock(&priv->lock);
+
+               netif_tx_lock_bh(dev);
+               spin_lock_irqsave(&priv->lock, flags);
+
                if (ret) {
                        neigh = p->neigh;
                        if (neigh) {
@@ -1320,44 +1326,52 @@ static void ipoib_cm_tx_start(struct work_struct *work)
                        kfree(p);
                }
        }
-       spin_unlock(&priv->lock);
-       spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
 }
 
 static void ipoib_cm_tx_reap(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
                                                   cm.reap_task);
+       struct net_device *dev = priv->dev;
        struct ipoib_cm_tx *p;
+       unsigned long flags;
+
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
 
-       spin_lock_irq(&priv->tx_lock);
-       spin_lock(&priv->lock);
        while (!list_empty(&priv->cm.reap_list)) {
                p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
                list_del(&p->list);
-               spin_unlock(&priv->lock);
-               spin_unlock_irq(&priv->tx_lock);
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_tx_unlock_bh(dev);
                ipoib_cm_tx_destroy(p);
-               spin_lock_irq(&priv->tx_lock);
-               spin_lock(&priv->lock);
+               netif_tx_lock_bh(dev);
+               spin_lock_irqsave(&priv->lock, flags);
        }
-       spin_unlock(&priv->lock);
-       spin_unlock_irq(&priv->tx_lock);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
 }
 
 static void ipoib_cm_skb_reap(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
                                                   cm.skb_task);
+       struct net_device *dev = priv->dev;
        struct sk_buff *skb;
-
+       unsigned long flags;
        unsigned mtu = priv->mcast_mtu;
 
-       spin_lock_irq(&priv->tx_lock);
-       spin_lock(&priv->lock);
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
+
        while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
-               spin_unlock(&priv->lock);
-               spin_unlock_irq(&priv->tx_lock);
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_tx_unlock_bh(dev);
+
                if (skb->protocol == htons(ETH_P_IP))
                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -1365,11 +1379,13 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);
 #endif
                dev_kfree_skb_any(skb);
-               spin_lock_irq(&priv->tx_lock);
-               spin_lock(&priv->lock);
+
+               netif_tx_lock_bh(dev);
+               spin_lock_irqsave(&priv->lock, flags);
        }
-       spin_unlock(&priv->lock);
-       spin_unlock_irq(&priv->tx_lock);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
 }
 
 void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
index 66cafa20c246cea2a024fdaf1bbdaf5b6b26279d..0e748aeeae99fe830eb3c9e1c1e8bd96461ba30a 100644 (file)
@@ -468,21 +468,22 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 static void drain_tx_cq(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       unsigned long flags;
 
-       spin_lock_irqsave(&priv->tx_lock, flags);
+       netif_tx_lock(dev);
        while (poll_tx(priv))
                ; /* nothing */
 
        if (netif_queue_stopped(dev))
                mod_timer(&priv->poll_timer, jiffies + 1);
 
-       spin_unlock_irqrestore(&priv->tx_lock, flags);
+       netif_tx_unlock(dev);
 }
 
 void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
 {
-       drain_tx_cq((struct net_device *)dev_ptr);
+       struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+
+       mod_timer(&priv->poll_timer, jiffies);
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,
@@ -614,17 +615,20 @@ static void __ipoib_reap_ah(struct net_device *dev)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_ah *ah, *tah;
        LIST_HEAD(remove_list);
+       unsigned long flags;
+
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
 
-       spin_lock_irq(&priv->tx_lock);
-       spin_lock(&priv->lock);
        list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
                if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
                        list_del(&ah->list);
                        ib_destroy_ah(ah->ah);
                        kfree(ah);
                }
-       spin_unlock(&priv->lock);
-       spin_unlock_irq(&priv->tx_lock);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
 }
 
 void ipoib_reap_ah(struct work_struct *work)
@@ -761,6 +765,14 @@ void ipoib_drain_cq(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int i, n;
+
+       /*
+        * We call completion handling routines that expect to be
+        * called from the BH-disabled NAPI poll context, so disable
+        * BHs here too.
+        */
+       local_bh_disable();
+
        do {
                n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
                for (i = 0; i < n; ++i) {
@@ -784,6 +796,8 @@ void ipoib_drain_cq(struct net_device *dev)
 
        while (poll_tx(priv))
                ; /* nothing */
+
+       local_bh_enable();
 }
 
 int ipoib_ib_dev_stop(struct net_device *dev, int flush)
index 1b1df5cc4113f7487e54876afd8aa5f44eaab456..c0ee514396dfa7a18842d0503d2c01641661d261 100644 (file)
@@ -373,9 +373,10 @@ void ipoib_flush_paths(struct net_device *dev)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_path *path, *tp;
        LIST_HEAD(remove_list);
+       unsigned long flags;
 
-       spin_lock_irq(&priv->tx_lock);
-       spin_lock(&priv->lock);
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
 
        list_splice_init(&priv->path_list, &remove_list);
 
@@ -385,15 +386,16 @@ void ipoib_flush_paths(struct net_device *dev)
        list_for_each_entry_safe(path, tp, &remove_list, list) {
                if (path->query)
                        ib_sa_cancel_query(path->query_id, path->query);
-               spin_unlock(&priv->lock);
-               spin_unlock_irq(&priv->tx_lock);
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_tx_unlock_bh(dev);
                wait_for_completion(&path->done);
                path_free(dev, path);
-               spin_lock_irq(&priv->tx_lock);
-               spin_lock(&priv->lock);
+               netif_tx_lock_bh(dev);
+               spin_lock_irqsave(&priv->lock, flags);
        }
-       spin_unlock(&priv->lock);
-       spin_unlock_irq(&priv->tx_lock);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
 }
 
 static void path_rec_completion(int status,
@@ -404,7 +406,7 @@ static void path_rec_completion(int status,
        struct net_device *dev = path->dev;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_ah *ah = NULL;
-       struct ipoib_ah *old_ah;
+       struct ipoib_ah *old_ah = NULL;
        struct ipoib_neigh *neigh, *tn;
        struct sk_buff_head skqueue;
        struct sk_buff *skb;
@@ -428,12 +430,12 @@ static void path_rec_completion(int status,
 
        spin_lock_irqsave(&priv->lock, flags);
 
-       old_ah   = path->ah;
-       path->ah = ah;
-
        if (ah) {
                path->pathrec = *pathrec;
 
+               old_ah   = path->ah;
+               path->ah = ah;
+
                ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
                          ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
 
@@ -555,6 +557,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_path *path;
        struct ipoib_neigh *neigh;
+       unsigned long flags;
 
        neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
        if (!neigh) {
@@ -563,11 +566,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
                return;
        }
 
-       /*
-        * We can only be called from ipoib_start_xmit, so we're
-        * inside tx_lock -- no need to save/restore flags.
-        */
-       spin_lock(&priv->lock);
+       spin_lock_irqsave(&priv->lock, flags);
 
        path = __path_find(dev, skb->dst->neighbour->ha + 4);
        if (!path) {
@@ -614,7 +613,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
                __skb_queue_tail(&neigh->queue, skb);
        }
 
-       spin_unlock(&priv->lock);
+       spin_unlock_irqrestore(&priv->lock, flags);
        return;
 
 err_list:
@@ -626,7 +625,7 @@ err_drop:
        ++dev->stats.tx_dropped;
        dev_kfree_skb_any(skb);
 
-       spin_unlock(&priv->lock);
+       spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
@@ -650,12 +649,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_path *path;
+       unsigned long flags;
 
-       /*
-        * We can only be called from ipoib_start_xmit, so we're
-        * inside tx_lock -- no need to save/restore flags.
-        */
-       spin_lock(&priv->lock);
+       spin_lock_irqsave(&priv->lock, flags);
 
        path = __path_find(dev, phdr->hwaddr + 4);
        if (!path || !path->valid) {
@@ -667,7 +663,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                        __skb_queue_tail(&path->queue, skb);
 
                        if (path_rec_start(dev, path)) {
-                               spin_unlock(&priv->lock);
+                               spin_unlock_irqrestore(&priv->lock, flags);
                                path_free(dev, path);
                                return;
                        } else
@@ -677,7 +673,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                        dev_kfree_skb_any(skb);
                }
 
-               spin_unlock(&priv->lock);
+               spin_unlock_irqrestore(&priv->lock, flags);
                return;
        }
 
@@ -696,7 +692,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                dev_kfree_skb_any(skb);
        }
 
-       spin_unlock(&priv->lock);
+       spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -705,13 +701,10 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct ipoib_neigh *neigh;
        unsigned long flags;
 
-       if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
-               return NETDEV_TX_LOCKED;
-
        if (likely(skb->dst && skb->dst->neighbour)) {
                if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
                        ipoib_path_lookup(skb, dev);
-                       goto out;
+                       return NETDEV_TX_OK;
                }
 
                neigh = *to_ipoib_neigh(skb->dst->neighbour);
@@ -721,7 +714,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
                                            skb->dst->neighbour->ha + 4,
                                            sizeof(union ib_gid))) ||
                                         (neigh->dev != dev))) {
-                               spin_lock(&priv->lock);
+                               spin_lock_irqsave(&priv->lock, flags);
                                /*
                                 * It's safe to call ipoib_put_ah() inside
                                 * priv->lock here, because we know that
@@ -732,25 +725,25 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
                                ipoib_put_ah(neigh->ah);
                                list_del(&neigh->list);
                                ipoib_neigh_free(dev, neigh);
-                               spin_unlock(&priv->lock);
+                               spin_unlock_irqrestore(&priv->lock, flags);
                                ipoib_path_lookup(skb, dev);
-                               goto out;
+                               return NETDEV_TX_OK;
                        }
 
                if (ipoib_cm_get(neigh)) {
                        if (ipoib_cm_up(neigh)) {
                                ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
-                               goto out;
+                               return NETDEV_TX_OK;
                        }
                } else if (neigh->ah) {
                        ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
-                       goto out;
+                       return NETDEV_TX_OK;
                }
 
                if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
-                       spin_lock(&priv->lock);
+                       spin_lock_irqsave(&priv->lock, flags);
                        __skb_queue_tail(&neigh->queue, skb);
-                       spin_unlock(&priv->lock);
+                       spin_unlock_irqrestore(&priv->lock, flags);
                } else {
                        ++dev->stats.tx_dropped;
                        dev_kfree_skb_any(skb);
@@ -779,16 +772,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
                                           IPOIB_GID_RAW_ARG(phdr->hwaddr + 4));
                                dev_kfree_skb_any(skb);
                                ++dev->stats.tx_dropped;
-                               goto out;
+                               return NETDEV_TX_OK;
                        }
 
                        unicast_arp_send(skb, dev, phdr);
                }
        }
 
-out:
-       spin_unlock_irqrestore(&priv->tx_lock, flags);
-
        return NETDEV_TX_OK;
 }
 
@@ -1052,7 +1042,6 @@ static void ipoib_setup(struct net_device *dev)
        dev->type                = ARPHRD_INFINIBAND;
        dev->tx_queue_len        = ipoib_sendq_size * 2;
        dev->features            = (NETIF_F_VLAN_CHALLENGED     |
-                                   NETIF_F_LLTX                |
                                    NETIF_F_HIGHDMA);
 
        memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
@@ -1064,7 +1053,6 @@ static void ipoib_setup(struct net_device *dev)
        ipoib_lro_setup(priv);
 
        spin_lock_init(&priv->lock);
-       spin_lock_init(&priv->tx_lock);
 
        mutex_init(&priv->vlan_mutex);
 
index aae28620a6e5f01a3650ce59b5c8d35f65b4ca28..d9d1223c3fd5f7dc1609764c9a1db16e3a20018f 100644 (file)
@@ -69,14 +69,13 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
        struct net_device *dev = mcast->dev;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_neigh *neigh, *tmp;
-       unsigned long flags;
        int tx_dropped = 0;
 
        ipoib_dbg_mcast(netdev_priv(dev),
                        "deleting multicast group " IPOIB_GID_FMT "\n",
                        IPOIB_GID_ARG(mcast->mcmember.mgid));
 
-       spin_lock_irqsave(&priv->lock, flags);
+       spin_lock_irq(&priv->lock);
 
        list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
                /*
@@ -90,7 +89,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
                ipoib_neigh_free(dev, neigh);
        }
 
-       spin_unlock_irqrestore(&priv->lock, flags);
+       spin_unlock_irq(&priv->lock);
 
        if (mcast->ah)
                ipoib_put_ah(mcast->ah);
@@ -100,9 +99,9 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
                dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
        }
 
-       spin_lock_irqsave(&priv->tx_lock, flags);
+       netif_tx_lock_bh(dev);
        dev->stats.tx_dropped += tx_dropped;
-       spin_unlock_irqrestore(&priv->tx_lock, flags);
+       netif_tx_unlock_bh(dev);
 
        kfree(mcast);
 }
@@ -259,10 +258,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
        }
 
        /* actually send any queued packets */
-       spin_lock_irq(&priv->tx_lock);
+       netif_tx_lock_bh(dev);
        while (!skb_queue_empty(&mcast->pkt_queue)) {
                struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
-               spin_unlock_irq(&priv->tx_lock);
+               netif_tx_unlock_bh(dev);
 
                skb->dev = dev;
 
@@ -273,9 +272,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 
                if (dev_queue_xmit(skb))
                        ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
-               spin_lock_irq(&priv->tx_lock);
+               netif_tx_lock_bh(dev);
        }
-       spin_unlock_irq(&priv->tx_lock);
+       netif_tx_unlock_bh(dev);
 
        return 0;
 }
@@ -286,7 +285,6 @@ ipoib_mcast_sendonly_join_complete(int status,
 {
        struct ipoib_mcast *mcast = multicast->context;
        struct net_device *dev = mcast->dev;
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
 
        /* We trap for port events ourselves. */
        if (status == -ENETRESET)
@@ -302,12 +300,12 @@ ipoib_mcast_sendonly_join_complete(int status,
                                        IPOIB_GID_ARG(mcast->mcmember.mgid), status);
 
                /* Flush out any queued packets */
-               spin_lock_irq(&priv->tx_lock);
+               netif_tx_lock_bh(dev);
                while (!skb_queue_empty(&mcast->pkt_queue)) {
                        ++dev->stats.tx_dropped;
                        dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
                }
-               spin_unlock_irq(&priv->tx_lock);
+               netif_tx_unlock_bh(dev);
 
                /* Clear the busy flag so we try again */
                status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
@@ -662,12 +660,9 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_mcast *mcast;
+       unsigned long flags;
 
-       /*
-        * We can only be called from ipoib_start_xmit, so we're
-        * inside tx_lock -- no need to save/restore flags.
-        */
-       spin_lock(&priv->lock);
+       spin_lock_irqsave(&priv->lock, flags);
 
        if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)         ||
            !priv->broadcast                                    ||
@@ -738,7 +733,7 @@ out:
        }
 
 unlock:
-       spin_unlock(&priv->lock);
+       spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 void ipoib_mcast_dev_flush(struct net_device *dev)