]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - drivers/net/mlx4/main.c
mlx4_core: Support multiple pre-reserved QP regions
[linux-2.6-omap-h63xx.git] / drivers / net / mlx4 / main.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/errno.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41
42 #include <linux/mlx4/device.h>
43 #include <linux/mlx4/doorbell.h>
44
45 #include "mlx4.h"
46 #include "fw.h"
47 #include "icm.h"
48
49 MODULE_AUTHOR("Roland Dreier");
50 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
51 MODULE_LICENSE("Dual BSD/GPL");
52 MODULE_VERSION(DRV_VERSION);
53
54 #ifdef CONFIG_MLX4_DEBUG
55
56 int mlx4_debug_level = 0;
57 module_param_named(debug_level, mlx4_debug_level, int, 0644);
58 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
59
60 #endif /* CONFIG_MLX4_DEBUG */
61
62 #ifdef CONFIG_PCI_MSI
63
64 static int msi_x = 1;
65 module_param(msi_x, int, 0444);
66 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
67
68 #else /* CONFIG_PCI_MSI */
69
70 #define msi_x (0)
71
72 #endif /* CONFIG_PCI_MSI */
73
74 static char mlx4_version[] __devinitdata =
75         DRV_NAME ": Mellanox ConnectX core driver v"
76         DRV_VERSION " (" DRV_RELDATE ")\n";
77
78 static struct mlx4_profile default_profile = {
79         .num_qp         = 1 << 17,
80         .num_srq        = 1 << 16,
81         .rdmarc_per_qp  = 1 << 4,
82         .num_cq         = 1 << 16,
83         .num_mcg        = 1 << 13,
84         .num_mpt        = 1 << 17,
85         .num_mtt        = 1 << 20,
86 };
87
88 static int log_num_mac = 2;
89 module_param_named(log_num_mac, log_num_mac, int, 0444);
90 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
91
92 static int log_num_vlan;
93 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
94 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
95
96 static int use_prio;
97 module_param_named(use_prio, use_prio, bool, 0444);
98 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
99                   "(0/1, default 0)");
100
101 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
102 {
103         int err;
104         int i;
105
106         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
107         if (err) {
108                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
109                 return err;
110         }
111
112         if (dev_cap->min_page_sz > PAGE_SIZE) {
113                 mlx4_err(dev, "HCA minimum page size of %d bigger than "
114                          "kernel PAGE_SIZE of %ld, aborting.\n",
115                          dev_cap->min_page_sz, PAGE_SIZE);
116                 return -ENODEV;
117         }
118         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
119                 mlx4_err(dev, "HCA has %d ports, but we only support %d, "
120                          "aborting.\n",
121                          dev_cap->num_ports, MLX4_MAX_PORTS);
122                 return -ENODEV;
123         }
124
125         if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
126                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
127                          "PCI resource 2 size of 0x%llx, aborting.\n",
128                          dev_cap->uar_size,
129                          (unsigned long long) pci_resource_len(dev->pdev, 2));
130                 return -ENODEV;
131         }
132
133         dev->caps.num_ports          = dev_cap->num_ports;
134         for (i = 1; i <= dev->caps.num_ports; ++i) {
135                 dev->caps.vl_cap[i]         = dev_cap->max_vl[i];
136                 dev->caps.mtu_cap[i]        = dev_cap->max_mtu[i];
137                 dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
138                 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
139                 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
140         }
141
142         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
143         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
144         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
145         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
146         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
147         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
148         dev->caps.max_wqes           = dev_cap->max_qp_sz;
149         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
150         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
151         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
152         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
153         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
154         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
155         dev->caps.num_qp_per_mgm     = MLX4_QP_PER_MGM;
156         /*
157          * Subtract 1 from the limit because we need to allocate a
158          * spare CQE so the HCA HW can tell the difference between an
159          * empty CQ and a full CQ.
160          */
161         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
162         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
163         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
164         dev->caps.reserved_mtts      = DIV_ROUND_UP(dev_cap->reserved_mtts,
165                                                     MLX4_MTT_ENTRY_PER_SEG);
166         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
167         dev->caps.reserved_uars      = dev_cap->reserved_uars;
168         dev->caps.reserved_pds       = dev_cap->reserved_pds;
169         dev->caps.mtt_entry_sz       = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
170         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
171         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
172         dev->caps.flags              = dev_cap->flags;
173         dev->caps.bmme_flags         = dev_cap->bmme_flags;
174         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
175         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
176         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
177
178         dev->caps.log_num_macs  = log_num_mac;
179         dev->caps.log_num_vlans = log_num_vlan;
180         dev->caps.log_num_prios = use_prio ? 3 : 0;
181
182         for (i = 1; i <= dev->caps.num_ports; ++i) {
183                 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
184                         dev->caps.log_num_macs = dev_cap->log_max_macs[i];
185                         mlx4_warn(dev, "Requested number of MACs is too much "
186                                   "for port %d, reducing to %d.\n",
187                                   i, 1 << dev->caps.log_num_macs);
188                 }
189                 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
190                         dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
191                         mlx4_warn(dev, "Requested number of VLANs is too much "
192                                   "for port %d, reducing to %d.\n",
193                                   i, 1 << dev->caps.log_num_vlans);
194                 }
195         }
196
197         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
198         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
199                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
200                 (1 << dev->caps.log_num_macs) *
201                 (1 << dev->caps.log_num_vlans) *
202                 (1 << dev->caps.log_num_prios) *
203                 dev->caps.num_ports;
204         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
205
206         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
207                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
208                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
209                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
210
211         return 0;
212 }
213
214 static int mlx4_load_fw(struct mlx4_dev *dev)
215 {
216         struct mlx4_priv *priv = mlx4_priv(dev);
217         int err;
218
219         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
220                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
221         if (!priv->fw.fw_icm) {
222                 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
223                 return -ENOMEM;
224         }
225
226         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
227         if (err) {
228                 mlx4_err(dev, "MAP_FA command failed, aborting.\n");
229                 goto err_free;
230         }
231
232         err = mlx4_RUN_FW(dev);
233         if (err) {
234                 mlx4_err(dev, "RUN_FW command failed, aborting.\n");
235                 goto err_unmap_fa;
236         }
237
238         return 0;
239
240 err_unmap_fa:
241         mlx4_UNMAP_FA(dev);
242
243 err_free:
244         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
245         return err;
246 }
247
248 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
249                                 int cmpt_entry_sz)
250 {
251         struct mlx4_priv *priv = mlx4_priv(dev);
252         int err;
253
254         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
255                                   cmpt_base +
256                                   ((u64) (MLX4_CMPT_TYPE_QP *
257                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
258                                   cmpt_entry_sz, dev->caps.num_qps,
259                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
260                                   0, 0);
261         if (err)
262                 goto err;
263
264         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
265                                   cmpt_base +
266                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
267                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
268                                   cmpt_entry_sz, dev->caps.num_srqs,
269                                   dev->caps.reserved_srqs, 0, 0);
270         if (err)
271                 goto err_qp;
272
273         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
274                                   cmpt_base +
275                                   ((u64) (MLX4_CMPT_TYPE_CQ *
276                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
277                                   cmpt_entry_sz, dev->caps.num_cqs,
278                                   dev->caps.reserved_cqs, 0, 0);
279         if (err)
280                 goto err_srq;
281
282         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
283                                   cmpt_base +
284                                   ((u64) (MLX4_CMPT_TYPE_EQ *
285                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
286                                   cmpt_entry_sz,
287                                   roundup_pow_of_two(MLX4_NUM_EQ +
288                                                      dev->caps.reserved_eqs),
289                                   MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0);
290         if (err)
291                 goto err_cq;
292
293         return 0;
294
295 err_cq:
296         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
297
298 err_srq:
299         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
300
301 err_qp:
302         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
303
304 err:
305         return err;
306 }
307
308 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
309                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
310 {
311         struct mlx4_priv *priv = mlx4_priv(dev);
312         u64 aux_pages;
313         int err;
314
315         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
316         if (err) {
317                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
318                 return err;
319         }
320
321         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
322                  (unsigned long long) icm_size >> 10,
323                  (unsigned long long) aux_pages << 2);
324
325         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
326                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
327         if (!priv->fw.aux_icm) {
328                 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
329                 return -ENOMEM;
330         }
331
332         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
333         if (err) {
334                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
335                 goto err_free_aux;
336         }
337
338         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
339         if (err) {
340                 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
341                 goto err_unmap_aux;
342         }
343
344         err = mlx4_map_eq_icm(dev, init_hca->eqc_base);
345         if (err) {
346                 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
347                 goto err_unmap_cmpt;
348         }
349
350         /*
351          * Reserved MTT entries must be aligned up to a cacheline
352          * boundary, since the FW will write to them, while the driver
353          * writes to all other MTT entries. (The variable
354          * dev->caps.mtt_entry_sz below is really the MTT segment
355          * size, not the raw entry size)
356          */
357         dev->caps.reserved_mtts =
358                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
359                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
360
361         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
362                                   init_hca->mtt_base,
363                                   dev->caps.mtt_entry_sz,
364                                   dev->caps.num_mtt_segs,
365                                   dev->caps.reserved_mtts, 1, 0);
366         if (err) {
367                 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
368                 goto err_unmap_eq;
369         }
370
371         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
372                                   init_hca->dmpt_base,
373                                   dev_cap->dmpt_entry_sz,
374                                   dev->caps.num_mpts,
375                                   dev->caps.reserved_mrws, 1, 1);
376         if (err) {
377                 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
378                 goto err_unmap_mtt;
379         }
380
381         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
382                                   init_hca->qpc_base,
383                                   dev_cap->qpc_entry_sz,
384                                   dev->caps.num_qps,
385                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
386                                   0, 0);
387         if (err) {
388                 mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
389                 goto err_unmap_dmpt;
390         }
391
392         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
393                                   init_hca->auxc_base,
394                                   dev_cap->aux_entry_sz,
395                                   dev->caps.num_qps,
396                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
397                                   0, 0);
398         if (err) {
399                 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
400                 goto err_unmap_qp;
401         }
402
403         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
404                                   init_hca->altc_base,
405                                   dev_cap->altc_entry_sz,
406                                   dev->caps.num_qps,
407                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
408                                   0, 0);
409         if (err) {
410                 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
411                 goto err_unmap_auxc;
412         }
413
414         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
415                                   init_hca->rdmarc_base,
416                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
417                                   dev->caps.num_qps,
418                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
419                                   0, 0);
420         if (err) {
421                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
422                 goto err_unmap_altc;
423         }
424
425         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
426                                   init_hca->cqc_base,
427                                   dev_cap->cqc_entry_sz,
428                                   dev->caps.num_cqs,
429                                   dev->caps.reserved_cqs, 0, 0);
430         if (err) {
431                 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
432                 goto err_unmap_rdmarc;
433         }
434
435         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
436                                   init_hca->srqc_base,
437                                   dev_cap->srq_entry_sz,
438                                   dev->caps.num_srqs,
439                                   dev->caps.reserved_srqs, 0, 0);
440         if (err) {
441                 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
442                 goto err_unmap_cq;
443         }
444
445         /*
446          * It's not strictly required, but for simplicity just map the
447          * whole multicast group table now.  The table isn't very big
448          * and it's a lot easier than trying to track ref counts.
449          */
450         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
451                                   init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
452                                   dev->caps.num_mgms + dev->caps.num_amgms,
453                                   dev->caps.num_mgms + dev->caps.num_amgms,
454                                   0, 0);
455         if (err) {
456                 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
457                 goto err_unmap_srq;
458         }
459
460         return 0;
461
462 err_unmap_srq:
463         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
464
465 err_unmap_cq:
466         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
467
468 err_unmap_rdmarc:
469         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
470
471 err_unmap_altc:
472         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
473
474 err_unmap_auxc:
475         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
476
477 err_unmap_qp:
478         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
479
480 err_unmap_dmpt:
481         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
482
483 err_unmap_mtt:
484         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
485
486 err_unmap_eq:
487         mlx4_unmap_eq_icm(dev);
488
489 err_unmap_cmpt:
490         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
491         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
492         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
493         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
494
495 err_unmap_aux:
496         mlx4_UNMAP_ICM_AUX(dev);
497
498 err_free_aux:
499         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
500
501         return err;
502 }
503
504 static void mlx4_free_icms(struct mlx4_dev *dev)
505 {
506         struct mlx4_priv *priv = mlx4_priv(dev);
507
508         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
509         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
510         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
511         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
512         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
513         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
514         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
515         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
516         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
517         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
518         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
519         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
520         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
521         mlx4_unmap_eq_icm(dev);
522
523         mlx4_UNMAP_ICM_AUX(dev);
524         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
525 }
526
527 static void mlx4_close_hca(struct mlx4_dev *dev)
528 {
529         mlx4_CLOSE_HCA(dev, 0);
530         mlx4_free_icms(dev);
531         mlx4_UNMAP_FA(dev);
532         mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
533 }
534
535 static int mlx4_init_hca(struct mlx4_dev *dev)
536 {
537         struct mlx4_priv          *priv = mlx4_priv(dev);
538         struct mlx4_adapter        adapter;
539         struct mlx4_dev_cap        dev_cap;
540         struct mlx4_mod_stat_cfg   mlx4_cfg;
541         struct mlx4_profile        profile;
542         struct mlx4_init_hca_param init_hca;
543         u64 icm_size;
544         int err;
545
546         err = mlx4_QUERY_FW(dev);
547         if (err) {
548                 mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
549                 return err;
550         }
551
552         err = mlx4_load_fw(dev);
553         if (err) {
554                 mlx4_err(dev, "Failed to start FW, aborting.\n");
555                 return err;
556         }
557
558         mlx4_cfg.log_pg_sz_m = 1;
559         mlx4_cfg.log_pg_sz = 0;
560         err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
561         if (err)
562                 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
563
564         err = mlx4_dev_cap(dev, &dev_cap);
565         if (err) {
566                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
567                 goto err_stop_fw;
568         }
569
570         profile = default_profile;
571
572         icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
573         if ((long long) icm_size < 0) {
574                 err = icm_size;
575                 goto err_stop_fw;
576         }
577
578         init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
579
580         err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
581         if (err)
582                 goto err_stop_fw;
583
584         err = mlx4_INIT_HCA(dev, &init_hca);
585         if (err) {
586                 mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
587                 goto err_free_icm;
588         }
589
590         err = mlx4_QUERY_ADAPTER(dev, &adapter);
591         if (err) {
592                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
593                 goto err_close;
594         }
595
596         priv->eq_table.inta_pin = adapter.inta_pin;
597         memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
598
599         return 0;
600
601 err_close:
602         mlx4_close_hca(dev);
603
604 err_free_icm:
605         mlx4_free_icms(dev);
606
607 err_stop_fw:
608         mlx4_UNMAP_FA(dev);
609         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
610
611         return err;
612 }
613
614 static int mlx4_setup_hca(struct mlx4_dev *dev)
615 {
616         struct mlx4_priv *priv = mlx4_priv(dev);
617         int err;
618
619         err = mlx4_init_uar_table(dev);
620         if (err) {
621                 mlx4_err(dev, "Failed to initialize "
622                          "user access region table, aborting.\n");
623                 return err;
624         }
625
626         err = mlx4_uar_alloc(dev, &priv->driver_uar);
627         if (err) {
628                 mlx4_err(dev, "Failed to allocate driver access region, "
629                          "aborting.\n");
630                 goto err_uar_table_free;
631         }
632
633         priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
634         if (!priv->kar) {
635                 mlx4_err(dev, "Couldn't map kernel access region, "
636                          "aborting.\n");
637                 err = -ENOMEM;
638                 goto err_uar_free;
639         }
640
641         err = mlx4_init_pd_table(dev);
642         if (err) {
643                 mlx4_err(dev, "Failed to initialize "
644                          "protection domain table, aborting.\n");
645                 goto err_kar_unmap;
646         }
647
648         err = mlx4_init_mr_table(dev);
649         if (err) {
650                 mlx4_err(dev, "Failed to initialize "
651                          "memory region table, aborting.\n");
652                 goto err_pd_table_free;
653         }
654
655         err = mlx4_init_eq_table(dev);
656         if (err) {
657                 mlx4_err(dev, "Failed to initialize "
658                          "event queue table, aborting.\n");
659                 goto err_mr_table_free;
660         }
661
662         err = mlx4_cmd_use_events(dev);
663         if (err) {
664                 mlx4_err(dev, "Failed to switch to event-driven "
665                          "firmware commands, aborting.\n");
666                 goto err_eq_table_free;
667         }
668
669         err = mlx4_NOP(dev);
670         if (err) {
671                 if (dev->flags & MLX4_FLAG_MSI_X) {
672                         mlx4_warn(dev, "NOP command failed to generate MSI-X "
673                                   "interrupt IRQ %d).\n",
674                                   priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
675                         mlx4_warn(dev, "Trying again without MSI-X.\n");
676                 } else {
677                         mlx4_err(dev, "NOP command failed to generate interrupt "
678                                  "(IRQ %d), aborting.\n",
679                                  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
680                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
681                 }
682
683                 goto err_cmd_poll;
684         }
685
686         mlx4_dbg(dev, "NOP command IRQ test passed\n");
687
688         err = mlx4_init_cq_table(dev);
689         if (err) {
690                 mlx4_err(dev, "Failed to initialize "
691                          "completion queue table, aborting.\n");
692                 goto err_cmd_poll;
693         }
694
695         err = mlx4_init_srq_table(dev);
696         if (err) {
697                 mlx4_err(dev, "Failed to initialize "
698                          "shared receive queue table, aborting.\n");
699                 goto err_cq_table_free;
700         }
701
702         err = mlx4_init_qp_table(dev);
703         if (err) {
704                 mlx4_err(dev, "Failed to initialize "
705                          "queue pair table, aborting.\n");
706                 goto err_srq_table_free;
707         }
708
709         err = mlx4_init_mcg_table(dev);
710         if (err) {
711                 mlx4_err(dev, "Failed to initialize "
712                          "multicast group table, aborting.\n");
713                 goto err_qp_table_free;
714         }
715
716         return 0;
717
718 err_qp_table_free:
719         mlx4_cleanup_qp_table(dev);
720
721 err_srq_table_free:
722         mlx4_cleanup_srq_table(dev);
723
724 err_cq_table_free:
725         mlx4_cleanup_cq_table(dev);
726
727 err_cmd_poll:
728         mlx4_cmd_use_polling(dev);
729
730 err_eq_table_free:
731         mlx4_cleanup_eq_table(dev);
732
733 err_mr_table_free:
734         mlx4_cleanup_mr_table(dev);
735
736 err_pd_table_free:
737         mlx4_cleanup_pd_table(dev);
738
739 err_kar_unmap:
740         iounmap(priv->kar);
741
742 err_uar_free:
743         mlx4_uar_free(dev, &priv->driver_uar);
744
745 err_uar_table_free:
746         mlx4_cleanup_uar_table(dev);
747         return err;
748 }
749
750 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
751 {
752         struct mlx4_priv *priv = mlx4_priv(dev);
753         struct msix_entry entries[MLX4_NUM_EQ];
754         int err;
755         int i;
756
757         if (msi_x) {
758                 for (i = 0; i < MLX4_NUM_EQ; ++i)
759                         entries[i].entry = i;
760
761                 err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries));
762                 if (err) {
763                         if (err > 0)
764                                 mlx4_info(dev, "Only %d MSI-X vectors available, "
765                                           "not using MSI-X\n", err);
766                         goto no_msi;
767                 }
768
769                 for (i = 0; i < MLX4_NUM_EQ; ++i)
770                         priv->eq_table.eq[i].irq = entries[i].vector;
771
772                 dev->flags |= MLX4_FLAG_MSI_X;
773                 return;
774         }
775
776 no_msi:
777         for (i = 0; i < MLX4_NUM_EQ; ++i)
778                 priv->eq_table.eq[i].irq = dev->pdev->irq;
779 }
780
781 static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
782 {
783         struct mlx4_priv *priv;
784         struct mlx4_dev *dev;
785         int err;
786
787         printk(KERN_INFO PFX "Initializing %s\n",
788                pci_name(pdev));
789
790         err = pci_enable_device(pdev);
791         if (err) {
792                 dev_err(&pdev->dev, "Cannot enable PCI device, "
793                         "aborting.\n");
794                 return err;
795         }
796
797         /*
798          * Check for BARs.  We expect 0: 1MB
799          */
800         if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
801             pci_resource_len(pdev, 0) != 1 << 20) {
802                 dev_err(&pdev->dev, "Missing DCS, aborting.\n");
803                 err = -ENODEV;
804                 goto err_disable_pdev;
805         }
806         if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
807                 dev_err(&pdev->dev, "Missing UAR, aborting.\n");
808                 err = -ENODEV;
809                 goto err_disable_pdev;
810         }
811
812         err = pci_request_region(pdev, 0, DRV_NAME);
813         if (err) {
814                 dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
815                 goto err_disable_pdev;
816         }
817
818         err = pci_request_region(pdev, 2, DRV_NAME);
819         if (err) {
820                 dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
821                 goto err_release_bar0;
822         }
823
824         pci_set_master(pdev);
825
826         err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
827         if (err) {
828                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
829                 err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
830                 if (err) {
831                         dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
832                         goto err_release_bar2;
833                 }
834         }
835         err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
836         if (err) {
837                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
838                          "consistent PCI DMA mask.\n");
839                 err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
840                 if (err) {
841                         dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
842                                 "aborting.\n");
843                         goto err_release_bar2;
844                 }
845         }
846
847         priv = kzalloc(sizeof *priv, GFP_KERNEL);
848         if (!priv) {
849                 dev_err(&pdev->dev, "Device struct alloc failed, "
850                         "aborting.\n");
851                 err = -ENOMEM;
852                 goto err_release_bar2;
853         }
854
855         dev       = &priv->dev;
856         dev->pdev = pdev;
857         INIT_LIST_HEAD(&priv->ctx_list);
858         spin_lock_init(&priv->ctx_lock);
859
860         INIT_LIST_HEAD(&priv->pgdir_list);
861         mutex_init(&priv->pgdir_mutex);
862
863         /*
864          * Now reset the HCA before we touch the PCI capabilities or
865          * attempt a firmware command, since a boot ROM may have left
866          * the HCA in an undefined state.
867          */
868         err = mlx4_reset(dev);
869         if (err) {
870                 mlx4_err(dev, "Failed to reset HCA, aborting.\n");
871                 goto err_free_dev;
872         }
873
874         if (mlx4_cmd_init(dev)) {
875                 mlx4_err(dev, "Failed to init command interface, aborting.\n");
876                 goto err_free_dev;
877         }
878
879         err = mlx4_init_hca(dev);
880         if (err)
881                 goto err_cmd;
882
883         mlx4_enable_msi_x(dev);
884
885         err = mlx4_setup_hca(dev);
886         if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
887                 dev->flags &= ~MLX4_FLAG_MSI_X;
888                 pci_disable_msix(pdev);
889                 err = mlx4_setup_hca(dev);
890         }
891
892         if (err)
893                 goto err_close;
894
895         err = mlx4_register_device(dev);
896         if (err)
897                 goto err_cleanup;
898
899         pci_set_drvdata(pdev, dev);
900
901         return 0;
902
903 err_cleanup:
904         mlx4_cleanup_mcg_table(dev);
905         mlx4_cleanup_qp_table(dev);
906         mlx4_cleanup_srq_table(dev);
907         mlx4_cleanup_cq_table(dev);
908         mlx4_cmd_use_polling(dev);
909         mlx4_cleanup_eq_table(dev);
910         mlx4_cleanup_mr_table(dev);
911         mlx4_cleanup_pd_table(dev);
912         mlx4_cleanup_uar_table(dev);
913
914 err_close:
915         if (dev->flags & MLX4_FLAG_MSI_X)
916                 pci_disable_msix(pdev);
917
918         mlx4_close_hca(dev);
919
920 err_cmd:
921         mlx4_cmd_cleanup(dev);
922
923 err_free_dev:
924         kfree(priv);
925
926 err_release_bar2:
927         pci_release_region(pdev, 2);
928
929 err_release_bar0:
930         pci_release_region(pdev, 0);
931
932 err_disable_pdev:
933         pci_disable_device(pdev);
934         pci_set_drvdata(pdev, NULL);
935         return err;
936 }
937
938 static int __devinit mlx4_init_one(struct pci_dev *pdev,
939                                    const struct pci_device_id *id)
940 {
941         static int mlx4_version_printed;
942
943         if (!mlx4_version_printed) {
944                 printk(KERN_INFO "%s", mlx4_version);
945                 ++mlx4_version_printed;
946         }
947
948         return __mlx4_init_one(pdev, id);
949 }
950
951 static void mlx4_remove_one(struct pci_dev *pdev)
952 {
953         struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
954         struct mlx4_priv *priv = mlx4_priv(dev);
955         int p;
956
957         if (dev) {
958                 mlx4_unregister_device(dev);
959
960                 for (p = 1; p <= dev->caps.num_ports; ++p)
961                         mlx4_CLOSE_PORT(dev, p);
962
963                 mlx4_cleanup_mcg_table(dev);
964                 mlx4_cleanup_qp_table(dev);
965                 mlx4_cleanup_srq_table(dev);
966                 mlx4_cleanup_cq_table(dev);
967                 mlx4_cmd_use_polling(dev);
968                 mlx4_cleanup_eq_table(dev);
969                 mlx4_cleanup_mr_table(dev);
970                 mlx4_cleanup_pd_table(dev);
971
972                 iounmap(priv->kar);
973                 mlx4_uar_free(dev, &priv->driver_uar);
974                 mlx4_cleanup_uar_table(dev);
975                 mlx4_close_hca(dev);
976                 mlx4_cmd_cleanup(dev);
977
978                 if (dev->flags & MLX4_FLAG_MSI_X)
979                         pci_disable_msix(pdev);
980
981                 kfree(priv);
982                 pci_release_region(pdev, 2);
983                 pci_release_region(pdev, 0);
984                 pci_disable_device(pdev);
985                 pci_set_drvdata(pdev, NULL);
986         }
987 }
988
989 int mlx4_restart_one(struct pci_dev *pdev)
990 {
991         mlx4_remove_one(pdev);
992         return __mlx4_init_one(pdev, NULL);
993 }
994
995 static struct pci_device_id mlx4_pci_table[] = {
996         { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
997         { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
998         { PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
999         { PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
1000         { PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
1001         { 0, }
1002 };
1003
1004 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
1005
1006 static struct pci_driver mlx4_driver = {
1007         .name           = DRV_NAME,
1008         .id_table       = mlx4_pci_table,
1009         .probe          = mlx4_init_one,
1010         .remove         = __devexit_p(mlx4_remove_one)
1011 };
1012
1013 static int __init mlx4_init(void)
1014 {
1015         int ret;
1016
1017         ret = mlx4_catas_init();
1018         if (ret)
1019                 return ret;
1020
1021         ret = pci_register_driver(&mlx4_driver);
1022         return ret < 0 ? ret : 0;
1023 }
1024
1025 static void __exit mlx4_cleanup(void)
1026 {
1027         pci_unregister_driver(&mlx4_driver);
1028         mlx4_catas_cleanup();
1029 }
1030
1031 module_init(mlx4_init);
1032 module_exit(mlx4_cleanup);