]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - drivers/infiniband/hw/ehca/ehca_irq.c
IB/ehca: Path migration support
[linux-2.6-omap-h63xx.git] / drivers / infiniband / hw / ehca / ehca_irq.c
1 /*
2  *  IBM eServer eHCA Infiniband device driver for Linux on POWER
3  *
4  *  Functions for EQs, NEQs and interrupts
5  *
6  *  Authors: Heiko J Schick <schickhj@de.ibm.com>
7  *           Khadija Souissi <souissi@de.ibm.com>
8  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9  *           Joachim Fenkes <fenkes@de.ibm.com>
10  *
11  *  Copyright (c) 2005 IBM Corporation
12  *
13  *  All rights reserved.
14  *
15  *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
16  *  BSD.
17  *
18  * OpenIB BSD License
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions are met:
22  *
23  * Redistributions of source code must retain the above copyright notice, this
24  * list of conditions and the following disclaimer.
25  *
26  * Redistributions in binary form must reproduce the above copyright notice,
27  * this list of conditions and the following disclaimer in the documentation
28  * and/or other materials
29  * provided with the distribution.
30  *
31  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41  * POSSIBILITY OF SUCH DAMAGE.
42  */
43
44 #include "ehca_classes.h"
45 #include "ehca_irq.h"
46 #include "ehca_iverbs.h"
47 #include "ehca_tools.h"
48 #include "hcp_if.h"
49 #include "hipz_fns.h"
50 #include "ipz_pt_fn.h"
51
52 #define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM( 1,  1)
53 #define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM( 8, 31)
54 #define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM( 2,  7)
55 #define EQE_CQ_NUMBER          EHCA_BMASK_IBM( 8, 31)
56 #define EQE_QP_NUMBER          EHCA_BMASK_IBM( 8, 31)
57 #define EQE_QP_TOKEN           EHCA_BMASK_IBM(32, 63)
58 #define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32, 63)
59
60 #define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM( 1,  1)
61 #define NEQE_EVENT_CODE        EHCA_BMASK_IBM( 2,  7)
62 #define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
63 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
64 #define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
65
66 #define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
67 #define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
68
69 static void queue_comp_task(struct ehca_cq *__cq);
70
71 static struct ehca_comp_pool *pool;
72 #ifdef CONFIG_HOTPLUG_CPU
73 static struct notifier_block comp_pool_callback_nb;
74 #endif
75
76 static inline void comp_event_callback(struct ehca_cq *cq)
77 {
78         if (!cq->ib_cq.comp_handler)
79                 return;
80
81         spin_lock(&cq->cb_lock);
82         cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
83         spin_unlock(&cq->cb_lock);
84
85         return;
86 }
87
88 static void print_error_data(struct ehca_shca *shca, void *data,
89                              u64 *rblock, int length)
90 {
91         u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
92         u64 resource = rblock[1];
93
94         switch (type) {
95         case 0x1: /* Queue Pair */
96         {
97                 struct ehca_qp *qp = (struct ehca_qp *)data;
98
99                 /* only print error data if AER is set */
100                 if (rblock[6] == 0)
101                         return;
102
103                 ehca_err(&shca->ib_device,
104                          "QP 0x%x (resource=%lx) has errors.",
105                          qp->ib_qp.qp_num, resource);
106                 break;
107         }
108         case 0x4: /* Completion Queue */
109         {
110                 struct ehca_cq *cq = (struct ehca_cq *)data;
111
112                 ehca_err(&shca->ib_device,
113                          "CQ 0x%x (resource=%lx) has errors.",
114                          cq->cq_number, resource);
115                 break;
116         }
117         default:
118                 ehca_err(&shca->ib_device,
119                          "Unknown error type: %lx on %s.",
120                          type, shca->ib_device.name);
121                 break;
122         }
123
124         ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
125         ehca_err(&shca->ib_device, "EHCA ----- error data begin "
126                  "---------------------------------------------------");
127         ehca_dmp(rblock, length, "resource=%lx", resource);
128         ehca_err(&shca->ib_device, "EHCA ----- error data end "
129                  "----------------------------------------------------");
130
131         return;
132 }
133
134 int ehca_error_data(struct ehca_shca *shca, void *data,
135                     u64 resource)
136 {
137
138         unsigned long ret;
139         u64 *rblock;
140         unsigned long block_count;
141
142         rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
143         if (!rblock) {
144                 ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
145                 ret = -ENOMEM;
146                 goto error_data1;
147         }
148
149         /* rblock must be 4K aligned and should be 4K large */
150         ret = hipz_h_error_data(shca->ipz_hca_handle,
151                                 resource,
152                                 rblock,
153                                 &block_count);
154
155         if (ret == H_R_STATE)
156                 ehca_err(&shca->ib_device,
157                          "No error data is available: %lx.", resource);
158         else if (ret == H_SUCCESS) {
159                 int length;
160
161                 length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
162
163                 if (length > EHCA_PAGESIZE)
164                         length = EHCA_PAGESIZE;
165
166                 print_error_data(shca, data, rblock, length);
167         } else
168                 ehca_err(&shca->ib_device,
169                          "Error data could not be fetched: %lx", resource);
170
171         ehca_free_fw_ctrlblock(rblock);
172
173 error_data1:
174         return ret;
175
176 }
177
178 static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
179                               enum ib_event_type event_type)
180 {
181         struct ib_event event;
182
183         event.device = &shca->ib_device;
184         event.event = event_type;
185
186         if (qp->ext_type == EQPT_SRQ) {
187                 if (!qp->ib_srq.event_handler)
188                         return;
189
190                 event.element.srq = &qp->ib_srq;
191                 qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
192         } else {
193                 if (!qp->ib_qp.event_handler)
194                         return;
195
196                 event.element.qp = &qp->ib_qp;
197                 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
198         }
199 }
200
201 static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
202                               enum ib_event_type event_type, int fatal)
203 {
204         struct ehca_qp *qp;
205         u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
206
207         read_lock(&ehca_qp_idr_lock);
208         qp = idr_find(&ehca_qp_idr, token);
209         read_unlock(&ehca_qp_idr_lock);
210
211         if (!qp)
212                 return;
213
214         if (fatal)
215                 ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
216
217         dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
218                           IB_EVENT_SRQ_ERR : event_type);
219
220         /*
221          * eHCA only processes one WQE at a time for SRQ base QPs,
222          * so the last WQE has been processed as soon as the QP enters
223          * error state.
224          */
225         if (fatal && qp->ext_type == EQPT_SRQBASE)
226                 dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
227
228         return;
229 }
230
231 static void cq_event_callback(struct ehca_shca *shca,
232                               u64 eqe)
233 {
234         struct ehca_cq *cq;
235         u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
236
237         read_lock(&ehca_cq_idr_lock);
238         cq = idr_find(&ehca_cq_idr, token);
239         if (cq)
240                 atomic_inc(&cq->nr_events);
241         read_unlock(&ehca_cq_idr_lock);
242
243         if (!cq)
244                 return;
245
246         ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
247
248         if (atomic_dec_and_test(&cq->nr_events))
249                 wake_up(&cq->wait_completion);
250
251         return;
252 }
253
254 static void parse_identifier(struct ehca_shca *shca, u64 eqe)
255 {
256         u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
257
258         switch (identifier) {
259         case 0x02: /* path migrated */
260                 qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
261                 break;
262         case 0x03: /* communication established */
263                 qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
264                 break;
265         case 0x04: /* send queue drained */
266                 qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
267                 break;
268         case 0x05: /* QP error */
269         case 0x06: /* QP error */
270                 qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
271                 break;
272         case 0x07: /* CQ error */
273         case 0x08: /* CQ error */
274                 cq_event_callback(shca, eqe);
275                 break;
276         case 0x09: /* MRMWPTE error */
277                 ehca_err(&shca->ib_device, "MRMWPTE error.");
278                 break;
279         case 0x0A: /* port event */
280                 ehca_err(&shca->ib_device, "Port event.");
281                 break;
282         case 0x0B: /* MR access error */
283                 ehca_err(&shca->ib_device, "MR access error.");
284                 break;
285         case 0x0C: /* EQ error */
286                 ehca_err(&shca->ib_device, "EQ error.");
287                 break;
288         case 0x0D: /* P/Q_Key mismatch */
289                 ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
290                 break;
291         case 0x10: /* sampling complete */
292                 ehca_err(&shca->ib_device, "Sampling complete.");
293                 break;
294         case 0x11: /* unaffiliated access error */
295                 ehca_err(&shca->ib_device, "Unaffiliated access error.");
296                 break;
297         case 0x12: /* path migrating */
298                 ehca_err(&shca->ib_device, "Path migrating.");
299                 break;
300         case 0x13: /* interface trace stopped */
301                 ehca_err(&shca->ib_device, "Interface trace stopped.");
302                 break;
303         case 0x14: /* first error capture info available */
304                 ehca_info(&shca->ib_device, "First error capture available");
305                 break;
306         case 0x15: /* SRQ limit reached */
307                 qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
308                 break;
309         default:
310                 ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
311                          identifier, shca->ib_device.name);
312                 break;
313         }
314
315         return;
316 }
317
318 static void dispatch_port_event(struct ehca_shca *shca, int port_num,
319                                 enum ib_event_type type, const char *msg)
320 {
321         struct ib_event event;
322
323         ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
324         event.device = &shca->ib_device;
325         event.event = type;
326         event.element.port_num = port_num;
327         ib_dispatch_event(&event);
328 }
329
330 static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
331 {
332         struct ehca_sma_attr  new_attr;
333         struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
334
335         ehca_query_sma_attr(shca, port_num, &new_attr);
336
337         if (new_attr.sm_sl  != old_attr->sm_sl ||
338             new_attr.sm_lid != old_attr->sm_lid)
339                 dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
340                                     "SM changed");
341
342         if (new_attr.lid != old_attr->lid ||
343             new_attr.lmc != old_attr->lmc)
344                 dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
345                                     "LID changed");
346
347         if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
348             memcmp(new_attr.pkeys, old_attr->pkeys,
349                    sizeof(u16) * new_attr.pkey_tbl_len))
350                 dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
351                                     "P_Key changed");
352
353         *old_attr = new_attr;
354 }
355
356 static void parse_ec(struct ehca_shca *shca, u64 eqe)
357 {
358         u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
359         u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
360
361         switch (ec) {
362         case 0x30: /* port availability change */
363                 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
364                         shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
365                         dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
366                                             "is active");
367                         ehca_query_sma_attr(shca, port,
368                                             &shca->sport[port - 1].saved_attr);
369                 } else {
370                         shca->sport[port - 1].port_state = IB_PORT_DOWN;
371                         dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
372                                             "is inactive");
373                 }
374                 break;
375         case 0x31:
376                 /* port configuration change
377                  * disruptive change is caused by
378                  * LID, PKEY or SM change
379                  */
380                 if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
381                         ehca_warn(&shca->ib_device, "disruptive port "
382                                   "%d configuration change", port);
383
384                         shca->sport[port - 1].port_state = IB_PORT_DOWN;
385                         dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
386                                             "is inactive");
387
388                         shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
389                         dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
390                                             "is active");
391                 } else
392                         notify_port_conf_change(shca, port);
393                 break;
394         case 0x32: /* adapter malfunction */
395                 ehca_err(&shca->ib_device, "Adapter malfunction.");
396                 break;
397         case 0x33:  /* trace stopped */
398                 ehca_err(&shca->ib_device, "Traced stopped.");
399                 break;
400         default:
401                 ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
402                          ec, shca->ib_device.name);
403                 break;
404         }
405
406         return;
407 }
408
409 static inline void reset_eq_pending(struct ehca_cq *cq)
410 {
411         u64 CQx_EP;
412         struct h_galpa gal = cq->galpas.kernel;
413
414         hipz_galpa_store_cq(gal, cqx_ep, 0x0);
415         CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
416
417         return;
418 }
419
420 irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
421 {
422         struct ehca_shca *shca = (struct ehca_shca*)dev_id;
423
424         tasklet_hi_schedule(&shca->neq.interrupt_task);
425
426         return IRQ_HANDLED;
427 }
428
429 void ehca_tasklet_neq(unsigned long data)
430 {
431         struct ehca_shca *shca = (struct ehca_shca*)data;
432         struct ehca_eqe *eqe;
433         u64 ret;
434
435         eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
436
437         while (eqe) {
438                 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
439                         parse_ec(shca, eqe->entry);
440
441                 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
442         }
443
444         ret = hipz_h_reset_event(shca->ipz_hca_handle,
445                                  shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
446
447         if (ret != H_SUCCESS)
448                 ehca_err(&shca->ib_device, "Can't clear notification events.");
449
450         return;
451 }
452
453 irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
454 {
455         struct ehca_shca *shca = (struct ehca_shca*)dev_id;
456
457         tasklet_hi_schedule(&shca->eq.interrupt_task);
458
459         return IRQ_HANDLED;
460 }
461
462
463 static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
464 {
465         u64 eqe_value;
466         u32 token;
467         struct ehca_cq *cq;
468
469         eqe_value = eqe->entry;
470         ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
471         if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
472                 ehca_dbg(&shca->ib_device, "Got completion event");
473                 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
474                 read_lock(&ehca_cq_idr_lock);
475                 cq = idr_find(&ehca_cq_idr, token);
476                 if (cq)
477                         atomic_inc(&cq->nr_events);
478                 read_unlock(&ehca_cq_idr_lock);
479                 if (cq == NULL) {
480                         ehca_err(&shca->ib_device,
481                                  "Invalid eqe for non-existing cq token=%x",
482                                  token);
483                         return;
484                 }
485                 reset_eq_pending(cq);
486                 if (ehca_scaling_code)
487                         queue_comp_task(cq);
488                 else {
489                         comp_event_callback(cq);
490                         if (atomic_dec_and_test(&cq->nr_events))
491                                 wake_up(&cq->wait_completion);
492                 }
493         } else {
494                 ehca_dbg(&shca->ib_device, "Got non completion event");
495                 parse_identifier(shca, eqe_value);
496         }
497 }
498
499 void ehca_process_eq(struct ehca_shca *shca, int is_irq)
500 {
501         struct ehca_eq *eq = &shca->eq;
502         struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
503         u64 eqe_value;
504         unsigned long flags;
505         int eqe_cnt, i;
506         int eq_empty = 0;
507
508         spin_lock_irqsave(&eq->irq_spinlock, flags);
509         if (is_irq) {
510                 const int max_query_cnt = 100;
511                 int query_cnt = 0;
512                 int int_state = 1;
513                 do {
514                         int_state = hipz_h_query_int_state(
515                                 shca->ipz_hca_handle, eq->ist);
516                         query_cnt++;
517                         iosync();
518                 } while (int_state && query_cnt < max_query_cnt);
519                 if (unlikely((query_cnt == max_query_cnt)))
520                         ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
521                                  int_state, query_cnt);
522         }
523
524         /* read out all eqes */
525         eqe_cnt = 0;
526         do {
527                 u32 token;
528                 eqe_cache[eqe_cnt].eqe =
529                         (struct ehca_eqe *)ehca_poll_eq(shca, eq);
530                 if (!eqe_cache[eqe_cnt].eqe)
531                         break;
532                 eqe_value = eqe_cache[eqe_cnt].eqe->entry;
533                 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
534                         token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
535                         read_lock(&ehca_cq_idr_lock);
536                         eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
537                         if (eqe_cache[eqe_cnt].cq)
538                                 atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
539                         read_unlock(&ehca_cq_idr_lock);
540                         if (!eqe_cache[eqe_cnt].cq) {
541                                 ehca_err(&shca->ib_device,
542                                          "Invalid eqe for non-existing cq "
543                                          "token=%x", token);
544                                 continue;
545                         }
546                 } else
547                         eqe_cache[eqe_cnt].cq = NULL;
548                 eqe_cnt++;
549         } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
550         if (!eqe_cnt) {
551                 if (is_irq)
552                         ehca_dbg(&shca->ib_device,
553                                  "No eqe found for irq event");
554                 goto unlock_irq_spinlock;
555         } else if (!is_irq)
556                 ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
557         if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
558                 ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
559         /* enable irq for new packets */
560         for (i = 0; i < eqe_cnt; i++) {
561                 if (eq->eqe_cache[i].cq)
562                         reset_eq_pending(eq->eqe_cache[i].cq);
563         }
564         /* check eq */
565         spin_lock(&eq->spinlock);
566         eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
567         spin_unlock(&eq->spinlock);
568         /* call completion handler for cached eqes */
569         for (i = 0; i < eqe_cnt; i++)
570                 if (eq->eqe_cache[i].cq) {
571                         if (ehca_scaling_code)
572                                 queue_comp_task(eq->eqe_cache[i].cq);
573                         else {
574                                 struct ehca_cq *cq = eq->eqe_cache[i].cq;
575                                 comp_event_callback(cq);
576                                 if (atomic_dec_and_test(&cq->nr_events))
577                                         wake_up(&cq->wait_completion);
578                         }
579                 } else {
580                         ehca_dbg(&shca->ib_device, "Got non completion event");
581                         parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
582                 }
583         /* poll eq if not empty */
584         if (eq_empty)
585                 goto unlock_irq_spinlock;
586         do {
587                 struct ehca_eqe *eqe;
588                 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
589                 if (!eqe)
590                         break;
591                 process_eqe(shca, eqe);
592         } while (1);
593
594 unlock_irq_spinlock:
595         spin_unlock_irqrestore(&eq->irq_spinlock, flags);
596 }
597
598 void ehca_tasklet_eq(unsigned long data)
599 {
600         ehca_process_eq((struct ehca_shca*)data, 1);
601 }
602
603 static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
604 {
605         int cpu;
606         unsigned long flags;
607
608         WARN_ON_ONCE(!in_interrupt());
609         if (ehca_debug_level)
610                 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
611
612         spin_lock_irqsave(&pool->last_cpu_lock, flags);
613         cpu = next_cpu(pool->last_cpu, cpu_online_map);
614         if (cpu == NR_CPUS)
615                 cpu = first_cpu(cpu_online_map);
616         pool->last_cpu = cpu;
617         spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
618
619         return cpu;
620 }
621
622 static void __queue_comp_task(struct ehca_cq *__cq,
623                               struct ehca_cpu_comp_task *cct)
624 {
625         unsigned long flags;
626
627         spin_lock_irqsave(&cct->task_lock, flags);
628         spin_lock(&__cq->task_lock);
629
630         if (__cq->nr_callbacks == 0) {
631                 __cq->nr_callbacks++;
632                 list_add_tail(&__cq->entry, &cct->cq_list);
633                 cct->cq_jobs++;
634                 wake_up(&cct->wait_queue);
635         } else
636                 __cq->nr_callbacks++;
637
638         spin_unlock(&__cq->task_lock);
639         spin_unlock_irqrestore(&cct->task_lock, flags);
640 }
641
642 static void queue_comp_task(struct ehca_cq *__cq)
643 {
644         int cpu_id;
645         struct ehca_cpu_comp_task *cct;
646         int cq_jobs;
647         unsigned long flags;
648
649         cpu_id = find_next_online_cpu(pool);
650         BUG_ON(!cpu_online(cpu_id));
651
652         cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
653         BUG_ON(!cct);
654
655         spin_lock_irqsave(&cct->task_lock, flags);
656         cq_jobs = cct->cq_jobs;
657         spin_unlock_irqrestore(&cct->task_lock, flags);
658         if (cq_jobs > 0) {
659                 cpu_id = find_next_online_cpu(pool);
660                 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
661                 BUG_ON(!cct);
662         }
663
664         __queue_comp_task(__cq, cct);
665 }
666
667 static void run_comp_task(struct ehca_cpu_comp_task *cct)
668 {
669         struct ehca_cq *cq;
670         unsigned long flags;
671
672         spin_lock_irqsave(&cct->task_lock, flags);
673
674         while (!list_empty(&cct->cq_list)) {
675                 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
676                 spin_unlock_irqrestore(&cct->task_lock, flags);
677
678                 comp_event_callback(cq);
679                 if (atomic_dec_and_test(&cq->nr_events))
680                         wake_up(&cq->wait_completion);
681
682                 spin_lock_irqsave(&cct->task_lock, flags);
683                 spin_lock(&cq->task_lock);
684                 cq->nr_callbacks--;
685                 if (!cq->nr_callbacks) {
686                         list_del_init(cct->cq_list.next);
687                         cct->cq_jobs--;
688                 }
689                 spin_unlock(&cq->task_lock);
690         }
691
692         spin_unlock_irqrestore(&cct->task_lock, flags);
693 }
694
695 static int comp_task(void *__cct)
696 {
697         struct ehca_cpu_comp_task *cct = __cct;
698         int cql_empty;
699         DECLARE_WAITQUEUE(wait, current);
700
701         set_current_state(TASK_INTERRUPTIBLE);
702         while (!kthread_should_stop()) {
703                 add_wait_queue(&cct->wait_queue, &wait);
704
705                 spin_lock_irq(&cct->task_lock);
706                 cql_empty = list_empty(&cct->cq_list);
707                 spin_unlock_irq(&cct->task_lock);
708                 if (cql_empty)
709                         schedule();
710                 else
711                         __set_current_state(TASK_RUNNING);
712
713                 remove_wait_queue(&cct->wait_queue, &wait);
714
715                 spin_lock_irq(&cct->task_lock);
716                 cql_empty = list_empty(&cct->cq_list);
717                 spin_unlock_irq(&cct->task_lock);
718                 if (!cql_empty)
719                         run_comp_task(__cct);
720
721                 set_current_state(TASK_INTERRUPTIBLE);
722         }
723         __set_current_state(TASK_RUNNING);
724
725         return 0;
726 }
727
728 static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
729                                             int cpu)
730 {
731         struct ehca_cpu_comp_task *cct;
732
733         cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
734         spin_lock_init(&cct->task_lock);
735         INIT_LIST_HEAD(&cct->cq_list);
736         init_waitqueue_head(&cct->wait_queue);
737         cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
738
739         return cct->task;
740 }
741
742 static void destroy_comp_task(struct ehca_comp_pool *pool,
743                               int cpu)
744 {
745         struct ehca_cpu_comp_task *cct;
746         struct task_struct *task;
747         unsigned long flags_cct;
748
749         cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
750
751         spin_lock_irqsave(&cct->task_lock, flags_cct);
752
753         task = cct->task;
754         cct->task = NULL;
755         cct->cq_jobs = 0;
756
757         spin_unlock_irqrestore(&cct->task_lock, flags_cct);
758
759         if (task)
760                 kthread_stop(task);
761 }
762
763 #ifdef CONFIG_HOTPLUG_CPU
764 static void take_over_work(struct ehca_comp_pool *pool,
765                            int cpu)
766 {
767         struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
768         LIST_HEAD(list);
769         struct ehca_cq *cq;
770         unsigned long flags_cct;
771
772         spin_lock_irqsave(&cct->task_lock, flags_cct);
773
774         list_splice_init(&cct->cq_list, &list);
775
776         while (!list_empty(&list)) {
777                 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
778
779                 list_del(&cq->entry);
780                 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
781                                                   smp_processor_id()));
782         }
783
784         spin_unlock_irqrestore(&cct->task_lock, flags_cct);
785
786 }
787
788 static int comp_pool_callback(struct notifier_block *nfb,
789                               unsigned long action,
790                               void *hcpu)
791 {
792         unsigned int cpu = (unsigned long)hcpu;
793         struct ehca_cpu_comp_task *cct;
794
795         switch (action) {
796         case CPU_UP_PREPARE:
797         case CPU_UP_PREPARE_FROZEN:
798                 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
799                 if (!create_comp_task(pool, cpu)) {
800                         ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
801                         return NOTIFY_BAD;
802                 }
803                 break;
804         case CPU_UP_CANCELED:
805         case CPU_UP_CANCELED_FROZEN:
806                 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
807                 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
808                 kthread_bind(cct->task, any_online_cpu(cpu_online_map));
809                 destroy_comp_task(pool, cpu);
810                 break;
811         case CPU_ONLINE:
812         case CPU_ONLINE_FROZEN:
813                 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
814                 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
815                 kthread_bind(cct->task, cpu);
816                 wake_up_process(cct->task);
817                 break;
818         case CPU_DOWN_PREPARE:
819         case CPU_DOWN_PREPARE_FROZEN:
820                 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
821                 break;
822         case CPU_DOWN_FAILED:
823         case CPU_DOWN_FAILED_FROZEN:
824                 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
825                 break;
826         case CPU_DEAD:
827         case CPU_DEAD_FROZEN:
828                 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
829                 destroy_comp_task(pool, cpu);
830                 take_over_work(pool, cpu);
831                 break;
832         }
833
834         return NOTIFY_OK;
835 }
836 #endif
837
838 int ehca_create_comp_pool(void)
839 {
840         int cpu;
841         struct task_struct *task;
842
843         if (!ehca_scaling_code)
844                 return 0;
845
846         pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
847         if (pool == NULL)
848                 return -ENOMEM;
849
850         spin_lock_init(&pool->last_cpu_lock);
851         pool->last_cpu = any_online_cpu(cpu_online_map);
852
853         pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
854         if (pool->cpu_comp_tasks == NULL) {
855                 kfree(pool);
856                 return -EINVAL;
857         }
858
859         for_each_online_cpu(cpu) {
860                 task = create_comp_task(pool, cpu);
861                 if (task) {
862                         kthread_bind(task, cpu);
863                         wake_up_process(task);
864                 }
865         }
866
867 #ifdef CONFIG_HOTPLUG_CPU
868         comp_pool_callback_nb.notifier_call = comp_pool_callback;
869         comp_pool_callback_nb.priority = 0;
870         register_cpu_notifier(&comp_pool_callback_nb);
871 #endif
872
873         printk(KERN_INFO "eHCA scaling code enabled\n");
874
875         return 0;
876 }
877
878 void ehca_destroy_comp_pool(void)
879 {
880         int i;
881
882         if (!ehca_scaling_code)
883                 return;
884
885 #ifdef CONFIG_HOTPLUG_CPU
886         unregister_cpu_notifier(&comp_pool_callback_nb);
887 #endif
888
889         for (i = 0; i < NR_CPUS; i++) {
890                 if (cpu_online(i))
891                         destroy_comp_task(pool, i);
892         }
893         free_percpu(pool->cpu_comp_tasks);
894         kfree(pool);
895 }