]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/ia64/sn/kernel/xpc_partition.c
958488f5569939e75592ae83823efc3fb81b6e77
[linux-2.6-omap-h63xx.git] / arch / ia64 / sn / kernel / xpc_partition.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9
10 /*
11  * Cross Partition Communication (XPC) partition support.
12  *
13  *      This is the part of XPC that detects the presence/absence of
14  *      other partitions. It provides a heartbeat and monitors the
15  *      heartbeats of other partitions.
16  *
17  */
18
19
20 #include <linux/kernel.h>
21 #include <linux/sysctl.h>
22 #include <linux/cache.h>
23 #include <linux/mmzone.h>
24 #include <linux/nodemask.h>
25 #include <asm/uncached.h>
26 #include <asm/sn/bte.h>
27 #include <asm/sn/intr.h>
28 #include <asm/sn/sn_sal.h>
29 #include <asm/sn/nodepda.h>
30 #include <asm/sn/addrs.h>
31 #include "xpc.h"
32
33
34 /* XPC is exiting flag */
35 int xpc_exiting;
36
37
38 /* SH_IPI_ACCESS shub register value on startup */
39 static u64 xpc_sh1_IPI_access;
40 static u64 xpc_sh2_IPI_access0;
41 static u64 xpc_sh2_IPI_access1;
42 static u64 xpc_sh2_IPI_access2;
43 static u64 xpc_sh2_IPI_access3;
44
45
46 /* original protection values for each node */
47 u64 xpc_prot_vec[MAX_COMPACT_NODES];
48
49
50 /* this partition's reserved page */
51 struct xpc_rsvd_page *xpc_rsvd_page;
52
53 /* this partition's XPC variables (within the reserved page) */
54 struct xpc_vars *xpc_vars;
55 struct xpc_vars_part *xpc_vars_part;
56
57
58 /*
59  * For performance reasons, each entry of xpc_partitions[] is cacheline
60  * aligned. And xpc_partitions[] is padded with an additional entry at the
61  * end so that the last legitimate entry doesn't share its cacheline with
62  * another variable.
63  */
64 struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
65
66
67 /*
68  * Generic buffer used to store a local copy of the remote partitions
69  * reserved page or XPC variables.
70  *
71  * xpc_discovery runs only once and is a seperate thread that is
72  * very likely going to be processing in parallel with receiving
73  * interrupts.
74  */
75 char ____cacheline_aligned
76                 xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
77
78
79 /*
80  * Given a nasid, get the physical address of the  partition's reserved page
81  * for that nasid. This function returns 0 on any error.
82  */
83 static u64
84 xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
85 {
86         bte_result_t bte_res;
87         s64 status;
88         u64 cookie = 0;
89         u64 rp_pa = nasid;      /* seed with nasid */
90         u64 len = 0;
91
92
93         while (1) {
94
95                 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
96                                                                 &len);
97
98                 dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
99                         "0x%016lx, address=0x%016lx, len=0x%016lx\n",
100                         status, cookie, rp_pa, len);
101
102                 if (status != SALRET_MORE_PASSES) {
103                         break;
104                 }
105
106                 if (len > buf_size) {
107                         dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
108                         status = SALRET_ERROR;
109                         break;
110                 }
111
112                 bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
113                                         (BTE_NOTIFY | BTE_WACQUIRE), NULL);
114                 if (bte_res != BTE_SUCCESS) {
115                         dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
116                         status = SALRET_ERROR;
117                         break;
118                 }
119         }
120
121         if (status != SALRET_OK) {
122                 rp_pa = 0;
123         }
124         dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
125         return rp_pa;
126 }
127
128
129 /*
130  * Fill the partition reserved page with the information needed by
131  * other partitions to discover we are alive and establish initial
132  * communications.
133  */
134 struct xpc_rsvd_page *
135 xpc_rsvd_page_init(void)
136 {
137         struct xpc_rsvd_page *rp;
138         AMO_t *amos_page;
139         u64 rp_pa, next_cl, nasid_array = 0;
140         int i, ret;
141
142
143         /* get the local reserved page's address */
144
145         rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
146                                         (u64) xpc_remote_copy_buffer,
147                                                 XPC_RSVD_PAGE_ALIGNED_SIZE);
148         if (rp_pa == 0) {
149                 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
150                 return NULL;
151         }
152         rp = (struct xpc_rsvd_page *) __va(rp_pa);
153
154         if (rp->partid != sn_partition_id) {
155                 dev_err(xpc_part, "the reserved page's partid of %d should be "
156                         "%d\n", rp->partid, sn_partition_id);
157                 return NULL;
158         }
159
160         rp->version = XPC_RP_VERSION;
161
162         /*
163          * Place the XPC variables on the cache line following the
164          * reserved page structure.
165          */
166         next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
167         xpc_vars = (struct xpc_vars *) next_cl;
168
169         /*
170          * Before clearing xpc_vars, see if a page of AMOs had been previously
171          * allocated. If not we'll need to allocate one and set permissions
172          * so that cross-partition AMOs are allowed.
173          *
174          * The allocated AMO page needs MCA reporting to remain disabled after
175          * XPC has unloaded.  To make this work, we keep a copy of the pointer
176          * to this page (i.e., amos_page) in the struct xpc_vars structure,
177          * which is pointed to by the reserved page, and re-use that saved copy
178          * on subsequent loads of XPC. This AMO page is never freed, and its
179          * memory protections are never restricted.
180          */
181         if ((amos_page = xpc_vars->amos_page) == NULL) {
182                 amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
183                 if (amos_page == NULL) {
184                         dev_err(xpc_part, "can't allocate page of AMOs\n");
185                         return NULL;
186                 }
187
188                 /*
189                  * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
190                  * when xpc_allow_IPI_ops() is called via xpc_hb_init().
191                  */
192                 if (!enable_shub_wars_1_1()) {
193                         ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
194                                         PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
195                                         &nasid_array);
196                         if (ret != 0) {
197                                 dev_err(xpc_part, "can't change memory "
198                                         "protections\n");
199                                 uncached_free_page(__IA64_UNCACHED_OFFSET |
200                                                    TO_PHYS((u64) amos_page));
201                                 return NULL;
202                         }
203                 }
204         } else if (!IS_AMO_ADDRESS((u64) amos_page)) {
205                 /*
206                  * EFI's XPBOOT can also set amos_page in the reserved page,
207                  * but it happens to leave it as an uncached physical address
208                  * and we need it to be an uncached virtual, so we'll have to
209                  * convert it.
210                  */
211                 if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
212                         dev_err(xpc_part, "previously used amos_page address "
213                                 "is bad = 0x%p\n", (void *) amos_page);
214                         return NULL;
215                 }
216                 amos_page = (AMO_t *) TO_AMO((u64) amos_page);
217         }
218
219         memset(xpc_vars, 0, sizeof(struct xpc_vars));
220
221         /*
222          * Place the XPC per partition specific variables on the cache line
223          * following the XPC variables structure.
224          */
225         next_cl += XPC_VARS_ALIGNED_SIZE;
226         memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
227                                                         XP_MAX_PARTITIONS);
228         xpc_vars_part = (struct xpc_vars_part *) next_cl;
229         xpc_vars->vars_part_pa = __pa(next_cl);
230
231         xpc_vars->version = XPC_V_VERSION;
232         xpc_vars->act_nasid = cpuid_to_nasid(0);
233         xpc_vars->act_phys_cpuid = cpu_physical_id(0);
234         xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
235
236
237         /* initialize the activate IRQ related AMO variables */
238         for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
239                 (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
240         }
241
242         /* initialize the engaged remote partitions related AMO variables */
243         (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
244         (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
245
246         /* export AMO page's physical address to other partitions */
247         xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
248
249         /* timestamp of when reserved page was initialized */
250         rp->stamp = CURRENT_TIME;
251
252         /*
253          * This signifies to the remote partition that our reserved
254          * page is initialized.
255          */
256         rp->vars_pa = __pa(xpc_vars);
257
258         return rp;
259 }
260
261
262 /*
263  * Change protections to allow IPI operations (and AMO operations on
264  * Shub 1.1 systems).
265  */
266 void
267 xpc_allow_IPI_ops(void)
268 {
269         int node;
270         int nasid;
271
272
273         // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
274
275         if (is_shub2()) {
276                 xpc_sh2_IPI_access0 =
277                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
278                 xpc_sh2_IPI_access1 =
279                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
280                 xpc_sh2_IPI_access2 =
281                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
282                 xpc_sh2_IPI_access3 =
283                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
284
285                 for_each_online_node(node) {
286                         nasid = cnodeid_to_nasid(node);
287                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
288                                                                 -1UL);
289                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
290                                                                 -1UL);
291                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
292                                                                 -1UL);
293                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
294                                                                 -1UL);
295                 }
296
297         } else {
298                 xpc_sh1_IPI_access =
299                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
300
301                 for_each_online_node(node) {
302                         nasid = cnodeid_to_nasid(node);
303                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
304                                                                 -1UL);
305
306                         /*
307                          * Since the BIST collides with memory operations on
308                          * SHUB 1.1 sn_change_memprotect() cannot be used.
309                          */
310                         if (enable_shub_wars_1_1()) {
311                                 /* open up everything */
312                                 xpc_prot_vec[node] = (u64) HUB_L((u64 *)
313                                                 GLOBAL_MMR_ADDR(nasid,
314                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0));
315                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
316                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
317                                                                 -1UL);
318                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
319                                                 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
320                                                                 -1UL);
321                         }
322                 }
323         }
324 }
325
326
327 /*
328  * Restrict protections to disallow IPI operations (and AMO operations on
329  * Shub 1.1 systems).
330  */
331 void
332 xpc_restrict_IPI_ops(void)
333 {
334         int node;
335         int nasid;
336
337
338         // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
339
340         if (is_shub2()) {
341
342                 for_each_online_node(node) {
343                         nasid = cnodeid_to_nasid(node);
344                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
345                                                         xpc_sh2_IPI_access0);
346                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
347                                                         xpc_sh2_IPI_access1);
348                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
349                                                         xpc_sh2_IPI_access2);
350                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
351                                                         xpc_sh2_IPI_access3);
352                 }
353
354         } else {
355
356                 for_each_online_node(node) {
357                         nasid = cnodeid_to_nasid(node);
358                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
359                                                         xpc_sh1_IPI_access);
360
361                         if (enable_shub_wars_1_1()) {
362                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
363                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
364                                                         xpc_prot_vec[node]);
365                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
366                                                 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
367                                                         xpc_prot_vec[node]);
368                         }
369                 }
370         }
371 }
372
373
374 /*
375  * At periodic intervals, scan through all active partitions and ensure
376  * their heartbeat is still active.  If not, the partition is deactivated.
377  */
378 void
379 xpc_check_remote_hb(void)
380 {
381         struct xpc_vars *remote_vars;
382         struct xpc_partition *part;
383         partid_t partid;
384         bte_result_t bres;
385
386
387         remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
388
389         for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
390
391                 if (xpc_exiting) {
392                         break;
393                 }
394
395                 if (partid == sn_partition_id) {
396                         continue;
397                 }
398
399                 part = &xpc_partitions[partid];
400
401                 if (part->act_state == XPC_P_INACTIVE ||
402                                 part->act_state == XPC_P_DEACTIVATING) {
403                         continue;
404                 }
405
406                 /* pull the remote_hb cache line */
407                 bres = xp_bte_copy(part->remote_vars_pa,
408                                         ia64_tpa((u64) remote_vars),
409                                         XPC_VARS_ALIGNED_SIZE,
410                                         (BTE_NOTIFY | BTE_WACQUIRE), NULL);
411                 if (bres != BTE_SUCCESS) {
412                         XPC_DEACTIVATE_PARTITION(part,
413                                                 xpc_map_bte_errors(bres));
414                         continue;
415                 }
416
417                 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
418                         " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid,
419                         remote_vars->heartbeat, part->last_heartbeat,
420                         remote_vars->kdb_status,
421                         remote_vars->heartbeating_to_mask);
422
423                 if (((remote_vars->heartbeat == part->last_heartbeat) &&
424                         (remote_vars->kdb_status == 0)) ||
425                              !xpc_hb_allowed(sn_partition_id, remote_vars)) {
426
427                         XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
428                         continue;
429                 }
430
431                 part->last_heartbeat = remote_vars->heartbeat;
432         }
433 }
434
435
436 /*
437  * Get a copy of the remote partition's rsvd page.
438  *
439  * remote_rp points to a buffer that is cacheline aligned for BTE copies and
440  * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
441  */
442 static enum xpc_retval
443 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
444                 struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
445 {
446         int bres, i;
447
448
449         /* get the reserved page's physical address */
450
451         *remote_rp_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
452                                                 XPC_RSVD_PAGE_ALIGNED_SIZE);
453         if (*remote_rp_pa == 0) {
454                 return xpcNoRsvdPageAddr;
455         }
456
457
458         /* pull over the reserved page structure */
459
460         bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
461                                 XPC_RSVD_PAGE_ALIGNED_SIZE,
462                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
463         if (bres != BTE_SUCCESS) {
464                 return xpc_map_bte_errors(bres);
465         }
466
467
468         if (discovered_nasids != NULL) {
469                 for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
470                         discovered_nasids[i] |= remote_rp->part_nasids[i];
471                 }
472         }
473
474
475         /* check that the partid is for another partition */
476
477         if (remote_rp->partid < 1 ||
478                                 remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
479                 return xpcInvalidPartid;
480         }
481
482         if (remote_rp->partid == sn_partition_id) {
483                 return xpcLocalPartid;
484         }
485
486
487         if (XPC_VERSION_MAJOR(remote_rp->version) !=
488                                         XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
489                 return xpcBadVersion;
490         }
491
492         return xpcSuccess;
493 }
494
495
496 /*
497  * Get a copy of the remote partition's XPC variables.
498  *
499  * remote_vars points to a buffer that is cacheline aligned for BTE copies and
500  * assumed to be of size XPC_VARS_ALIGNED_SIZE.
501  */
502 static enum xpc_retval
503 xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
504 {
505         int bres;
506
507
508         if (remote_vars_pa == 0) {
509                 return xpcVarsNotSet;
510         }
511
512
513         /* pull over the cross partition variables */
514
515         bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
516                                 XPC_VARS_ALIGNED_SIZE,
517                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
518         if (bres != BTE_SUCCESS) {
519                 return xpc_map_bte_errors(bres);
520         }
521
522         if (XPC_VERSION_MAJOR(remote_vars->version) !=
523                                         XPC_VERSION_MAJOR(XPC_V_VERSION)) {
524                 return xpcBadVersion;
525         }
526
527         return xpcSuccess;
528 }
529
530
531 /*
532  * Update the remote partition's info.
533  */
534 static void
535 xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
536                 struct timespec *remote_rp_stamp, u64 remote_rp_pa,
537                 u64 remote_vars_pa, struct xpc_vars *remote_vars)
538 {
539         part->remote_rp_version = remote_rp_version;
540         dev_dbg(xpc_part, "  remote_rp_version = 0x%016lx\n",
541                 part->remote_rp_version);
542
543         part->remote_rp_stamp = *remote_rp_stamp;
544         dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
545                 part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
546
547         part->remote_rp_pa = remote_rp_pa;
548         dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
549
550         part->remote_vars_pa = remote_vars_pa;
551         dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
552                 part->remote_vars_pa);
553
554         part->last_heartbeat = remote_vars->heartbeat;
555         dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
556                 part->last_heartbeat);
557
558         part->remote_vars_part_pa = remote_vars->vars_part_pa;
559         dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
560                 part->remote_vars_part_pa);
561
562         part->remote_act_nasid = remote_vars->act_nasid;
563         dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
564                 part->remote_act_nasid);
565
566         part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
567         dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
568                 part->remote_act_phys_cpuid);
569
570         part->remote_amos_page_pa = remote_vars->amos_page_pa;
571         dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
572                 part->remote_amos_page_pa);
573
574         part->remote_vars_version = remote_vars->version;
575         dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
576                 part->remote_vars_version);
577 }
578
579
580 /*
581  * Prior code has determined the nasid which generated an IPI.  Inspect
582  * that nasid to determine if its partition needs to be activated or
583  * deactivated.
584  *
585  * A partition is consider "awaiting activation" if our partition
586  * flags indicate it is not active and it has a heartbeat.  A
587  * partition is considered "awaiting deactivation" if our partition
588  * flags indicate it is active but it has no heartbeat or it is not
589  * sending its heartbeat to us.
590  *
591  * To determine the heartbeat, the remote nasid must have a properly
592  * initialized reserved page.
593  */
594 static void
595 xpc_identify_act_IRQ_req(int nasid)
596 {
597         struct xpc_rsvd_page *remote_rp;
598         struct xpc_vars *remote_vars;
599         u64 remote_rp_pa;
600         u64 remote_vars_pa;
601         int remote_rp_version;
602         int reactivate = 0;
603         int stamp_diff;
604         struct timespec remote_rp_stamp = { 0, 0 };
605         partid_t partid;
606         struct xpc_partition *part;
607         enum xpc_retval ret;
608
609
610         /* pull over the reserved page structure */
611
612         remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
613
614         ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
615         if (ret != xpcSuccess) {
616                 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
617                         "which sent interrupt, reason=%d\n", nasid, ret);
618                 return;
619         }
620
621         remote_vars_pa = remote_rp->vars_pa;
622         remote_rp_version = remote_rp->version;
623         if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
624                 remote_rp_stamp = remote_rp->stamp;
625         }
626         partid = remote_rp->partid;
627         part = &xpc_partitions[partid];
628
629
630         /* pull over the cross partition variables */
631
632         remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
633
634         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
635         if (ret != xpcSuccess) {
636
637                 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
638                         "which sent interrupt, reason=%d\n", nasid, ret);
639
640                 XPC_DEACTIVATE_PARTITION(part, ret);
641                 return;
642         }
643
644
645         part->act_IRQ_rcvd++;
646
647         dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
648                 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
649                 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
650
651         if (xpc_partition_disengaged(part) &&
652                                         part->act_state == XPC_P_INACTIVE) {
653
654                 xpc_update_partition_info(part, remote_rp_version,
655                                         &remote_rp_stamp, remote_rp_pa,
656                                         remote_vars_pa, remote_vars);
657
658                 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
659                         if (xpc_partition_disengage_requested(1UL << partid)) {
660                                 /*
661                                  * Other side is waiting on us to disengage,
662                                  * even though we already have.
663                                  */
664                                 return;
665                         }
666                 } else {
667                         /* other side doesn't support disengage requests */
668                         xpc_clear_partition_disengage_request(1UL << partid);
669                 }
670
671                 xpc_activate_partition(part);
672                 return;
673         }
674
675         DBUG_ON(part->remote_rp_version == 0);
676         DBUG_ON(part->remote_vars_version == 0);
677
678         if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
679                 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
680                                                         remote_vars_version));
681
682                 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
683                         DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
684                                                                 version));
685                         /* see if the other side rebooted */
686                         if (part->remote_amos_page_pa ==
687                                 remote_vars->amos_page_pa &&
688                                         xpc_hb_allowed(sn_partition_id,
689                                                                 remote_vars)) {
690                                 /* doesn't look that way, so ignore the IPI */
691                                 return;
692                         }
693                 }
694
695                 /*
696                  * Other side rebooted and previous XPC didn't support the
697                  * disengage request, so we don't need to do anything special.
698                  */
699
700                 xpc_update_partition_info(part, remote_rp_version,
701                                                 &remote_rp_stamp, remote_rp_pa,
702                                                 remote_vars_pa, remote_vars);
703                 part->reactivate_nasid = nasid;
704                 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
705                 return;
706         }
707
708         DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
709
710         if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
711                 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
712
713                 /*
714                  * Other side rebooted and previous XPC did support the
715                  * disengage request, but the new one doesn't.
716                  */
717
718                 xpc_clear_partition_engaged(1UL << partid);
719                 xpc_clear_partition_disengage_request(1UL << partid);
720
721                 xpc_update_partition_info(part, remote_rp_version,
722                                                 &remote_rp_stamp, remote_rp_pa,
723                                                 remote_vars_pa, remote_vars);
724                 reactivate = 1;
725
726         } else {
727                 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
728
729                 stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
730                                                         &remote_rp_stamp);
731                 if (stamp_diff != 0) {
732                         DBUG_ON(stamp_diff >= 0);
733
734                         /*
735                          * Other side rebooted and the previous XPC did support
736                          * the disengage request, as does the new one.
737                          */
738
739                         DBUG_ON(xpc_partition_engaged(1UL << partid));
740                         DBUG_ON(xpc_partition_disengage_requested(1UL <<
741                                                                 partid));
742
743                         xpc_update_partition_info(part, remote_rp_version,
744                                                 &remote_rp_stamp, remote_rp_pa,
745                                                 remote_vars_pa, remote_vars);
746                         reactivate = 1;
747                 }
748         }
749
750         if (!xpc_partition_disengaged(part)) {
751                 /* still waiting on other side to disengage from us */
752                 return;
753         }
754
755         if (reactivate) {
756                 part->reactivate_nasid = nasid;
757                 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
758
759         } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
760                         xpc_partition_disengage_requested(1UL << partid)) {
761                 XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
762         }
763 }
764
765
766 /*
767  * Loop through the activation AMO variables and process any bits
768  * which are set.  Each bit indicates a nasid sending a partition
769  * activation or deactivation request.
770  *
771  * Return #of IRQs detected.
772  */
773 int
774 xpc_identify_act_IRQ_sender(void)
775 {
776         int word, bit;
777         u64 nasid_mask;
778         u64 nasid;                      /* remote nasid */
779         int n_IRQs_detected = 0;
780         AMO_t *act_amos;
781         struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
782
783
784         act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
785
786
787         /* scan through act AMO variable looking for non-zero entries */
788         for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
789
790                 if (xpc_exiting) {
791                         break;
792                 }
793
794                 nasid_mask = xpc_IPI_receive(&act_amos[word]);
795                 if (nasid_mask == 0) {
796                         /* no IRQs from nasids in this variable */
797                         continue;
798                 }
799
800                 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
801                         nasid_mask);
802
803
804                 /*
805                  * If this nasid has been added to the machine since
806                  * our partition was reset, this will retain the
807                  * remote nasid in our reserved pages machine mask.
808                  * This is used in the event of module reload.
809                  */
810                 rp->mach_nasids[word] |= nasid_mask;
811
812
813                 /* locate the nasid(s) which sent interrupts */
814
815                 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
816                         if (nasid_mask & (1UL << bit)) {
817                                 n_IRQs_detected++;
818                                 nasid = XPC_NASID_FROM_W_B(word, bit);
819                                 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
820                                         nasid);
821                                 xpc_identify_act_IRQ_req(nasid);
822                         }
823                 }
824         }
825         return n_IRQs_detected;
826 }
827
828
829 /*
830  * See if the other side has responded to a partition disengage request
831  * from us.
832  */
833 int
834 xpc_partition_disengaged(struct xpc_partition *part)
835 {
836         partid_t partid = XPC_PARTID(part);
837         int disengaged;
838
839
840         disengaged = (xpc_partition_engaged(1UL << partid) == 0);
841         if (part->disengage_request_timeout) {
842                 if (!disengaged) {
843                         if (jiffies < part->disengage_request_timeout) {
844                                 /* timelimit hasn't been reached yet */
845                                 return 0;
846                         }
847
848                         /*
849                          * Other side hasn't responded to our disengage
850                          * request in a timely fashion, so assume it's dead.
851                          */
852
853                         xpc_clear_partition_engaged(1UL << partid);
854                         disengaged = 1;
855                 }
856                 part->disengage_request_timeout = 0;
857
858                 /* cancel the timer function, provided it's not us */
859                 if (!in_interrupt()) {
860                         del_singleshot_timer_sync(&part->
861                                                       disengage_request_timer);
862                 }
863
864                 DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
865                                         part->act_state != XPC_P_INACTIVE);
866                 if (part->act_state != XPC_P_INACTIVE) {
867                         xpc_wakeup_channel_mgr(part);
868                 }
869
870                 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
871                         xpc_cancel_partition_disengage_request(part);
872                 }
873         }
874         return disengaged;
875 }
876
877
878 /*
879  * Mark specified partition as active.
880  */
881 enum xpc_retval
882 xpc_mark_partition_active(struct xpc_partition *part)
883 {
884         unsigned long irq_flags;
885         enum xpc_retval ret;
886
887
888         dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
889
890         spin_lock_irqsave(&part->act_lock, irq_flags);
891         if (part->act_state == XPC_P_ACTIVATING) {
892                 part->act_state = XPC_P_ACTIVE;
893                 ret = xpcSuccess;
894         } else {
895                 DBUG_ON(part->reason == xpcSuccess);
896                 ret = part->reason;
897         }
898         spin_unlock_irqrestore(&part->act_lock, irq_flags);
899
900         return ret;
901 }
902
903
904 /*
905  * Notify XPC that the partition is down.
906  */
907 void
908 xpc_deactivate_partition(const int line, struct xpc_partition *part,
909                                 enum xpc_retval reason)
910 {
911         unsigned long irq_flags;
912
913
914         spin_lock_irqsave(&part->act_lock, irq_flags);
915
916         if (part->act_state == XPC_P_INACTIVE) {
917                 XPC_SET_REASON(part, reason, line);
918                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
919                 if (reason == xpcReactivating) {
920                         /* we interrupt ourselves to reactivate partition */
921                         xpc_IPI_send_reactivate(part);
922                 }
923                 return;
924         }
925         if (part->act_state == XPC_P_DEACTIVATING) {
926                 if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
927                                         reason == xpcReactivating) {
928                         XPC_SET_REASON(part, reason, line);
929                 }
930                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
931                 return;
932         }
933
934         part->act_state = XPC_P_DEACTIVATING;
935         XPC_SET_REASON(part, reason, line);
936
937         spin_unlock_irqrestore(&part->act_lock, irq_flags);
938
939         if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
940                 xpc_request_partition_disengage(part);
941                 xpc_IPI_send_disengage(part);
942
943                 /* set a timelimit on the disengage request */
944                 part->disengage_request_timeout = jiffies +
945                                         (xpc_disengage_request_timelimit * HZ);
946                 part->disengage_request_timer.expires =
947                                         part->disengage_request_timeout;
948                 add_timer(&part->disengage_request_timer);
949         }
950
951         dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
952                 XPC_PARTID(part), reason);
953
954         xpc_partition_going_down(part, reason);
955 }
956
957
958 /*
959  * Mark specified partition as inactive.
960  */
961 void
962 xpc_mark_partition_inactive(struct xpc_partition *part)
963 {
964         unsigned long irq_flags;
965
966
967         dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
968                 XPC_PARTID(part));
969
970         spin_lock_irqsave(&part->act_lock, irq_flags);
971         part->act_state = XPC_P_INACTIVE;
972         spin_unlock_irqrestore(&part->act_lock, irq_flags);
973         part->remote_rp_pa = 0;
974 }
975
976
977 /*
978  * SAL has provided a partition and machine mask.  The partition mask
979  * contains a bit for each even nasid in our partition.  The machine
980  * mask contains a bit for each even nasid in the entire machine.
981  *
982  * Using those two bit arrays, we can determine which nasids are
983  * known in the machine.  Each should also have a reserved page
984  * initialized if they are available for partitioning.
985  */
986 void
987 xpc_discovery(void)
988 {
989         void *remote_rp_base;
990         struct xpc_rsvd_page *remote_rp;
991         struct xpc_vars *remote_vars;
992         u64 remote_rp_pa;
993         u64 remote_vars_pa;
994         int region;
995         int max_regions;
996         int nasid;
997         struct xpc_rsvd_page *rp;
998         partid_t partid;
999         struct xpc_partition *part;
1000         u64 *discovered_nasids;
1001         enum xpc_retval ret;
1002
1003
1004         remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
1005                                                 GFP_KERNEL, &remote_rp_base);
1006         if (remote_rp == NULL) {
1007                 return;
1008         }
1009         remote_vars = (struct xpc_vars *) remote_rp;
1010
1011
1012         discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
1013                                                         GFP_KERNEL);
1014         if (discovered_nasids == NULL) {
1015                 kfree(remote_rp_base);
1016                 return;
1017         }
1018         memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
1019
1020         rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
1021
1022         /*
1023          * The term 'region' in this context refers to the minimum number of
1024          * nodes that can comprise an access protection grouping. The access
1025          * protection is in regards to memory, IOI and IPI.
1026          */
1027 //>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
1028 //>>> include/asm-ia64/sn/addrs.h
1029 #define SH1_MAX_REGIONS         64
1030 #define SH2_MAX_REGIONS         256
1031         max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
1032
1033         for (region = 0; region < max_regions; region++) {
1034
1035                 if ((volatile int) xpc_exiting) {
1036                         break;
1037                 }
1038
1039                 dev_dbg(xpc_part, "searching region %d\n", region);
1040
1041                 for (nasid = (region * sn_region_size * 2);
1042                      nasid < ((region + 1) * sn_region_size * 2);
1043                      nasid += 2) {
1044
1045                         if ((volatile int) xpc_exiting) {
1046                                 break;
1047                         }
1048
1049                         dev_dbg(xpc_part, "checking nasid %d\n", nasid);
1050
1051
1052                         if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
1053                                 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
1054                                         "part of the local partition; skipping "
1055                                         "region\n", nasid);
1056                                 break;
1057                         }
1058
1059                         if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
1060                                 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
1061                                         "not on Numa-Link network at reset\n",
1062                                         nasid);
1063                                 continue;
1064                         }
1065
1066                         if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
1067                                 dev_dbg(xpc_part, "Nasid %d is part of a "
1068                                         "partition which was previously "
1069                                         "discovered\n", nasid);
1070                                 continue;
1071                         }
1072
1073
1074                         /* pull over the reserved page structure */
1075
1076                         ret = xpc_get_remote_rp(nasid, discovered_nasids,
1077                                               remote_rp, &remote_rp_pa);
1078                         if (ret != xpcSuccess) {
1079                                 dev_dbg(xpc_part, "unable to get reserved page "
1080                                         "from nasid %d, reason=%d\n", nasid,
1081                                         ret);
1082
1083                                 if (ret == xpcLocalPartid) {
1084                                         break;
1085                                 }
1086                                 continue;
1087                         }
1088
1089                         remote_vars_pa = remote_rp->vars_pa;
1090
1091                         partid = remote_rp->partid;
1092                         part = &xpc_partitions[partid];
1093
1094
1095                         /* pull over the cross partition variables */
1096
1097                         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
1098                         if (ret != xpcSuccess) {
1099                                 dev_dbg(xpc_part, "unable to get XPC variables "
1100                                         "from nasid %d, reason=%d\n", nasid,
1101                                         ret);
1102
1103                                 XPC_DEACTIVATE_PARTITION(part, ret);
1104                                 continue;
1105                         }
1106
1107                         if (part->act_state != XPC_P_INACTIVE) {
1108                                 dev_dbg(xpc_part, "partition %d on nasid %d is "
1109                                         "already activating\n", partid, nasid);
1110                                 break;
1111                         }
1112
1113                         /*
1114                          * Register the remote partition's AMOs with SAL so it
1115                          * can handle and cleanup errors within that address
1116                          * range should the remote partition go down. We don't
1117                          * unregister this range because it is difficult to
1118                          * tell when outstanding writes to the remote partition
1119                          * are finished and thus when it is thus safe to
1120                          * unregister. This should not result in wasted space
1121                          * in the SAL xp_addr_region table because we should
1122                          * get the same page for remote_act_amos_pa after
1123                          * module reloads and system reboots.
1124                          */
1125                         if (sn_register_xp_addr_region(
1126                                             remote_vars->amos_page_pa,
1127                                                         PAGE_SIZE, 1) < 0) {
1128                                 dev_dbg(xpc_part, "partition %d failed to "
1129                                         "register xp_addr region 0x%016lx\n",
1130                                         partid, remote_vars->amos_page_pa);
1131
1132                                 XPC_SET_REASON(part, xpcPhysAddrRegFailed,
1133                                                 __LINE__);
1134                                 break;
1135                         }
1136
1137                         /*
1138                          * The remote nasid is valid and available.
1139                          * Send an interrupt to that nasid to notify
1140                          * it that we are ready to begin activation.
1141                          */
1142                         dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
1143                                 "nasid %d, phys_cpuid 0x%x\n",
1144                                 remote_vars->amos_page_pa,
1145                                 remote_vars->act_nasid,
1146                                 remote_vars->act_phys_cpuid);
1147
1148                         if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1149                                                                 version)) {
1150                                 part->remote_amos_page_pa =
1151                                                 remote_vars->amos_page_pa;
1152                                 xpc_mark_partition_disengaged(part);
1153                                 xpc_cancel_partition_disengage_request(part);
1154                         }
1155                         xpc_IPI_send_activate(remote_vars);
1156                 }
1157         }
1158
1159         kfree(discovered_nasids);
1160         kfree(remote_rp_base);
1161 }
1162
1163
1164 /*
1165  * Given a partid, get the nasids owned by that partition from the
1166  * remote partition's reserved page.
1167  */
1168 enum xpc_retval
1169 xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
1170 {
1171         struct xpc_partition *part;
1172         u64 part_nasid_pa;
1173         int bte_res;
1174
1175
1176         part = &xpc_partitions[partid];
1177         if (part->remote_rp_pa == 0) {
1178                 return xpcPartitionDown;
1179         }
1180
1181         part_nasid_pa = part->remote_rp_pa +
1182                 (u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
1183
1184         bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
1185                                 L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
1186                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
1187
1188         return xpc_map_bte_errors(bte_res);
1189 }
1190