*/
 static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
 {
-       u64 pstate, ver;
+       u64 pstate, ver, busy_mask;
        int nack_busy_id, is_jbus, need_more;
 
        if (cpus_empty(mask))
                               "i" (ASI_INTR_W));
 
        nack_busy_id = 0;
+       busy_mask = 0;
        {
                int i;
 
                for_each_cpu_mask(i, mask) {
                        u64 target = (i << 14) | 0x70;
 
-                       if (!is_jbus)
+                       if (is_jbus) {
+                               busy_mask |= (0x1UL << (i * 2));
+                       } else {
                                target |= (nack_busy_id << 24);
+                               busy_mask |= (0x1UL <<
+                                             (nack_busy_id * 2));
+                       }
                        __asm__ __volatile__(
                                "stxa   %%g0, [%0] %1\n\t"
                                "membar #Sync\n\t"
 
        /* Now, poll for completion. */
        {
-               u64 dispatch_stat;
+               u64 dispatch_stat, nack_mask;
                long stuck;
 
                stuck = 100000 * nack_busy_id;
+               nack_mask = busy_mask << 1;
                do {
                        __asm__ __volatile__("ldxa      [%%g0] %1, %0"
                                             : "=r" (dispatch_stat)
                                             : "i" (ASI_INTR_DISPATCH_STAT));
-                       if (dispatch_stat == 0UL) {
+                       if (!(dispatch_stat & (busy_mask | nack_mask))) {
                                __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
                                                     : : "r" (pstate));
                                if (unlikely(need_more)) {
                        }
                        if (!--stuck)
                                break;
-               } while (dispatch_stat & 0x5555555555555555UL);
+               } while (dispatch_stat & busy_mask);
 
                __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
                                     : : "r" (pstate));
 
-               if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) {
+               if (dispatch_stat & busy_mask) {
                        /* Busy bits will not clear, continue instead
                         * of freezing up on this cpu.
                         */