ld8.fill r15=[r3]                       // M0|1 restore r15
        mov b6=r18                              // I0   restore b6
 
-       addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+       LOAD_PHYS_STACK_REG_SIZE(r17)
        mov f9=f0                                       // F    clear f9
 (pKStk) br.cond.dpnt.many skip_rbs_switch              // B
 
        shr.u r18=r19,16                // I0|1 get byte size of existing "dirty" partition
        cover                           // B    add current frame into dirty partition & set cr.ifs
        ;;
-(pUStk) ld4 r17=[r17]                  // M0|1 r17 = cpu_data->phys_stacked_size_p8
        mov r19=ar.bsp                  // M2   get new backing store pointer
        mov f10=f0                      // F    clear f10
 
        shr.u r18=r19,16        // get byte size of existing "dirty" partition
        ;;
        mov r16=ar.bsp          // get existing backing store pointer
-       addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-       ;;
-       ld4 r17=[r17]           // r17 = cpu_data->phys_stacked_size_p8
+       LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)        br.cond.dpnt skip_rbs_switch
 
        /*
 
        ia64_patch_vtop(START(vtop), END(vtop));
        ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
 }
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+       s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+       s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+       u64 ip, mask, imm;
+
+       /* see instruction format A4: adds r1 = imm13, r3 */
+       mask = (0x3fUL << 27) | (0x7f << 13);
+       imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+       while (offp < end) {
+               ip = (u64) offp + *offp;
+               ia64_patch(ip, mask, imm);
+               ia64_fc(ip);
+               ++offp;
+       }
+       ia64_sync_i();
+       ia64_srlz_i();
+}
 
 
 DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
 DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
 unsigned long ia64_cycles_per_usec;
 struct ia64_boot_param *ia64_boot_param;
 struct screen_info screen_info;
 cpu_init (void)
 {
        extern void __cpuinit ia64_mmu_init (void *);
+       static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
        unsigned long num_phys_stacked;
        pal_vm_info_2_u_t vmi;
        unsigned int max_ctx;
                num_phys_stacked = 96;
        }
        /* size of physical stacked register partition plus 8 bytes: */
-       __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+       if (num_phys_stacked > max_num_phys_stacked) {
+               ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
+               max_num_phys_stacked = num_phys_stacked;
+       }
        platform_cpu_init();
        pm_idle = default_idle;
 }
 
          __stop___mca_table = .;
        }
 
+  .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET)
+       {
+         __start___phys_stack_reg_patchlist = .;
+         *(.data.patch.phys_stack_reg)
+         __end___phys_stack_reg_patchlist = .;
+       }
+
   /* Global data */
   _data = .;
 
 
 # define FSYS_RETURN   br.ret.sptk.many b6
 #endif
 
+/*
+ * If physical stack register size is different from DEF_NUM_STACK_REG,
+ * dynamically patch the kernel for correct size.
+ */
+       .section ".data.patch.phys_stack_reg", "a"
+       .previous
+#define LOAD_PHYS_STACK_REG_SIZE(reg)                  \
+[1:]   adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0;        \
+       .xdata4 ".data.patch.phys_stack_reg", 1b-.
+
 /*
  * Up until early 2004, use of .align within a function caused bad unwind info.
  * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
 
 
 extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
 extern void ia64_patch_vtop (unsigned long start, unsigned long end);
+extern void ia64_patch_phys_stack_reg(unsigned long val);
 extern void ia64_patch_gate (void);
 
 #endif /* _ASM_IA64_PATCH_H */
 
 #include <asm/ptrace.h>
 #include <asm/ustack.h>
 
+#define IA64_NUM_PHYS_STACK_REG        96
 #define IA64_NUM_DBG_REGS      8
 
 #define DEFAULT_MAP_BASE       __IA64_UL_CONST(0x2000000000000000)
 
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
+extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
 extern char __start_gate_section[];
 extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
 extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];