arch/sh/kernel/cpu/sh4/fpu.c

   1 /*
   2  * Save/restore floating point context for signal handlers.
   3  *
   4  * This file is subject to the terms and conditions of the GNU General Public
   5  * License.  See the file "COPYING" in the main directory of this archive
   6  * for more details.
   7  *
   8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
   9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
  10  *
  11  * FIXME! These routines have not been tested for big endian case.
  12  */
  13 #include <linux/sched.h>
  14 #include <linux/signal.h>
  15 #include <linux/io.h>
  16 #include <asm/cpu/fpu.h>
  17 #include <asm/processor.h>
  18 #include <asm/system.h>
  19
  20 /* The PR (precision) bit in the FP Status Register must be clear when
  21  * an frchg instruction is executed, otherwise the instruction is undefined.
  22  * Executing frchg with PR set causes a trap on some SH4 implementations.
  23  */
  24
  25 #define FPSCR_RCHG 0x00000000
  26 extern unsigned long long float64_div(unsigned long long a,
  27                                       unsigned long long b);
  28 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
  29 extern unsigned long long float64_mul(unsigned long long a,
  30                                       unsigned long long b);
  31 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
  32 extern unsigned long long float64_add(unsigned long long a,
  33                                       unsigned long long b);
  34 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
  35 extern unsigned long long float64_sub(unsigned long long a,
  36                                       unsigned long long b);
  37 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
  38
  39 static unsigned int fpu_exception_flags;
  40
  41 /*
  42  * Save FPU registers onto task structure.
  43  * Assume called with FPU enabled (SR.FD=0).
  44  */
  45 void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
  46 {
  47         unsigned long dummy;
  48
  49         clear_tsk_thread_flag(tsk, TIF_USEDFPU);
  50         enable_fpu();
  51         asm volatile ("sts.l    fpul, @-%0\n\t"
  52                       "sts.l    fpscr, @-%0\n\t"
  53                       "lds      %2, fpscr\n\t"
  54                       "frchg\n\t"
  55                       "fmov.s   fr15, @-%0\n\t"
  56                       "fmov.s   fr14, @-%0\n\t"
  57                       "fmov.s   fr13, @-%0\n\t"
  58                       "fmov.s   fr12, @-%0\n\t"
  59                       "fmov.s   fr11, @-%0\n\t"
  60                       "fmov.s   fr10, @-%0\n\t"
  61                       "fmov.s   fr9, @-%0\n\t"
  62                       "fmov.s   fr8, @-%0\n\t"
  63                       "fmov.s   fr7, @-%0\n\t"
  64                       "fmov.s   fr6, @-%0\n\t"
  65                       "fmov.s   fr5, @-%0\n\t"
  66                       "fmov.s   fr4, @-%0\n\t"
  67                       "fmov.s   fr3, @-%0\n\t"
  68                       "fmov.s   fr2, @-%0\n\t"
  69                       "fmov.s   fr1, @-%0\n\t"
  70                       "fmov.s   fr0, @-%0\n\t"
  71                       "frchg\n\t"
  72                       "fmov.s   fr15, @-%0\n\t"
  73                       "fmov.s   fr14, @-%0\n\t"
  74                       "fmov.s   fr13, @-%0\n\t"
  75                       "fmov.s   fr12, @-%0\n\t"
  76                       "fmov.s   fr11, @-%0\n\t"
  77                       "fmov.s   fr10, @-%0\n\t"
  78                       "fmov.s   fr9, @-%0\n\t"
  79                       "fmov.s   fr8, @-%0\n\t"
  80                       "fmov.s   fr7, @-%0\n\t"
  81                       "fmov.s   fr6, @-%0\n\t"
  82                       "fmov.s   fr5, @-%0\n\t"
  83                       "fmov.s   fr4, @-%0\n\t"
  84                       "fmov.s   fr3, @-%0\n\t"
  85                       "fmov.s   fr2, @-%0\n\t"
  86                       "fmov.s   fr1, @-%0\n\t"
  87                       "fmov.s   fr0, @-%0\n\t"
  88                       "lds      %3, fpscr\n\t":"=r" (dummy)
  89                       :"0"((char *)(&tsk->thread.fpu.hard.status)),
  90                       "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
  91                       :"memory");
  92
  93         disable_fpu();
  94         release_fpu(regs);
  95 }
  96
  97 static void restore_fpu(struct task_struct *tsk)
  98 {
  99         unsigned long dummy;
 100
 101         enable_fpu();
 102         asm volatile ("lds      %2, fpscr\n\t"
 103                       "fmov.s   @%0+, fr0\n\t"
 104                       "fmov.s   @%0+, fr1\n\t"
 105                       "fmov.s   @%0+, fr2\n\t"
 106                       "fmov.s   @%0+, fr3\n\t"
 107                       "fmov.s   @%0+, fr4\n\t"
 108                       "fmov.s   @%0+, fr5\n\t"
 109                       "fmov.s   @%0+, fr6\n\t"
 110                       "fmov.s   @%0+, fr7\n\t"
 111                       "fmov.s   @%0+, fr8\n\t"
 112                       "fmov.s   @%0+, fr9\n\t"
 113                       "fmov.s   @%0+, fr10\n\t"
 114                       "fmov.s   @%0+, fr11\n\t"
 115                       "fmov.s   @%0+, fr12\n\t"
 116                       "fmov.s   @%0+, fr13\n\t"
 117                       "fmov.s   @%0+, fr14\n\t"
 118                       "fmov.s   @%0+, fr15\n\t"
 119                       "frchg\n\t"
 120                       "fmov.s   @%0+, fr0\n\t"
 121                       "fmov.s   @%0+, fr1\n\t"
 122                       "fmov.s   @%0+, fr2\n\t"
 123                       "fmov.s   @%0+, fr3\n\t"
 124                       "fmov.s   @%0+, fr4\n\t"
 125                       "fmov.s   @%0+, fr5\n\t"
 126                       "fmov.s   @%0+, fr6\n\t"
 127                       "fmov.s   @%0+, fr7\n\t"
 128                       "fmov.s   @%0+, fr8\n\t"
 129                       "fmov.s   @%0+, fr9\n\t"
 130                       "fmov.s   @%0+, fr10\n\t"
 131                       "fmov.s   @%0+, fr11\n\t"
 132                       "fmov.s   @%0+, fr12\n\t"
 133                       "fmov.s   @%0+, fr13\n\t"
 134                       "fmov.s   @%0+, fr14\n\t"
 135                       "fmov.s   @%0+, fr15\n\t"
 136                       "frchg\n\t"
 137                       "lds.l    @%0+, fpscr\n\t"
 138                       "lds.l    @%0+, fpul\n\t"
 139                       :"=r" (dummy)
 140                       :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
 141                       :"memory");
 142         disable_fpu();
 143 }
 144
 145 /*
 146  * Load the FPU with signalling NANS.  This bit pattern we're using
 147  * has the property that no matter wether considered as single or as
 148  * double precision represents signaling NANS.
 149  */
 150
 151 static void fpu_init(void)
 152 {
 153         enable_fpu();
 154         asm volatile (  "lds    %0, fpul\n\t"
 155                         "lds    %1, fpscr\n\t"
 156                         "fsts   fpul, fr0\n\t"
 157                         "fsts   fpul, fr1\n\t"
 158                         "fsts   fpul, fr2\n\t"
 159                         "fsts   fpul, fr3\n\t"
 160                         "fsts   fpul, fr4\n\t"
 161                         "fsts   fpul, fr5\n\t"
 162                         "fsts   fpul, fr6\n\t"
 163                         "fsts   fpul, fr7\n\t"
 164                         "fsts   fpul, fr8\n\t"
 165                         "fsts   fpul, fr9\n\t"
 166                         "fsts   fpul, fr10\n\t"
 167                         "fsts   fpul, fr11\n\t"
 168                         "fsts   fpul, fr12\n\t"
 169                         "fsts   fpul, fr13\n\t"
 170                         "fsts   fpul, fr14\n\t"
 171                         "fsts   fpul, fr15\n\t"
 172                         "frchg\n\t"
 173                         "fsts   fpul, fr0\n\t"
 174                         "fsts   fpul, fr1\n\t"
 175                         "fsts   fpul, fr2\n\t"
 176                         "fsts   fpul, fr3\n\t"
 177                         "fsts   fpul, fr4\n\t"
 178                         "fsts   fpul, fr5\n\t"
 179                         "fsts   fpul, fr6\n\t"
 180                         "fsts   fpul, fr7\n\t"
 181                         "fsts   fpul, fr8\n\t"
 182                         "fsts   fpul, fr9\n\t"
 183                         "fsts   fpul, fr10\n\t"
 184                         "fsts   fpul, fr11\n\t"
 185                         "fsts   fpul, fr12\n\t"
 186                         "fsts   fpul, fr13\n\t"
 187                         "fsts   fpul, fr14\n\t"
 188                         "fsts   fpul, fr15\n\t"
 189                         "frchg\n\t"
 190                         "lds    %2, fpscr\n\t"
 191                         :       /* no output */
 192                         :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
 193         disable_fpu();
 194 }
 195
 196 /**
 197  *      denormal_to_double - Given denormalized float number,
 198  *                           store double float
 199  *
 200  *      @fpu: Pointer to sh_fpu_hard structure
 201  *      @n: Index to FP register
 202  */
 203 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
 204 {
 205         unsigned long du, dl;
 206         unsigned long x = fpu->fpul;
 207         int exp = 1023 - 126;
 208
 209         if (x != 0 && (x & 0x7f800000) == 0) {
 210                 du = (x & 0x80000000);
 211                 while ((x & 0x00800000) == 0) {
 212                         x <<= 1;
 213                         exp--;
 214                 }
 215                 x &= 0x007fffff;
 216                 du |= (exp << 20) | (x >> 3);
 217                 dl = x << 29;
 218
 219                 fpu->fp_regs[n] = du;
 220                 fpu->fp_regs[n + 1] = dl;
 221         }
 222 }
 223
 224 /**
 225  *      ieee_fpe_handler - Handle denormalized number exception
 226  *
 227  *      @regs: Pointer to register structure
 228  *
 229  *      Returns 1 when it's handled (should not cause exception).
 230  */
 231 static int ieee_fpe_handler(struct pt_regs *regs)
 232 {
 233         unsigned short insn = *(unsigned short *)regs->pc;
 234         unsigned short finsn;
 235         unsigned long nextpc;
 236         int nib[4] = {
 237                 (insn >> 12) & 0xf,
 238                 (insn >> 8) & 0xf,
 239                 (insn >> 4) & 0xf,
 240                 insn & 0xf
 241         };
 242
 243         if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
 244                 regs->pr = regs->pc + 4;  /* bsr & jsr */
 245
 246         if (nib[0] == 0xa || nib[0] == 0xb) {
 247                 /* bra & bsr */
 248                 nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
 249                 finsn = *(unsigned short *)(regs->pc + 2);
 250         } else if (nib[0] == 0x8 && nib[1] == 0xd) {
 251                 /* bt/s */
 252                 if (regs->sr & 1)
 253                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
 254                 else
 255                         nextpc = regs->pc + 4;
 256                 finsn = *(unsigned short *)(regs->pc + 2);
 257         } else if (nib[0] == 0x8 && nib[1] == 0xf) {
 258                 /* bf/s */
 259                 if (regs->sr & 1)
 260                         nextpc = regs->pc + 4;
 261                 else
 262                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
 263                 finsn = *(unsigned short *)(regs->pc + 2);
 264         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
 265                    (nib[2] == 0x0 || nib[2] == 0x2)) {
 266                 /* jmp & jsr */
 267                 nextpc = regs->regs[nib[1]];
 268                 finsn = *(unsigned short *)(regs->pc + 2);
 269         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
 270                    (nib[2] == 0x0 || nib[2] == 0x2)) {
 271                 /* braf & bsrf */
 272                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
 273                 finsn = *(unsigned short *)(regs->pc + 2);
 274         } else if (insn == 0x000b) {
 275                 /* rts */
 276                 nextpc = regs->pr;
 277                 finsn = *(unsigned short *)(regs->pc + 2);
 278         } else {
 279                 nextpc = regs->pc + instruction_size(insn);
 280                 finsn = insn;
 281         }
 282
 283         if ((finsn & 0xf1ff) == 0xf0ad) {
 284                 /* fcnvsd */
 285                 struct task_struct *tsk = current;
 286
 287                 save_fpu(tsk, regs);
 288                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
 289                         /* FPU error */
 290                         denormal_to_double(&tsk->thread.fpu.hard,
 291                                            (finsn >> 8) & 0xf);
 292                 else
 293                         return 0;
 294
 295                 regs->pc = nextpc;
 296                 return 1;
 297         } else if ((finsn & 0xf00f) == 0xf002) {
 298                 /* fmul */
 299                 struct task_struct *tsk = current;
 300                 int fpscr;
 301                 int n, m, prec;
 302                 unsigned int hx, hy;
 303
 304                 n = (finsn >> 8) & 0xf;
 305                 m = (finsn >> 4) & 0xf;
 306                 hx = tsk->thread.fpu.hard.fp_regs[n];
 307                 hy = tsk->thread.fpu.hard.fp_regs[m];
 308                 fpscr = tsk->thread.fpu.hard.fpscr;
 309                 prec = fpscr & FPSCR_DBL_PRECISION;
 310
 311                 if ((fpscr & FPSCR_CAUSE_ERROR)
 312                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 313                                  || (hy & 0x7fffffff) < 0x00100000))) {
 314                         long long llx, lly;
 315
 316                         /* FPU error because of denormal (doubles) */
 317                         llx = ((long long)hx << 32)
 318                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 319                         lly = ((long long)hy << 32)
 320                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 321                         llx = float64_mul(llx, lly);
 322                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 323                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 324                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 325                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 326                                          || (hy & 0x7fffffff) < 0x00800000))) {
 327                         /* FPU error because of denormal (floats) */
 328                         hx = float32_mul(hx, hy);
 329                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 330                 } else
 331                         return 0;
 332
 333                 regs->pc = nextpc;
 334                 return 1;
 335         } else if ((finsn & 0xf00e) == 0xf000) {
 336                 /* fadd, fsub */
 337                 struct task_struct *tsk = current;
 338                 int fpscr;
 339                 int n, m, prec;
 340                 unsigned int hx, hy;
 341
 342                 n = (finsn >> 8) & 0xf;
 343                 m = (finsn >> 4) & 0xf;
 344                 hx = tsk->thread.fpu.hard.fp_regs[n];
 345                 hy = tsk->thread.fpu.hard.fp_regs[m];
 346                 fpscr = tsk->thread.fpu.hard.fpscr;
 347                 prec = fpscr & FPSCR_DBL_PRECISION;
 348
 349                 if ((fpscr & FPSCR_CAUSE_ERROR)
 350                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 351                                  || (hy & 0x7fffffff) < 0x00100000))) {
 352                         long long llx, lly;
 353
 354                         /* FPU error because of denormal (doubles) */
 355                         llx = ((long long)hx << 32)
 356                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 357                         lly = ((long long)hy << 32)
 358                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 359                         if ((finsn & 0xf00f) == 0xf000)
 360                                 llx = float64_add(llx, lly);
 361                         else
 362                                 llx = float64_sub(llx, lly);
 363                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 364                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 365                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 366                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 367                                          || (hy & 0x7fffffff) < 0x00800000))) {
 368                         /* FPU error because of denormal (floats) */
 369                         if ((finsn & 0xf00f) == 0xf000)
 370                                 hx = float32_add(hx, hy);
 371                         else
 372                                 hx = float32_sub(hx, hy);
 373                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 374                 } else
 375                         return 0;
 376
 377                 regs->pc = nextpc;
 378                 return 1;
 379         } else if ((finsn & 0xf003) == 0xf003) {
 380                 /* fdiv */
 381                 struct task_struct *tsk = current;
 382                 int fpscr;
 383                 int n, m, prec;
 384                 unsigned int hx, hy;
 385
 386                 n = (finsn >> 8) & 0xf;
 387                 m = (finsn >> 4) & 0xf;
 388                 hx = tsk->thread.fpu.hard.fp_regs[n];
 389                 hy = tsk->thread.fpu.hard.fp_regs[m];
 390                 fpscr = tsk->thread.fpu.hard.fpscr;
 391                 prec = fpscr & FPSCR_DBL_PRECISION;
 392
 393                 if ((fpscr & FPSCR_CAUSE_ERROR)
 394                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 395                                  || (hy & 0x7fffffff) < 0x00100000))) {
 396                         long long llx, lly;
 397
 398                         /* FPU error because of denormal (doubles) */
 399                         llx = ((long long)hx << 32)
 400                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 401                         lly = ((long long)hy << 32)
 402                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 403
 404                         llx = float64_div(llx, lly);
 405
 406                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 407                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 408                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 409                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 410                                          || (hy & 0x7fffffff) < 0x00800000))) {
 411                         /* FPU error because of denormal (floats) */
 412                         hx = float32_div(hx, hy);
 413                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 414                 } else
 415                         return 0;
 416
 417                 regs->pc = nextpc;
 418                 return 1;
 419         }
 420
 421         return 0;
 422 }
 423
 424 void float_raise(unsigned int flags)
 425 {
 426         fpu_exception_flags |= flags;
 427 }
 428
 429 int float_rounding_mode(void)
 430 {
 431         struct task_struct *tsk = current;
 432         int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
 433         return roundingMode;
 434 }
 435
 436 BUILD_TRAP_HANDLER(fpu_error)
 437 {
 438         struct task_struct *tsk = current;
 439         TRAP_HANDLER_DECL;
 440
 441         save_fpu(tsk, regs);
 442         fpu_exception_flags = 0;
 443         if (ieee_fpe_handler(regs)) {
 444                 tsk->thread.fpu.hard.fpscr &=
 445                     ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
 446                 tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
 447                 /* Set the FPSCR flag as well as cause bits - simply
 448                  * replicate the cause */
 449                 tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
 450                 grab_fpu(regs);
 451                 restore_fpu(tsk);
 452                 set_tsk_thread_flag(tsk, TIF_USEDFPU);
 453                 if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
 454                      (fpu_exception_flags >> 2)) == 0) {
 455                         return;
 456                 }
 457         }
 458
 459         force_sig(SIGFPE, tsk);
 460 }
 461
 462 BUILD_TRAP_HANDLER(fpu_state_restore)
 463 {
 464         struct task_struct *tsk = current;
 465         TRAP_HANDLER_DECL;
 466
 467         grab_fpu(regs);
 468         if (!user_mode(regs)) {
 469                 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
 470                 return;
 471         }
 472
 473         if (used_math()) {
 474                 /* Using the FPU again.  */
 475                 restore_fpu(tsk);
 476         } else {
 477                 /* First time FPU user.  */
 478                 fpu_init();
 479                 set_used_math();
 480         }
 481         set_tsk_thread_flag(tsk, TIF_USEDFPU);
 482 }