]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/sh/kernel/cpu/sh4/fpu.c
sh: Support denormalization on SH-4 FPU.
[linux-2.6-omap-h63xx.git] / arch / sh / kernel / cpu / sh4 / fpu.c
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
10  *
11  * FIXME! These routines have not been tested for big endian case.
12  */
13 #include <linux/sched.h>
14 #include <linux/signal.h>
15 #include <linux/io.h>
16 #include <asm/cpu/fpu.h>
17 #include <asm/processor.h>
18 #include <asm/system.h>
19
20 /* The PR (precision) bit in the FP Status Register must be clear when
21  * an frchg instruction is executed, otherwise the instruction is undefined.
22  * Executing frchg with PR set causes a trap on some SH4 implementations.
23  */
24
25 #define FPSCR_RCHG 0x00000000
26 extern unsigned long long float64_div(unsigned long long a,
27                                       unsigned long long b);
28 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
29 extern unsigned long long float64_mul(unsigned long long a,
30                                       unsigned long long b);
31 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
32 extern unsigned long long float64_add(unsigned long long a,
33                                       unsigned long long b);
34 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
35 extern unsigned long long float64_sub(unsigned long long a,
36                                       unsigned long long b);
37 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
38
39 static unsigned int fpu_exception_flags;
40
41 /*
42  * Save FPU registers onto task structure.
43  * Assume called with FPU enabled (SR.FD=0).
44  */
45 void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
46 {
47         unsigned long dummy;
48
49         clear_tsk_thread_flag(tsk, TIF_USEDFPU);
50         enable_fpu();
51         asm volatile ("sts.l    fpul, @-%0\n\t"
52                       "sts.l    fpscr, @-%0\n\t"
53                       "lds      %2, fpscr\n\t"
54                       "frchg\n\t"
55                       "fmov.s   fr15, @-%0\n\t"
56                       "fmov.s   fr14, @-%0\n\t"
57                       "fmov.s   fr13, @-%0\n\t"
58                       "fmov.s   fr12, @-%0\n\t"
59                       "fmov.s   fr11, @-%0\n\t"
60                       "fmov.s   fr10, @-%0\n\t"
61                       "fmov.s   fr9, @-%0\n\t"
62                       "fmov.s   fr8, @-%0\n\t"
63                       "fmov.s   fr7, @-%0\n\t"
64                       "fmov.s   fr6, @-%0\n\t"
65                       "fmov.s   fr5, @-%0\n\t"
66                       "fmov.s   fr4, @-%0\n\t"
67                       "fmov.s   fr3, @-%0\n\t"
68                       "fmov.s   fr2, @-%0\n\t"
69                       "fmov.s   fr1, @-%0\n\t"
70                       "fmov.s   fr0, @-%0\n\t"
71                       "frchg\n\t"
72                       "fmov.s   fr15, @-%0\n\t"
73                       "fmov.s   fr14, @-%0\n\t"
74                       "fmov.s   fr13, @-%0\n\t"
75                       "fmov.s   fr12, @-%0\n\t"
76                       "fmov.s   fr11, @-%0\n\t"
77                       "fmov.s   fr10, @-%0\n\t"
78                       "fmov.s   fr9, @-%0\n\t"
79                       "fmov.s   fr8, @-%0\n\t"
80                       "fmov.s   fr7, @-%0\n\t"
81                       "fmov.s   fr6, @-%0\n\t"
82                       "fmov.s   fr5, @-%0\n\t"
83                       "fmov.s   fr4, @-%0\n\t"
84                       "fmov.s   fr3, @-%0\n\t"
85                       "fmov.s   fr2, @-%0\n\t"
86                       "fmov.s   fr1, @-%0\n\t"
87                       "fmov.s   fr0, @-%0\n\t"
88                       "lds      %3, fpscr\n\t":"=r" (dummy)
89                       :"0"((char *)(&tsk->thread.fpu.hard.status)),
90                       "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
91                       :"memory");
92
93         disable_fpu();
94         release_fpu(regs);
95 }
96
97 static void restore_fpu(struct task_struct *tsk)
98 {
99         unsigned long dummy;
100
101         enable_fpu();
102         asm volatile ("lds      %2, fpscr\n\t"
103                       "fmov.s   @%0+, fr0\n\t"
104                       "fmov.s   @%0+, fr1\n\t"
105                       "fmov.s   @%0+, fr2\n\t"
106                       "fmov.s   @%0+, fr3\n\t"
107                       "fmov.s   @%0+, fr4\n\t"
108                       "fmov.s   @%0+, fr5\n\t"
109                       "fmov.s   @%0+, fr6\n\t"
110                       "fmov.s   @%0+, fr7\n\t"
111                       "fmov.s   @%0+, fr8\n\t"
112                       "fmov.s   @%0+, fr9\n\t"
113                       "fmov.s   @%0+, fr10\n\t"
114                       "fmov.s   @%0+, fr11\n\t"
115                       "fmov.s   @%0+, fr12\n\t"
116                       "fmov.s   @%0+, fr13\n\t"
117                       "fmov.s   @%0+, fr14\n\t"
118                       "fmov.s   @%0+, fr15\n\t"
119                       "frchg\n\t"
120                       "fmov.s   @%0+, fr0\n\t"
121                       "fmov.s   @%0+, fr1\n\t"
122                       "fmov.s   @%0+, fr2\n\t"
123                       "fmov.s   @%0+, fr3\n\t"
124                       "fmov.s   @%0+, fr4\n\t"
125                       "fmov.s   @%0+, fr5\n\t"
126                       "fmov.s   @%0+, fr6\n\t"
127                       "fmov.s   @%0+, fr7\n\t"
128                       "fmov.s   @%0+, fr8\n\t"
129                       "fmov.s   @%0+, fr9\n\t"
130                       "fmov.s   @%0+, fr10\n\t"
131                       "fmov.s   @%0+, fr11\n\t"
132                       "fmov.s   @%0+, fr12\n\t"
133                       "fmov.s   @%0+, fr13\n\t"
134                       "fmov.s   @%0+, fr14\n\t"
135                       "fmov.s   @%0+, fr15\n\t"
136                       "frchg\n\t"
137                       "lds.l    @%0+, fpscr\n\t"
138                       "lds.l    @%0+, fpul\n\t"
139                       :"=r" (dummy)
140                       :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
141                       :"memory");
142         disable_fpu();
143 }
144
145 /*
146  * Load the FPU with signalling NANS.  This bit pattern we're using
147  * has the property that no matter wether considered as single or as
148  * double precision represents signaling NANS.
149  */
150
151 static void fpu_init(void)
152 {
153         enable_fpu();
154         asm volatile (  "lds    %0, fpul\n\t"
155                         "lds    %1, fpscr\n\t"
156                         "fsts   fpul, fr0\n\t"
157                         "fsts   fpul, fr1\n\t"
158                         "fsts   fpul, fr2\n\t"
159                         "fsts   fpul, fr3\n\t"
160                         "fsts   fpul, fr4\n\t"
161                         "fsts   fpul, fr5\n\t"
162                         "fsts   fpul, fr6\n\t"
163                         "fsts   fpul, fr7\n\t"
164                         "fsts   fpul, fr8\n\t"
165                         "fsts   fpul, fr9\n\t"
166                         "fsts   fpul, fr10\n\t"
167                         "fsts   fpul, fr11\n\t"
168                         "fsts   fpul, fr12\n\t"
169                         "fsts   fpul, fr13\n\t"
170                         "fsts   fpul, fr14\n\t"
171                         "fsts   fpul, fr15\n\t"
172                         "frchg\n\t"
173                         "fsts   fpul, fr0\n\t"
174                         "fsts   fpul, fr1\n\t"
175                         "fsts   fpul, fr2\n\t"
176                         "fsts   fpul, fr3\n\t"
177                         "fsts   fpul, fr4\n\t"
178                         "fsts   fpul, fr5\n\t"
179                         "fsts   fpul, fr6\n\t"
180                         "fsts   fpul, fr7\n\t"
181                         "fsts   fpul, fr8\n\t"
182                         "fsts   fpul, fr9\n\t"
183                         "fsts   fpul, fr10\n\t"
184                         "fsts   fpul, fr11\n\t"
185                         "fsts   fpul, fr12\n\t"
186                         "fsts   fpul, fr13\n\t"
187                         "fsts   fpul, fr14\n\t"
188                         "fsts   fpul, fr15\n\t"
189                         "frchg\n\t"
190                         "lds    %2, fpscr\n\t"
191                         :       /* no output */
192                         :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
193         disable_fpu();
194 }
195
196 /**
197  *      denormal_to_double - Given denormalized float number,
198  *                           store double float
199  *
200  *      @fpu: Pointer to sh_fpu_hard structure
201  *      @n: Index to FP register
202  */
203 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
204 {
205         unsigned long du, dl;
206         unsigned long x = fpu->fpul;
207         int exp = 1023 - 126;
208
209         if (x != 0 && (x & 0x7f800000) == 0) {
210                 du = (x & 0x80000000);
211                 while ((x & 0x00800000) == 0) {
212                         x <<= 1;
213                         exp--;
214                 }
215                 x &= 0x007fffff;
216                 du |= (exp << 20) | (x >> 3);
217                 dl = x << 29;
218
219                 fpu->fp_regs[n] = du;
220                 fpu->fp_regs[n + 1] = dl;
221         }
222 }
223
224 /**
225  *      ieee_fpe_handler - Handle denormalized number exception
226  *
227  *      @regs: Pointer to register structure
228  *
229  *      Returns 1 when it's handled (should not cause exception).
230  */
231 static int ieee_fpe_handler(struct pt_regs *regs)
232 {
233         unsigned short insn = *(unsigned short *)regs->pc;
234         unsigned short finsn;
235         unsigned long nextpc;
236         int nib[4] = {
237                 (insn >> 12) & 0xf,
238                 (insn >> 8) & 0xf,
239                 (insn >> 4) & 0xf,
240                 insn & 0xf
241         };
242
243         if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
244                 regs->pr = regs->pc + 4;  /* bsr & jsr */
245
246         if (nib[0] == 0xa || nib[0] == 0xb) {
247                 /* bra & bsr */
248                 nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
249                 finsn = *(unsigned short *)(regs->pc + 2);
250         } else if (nib[0] == 0x8 && nib[1] == 0xd) {
251                 /* bt/s */
252                 if (regs->sr & 1)
253                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
254                 else
255                         nextpc = regs->pc + 4;
256                 finsn = *(unsigned short *)(regs->pc + 2);
257         } else if (nib[0] == 0x8 && nib[1] == 0xf) {
258                 /* bf/s */
259                 if (regs->sr & 1)
260                         nextpc = regs->pc + 4;
261                 else
262                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
263                 finsn = *(unsigned short *)(regs->pc + 2);
264         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
265                    (nib[2] == 0x0 || nib[2] == 0x2)) {
266                 /* jmp & jsr */
267                 nextpc = regs->regs[nib[1]];
268                 finsn = *(unsigned short *)(regs->pc + 2);
269         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
270                    (nib[2] == 0x0 || nib[2] == 0x2)) {
271                 /* braf & bsrf */
272                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
273                 finsn = *(unsigned short *)(regs->pc + 2);
274         } else if (insn == 0x000b) {
275                 /* rts */
276                 nextpc = regs->pr;
277                 finsn = *(unsigned short *)(regs->pc + 2);
278         } else {
279                 nextpc = regs->pc + instruction_size(insn);
280                 finsn = insn;
281         }
282
283         if ((finsn & 0xf1ff) == 0xf0ad) {
284                 /* fcnvsd */
285                 struct task_struct *tsk = current;
286
287                 save_fpu(tsk, regs);
288                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
289                         /* FPU error */
290                         denormal_to_double(&tsk->thread.fpu.hard,
291                                            (finsn >> 8) & 0xf);
292                 else
293                         return 0;
294
295                 regs->pc = nextpc;
296                 return 1;
297         } else if ((finsn & 0xf00f) == 0xf002) {
298                 /* fmul */
299                 struct task_struct *tsk = current;
300                 int fpscr;
301                 int n, m, prec;
302                 unsigned int hx, hy;
303
304                 n = (finsn >> 8) & 0xf;
305                 m = (finsn >> 4) & 0xf;
306                 hx = tsk->thread.fpu.hard.fp_regs[n];
307                 hy = tsk->thread.fpu.hard.fp_regs[m];
308                 fpscr = tsk->thread.fpu.hard.fpscr;
309                 prec = fpscr & FPSCR_DBL_PRECISION;
310
311                 if ((fpscr & FPSCR_CAUSE_ERROR)
312                     && (prec && ((hx & 0x7fffffff) < 0x00100000
313                                  || (hy & 0x7fffffff) < 0x00100000))) {
314                         long long llx, lly;
315
316                         /* FPU error because of denormal (doubles) */
317                         llx = ((long long)hx << 32)
318                             | tsk->thread.fpu.hard.fp_regs[n + 1];
319                         lly = ((long long)hy << 32)
320                             | tsk->thread.fpu.hard.fp_regs[m + 1];
321                         llx = float64_mul(llx, lly);
322                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
323                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
324                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
325                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
326                                          || (hy & 0x7fffffff) < 0x00800000))) {
327                         /* FPU error because of denormal (floats) */
328                         hx = float32_mul(hx, hy);
329                         tsk->thread.fpu.hard.fp_regs[n] = hx;
330                 } else
331                         return 0;
332
333                 regs->pc = nextpc;
334                 return 1;
335         } else if ((finsn & 0xf00e) == 0xf000) {
336                 /* fadd, fsub */
337                 struct task_struct *tsk = current;
338                 int fpscr;
339                 int n, m, prec;
340                 unsigned int hx, hy;
341
342                 n = (finsn >> 8) & 0xf;
343                 m = (finsn >> 4) & 0xf;
344                 hx = tsk->thread.fpu.hard.fp_regs[n];
345                 hy = tsk->thread.fpu.hard.fp_regs[m];
346                 fpscr = tsk->thread.fpu.hard.fpscr;
347                 prec = fpscr & FPSCR_DBL_PRECISION;
348
349                 if ((fpscr & FPSCR_CAUSE_ERROR)
350                     && (prec && ((hx & 0x7fffffff) < 0x00100000
351                                  || (hy & 0x7fffffff) < 0x00100000))) {
352                         long long llx, lly;
353
354                         /* FPU error because of denormal (doubles) */
355                         llx = ((long long)hx << 32)
356                             | tsk->thread.fpu.hard.fp_regs[n + 1];
357                         lly = ((long long)hy << 32)
358                             | tsk->thread.fpu.hard.fp_regs[m + 1];
359                         if ((finsn & 0xf00f) == 0xf000)
360                                 llx = float64_add(llx, lly);
361                         else
362                                 llx = float64_sub(llx, lly);
363                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
364                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
365                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
366                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
367                                          || (hy & 0x7fffffff) < 0x00800000))) {
368                         /* FPU error because of denormal (floats) */
369                         if ((finsn & 0xf00f) == 0xf000)
370                                 hx = float32_add(hx, hy);
371                         else
372                                 hx = float32_sub(hx, hy);
373                         tsk->thread.fpu.hard.fp_regs[n] = hx;
374                 } else
375                         return 0;
376
377                 regs->pc = nextpc;
378                 return 1;
379         } else if ((finsn & 0xf003) == 0xf003) {
380                 /* fdiv */
381                 struct task_struct *tsk = current;
382                 int fpscr;
383                 int n, m, prec;
384                 unsigned int hx, hy;
385
386                 n = (finsn >> 8) & 0xf;
387                 m = (finsn >> 4) & 0xf;
388                 hx = tsk->thread.fpu.hard.fp_regs[n];
389                 hy = tsk->thread.fpu.hard.fp_regs[m];
390                 fpscr = tsk->thread.fpu.hard.fpscr;
391                 prec = fpscr & FPSCR_DBL_PRECISION;
392
393                 if ((fpscr & FPSCR_CAUSE_ERROR)
394                     && (prec && ((hx & 0x7fffffff) < 0x00100000
395                                  || (hy & 0x7fffffff) < 0x00100000))) {
396                         long long llx, lly;
397
398                         /* FPU error because of denormal (doubles) */
399                         llx = ((long long)hx << 32)
400                             | tsk->thread.fpu.hard.fp_regs[n + 1];
401                         lly = ((long long)hy << 32)
402                             | tsk->thread.fpu.hard.fp_regs[m + 1];
403
404                         llx = float64_div(llx, lly);
405
406                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
407                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
408                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
409                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
410                                          || (hy & 0x7fffffff) < 0x00800000))) {
411                         /* FPU error because of denormal (floats) */
412                         hx = float32_div(hx, hy);
413                         tsk->thread.fpu.hard.fp_regs[n] = hx;
414                 } else
415                         return 0;
416
417                 regs->pc = nextpc;
418                 return 1;
419         }
420
421         return 0;
422 }
423
424 void float_raise(unsigned int flags)
425 {
426         fpu_exception_flags |= flags;
427 }
428
429 int float_rounding_mode(void)
430 {
431         struct task_struct *tsk = current;
432         int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
433         return roundingMode;
434 }
435
436 BUILD_TRAP_HANDLER(fpu_error)
437 {
438         struct task_struct *tsk = current;
439         TRAP_HANDLER_DECL;
440
441         save_fpu(tsk, regs);
442         fpu_exception_flags = 0;
443         if (ieee_fpe_handler(regs)) {
444                 tsk->thread.fpu.hard.fpscr &=
445                     ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
446                 tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
447                 /* Set the FPSCR flag as well as cause bits - simply
448                  * replicate the cause */
449                 tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
450                 grab_fpu(regs);
451                 restore_fpu(tsk);
452                 set_tsk_thread_flag(tsk, TIF_USEDFPU);
453                 if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
454                      (fpu_exception_flags >> 2)) == 0) {
455                         return;
456                 }
457         }
458
459         force_sig(SIGFPE, tsk);
460 }
461
462 BUILD_TRAP_HANDLER(fpu_state_restore)
463 {
464         struct task_struct *tsk = current;
465         TRAP_HANDLER_DECL;
466
467         grab_fpu(regs);
468         if (!user_mode(regs)) {
469                 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
470                 return;
471         }
472
473         if (used_math()) {
474                 /* Using the FPU again.  */
475                 restore_fpu(tsk);
476         } else {
477                 /* First time FPU user.  */
478                 fpu_init();
479                 set_used_math();
480         }
481         set_tsk_thread_flag(tsk, TIF_USEDFPU);
482 }