arch/mips/lib/csum_partial.S

   1 /*
   2  * This file is subject to the terms and conditions of the GNU General Public
   3  * License.  See the file "COPYING" in the main directory of this archive
   4  * for more details.
   5  *
   6  * Quick'n'dirty IP checksum ...
   7  *
   8  * Copyright (C) 1998, 1999 Ralf Baechle
   9  * Copyright (C) 1999 Silicon Graphics, Inc.
  10  */
  11 #include <asm/asm.h>
  12 #include <asm/regdef.h>
  13
  14 #ifdef CONFIG_64BIT
  15 /*
  16  * As we are sharing code base with the mips32 tree (which use the o32 ABI
  17  * register definitions). We need to redefine the register definitions from
  18  * the n64 ABI register naming to the o32 ABI register naming.
  19  */
  20 #undef t0
  21 #undef t1
  22 #undef t2
  23 #undef t3
  24 #define t0      $8
  25 #define t1      $9
  26 #define t2      $10
  27 #define t3      $11
  28 #define t4      $12
  29 #define t5      $13
  30 #define t6      $14
  31 #define t7      $15
  32 #endif
  33
  34 #define ADDC(sum,reg)                                           \
  35         addu    sum, reg;                                       \
  36         sltu    v1, sum, reg;                                   \
  37         addu    sum, v1
  38
  39 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)     \
  40         lw      _t0, (offset + 0x00)(src);                      \
  41         lw      _t1, (offset + 0x04)(src);                      \
  42         lw      _t2, (offset + 0x08)(src);                      \
  43         lw      _t3, (offset + 0x0c)(src);                      \
  44         ADDC(sum, _t0);                                         \
  45         ADDC(sum, _t1);                                         \
  46         ADDC(sum, _t2);                                         \
  47         ADDC(sum, _t3);                                         \
  48         lw      _t0, (offset + 0x10)(src);                      \
  49         lw      _t1, (offset + 0x14)(src);                      \
  50         lw      _t2, (offset + 0x18)(src);                      \
  51         lw      _t3, (offset + 0x1c)(src);                      \
  52         ADDC(sum, _t0);                                         \
  53         ADDC(sum, _t1);                                         \
  54         ADDC(sum, _t2);                                         \
  55         ADDC(sum, _t3);                                         \
  56
  57 /*
  58  * a0: source address
  59  * a1: length of the area to checksum
  60  * a2: partial checksum
  61  */
  62
  63 #define src a0
  64 #define sum v0
  65
  66         .text
  67         .set    noreorder
  68
  69 /* unknown src alignment and < 8 bytes to go  */
  70 small_csumcpy:
  71         move    a1, t2
  72
  73         andi    t0, a1, 4
  74         beqz    t0, 1f
  75          andi   t0, a1, 2
  76
  77         /* Still a full word to go  */
  78         ulw     t1, (src)
  79         PTR_ADDIU       src, 4
  80         ADDC(sum, t1)
  81
  82 1:      move    t1, zero
  83         beqz    t0, 1f
  84          andi   t0, a1, 1
  85
  86         /* Still a halfword to go  */
  87         ulhu    t1, (src)
  88         PTR_ADDIU       src, 2
  89
  90 1:      beqz    t0, 1f
  91          sll    t1, t1, 16
  92
  93         lbu     t2, (src)
  94          nop
  95
  96 #ifdef __MIPSEB__
  97         sll     t2, t2, 8
  98 #endif
  99         or      t1, t2
 100
 101 1:      ADDC(sum, t1)
 102
 103         /* fold checksum */
 104         sll     v1, sum, 16
 105         addu    sum, v1
 106         sltu    v1, sum, v1
 107         srl     sum, sum, 16
 108         addu    sum, v1
 109
 110         /* odd buffer alignment? */
 111         beqz    t7, 1f
 112          nop
 113         sll     v1, sum, 8
 114         srl     sum, sum, 8
 115         or      sum, v1
 116         andi    sum, 0xffff
 117 1:
 118         .set    reorder
 119         /* Add the passed partial csum.  */
 120         ADDC(sum, a2)
 121         jr      ra
 122         .set    noreorder
 123
 124 /* ------------------------------------------------------------------------- */
 125
 126         .align  5
 127 LEAF(csum_partial)
 128         move    sum, zero
 129         move    t7, zero
 130
 131         sltiu   t8, a1, 0x8
 132         bnez    t8, small_csumcpy               /* < 8 bytes to copy */
 133          move   t2, a1
 134
 135         beqz    a1, out
 136          andi   t7, src, 0x1                    /* odd buffer? */
 137
 138 hword_align:
 139         beqz    t7, word_align
 140          andi   t8, src, 0x2
 141
 142         lbu     t0, (src)
 143         LONG_SUBU       a1, a1, 0x1
 144 #ifdef __MIPSEL__
 145         sll     t0, t0, 8
 146 #endif
 147         ADDC(sum, t0)
 148         PTR_ADDU        src, src, 0x1
 149         andi    t8, src, 0x2
 150
 151 word_align:
 152         beqz    t8, dword_align
 153          sltiu  t8, a1, 56
 154
 155         lhu     t0, (src)
 156         LONG_SUBU       a1, a1, 0x2
 157         ADDC(sum, t0)
 158         sltiu   t8, a1, 56
 159         PTR_ADDU        src, src, 0x2
 160
 161 dword_align:
 162         bnez    t8, do_end_words
 163          move   t8, a1
 164
 165         andi    t8, src, 0x4
 166         beqz    t8, qword_align
 167          andi   t8, src, 0x8
 168
 169         lw      t0, 0x00(src)
 170         LONG_SUBU       a1, a1, 0x4
 171         ADDC(sum, t0)
 172         PTR_ADDU        src, src, 0x4
 173         andi    t8, src, 0x8
 174
 175 qword_align:
 176         beqz    t8, oword_align
 177          andi   t8, src, 0x10
 178
 179         lw      t0, 0x00(src)
 180         lw      t1, 0x04(src)
 181         LONG_SUBU       a1, a1, 0x8
 182         ADDC(sum, t0)
 183         ADDC(sum, t1)
 184         PTR_ADDU        src, src, 0x8
 185         andi    t8, src, 0x10
 186
 187 oword_align:
 188         beqz    t8, begin_movement
 189          LONG_SRL       t8, a1, 0x7
 190
 191         lw      t3, 0x08(src)
 192         lw      t4, 0x0c(src)
 193         lw      t0, 0x00(src)
 194         lw      t1, 0x04(src)
 195         ADDC(sum, t3)
 196         ADDC(sum, t4)
 197         ADDC(sum, t0)
 198         ADDC(sum, t1)
 199         LONG_SUBU       a1, a1, 0x10
 200         PTR_ADDU        src, src, 0x10
 201         LONG_SRL        t8, a1, 0x7
 202
 203 begin_movement:
 204         beqz    t8, 1f
 205          andi   t2, a1, 0x40
 206
 207 move_128bytes:
 208         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 209         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 210         CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
 211         CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
 212         LONG_SUBU       t8, t8, 0x01
 213         bnez    t8, move_128bytes
 214          PTR_ADDU       src, src, 0x80
 215
 216 1:
 217         beqz    t2, 1f
 218          andi   t2, a1, 0x20
 219
 220 move_64bytes:
 221         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 222         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 223         PTR_ADDU        src, src, 0x40
 224
 225 1:
 226         beqz    t2, do_end_words
 227          andi   t8, a1, 0x1c
 228
 229 move_32bytes:
 230         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 231         andi    t8, a1, 0x1c
 232         PTR_ADDU        src, src, 0x20
 233
 234 do_end_words:
 235         beqz    t8, maybe_end_cruft
 236          LONG_SRL       t8, t8, 0x2
 237
 238 end_words:
 239         lw      t0, (src)
 240         LONG_SUBU       t8, t8, 0x1
 241         ADDC(sum, t0)
 242         bnez    t8, end_words
 243          PTR_ADDU       src, src, 0x4
 244
 245 maybe_end_cruft:
 246         andi    t2, a1, 0x3
 247
 248 small_memcpy:
 249  j small_csumcpy; move a1, t2           /* XXX ??? */
 250         beqz    t2, out
 251          move   a1, t2
 252
 253 end_bytes:
 254         lb      t0, (src)
 255         LONG_SUBU       a1, a1, 0x1
 256         bnez    a2, end_bytes
 257          PTR_ADDU       src, src, 0x1
 258
 259 out:
 260         jr      ra
 261          move   v0, sum
 262         END(csum_partial)