Home | History | Annotate | Download | only in simd
      1 /*
      2  * MIPS DSPr2 optimizations for libjpeg-turbo
      3  *
      4  * Copyright (C) 2013, MIPS Technologies, Inc., California.
      5  * All Rights Reserved.
      6  * Authors:  Teodora Novkovic (teodora.novkovic (at) imgtec.com)
      7  *           Darko Laus       (darko.laus (at) imgtec.com)
      8  * This software is provided 'as-is', without any express or implied
      9  * warranty.  In no event will the authors be held liable for any damages
     10  * arising from the use of this software.
     11  *
     12  * Permission is granted to anyone to use this software for any purpose,
     13  * including commercial applications, and to alter it and redistribute it
     14  * freely, subject to the following restrictions:
     15  *
     16  * 1. The origin of this software must not be misrepresented; you must not
     17  *    claim that you wrote the original software. If you use this software
     18  *    in a product, an acknowledgment in the product documentation would be
     19  *    appreciated but is not required.
     20  * 2. Altered source versions must be plainly marked as such, and must not be
     21  *    misrepresented as being the original software.
     22  * 3. This notice may not be removed or altered from any source distribution.
     23  */
     24 
     25 #define zero $0
     26 #define AT   $1
     27 #define v0   $2
     28 #define v1   $3
     29 #define a0   $4
     30 #define a1   $5
     31 #define a2   $6
     32 #define a3   $7
     33 #define t0   $8
     34 #define t1   $9
     35 #define t2   $10
     36 #define t3   $11
     37 #define t4   $12
     38 #define t5   $13
     39 #define t6   $14
     40 #define t7   $15
     41 #define s0   $16
     42 #define s1   $17
     43 #define s2   $18
     44 #define s3   $19
     45 #define s4   $20
     46 #define s5   $21
     47 #define s6   $22
     48 #define s7   $23
     49 #define t8   $24
     50 #define t9   $25
     51 #define k0   $26
     52 #define k1   $27
     53 #define gp   $28
     54 #define sp   $29
     55 #define fp   $30
     56 #define s8   $30
     57 #define ra   $31
     58 
     59 #define f0   $f0
     60 #define f1   $f1
     61 #define f2   $f2
     62 #define f3   $f3
     63 #define f4   $f4
     64 #define f5   $f5
     65 #define f6   $f6
     66 #define f7   $f7
     67 #define f8   $f8
     68 #define f9   $f9
     69 #define f10  $f10
     70 #define f11  $f11
     71 #define f12  $f12
     72 #define f13  $f13
     73 #define f14  $f14
     74 #define f15  $f15
     75 #define f16  $f16
     76 #define f17  $f17
     77 #define f18  $f18
     78 #define f19  $f19
     79 #define f20  $f20
     80 #define f21  $f21
     81 #define f22  $f22
     82 #define f23  $f23
     83 #define f24  $f24
     84 #define f25  $f25
     85 #define f26  $f26
     86 #define f27  $f27
     87 #define f28  $f28
     88 #define f29  $f29
     89 #define f30  $f30
     90 #define f31  $f31
     91 
     92 /*
     93  * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
     94  */
     95 #define LEAF_MIPS32R2(symbol)                           \
     96                 .globl  symbol;                         \
     97                 .align  2;                              \
     98                 .type   symbol, @function;              \
     99                 .ent    symbol, 0;                      \
    100 symbol:         .frame  sp, 0, ra;                      \
    101                 .set    push;                           \
    102                 .set    arch=mips32r2;                  \
    103                 .set    noreorder;                      \
    104                 .set    noat;
    105 
    106 /*
    107  * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2
    108  */
    109 #define LEAF_MIPS_DSPR2(symbol)                         \
    110 LEAF_MIPS32R2(symbol)                                   \
    111                 .set    dspr2;
    112 
    113 /*
    114  * END - mark end of function
    115  */
    116 #define END(function)                                   \
    117                 .set    pop;                            \
    118                 .end    function;                       \
    119                 .size   function,.-function
    120 
    121 /*
    122  * Checks if stack offset is big enough for storing/restoring regs_num
    123  * number of register to/from stack. Stack offset must be greater than
    124  * or equal to the number of bytes needed for storing registers (regs_num*4).
    125  * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
    126  * preserved for input arguments of the functions, already stored in a0-a3),
    127  * stack size can be further optimized by utilizing this space.
    128  */
    129 .macro CHECK_STACK_OFFSET regs_num, stack_offset
    130 .if \stack_offset < \regs_num * 4 - 16
    131 .error "Stack offset too small."
    132 .endif
    133 .endm
    134 
    135 /*
    136  * Saves set of registers on stack. Maximum number of registers that
    137  * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
    138  * Stack offset is number of bytes that are added to stack pointer (sp)
    139  * before registers are pushed in order to provide enough space on stack
    140  * (offset must be multiple of 4, and must be big enough, as described by
    141  * CHECK_STACK_OFFSET macro). This macro is intended to be used in
    142  * combination with RESTORE_REGS_FROM_STACK macro. Example:
    143  *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
    144  *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
    145  */
    146 .macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
    147                           r2  = 0, r3  = 0, r4  = 0, \
    148                           r5  = 0, r6  = 0, r7  = 0, \
    149                           r8  = 0, r9  = 0, r10 = 0, \
    150                           r11 = 0, r12 = 0, r13 = 0, \
    151                           r14 = 0
    152     .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
    153     .error "Stack offset must be pozitive and multiple of 4."
    154     .endif
    155     .if \stack_offset != 0
    156     addiu           sp, sp, -\stack_offset
    157     .endif
    158     sw              \r1, 0(sp)
    159     .if \r2 != 0
    160     sw              \r2, 4(sp)
    161     .endif
    162     .if \r3 != 0
    163     sw              \r3, 8(sp)
    164     .endif
    165     .if \r4 != 0
    166     sw              \r4, 12(sp)
    167     .endif
    168     .if \r5 != 0
    169     CHECK_STACK_OFFSET 5, \stack_offset
    170     sw              \r5, 16(sp)
    171     .endif
    172     .if \r6 != 0
    173     CHECK_STACK_OFFSET 6, \stack_offset
    174     sw              \r6, 20(sp)
    175     .endif
    176     .if \r7 != 0
    177     CHECK_STACK_OFFSET 7, \stack_offset
    178     sw              \r7, 24(sp)
    179     .endif
    180     .if \r8 != 0
    181     CHECK_STACK_OFFSET 8, \stack_offset
    182     sw              \r8, 28(sp)
    183     .endif
    184     .if \r9 != 0
    185     CHECK_STACK_OFFSET 9, \stack_offset
    186     sw              \r9, 32(sp)
    187     .endif
    188     .if \r10 != 0
    189     CHECK_STACK_OFFSET 10, \stack_offset
    190     sw              \r10, 36(sp)
    191     .endif
    192     .if \r11 != 0
    193     CHECK_STACK_OFFSET 11, \stack_offset
    194     sw              \r11, 40(sp)
    195     .endif
    196     .if \r12 != 0
    197     CHECK_STACK_OFFSET 12, \stack_offset
    198     sw              \r12, 44(sp)
    199     .endif
    200     .if \r13 != 0
    201     CHECK_STACK_OFFSET 13, \stack_offset
    202     sw              \r13, 48(sp)
    203     .endif
    204     .if \r14 != 0
    205     CHECK_STACK_OFFSET 14, \stack_offset
    206     sw              \r14, 52(sp)
    207     .endif
    208 .endm
    209 
    210 /*
    211  * Restores set of registers from stack. Maximum number of registers that
    212  * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
    213  * Stack offset is number of bytes that are added to stack pointer (sp)
    214  * after registers are restored (offset must be multiple of 4, and must
    215  * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
    216  * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
    217  * Example:
    218  *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
    219  *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
    220  */
    221 .macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
    222                                r2  = 0, r3  = 0, r4  = 0, \
    223                                r5  = 0, r6  = 0, r7  = 0, \
    224                                r8  = 0, r9  = 0, r10 = 0, \
    225                                r11 = 0, r12 = 0, r13 = 0, \
    226                                r14 = 0
    227     .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
    228     .error "Stack offset must be pozitive and multiple of 4."
    229     .endif
    230     lw              \r1, 0(sp)
    231     .if \r2 != 0
    232     lw              \r2, 4(sp)
    233     .endif
    234     .if \r3 != 0
    235     lw              \r3, 8(sp)
    236     .endif
    237     .if \r4 != 0
    238     lw              \r4, 12(sp)
    239     .endif
    240     .if \r5 != 0
    241     CHECK_STACK_OFFSET 5, \stack_offset
    242     lw              \r5, 16(sp)
    243     .endif
    244     .if \r6 != 0
    245     CHECK_STACK_OFFSET 6, \stack_offset
    246     lw              \r6, 20(sp)
    247     .endif
    248     .if \r7 != 0
    249     CHECK_STACK_OFFSET 7, \stack_offset
    250     lw              \r7, 24(sp)
    251     .endif
    252     .if \r8 != 0
    253     CHECK_STACK_OFFSET 8, \stack_offset
    254     lw              \r8, 28(sp)
    255     .endif
    256     .if \r9 != 0
    257     CHECK_STACK_OFFSET 9, \stack_offset
    258     lw              \r9, 32(sp)
    259     .endif
    260     .if \r10 != 0
    261     CHECK_STACK_OFFSET 10, \stack_offset
    262     lw              \r10, 36(sp)
    263     .endif
    264     .if \r11 != 0
    265     CHECK_STACK_OFFSET 11, \stack_offset
    266     lw              \r11, 40(sp)
    267     .endif
    268     .if \r12 != 0
    269     CHECK_STACK_OFFSET 12, \stack_offset
    270     lw              \r12, 44(sp)
    271     .endif
    272     .if \r13 != 0
    273     CHECK_STACK_OFFSET 13, \stack_offset
    274     lw              \r13, 48(sp)
    275     .endif
    276     .if \r14 != 0
    277     CHECK_STACK_OFFSET 14, \stack_offset
    278     lw              \r14, 52(sp)
    279     .endif
    280     .if \stack_offset != 0
    281     addiu           sp, sp, \stack_offset
    282     .endif
    283 .endm
    284