Home | History | Annotate | Download | only in dsp
      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // MIPS version of rescaling functions
     11 //
     12 // Author(s): Djordje Pesut (djordje.pesut (at) imgtec.com)
     13 
     14 #include "./dsp.h"
     15 
     16 #if defined(WEBP_USE_MIPS32)
     17 
     18 #include <assert.h>
     19 #include "../utils/rescaler.h"
     20 
     21 //------------------------------------------------------------------------------
     22 // Row import
     23 
     24 static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) {
     25   const int x_stride = wrk->num_channels;
     26   const int x_out_max = wrk->dst_width * wrk->num_channels;
     27   const int fx_scale = wrk->fx_scale;
     28   const int x_add = wrk->x_add;
     29   const int x_sub = wrk->x_sub;
     30   const int x_stride1 = x_stride << 2;
     31   int channel;
     32   assert(!wrk->x_expand);
     33   assert(!WebPRescalerInputDone(wrk));
     34 
     35   for (channel = 0; channel < x_stride; ++channel) {
     36     const uint8_t* src1 = src + channel;
     37     rescaler_t* frow = wrk->frow + channel;
     38     int temp1, temp2, temp3;
     39     int base, frac, sum;
     40     int accum, accum1;
     41     int loop_c = x_out_max - channel;
     42 
     43     __asm__ volatile (
     44       "li     %[temp1],   0x8000                    \n\t"
     45       "li     %[temp2],   0x10000                   \n\t"
     46       "li     %[sum],     0                         \n\t"
     47       "li     %[accum],   0                         \n\t"
     48     "1:                                             \n\t"
     49       "addu   %[accum],   %[accum],   %[x_add]      \n\t"
     50       "li     %[base],    0                         \n\t"
     51       "blez   %[accum],   3f                        \n\t"
     52     "2:                                             \n\t"
     53       "lbu    %[base],    0(%[src1])                \n\t"
     54       "subu   %[accum],   %[accum],   %[x_sub]      \n\t"
     55       "addu   %[src1],    %[src1],    %[x_stride]   \n\t"
     56       "addu   %[sum],     %[sum],     %[base]       \n\t"
     57       "bgtz   %[accum],   2b                        \n\t"
     58     "3:                                             \n\t"
     59       "negu   %[accum1],  %[accum]                  \n\t"
     60       "mul    %[frac],    %[base],    %[accum1]     \n\t"
     61       "mul    %[temp3],   %[sum],     %[x_sub]      \n\t"
     62       "subu   %[loop_c],  %[loop_c],  %[x_stride]   \n\t"
     63       "mult   %[temp1],   %[temp2]                  \n\t"
     64       "maddu  %[frac],    %[fx_scale]               \n\t"
     65       "mfhi   %[sum]                                \n\t"
     66       "subu   %[temp3],   %[temp3],   %[frac]       \n\t"
     67       "sw     %[temp3],   0(%[frow])                \n\t"
     68       "addu   %[frow],    %[frow],    %[x_stride1]  \n\t"
     69       "bgtz   %[loop_c],  1b                        \n\t"
     70       : [accum]"=&r"(accum), [src1]"+r"(src1), [temp3]"=&r"(temp3),
     71         [sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac),
     72         [frow]"+r"(frow), [accum1]"=&r"(accum1),
     73         [temp2]"=&r"(temp2), [temp1]"=&r"(temp1)
     74       : [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale),
     75         [x_sub]"r"(x_sub), [x_add]"r"(x_add),
     76         [loop_c]"r"(loop_c), [x_stride1]"r"(x_stride1)
     77       : "memory", "hi", "lo"
     78     );
     79     assert(accum == 0);
     80   }
     81 }
     82 
     83 static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
     84   const int x_stride = wrk->num_channels;
     85   const int x_out_max = wrk->dst_width * wrk->num_channels;
     86   const int x_add = wrk->x_add;
     87   const int x_sub = wrk->x_sub;
     88   const int src_width = wrk->src_width;
     89   const int x_stride1 = x_stride << 2;
     90   int channel;
     91   assert(wrk->x_expand);
     92   assert(!WebPRescalerInputDone(wrk));
     93 
     94   for (channel = 0; channel < x_stride; ++channel) {
     95     const uint8_t* src1 = src + channel;
     96     rescaler_t* frow = wrk->frow + channel;
     97     int temp1, temp2, temp3, temp4;
     98     int frac;
     99     int accum;
    100     int x_out = channel;
    101 
    102     __asm__ volatile (
    103       "addiu  %[temp3],   %[src_width], -1            \n\t"
    104       "lbu    %[temp2],   0(%[src1])                  \n\t"
    105       "addu   %[src1],    %[src1],      %[x_stride]   \n\t"
    106       "bgtz   %[temp3],   0f                          \n\t"
    107       "addiu  %[temp1],   %[temp2],     0             \n\t"
    108       "b      3f                                      \n\t"
    109     "0:                                               \n\t"
    110       "lbu    %[temp1],   0(%[src1])                  \n\t"
    111     "3:                                               \n\t"
    112       "addiu  %[accum],   %[x_add],     0             \n\t"
    113     "1:                                               \n\t"
    114       "subu   %[temp3],   %[temp2],     %[temp1]      \n\t"
    115       "mul    %[temp3],   %[temp3],     %[accum]      \n\t"
    116       "mul    %[temp4],   %[temp1],     %[x_add]      \n\t"
    117       "addu   %[temp3],   %[temp4],     %[temp3]      \n\t"
    118       "sw     %[temp3],   0(%[frow])                  \n\t"
    119       "addu   %[frow],    %[frow],      %[x_stride1]  \n\t"
    120       "addu   %[x_out],   %[x_out],     %[x_stride]   \n\t"
    121       "subu   %[temp3],   %[x_out],     %[x_out_max]  \n\t"
    122       "bgez   %[temp3],   2f                          \n\t"
    123       "subu   %[accum],   %[accum],     %[x_sub]      \n\t"
    124       "bgez   %[accum],   4f                          \n\t"
    125       "addiu  %[temp2],   %[temp1],     0             \n\t"
    126       "addu   %[src1],    %[src1],      %[x_stride]   \n\t"
    127       "lbu    %[temp1],   0(%[src1])                  \n\t"
    128       "addu   %[accum],   %[accum],     %[x_add]      \n\t"
    129     "4:                                               \n\t"
    130       "b      1b                                      \n\t"
    131     "2:                                               \n\t"
    132       : [src1]"+r"(src1), [accum]"=&r"(accum), [temp1]"=&r"(temp1),
    133         [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
    134         [x_out]"+r"(x_out), [frac]"=&r"(frac), [frow]"+r"(frow)
    135       : [x_stride]"r"(x_stride), [x_add]"r"(x_add), [x_sub]"r"(x_sub),
    136         [x_stride1]"r"(x_stride1), [src_width]"r"(src_width),
    137         [x_out_max]"r"(x_out_max)
    138       : "memory", "hi", "lo"
    139     );
    140     assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
    141   }
    142 }
    143 
    144 //------------------------------------------------------------------------------
    145 // Row export
    146 
    147 static void ExportRowExpand(WebPRescaler* const wrk) {
    148   uint8_t* dst = wrk->dst;
    149   rescaler_t* irow = wrk->irow;
    150   const int x_out_max = wrk->dst_width * wrk->num_channels;
    151   const rescaler_t* frow = wrk->frow;
    152   int temp0, temp1, temp3, temp4, temp5, loop_end;
    153   const int temp2 = (int)wrk->fy_scale;
    154   const int temp6 = x_out_max << 2;
    155   assert(!WebPRescalerOutputDone(wrk));
    156   assert(wrk->y_accum <= 0);
    157   assert(wrk->y_expand);
    158   assert(wrk->y_sub != 0);
    159   if (wrk->y_accum == 0) {
    160     __asm__ volatile (
    161       "li       %[temp3],    0x10000                    \n\t"
    162       "li       %[temp4],    0x8000                     \n\t"
    163       "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
    164     "1:                                                 \n\t"
    165       "lw       %[temp0],    0(%[frow])                 \n\t"
    166       "addiu    %[dst],      %[dst],      1             \n\t"
    167       "addiu    %[frow],     %[frow],     4             \n\t"
    168       "mult     %[temp3],    %[temp4]                   \n\t"
    169       "maddu    %[temp0],    %[temp2]                   \n\t"
    170       "mfhi     %[temp5]                                \n\t"
    171       "sb       %[temp5],    -1(%[dst])                 \n\t"
    172       "bne      %[frow],     %[loop_end], 1b            \n\t"
    173       : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    174         [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    175         [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
    176       : [temp2]"r"(temp2), [temp6]"r"(temp6)
    177       : "memory", "hi", "lo"
    178     );
    179   } else {
    180     const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
    181     const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
    182     __asm__ volatile (
    183       "li       %[temp3],    0x10000                    \n\t"
    184       "li       %[temp4],    0x8000                     \n\t"
    185       "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
    186     "1:                                                 \n\t"
    187       "lw       %[temp0],    0(%[frow])                 \n\t"
    188       "lw       %[temp1],    0(%[irow])                 \n\t"
    189       "addiu    %[dst],      %[dst],      1             \n\t"
    190       "mult     %[temp3],    %[temp4]                   \n\t"
    191       "maddu    %[A],        %[temp0]                   \n\t"
    192       "maddu    %[B],        %[temp1]                   \n\t"
    193       "addiu    %[frow],     %[frow],     4             \n\t"
    194       "addiu    %[irow],     %[irow],     4             \n\t"
    195       "mfhi     %[temp5]                                \n\t"
    196       "mult     %[temp3],    %[temp4]                   \n\t"
    197       "maddu    %[temp5],    %[temp2]                   \n\t"
    198       "mfhi     %[temp5]                                \n\t"
    199       "sb       %[temp5],    -1(%[dst])                 \n\t"
    200       "bne      %[frow],     %[loop_end], 1b            \n\t"
    201       : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    202         [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    203         [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
    204       : [temp2]"r"(temp2), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
    205       : "memory", "hi", "lo"
    206     );
    207   }
    208 }
    209 
    210 static void ExportRowShrink(WebPRescaler* const wrk) {
    211   const int x_out_max = wrk->dst_width * wrk->num_channels;
    212   uint8_t* dst = wrk->dst;
    213   rescaler_t* irow = wrk->irow;
    214   const rescaler_t* frow = wrk->frow;
    215   const int yscale = wrk->fy_scale * (-wrk->y_accum);
    216   int temp0, temp1, temp3, temp4, temp5, loop_end;
    217   const int temp2 = (int)wrk->fxy_scale;
    218   const int temp6 = x_out_max << 2;
    219 
    220   assert(!WebPRescalerOutputDone(wrk));
    221   assert(wrk->y_accum <= 0);
    222   assert(!wrk->y_expand);
    223   assert(wrk->fxy_scale != 0);
    224   if (yscale) {
    225     __asm__ volatile (
    226       "li       %[temp3],    0x10000                    \n\t"
    227       "li       %[temp4],    0x8000                     \n\t"
    228       "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
    229     "1:                                                 \n\t"
    230       "lw       %[temp0],    0(%[frow])                 \n\t"
    231       "mult     %[temp3],    %[temp4]                   \n\t"
    232       "addiu    %[frow],     %[frow],     4             \n\t"
    233       "maddu    %[temp0],    %[yscale]                  \n\t"
    234       "mfhi     %[temp1]                                \n\t"
    235       "lw       %[temp0],    0(%[irow])                 \n\t"
    236       "addiu    %[dst],      %[dst],      1             \n\t"
    237       "addiu    %[irow],     %[irow],     4             \n\t"
    238       "subu     %[temp0],    %[temp0],    %[temp1]      \n\t"
    239       "mult     %[temp3],    %[temp4]                   \n\t"
    240       "maddu    %[temp0],    %[temp2]                   \n\t"
    241       "mfhi     %[temp5]                                \n\t"
    242       "sw       %[temp1],    -4(%[irow])                \n\t"
    243       "sb       %[temp5],    -1(%[dst])                 \n\t"
    244       "bne      %[frow],     %[loop_end], 1b            \n\t"
    245       : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    246         [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    247         [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
    248       : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp6]"r"(temp6)
    249       : "memory", "hi", "lo"
    250     );
    251   } else {
    252     __asm__ volatile (
    253       "li       %[temp3],    0x10000                    \n\t"
    254       "li       %[temp4],    0x8000                     \n\t"
    255       "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
    256     "1:                                                 \n\t"
    257       "lw       %[temp0],    0(%[irow])                 \n\t"
    258       "addiu    %[dst],      %[dst],      1             \n\t"
    259       "addiu    %[irow],     %[irow],     4             \n\t"
    260       "mult     %[temp3],    %[temp4]                   \n\t"
    261       "maddu    %[temp0],    %[temp2]                   \n\t"
    262       "mfhi     %[temp5]                                \n\t"
    263       "sw       $zero,       -4(%[irow])                \n\t"
    264       "sb       %[temp5],    -1(%[dst])                 \n\t"
    265       "bne      %[irow],     %[loop_end], 1b            \n\t"
    266       : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    267         [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
    268         [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
    269       : [temp2]"r"(temp2), [temp6]"r"(temp6)
    270       : "memory", "hi", "lo"
    271     );
    272   }
    273 }
    274 
    275 //------------------------------------------------------------------------------
    276 // Entry point
    277 
    278 extern void WebPRescalerDspInitMIPS32(void);
    279 
    280 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) {
    281   WebPRescalerImportRowExpand = ImportRowExpand;
    282   WebPRescalerImportRowShrink = ImportRowShrink;
    283   WebPRescalerExportRowExpand = ExportRowExpand;
    284   WebPRescalerExportRowShrink = ExportRowShrink;
    285 }
    286 
    287 #else  // !WEBP_USE_MIPS32
    288 
    289 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPS32)
    290 
    291 #endif  // WEBP_USE_MIPS32
    292