Home | History | Annotate | Download | only in dsp
      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // MIPS version of rescaling functions
     11 //
     12 // Author(s): Djordje Pesut (djordje.pesut (at) imgtec.com)
     13 
     14 #include "src/dsp/dsp.h"
     15 
     16 #if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)
     17 
     18 #include <assert.h>
     19 #include "src/utils/rescaler_utils.h"
     20 
     21 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
     22 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
     23 #define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
     24 
     25 //------------------------------------------------------------------------------
     26 // Row export
     27 
     28 #if 0  // disabled for now. TODO(skal): make match the C-code
     29 static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
     30   int i;
     31   const int x_out_max = wrk->dst_width * wrk->num_channels;
     32   uint8_t* dst = wrk->dst;
     33   rescaler_t* irow = wrk->irow;
     34   const rescaler_t* frow = wrk->frow;
     35   const int yscale = wrk->fy_scale * (-wrk->y_accum);
     36   int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
     37   const int temp7 = (int)wrk->fxy_scale;
     38   const int temp6 = (x_out_max & ~0x3) << 2;
     39   assert(!WebPRescalerOutputDone(wrk));
     40   assert(wrk->y_accum <= 0);
     41   assert(!wrk->y_expand);
     42   assert(wrk->fxy_scale != 0);
     43   if (yscale) {
     44     if (x_out_max >= 4) {
     45       int temp8, temp9, temp10, temp11;
     46       __asm__ volatile (
     47         "li       %[temp3],    0x10000                    \n\t"
     48         "li       %[temp4],    0x8000                     \n\t"
     49         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
     50       "1:                                                 \n\t"
     51         "lw       %[temp0],    0(%[frow])                 \n\t"
     52         "lw       %[temp1],    4(%[frow])                 \n\t"
     53         "lw       %[temp2],    8(%[frow])                 \n\t"
     54         "lw       %[temp5],    12(%[frow])                \n\t"
     55         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
     56         "maddu    $ac0,        %[temp0],    %[yscale]     \n\t"
     57         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
     58         "maddu    $ac1,        %[temp1],    %[yscale]     \n\t"
     59         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
     60         "maddu    $ac2,        %[temp2],    %[yscale]     \n\t"
     61         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
     62         "maddu    $ac3,        %[temp5],    %[yscale]     \n\t"
     63         "addiu    %[frow],     %[frow],     16            \n\t"
     64         "mfhi     %[temp0],    $ac0                       \n\t"
     65         "mfhi     %[temp1],    $ac1                       \n\t"
     66         "mfhi     %[temp2],    $ac2                       \n\t"
     67         "mfhi     %[temp5],    $ac3                       \n\t"
     68         "lw       %[temp8],    0(%[irow])                 \n\t"
     69         "lw       %[temp9],    4(%[irow])                 \n\t"
     70         "lw       %[temp10],   8(%[irow])                 \n\t"
     71         "lw       %[temp11],   12(%[irow])                \n\t"
     72         "addiu    %[dst],      %[dst],      4             \n\t"
     73         "addiu    %[irow],     %[irow],     16            \n\t"
     74         "subu     %[temp8],    %[temp8],    %[temp0]      \n\t"
     75         "subu     %[temp9],    %[temp9],    %[temp1]      \n\t"
     76         "subu     %[temp10],   %[temp10],   %[temp2]      \n\t"
     77         "subu     %[temp11],   %[temp11],   %[temp5]      \n\t"
     78         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
     79         "maddu    $ac0,        %[temp8],    %[temp7]      \n\t"
     80         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
     81         "maddu    $ac1,        %[temp9],    %[temp7]      \n\t"
     82         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
     83         "maddu    $ac2,        %[temp10],   %[temp7]      \n\t"
     84         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
     85         "maddu    $ac3,        %[temp11],   %[temp7]      \n\t"
     86         "mfhi     %[temp8],    $ac0                       \n\t"
     87         "mfhi     %[temp9],    $ac1                       \n\t"
     88         "mfhi     %[temp10],   $ac2                       \n\t"
     89         "mfhi     %[temp11],   $ac3                       \n\t"
     90         "sw       %[temp0],    -16(%[irow])               \n\t"
     91         "sw       %[temp1],    -12(%[irow])               \n\t"
     92         "sw       %[temp2],    -8(%[irow])                \n\t"
     93         "sw       %[temp5],    -4(%[irow])                \n\t"
     94         "sb       %[temp8],    -4(%[dst])                 \n\t"
     95         "sb       %[temp9],    -3(%[dst])                 \n\t"
     96         "sb       %[temp10],   -2(%[dst])                 \n\t"
     97         "sb       %[temp11],   -1(%[dst])                 \n\t"
     98         "bne      %[frow],     %[loop_end], 1b            \n\t"
     99         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    100           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    101           [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
    102           [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
    103           [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
    104         : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
    105         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
    106           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
    107       );
    108     }
    109     for (i = 0; i < (x_out_max & 0x3); ++i) {
    110       const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(*frow++, yscale);
    111       const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
    112       *dst++ = (v > 255) ? 255u : (uint8_t)v;
    113       *irow++ = frac;   // new fractional start
    114     }
    115   } else {
    116     if (x_out_max >= 4) {
    117       __asm__ volatile (
    118         "li       %[temp3],    0x10000                    \n\t"
    119         "li       %[temp4],    0x8000                     \n\t"
    120         "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
    121       "1:                                                 \n\t"
    122         "lw       %[temp0],    0(%[irow])                 \n\t"
    123         "lw       %[temp1],    4(%[irow])                 \n\t"
    124         "lw       %[temp2],    8(%[irow])                 \n\t"
    125         "lw       %[temp5],    12(%[irow])                \n\t"
    126         "addiu    %[dst],      %[dst],      4             \n\t"
    127         "addiu    %[irow],     %[irow],     16            \n\t"
    128         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
    129         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
    130         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
    131         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
    132         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
    133         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
    134         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
    135         "maddu    $ac3,        %[temp5],    %[temp7]      \n\t"
    136         "mfhi     %[temp0],    $ac0                       \n\t"
    137         "mfhi     %[temp1],    $ac1                       \n\t"
    138         "mfhi     %[temp2],    $ac2                       \n\t"
    139         "mfhi     %[temp5],    $ac3                       \n\t"
    140         "sw       $zero,       -16(%[irow])               \n\t"
    141         "sw       $zero,       -12(%[irow])               \n\t"
    142         "sw       $zero,       -8(%[irow])                \n\t"
    143         "sw       $zero,       -4(%[irow])                \n\t"
    144         "sb       %[temp0],    -4(%[dst])                 \n\t"
    145         "sb       %[temp1],    -3(%[dst])                 \n\t"
    146         "sb       %[temp2],    -2(%[dst])                 \n\t"
    147         "sb       %[temp5],    -1(%[dst])                 \n\t"
    148         "bne      %[irow],     %[loop_end], 1b            \n\t"
    149         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    150           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
    151           [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
    152         : [temp7]"r"(temp7), [temp6]"r"(temp6)
    153         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
    154           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
    155       );
    156     }
    157     for (i = 0; i < (x_out_max & 0x3); ++i) {
    158       const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
    159       *dst++ = (v > 255) ? 255u : (uint8_t)v;
    160       *irow++ = 0;
    161     }
    162   }
    163 }
    164 #endif  // 0
    165 
    166 static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
    167   int i;
    168   uint8_t* dst = wrk->dst;
    169   rescaler_t* irow = wrk->irow;
    170   const int x_out_max = wrk->dst_width * wrk->num_channels;
    171   const rescaler_t* frow = wrk->frow;
    172   int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
    173   const int temp6 = (x_out_max & ~0x3) << 2;
    174   const int temp7 = (int)wrk->fy_scale;
    175   assert(!WebPRescalerOutputDone(wrk));
    176   assert(wrk->y_accum <= 0);
    177   assert(wrk->y_expand);
    178   assert(wrk->y_sub != 0);
    179   if (wrk->y_accum == 0) {
    180     if (x_out_max >= 4) {
    181       __asm__ volatile (
    182         "li       %[temp4],    0x10000                    \n\t"
    183         "li       %[temp5],    0x8000                     \n\t"
    184         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
    185       "1:                                                 \n\t"
    186         "lw       %[temp0],    0(%[frow])                 \n\t"
    187         "lw       %[temp1],    4(%[frow])                 \n\t"
    188         "lw       %[temp2],    8(%[frow])                 \n\t"
    189         "lw       %[temp3],    12(%[frow])                \n\t"
    190         "addiu    %[dst],      %[dst],      4             \n\t"
    191         "addiu    %[frow],     %[frow],     16            \n\t"
    192         "mult     $ac0,        %[temp4],    %[temp5]      \n\t"
    193         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
    194         "mult     $ac1,        %[temp4],    %[temp5]      \n\t"
    195         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
    196         "mult     $ac2,        %[temp4],    %[temp5]      \n\t"
    197         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
    198         "mult     $ac3,        %[temp4],    %[temp5]      \n\t"
    199         "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
    200         "mfhi     %[temp0],    $ac0                       \n\t"
    201         "mfhi     %[temp1],    $ac1                       \n\t"
    202         "mfhi     %[temp2],    $ac2                       \n\t"
    203         "mfhi     %[temp3],    $ac3                       \n\t"
    204         "sb       %[temp0],    -4(%[dst])                 \n\t"
    205         "sb       %[temp1],    -3(%[dst])                 \n\t"
    206         "sb       %[temp2],    -2(%[dst])                 \n\t"
    207         "sb       %[temp3],    -1(%[dst])                 \n\t"
    208         "bne      %[frow],     %[loop_end], 1b            \n\t"
    209         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    210           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    211           [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
    212         : [temp7]"r"(temp7), [temp6]"r"(temp6)
    213         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
    214           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
    215       );
    216     }
    217     for (i = 0; i < (x_out_max & 0x3); ++i) {
    218       const uint32_t J = *frow++;
    219       const int v = (int)MULT_FIX(J, wrk->fy_scale);
    220       *dst++ = (v > 255) ? 255u : (uint8_t)v;
    221     }
    222   } else {
    223     const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
    224     const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
    225     if (x_out_max >= 4) {
    226       int temp8, temp9, temp10, temp11;
    227       __asm__ volatile (
    228         "li       %[temp8],    0x10000                    \n\t"
    229         "li       %[temp9],    0x8000                     \n\t"
    230         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
    231       "1:                                                 \n\t"
    232         "lw       %[temp0],    0(%[frow])                 \n\t"
    233         "lw       %[temp1],    4(%[frow])                 \n\t"
    234         "lw       %[temp2],    8(%[frow])                 \n\t"
    235         "lw       %[temp3],    12(%[frow])                \n\t"
    236         "lw       %[temp4],    0(%[irow])                 \n\t"
    237         "lw       %[temp5],    4(%[irow])                 \n\t"
    238         "lw       %[temp10],   8(%[irow])                 \n\t"
    239         "lw       %[temp11],   12(%[irow])                \n\t"
    240         "addiu    %[dst],      %[dst],      4             \n\t"
    241         "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
    242         "maddu    $ac0,        %[A],        %[temp0]      \n\t"
    243         "maddu    $ac0,        %[B],        %[temp4]      \n\t"
    244         "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
    245         "maddu    $ac1,        %[A],        %[temp1]      \n\t"
    246         "maddu    $ac1,        %[B],        %[temp5]      \n\t"
    247         "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
    248         "maddu    $ac2,        %[A],        %[temp2]      \n\t"
    249         "maddu    $ac2,        %[B],        %[temp10]     \n\t"
    250         "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
    251         "maddu    $ac3,        %[A],        %[temp3]      \n\t"
    252         "maddu    $ac3,        %[B],        %[temp11]     \n\t"
    253         "addiu    %[frow],     %[frow],     16            \n\t"
    254         "addiu    %[irow],     %[irow],     16            \n\t"
    255         "mfhi     %[temp0],    $ac0                       \n\t"
    256         "mfhi     %[temp1],    $ac1                       \n\t"
    257         "mfhi     %[temp2],    $ac2                       \n\t"
    258         "mfhi     %[temp3],    $ac3                       \n\t"
    259         "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
    260         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
    261         "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
    262         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
    263         "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
    264         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
    265         "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
    266         "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
    267         "mfhi     %[temp0],    $ac0                       \n\t"
    268         "mfhi     %[temp1],    $ac1                       \n\t"
    269         "mfhi     %[temp2],    $ac2                       \n\t"
    270         "mfhi     %[temp3],    $ac3                       \n\t"
    271         "sb       %[temp0],    -4(%[dst])                 \n\t"
    272         "sb       %[temp1],    -3(%[dst])                 \n\t"
    273         "sb       %[temp2],    -2(%[dst])                 \n\t"
    274         "sb       %[temp3],    -1(%[dst])                 \n\t"
    275         "bne      %[frow],     %[loop_end], 1b            \n\t"
    276         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    277           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    278           [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
    279           [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
    280           [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
    281         : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
    282         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
    283           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
    284       );
    285     }
    286     for (i = 0; i < (x_out_max & 0x3); ++i) {
    287       const uint64_t I = (uint64_t)A * *frow++
    288                        + (uint64_t)B * *irow++;
    289       const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
    290       const int v = (int)MULT_FIX(J, wrk->fy_scale);
    291       *dst++ = (v > 255) ? 255u : (uint8_t)v;
    292     }
    293   }
    294 }
    295 
    296 #undef MULT_FIX_FLOOR
    297 #undef MULT_FIX
    298 #undef ROUNDER
    299 
    300 //------------------------------------------------------------------------------
    301 // Entry point
    302 
    303 extern void WebPRescalerDspInitMIPSdspR2(void);
    304 
    305 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
    306   WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
    307 //  WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
    308 }
    309 
    310 #else  // !WEBP_USE_MIPS_DSP_R2
    311 
    312 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
    313 
    314 #endif  // WEBP_USE_MIPS_DSP_R2
    315