Home | History | Annotate | Download | only in mmi
      1 /*
      2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "vpx_mem/vpx_mem.h"
     12 #include "vpx_ports/asmdefs_mmi.h"
     13 #include "vp8/encoder/onyx_int.h"
     14 #include "vp8/encoder/quantize.h"
     15 #include "vp8/common/quant_common.h"
     16 
     17 #define REGULAR_SELECT_EOB(i, rc)                                        \
     18   z = coeff_ptr[rc];                                                     \
     19   sz = (z >> 31);                                                        \
     20   x = (z ^ sz) - sz;                                                     \
     21   zbin = zbin_ptr[rc] + *(zbin_boost_ptr++) + zbin_oq_value;             \
     22   if (x >= zbin) {                                                       \
     23     x += round_ptr[rc];                                                  \
     24     y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16; \
     25     if (y) {                                                             \
     26       x = (y ^ sz) - sz;                                                 \
     27       qcoeff_ptr[rc] = x;                                                \
     28       dqcoeff_ptr[rc] = x * dequant_ptr[rc];                             \
     29       eob = i;                                                           \
     30       zbin_boost_ptr = b->zrun_zbin_boost;                               \
     31     }                                                                    \
     32   }
     33 
     34 void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
     35   const int16_t *coeff_ptr = b->coeff;
     36   const int16_t *round_ptr = b->round;
     37   const int16_t *quant_ptr = b->quant_fast;
     38   int16_t *qcoeff_ptr = d->qcoeff;
     39   int16_t *dqcoeff_ptr = d->dqcoeff;
     40   const int16_t *dequant_ptr = d->dequant;
     41   const int16_t *inv_zig_zag = vp8_default_inv_zig_zag;
     42 
     43   double ftmp[13];
     44   uint64_t tmp[1];
     45   DECLARE_ALIGNED(8, const uint64_t, ones) = { 0xffffffffffffffffULL };
     46   int eob = 0;
     47 
     48   __asm__ volatile(
     49       // loop 0 ~ 7
     50       "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
     51       "gsldlc1    %[ftmp1],   0x07(%[coeff_ptr])              \n\t"
     52       "gsldrc1    %[ftmp1],   0x00(%[coeff_ptr])              \n\t"
     53       "li         %[tmp0],    0x0f                            \n\t"
     54       "mtc1       %[tmp0],    %[ftmp9]                        \n\t"
     55       "gsldlc1    %[ftmp2],   0x0f(%[coeff_ptr])              \n\t"
     56       "gsldrc1    %[ftmp2],   0x08(%[coeff_ptr])              \n\t"
     57 
     58       "psrah      %[ftmp3],   %[ftmp1],       %[ftmp9]        \n\t"
     59       "xor        %[ftmp1],   %[ftmp3],       %[ftmp1]        \n\t"
     60       "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]        \n\t"
     61       "psrah      %[ftmp4],   %[ftmp2],       %[ftmp9]        \n\t"
     62       "xor        %[ftmp2],   %[ftmp4],       %[ftmp2]        \n\t"
     63       "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]        \n\t"
     64 
     65       "gsldlc1    %[ftmp5],   0x07(%[round_ptr])              \n\t"
     66       "gsldrc1    %[ftmp5],   0x00(%[round_ptr])              \n\t"
     67       "gsldlc1    %[ftmp6],   0x0f(%[round_ptr])              \n\t"
     68       "gsldrc1    %[ftmp6],   0x08(%[round_ptr])              \n\t"
     69       "paddh      %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
     70       "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
     71       "gsldlc1    %[ftmp7],   0x07(%[quant_ptr])              \n\t"
     72       "gsldrc1    %[ftmp7],   0x00(%[quant_ptr])              \n\t"
     73       "gsldlc1    %[ftmp8],   0x0f(%[quant_ptr])              \n\t"
     74       "gsldrc1    %[ftmp8],   0x08(%[quant_ptr])              \n\t"
     75       "pmulhuh    %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
     76       "pmulhuh    %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
     77 
     78       "xor        %[ftmp7],   %[ftmp5],       %[ftmp3]        \n\t"
     79       "xor        %[ftmp8],   %[ftmp6],       %[ftmp4]        \n\t"
     80       "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]        \n\t"
     81       "psubh      %[ftmp8],   %[ftmp8],       %[ftmp4]        \n\t"
     82       "gssdlc1    %[ftmp7],   0x07(%[qcoeff_ptr])             \n\t"
     83       "gssdrc1    %[ftmp7],   0x00(%[qcoeff_ptr])             \n\t"
     84       "gssdlc1    %[ftmp8],   0x0f(%[qcoeff_ptr])             \n\t"
     85       "gssdrc1    %[ftmp8],   0x08(%[qcoeff_ptr])             \n\t"
     86 
     87       "gsldlc1    %[ftmp1],   0x07(%[inv_zig_zag])            \n\t"
     88       "gsldrc1    %[ftmp1],   0x00(%[inv_zig_zag])            \n\t"
     89       "gsldlc1    %[ftmp2],   0x0f(%[inv_zig_zag])            \n\t"
     90       "gsldrc1    %[ftmp2],   0x08(%[inv_zig_zag])            \n\t"
     91       "pcmpeqh    %[ftmp5],   %[ftmp5],       %[ftmp0]        \n\t"
     92       "pcmpeqh    %[ftmp6],   %[ftmp6],       %[ftmp0]        \n\t"
     93       "xor        %[ftmp5],   %[ftmp5],       %[ones]         \n\t"
     94       "xor        %[ftmp6],   %[ftmp6],       %[ones]         \n\t"
     95       "and        %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
     96       "and        %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
     97       "pmaxsh     %[ftmp10],  %[ftmp5],       %[ftmp6]        \n\t"
     98 
     99       "gsldlc1    %[ftmp5],   0x07(%[dequant_ptr])            \n\t"
    100       "gsldrc1    %[ftmp5],   0x00(%[dequant_ptr])            \n\t"
    101       "gsldlc1    %[ftmp6],   0x0f(%[dequant_ptr])            \n\t"
    102       "gsldrc1    %[ftmp6],   0x08(%[dequant_ptr])            \n\t"
    103       "pmullh     %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
    104       "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
    105       "gssdlc1    %[ftmp5],   0x07(%[dqcoeff_ptr])            \n\t"
    106       "gssdrc1    %[ftmp5],   0x00(%[dqcoeff_ptr])            \n\t"
    107       "gssdlc1    %[ftmp6],   0x0f(%[dqcoeff_ptr])            \n\t"
    108       "gssdrc1    %[ftmp6],   0x08(%[dqcoeff_ptr])            \n\t"
    109 
    110       // loop 8 ~ 15
    111       "gsldlc1    %[ftmp1],   0x17(%[coeff_ptr])              \n\t"
    112       "gsldrc1    %[ftmp1],   0x10(%[coeff_ptr])              \n\t"
    113       "gsldlc1    %[ftmp2],   0x1f(%[coeff_ptr])              \n\t"
    114       "gsldrc1    %[ftmp2],   0x18(%[coeff_ptr])              \n\t"
    115 
    116       "psrah      %[ftmp3],   %[ftmp1],       %[ftmp9]        \n\t"
    117       "xor        %[ftmp1],   %[ftmp3],       %[ftmp1]        \n\t"
    118       "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]        \n\t"
    119       "psrah      %[ftmp4],   %[ftmp2],       %[ftmp9]        \n\t"
    120       "xor        %[ftmp2],   %[ftmp4],       %[ftmp2]        \n\t"
    121       "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]        \n\t"
    122 
    123       "gsldlc1    %[ftmp5],   0x17(%[round_ptr])              \n\t"
    124       "gsldrc1    %[ftmp5],   0x10(%[round_ptr])              \n\t"
    125       "gsldlc1    %[ftmp6],   0x1f(%[round_ptr])              \n\t"
    126       "gsldrc1    %[ftmp6],   0x18(%[round_ptr])              \n\t"
    127       "paddh      %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
    128       "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
    129       "gsldlc1    %[ftmp7],   0x17(%[quant_ptr])              \n\t"
    130       "gsldrc1    %[ftmp7],   0x10(%[quant_ptr])              \n\t"
    131       "gsldlc1    %[ftmp8],   0x1f(%[quant_ptr])              \n\t"
    132       "gsldrc1    %[ftmp8],   0x18(%[quant_ptr])              \n\t"
    133       "pmulhuh    %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
    134       "pmulhuh    %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
    135 
    136       "xor        %[ftmp7],   %[ftmp5],       %[ftmp3]        \n\t"
    137       "xor        %[ftmp8],   %[ftmp6],       %[ftmp4]        \n\t"
    138       "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]        \n\t"
    139       "psubh      %[ftmp8],   %[ftmp8],       %[ftmp4]        \n\t"
    140       "gssdlc1    %[ftmp7],   0x17(%[qcoeff_ptr])             \n\t"
    141       "gssdrc1    %[ftmp7],   0x10(%[qcoeff_ptr])             \n\t"
    142       "gssdlc1    %[ftmp8],   0x1f(%[qcoeff_ptr])             \n\t"
    143       "gssdrc1    %[ftmp8],   0x18(%[qcoeff_ptr])             \n\t"
    144 
    145       "gsldlc1    %[ftmp1],   0x17(%[inv_zig_zag])            \n\t"
    146       "gsldrc1    %[ftmp1],   0x10(%[inv_zig_zag])            \n\t"
    147       "gsldlc1    %[ftmp2],   0x1f(%[inv_zig_zag])            \n\t"
    148       "gsldrc1    %[ftmp2],   0x18(%[inv_zig_zag])            \n\t"
    149       "pcmpeqh    %[ftmp5],   %[ftmp5],       %[ftmp0]        \n\t"
    150       "pcmpeqh    %[ftmp6],   %[ftmp6],       %[ftmp0]        \n\t"
    151       "xor        %[ftmp5],   %[ftmp5],       %[ones]         \n\t"
    152       "xor        %[ftmp6],   %[ftmp6],       %[ones]         \n\t"
    153       "and        %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
    154       "and        %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
    155       "pmaxsh     %[ftmp11],  %[ftmp5],       %[ftmp6]        \n\t"
    156 
    157       "gsldlc1    %[ftmp5],   0x17(%[dequant_ptr])            \n\t"
    158       "gsldrc1    %[ftmp5],   0x10(%[dequant_ptr])            \n\t"
    159       "gsldlc1    %[ftmp6],   0x1f(%[dequant_ptr])            \n\t"
    160       "gsldrc1    %[ftmp6],   0x18(%[dequant_ptr])            \n\t"
    161       "pmullh     %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
    162       "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
    163       "gssdlc1    %[ftmp5],   0x17(%[dqcoeff_ptr])            \n\t"
    164       "gssdrc1    %[ftmp5],   0x10(%[dqcoeff_ptr])            \n\t"
    165       "gssdlc1    %[ftmp6],   0x1f(%[dqcoeff_ptr])            \n\t"
    166       "gssdrc1    %[ftmp6],   0x18(%[dqcoeff_ptr])            \n\t"
    167 
    168       "li         %[tmp0],    0x10                            \n\t"
    169       "mtc1       %[tmp0],    %[ftmp9]                        \n\t"
    170 
    171       "pmaxsh     %[ftmp10],  %[ftmp10],       %[ftmp11]      \n\t"
    172       "psrlw      %[ftmp11],  %[ftmp10],       %[ftmp9]       \n\t"
    173       "pmaxsh     %[ftmp10],  %[ftmp10],       %[ftmp11]      \n\t"
    174       "li         %[tmp0],    0xaa                            \n\t"
    175       "mtc1       %[tmp0],    %[ftmp9]                        \n\t"
    176       "pshufh     %[ftmp11],  %[ftmp10],       %[ftmp9]       \n\t"
    177       "pmaxsh     %[ftmp10],  %[ftmp10],       %[ftmp11]      \n\t"
    178       "li         %[tmp0],    0xffff                          \n\t"
    179       "mtc1       %[tmp0],    %[ftmp9]                        \n\t"
    180       "and        %[ftmp10],  %[ftmp10],       %[ftmp9]       \n\t"
    181       "gssdlc1    %[ftmp10],  0x07(%[eob])                    \n\t"
    182       "gssdrc1    %[ftmp10],  0x00(%[eob])                    \n\t"
    183       : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
    184         [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
    185         [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]),
    186         [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
    187         [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
    188       : [coeff_ptr] "r"((mips_reg)coeff_ptr),
    189         [qcoeff_ptr] "r"((mips_reg)qcoeff_ptr),
    190         [dequant_ptr] "r"((mips_reg)dequant_ptr),
    191         [round_ptr] "r"((mips_reg)round_ptr),
    192         [quant_ptr] "r"((mips_reg)quant_ptr),
    193         [dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr),
    194         [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob),
    195         [ones] "f"(ones)
    196       : "memory");
    197 
    198   *d->eob = eob;
    199 }
    200 
    201 void vp8_regular_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
    202   int eob = 0;
    203   int x, y, z, sz, zbin;
    204   const int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
    205   const int16_t *coeff_ptr = b->coeff;
    206   const int16_t *zbin_ptr = b->zbin;
    207   const int16_t *round_ptr = b->round;
    208   const int16_t *quant_ptr = b->quant;
    209   const int16_t *quant_shift_ptr = b->quant_shift;
    210   int16_t *qcoeff_ptr = d->qcoeff;
    211   int16_t *dqcoeff_ptr = d->dqcoeff;
    212   const int16_t *dequant_ptr = d->dequant;
    213   const int16_t zbin_oq_value = b->zbin_extra;
    214   register double ftmp0 asm("$f0");
    215 
    216   //  memset(qcoeff_ptr, 0, 32);
    217   //  memset(dqcoeff_ptr, 0, 32);
    218   /* clang-format off */
    219   __asm__ volatile (
    220     "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
    221     "gssdlc1    %[ftmp0],   0x07(%[qcoeff_ptr])             \n\t"
    222     "gssdrc1    %[ftmp0],   0x00(%[qcoeff_ptr])             \n\t"
    223     "gssdlc1    %[ftmp0],   0x0f(%[qcoeff_ptr])             \n\t"
    224     "gssdrc1    %[ftmp0],   0x08(%[qcoeff_ptr])             \n\t"
    225     "gssdlc1    %[ftmp0],   0x17(%[qcoeff_ptr])             \n\t"
    226     "gssdrc1    %[ftmp0],   0x10(%[qcoeff_ptr])             \n\t"
    227     "gssdlc1    %[ftmp0],   0x1f(%[qcoeff_ptr])             \n\t"
    228     "gssdrc1    %[ftmp0],   0x18(%[qcoeff_ptr])             \n\t"
    229 
    230     "gssdlc1    %[ftmp0],   0x07(%[dqcoeff_ptr])            \n\t"
    231     "gssdrc1    %[ftmp0],   0x00(%[dqcoeff_ptr])            \n\t"
    232     "gssdlc1    %[ftmp0],   0x0f(%[dqcoeff_ptr])            \n\t"
    233     "gssdrc1    %[ftmp0],   0x08(%[dqcoeff_ptr])            \n\t"
    234     "gssdlc1    %[ftmp0],   0x17(%[dqcoeff_ptr])            \n\t"
    235     "gssdrc1    %[ftmp0],   0x10(%[dqcoeff_ptr])            \n\t"
    236     "gssdlc1    %[ftmp0],   0x1f(%[dqcoeff_ptr])            \n\t"
    237     "gssdrc1    %[ftmp0],   0x18(%[dqcoeff_ptr])            \n\t"
    238     : [ftmp0]"=&f"(ftmp0)
    239     : [qcoeff_ptr]"r"(qcoeff_ptr), [dqcoeff_ptr]"r"(dqcoeff_ptr)
    240     : "memory"
    241   );
    242   /* clang-format on */
    243 
    244   REGULAR_SELECT_EOB(1, 0);
    245   REGULAR_SELECT_EOB(2, 1);
    246   REGULAR_SELECT_EOB(3, 4);
    247   REGULAR_SELECT_EOB(4, 8);
    248   REGULAR_SELECT_EOB(5, 5);
    249   REGULAR_SELECT_EOB(6, 2);
    250   REGULAR_SELECT_EOB(7, 3);
    251   REGULAR_SELECT_EOB(8, 6);
    252   REGULAR_SELECT_EOB(9, 9);
    253   REGULAR_SELECT_EOB(10, 12);
    254   REGULAR_SELECT_EOB(11, 13);
    255   REGULAR_SELECT_EOB(12, 10);
    256   REGULAR_SELECT_EOB(13, 7);
    257   REGULAR_SELECT_EOB(14, 11);
    258   REGULAR_SELECT_EOB(15, 14);
    259   REGULAR_SELECT_EOB(16, 15);
    260 
    261   *d->eob = (char)eob;
    262 }
    263