1 /* 2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "vpx_mem/vpx_mem.h" 12 #include "vpx_ports/asmdefs_mmi.h" 13 #include "vp8/encoder/onyx_int.h" 14 #include "vp8/encoder/quantize.h" 15 #include "vp8/common/quant_common.h" 16 17 #define REGULAR_SELECT_EOB(i, rc) \ 18 z = coeff_ptr[rc]; \ 19 sz = (z >> 31); \ 20 x = (z ^ sz) - sz; \ 21 zbin = zbin_ptr[rc] + *(zbin_boost_ptr++) + zbin_oq_value; \ 22 if (x >= zbin) { \ 23 x += round_ptr[rc]; \ 24 y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16; \ 25 if (y) { \ 26 x = (y ^ sz) - sz; \ 27 qcoeff_ptr[rc] = x; \ 28 dqcoeff_ptr[rc] = x * dequant_ptr[rc]; \ 29 eob = i; \ 30 zbin_boost_ptr = b->zrun_zbin_boost; \ 31 } \ 32 } 33 34 void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) { 35 const int16_t *coeff_ptr = b->coeff; 36 const int16_t *round_ptr = b->round; 37 const int16_t *quant_ptr = b->quant_fast; 38 int16_t *qcoeff_ptr = d->qcoeff; 39 int16_t *dqcoeff_ptr = d->dqcoeff; 40 const int16_t *dequant_ptr = d->dequant; 41 const int16_t *inv_zig_zag = vp8_default_inv_zig_zag; 42 43 double ftmp[13]; 44 uint64_t tmp[1]; 45 DECLARE_ALIGNED(8, const uint64_t, ones) = { 0xffffffffffffffffULL }; 46 int eob = 0; 47 48 __asm__ volatile( 49 // loop 0 ~ 7 50 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 51 "gsldlc1 %[ftmp1], 0x07(%[coeff_ptr]) \n\t" 52 "gsldrc1 %[ftmp1], 0x00(%[coeff_ptr]) \n\t" 53 "li %[tmp0], 0x0f \n\t" 54 "mtc1 %[tmp0], %[ftmp9] \n\t" 55 "gsldlc1 %[ftmp2], 0x0f(%[coeff_ptr]) \n\t" 56 "gsldrc1 %[ftmp2], 0x08(%[coeff_ptr]) \n\t" 57 58 "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t" 59 "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t" 60 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 61 "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t" 62 "xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t" 63 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 64 65 "gsldlc1 %[ftmp5], 0x07(%[round_ptr]) \n\t" 66 "gsldrc1 %[ftmp5], 0x00(%[round_ptr]) \n\t" 67 "gsldlc1 %[ftmp6], 0x0f(%[round_ptr]) \n\t" 68 "gsldrc1 %[ftmp6], 0x08(%[round_ptr]) \n\t" 69 "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 70 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 71 "gsldlc1 %[ftmp7], 0x07(%[quant_ptr]) \n\t" 72 "gsldrc1 %[ftmp7], 0x00(%[quant_ptr]) \n\t" 73 "gsldlc1 %[ftmp8], 0x0f(%[quant_ptr]) \n\t" 74 "gsldrc1 %[ftmp8], 0x08(%[quant_ptr]) \n\t" 75 "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 76 "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 77 78 "xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t" 79 "xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t" 80 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 81 "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" 82 "gssdlc1 %[ftmp7], 0x07(%[qcoeff_ptr]) \n\t" 83 "gssdrc1 %[ftmp7], 0x00(%[qcoeff_ptr]) \n\t" 84 "gssdlc1 %[ftmp8], 0x0f(%[qcoeff_ptr]) \n\t" 85 "gssdrc1 %[ftmp8], 0x08(%[qcoeff_ptr]) \n\t" 86 87 "gsldlc1 %[ftmp1], 0x07(%[inv_zig_zag]) \n\t" 88 "gsldrc1 %[ftmp1], 0x00(%[inv_zig_zag]) \n\t" 89 "gsldlc1 %[ftmp2], 0x0f(%[inv_zig_zag]) \n\t" 90 "gsldrc1 %[ftmp2], 0x08(%[inv_zig_zag]) \n\t" 91 "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 92 "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 93 "xor %[ftmp5], %[ftmp5], %[ones] \n\t" 94 "xor %[ftmp6], %[ftmp6], %[ones] \n\t" 95 "and %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 96 "and %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 97 "pmaxsh %[ftmp10], %[ftmp5], %[ftmp6] \n\t" 98 99 "gsldlc1 %[ftmp5], 0x07(%[dequant_ptr]) \n\t" 100 "gsldrc1 %[ftmp5], 0x00(%[dequant_ptr]) \n\t" 101 "gsldlc1 %[ftmp6], 0x0f(%[dequant_ptr]) \n\t" 102 "gsldrc1 %[ftmp6], 0x08(%[dequant_ptr]) \n\t" 103 "pmullh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 104 "pmullh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 105 "gssdlc1 %[ftmp5], 0x07(%[dqcoeff_ptr]) \n\t" 106 "gssdrc1 %[ftmp5], 0x00(%[dqcoeff_ptr]) \n\t" 107 "gssdlc1 %[ftmp6], 0x0f(%[dqcoeff_ptr]) \n\t" 108 "gssdrc1 %[ftmp6], 0x08(%[dqcoeff_ptr]) \n\t" 109 110 // loop 8 ~ 15 111 "gsldlc1 %[ftmp1], 0x17(%[coeff_ptr]) \n\t" 112 "gsldrc1 %[ftmp1], 0x10(%[coeff_ptr]) \n\t" 113 "gsldlc1 %[ftmp2], 0x1f(%[coeff_ptr]) \n\t" 114 "gsldrc1 %[ftmp2], 0x18(%[coeff_ptr]) \n\t" 115 116 "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t" 117 "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t" 118 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 119 "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t" 120 "xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t" 121 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 122 123 "gsldlc1 %[ftmp5], 0x17(%[round_ptr]) \n\t" 124 "gsldrc1 %[ftmp5], 0x10(%[round_ptr]) \n\t" 125 "gsldlc1 %[ftmp6], 0x1f(%[round_ptr]) \n\t" 126 "gsldrc1 %[ftmp6], 0x18(%[round_ptr]) \n\t" 127 "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 128 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 129 "gsldlc1 %[ftmp7], 0x17(%[quant_ptr]) \n\t" 130 "gsldrc1 %[ftmp7], 0x10(%[quant_ptr]) \n\t" 131 "gsldlc1 %[ftmp8], 0x1f(%[quant_ptr]) \n\t" 132 "gsldrc1 %[ftmp8], 0x18(%[quant_ptr]) \n\t" 133 "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 134 "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 135 136 "xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t" 137 "xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t" 138 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 139 "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" 140 "gssdlc1 %[ftmp7], 0x17(%[qcoeff_ptr]) \n\t" 141 "gssdrc1 %[ftmp7], 0x10(%[qcoeff_ptr]) \n\t" 142 "gssdlc1 %[ftmp8], 0x1f(%[qcoeff_ptr]) \n\t" 143 "gssdrc1 %[ftmp8], 0x18(%[qcoeff_ptr]) \n\t" 144 145 "gsldlc1 %[ftmp1], 0x17(%[inv_zig_zag]) \n\t" 146 "gsldrc1 %[ftmp1], 0x10(%[inv_zig_zag]) \n\t" 147 "gsldlc1 %[ftmp2], 0x1f(%[inv_zig_zag]) \n\t" 148 "gsldrc1 %[ftmp2], 0x18(%[inv_zig_zag]) \n\t" 149 "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 150 "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 151 "xor %[ftmp5], %[ftmp5], %[ones] \n\t" 152 "xor %[ftmp6], %[ftmp6], %[ones] \n\t" 153 "and %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 154 "and %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 155 "pmaxsh %[ftmp11], %[ftmp5], %[ftmp6] \n\t" 156 157 "gsldlc1 %[ftmp5], 0x17(%[dequant_ptr]) \n\t" 158 "gsldrc1 %[ftmp5], 0x10(%[dequant_ptr]) \n\t" 159 "gsldlc1 %[ftmp6], 0x1f(%[dequant_ptr]) \n\t" 160 "gsldrc1 %[ftmp6], 0x18(%[dequant_ptr]) \n\t" 161 "pmullh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 162 "pmullh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 163 "gssdlc1 %[ftmp5], 0x17(%[dqcoeff_ptr]) \n\t" 164 "gssdrc1 %[ftmp5], 0x10(%[dqcoeff_ptr]) \n\t" 165 "gssdlc1 %[ftmp6], 0x1f(%[dqcoeff_ptr]) \n\t" 166 "gssdrc1 %[ftmp6], 0x18(%[dqcoeff_ptr]) \n\t" 167 168 "li %[tmp0], 0x10 \n\t" 169 "mtc1 %[tmp0], %[ftmp9] \n\t" 170 171 "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" 172 "psrlw %[ftmp11], %[ftmp10], %[ftmp9] \n\t" 173 "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" 174 "li %[tmp0], 0xaa \n\t" 175 "mtc1 %[tmp0], %[ftmp9] \n\t" 176 "pshufh %[ftmp11], %[ftmp10], %[ftmp9] \n\t" 177 "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" 178 "li %[tmp0], 0xffff \n\t" 179 "mtc1 %[tmp0], %[ftmp9] \n\t" 180 "and %[ftmp10], %[ftmp10], %[ftmp9] \n\t" 181 "gssdlc1 %[ftmp10], 0x07(%[eob]) \n\t" 182 "gssdrc1 %[ftmp10], 0x00(%[eob]) \n\t" 183 : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), 184 [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), 185 [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]), 186 [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), 187 [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) 188 : [coeff_ptr] "r"((mips_reg)coeff_ptr), 189 [qcoeff_ptr] "r"((mips_reg)qcoeff_ptr), 190 [dequant_ptr] "r"((mips_reg)dequant_ptr), 191 [round_ptr] "r"((mips_reg)round_ptr), 192 [quant_ptr] "r"((mips_reg)quant_ptr), 193 [dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr), 194 [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob), 195 [ones] "f"(ones) 196 : "memory"); 197 198 *d->eob = eob; 199 } 200 201 void vp8_regular_quantize_b_mmi(BLOCK *b, BLOCKD *d) { 202 int eob = 0; 203 int x, y, z, sz, zbin; 204 const int16_t *zbin_boost_ptr = b->zrun_zbin_boost; 205 const int16_t *coeff_ptr = b->coeff; 206 const int16_t *zbin_ptr = b->zbin; 207 const int16_t *round_ptr = b->round; 208 const int16_t *quant_ptr = b->quant; 209 const int16_t *quant_shift_ptr = b->quant_shift; 210 int16_t *qcoeff_ptr = d->qcoeff; 211 int16_t *dqcoeff_ptr = d->dqcoeff; 212 const int16_t *dequant_ptr = d->dequant; 213 const int16_t zbin_oq_value = b->zbin_extra; 214 register double ftmp0 asm("$f0"); 215 216 // memset(qcoeff_ptr, 0, 32); 217 // memset(dqcoeff_ptr, 0, 32); 218 /* clang-format off */ 219 __asm__ volatile ( 220 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 221 "gssdlc1 %[ftmp0], 0x07(%[qcoeff_ptr]) \n\t" 222 "gssdrc1 %[ftmp0], 0x00(%[qcoeff_ptr]) \n\t" 223 "gssdlc1 %[ftmp0], 0x0f(%[qcoeff_ptr]) \n\t" 224 "gssdrc1 %[ftmp0], 0x08(%[qcoeff_ptr]) \n\t" 225 "gssdlc1 %[ftmp0], 0x17(%[qcoeff_ptr]) \n\t" 226 "gssdrc1 %[ftmp0], 0x10(%[qcoeff_ptr]) \n\t" 227 "gssdlc1 %[ftmp0], 0x1f(%[qcoeff_ptr]) \n\t" 228 "gssdrc1 %[ftmp0], 0x18(%[qcoeff_ptr]) \n\t" 229 230 "gssdlc1 %[ftmp0], 0x07(%[dqcoeff_ptr]) \n\t" 231 "gssdrc1 %[ftmp0], 0x00(%[dqcoeff_ptr]) \n\t" 232 "gssdlc1 %[ftmp0], 0x0f(%[dqcoeff_ptr]) \n\t" 233 "gssdrc1 %[ftmp0], 0x08(%[dqcoeff_ptr]) \n\t" 234 "gssdlc1 %[ftmp0], 0x17(%[dqcoeff_ptr]) \n\t" 235 "gssdrc1 %[ftmp0], 0x10(%[dqcoeff_ptr]) \n\t" 236 "gssdlc1 %[ftmp0], 0x1f(%[dqcoeff_ptr]) \n\t" 237 "gssdrc1 %[ftmp0], 0x18(%[dqcoeff_ptr]) \n\t" 238 : [ftmp0]"=&f"(ftmp0) 239 : [qcoeff_ptr]"r"(qcoeff_ptr), [dqcoeff_ptr]"r"(dqcoeff_ptr) 240 : "memory" 241 ); 242 /* clang-format on */ 243 244 REGULAR_SELECT_EOB(1, 0); 245 REGULAR_SELECT_EOB(2, 1); 246 REGULAR_SELECT_EOB(3, 4); 247 REGULAR_SELECT_EOB(4, 8); 248 REGULAR_SELECT_EOB(5, 5); 249 REGULAR_SELECT_EOB(6, 2); 250 REGULAR_SELECT_EOB(7, 3); 251 REGULAR_SELECT_EOB(8, 6); 252 REGULAR_SELECT_EOB(9, 9); 253 REGULAR_SELECT_EOB(10, 12); 254 REGULAR_SELECT_EOB(11, 13); 255 REGULAR_SELECT_EOB(12, 10); 256 REGULAR_SELECT_EOB(13, 7); 257 REGULAR_SELECT_EOB(14, 11); 258 REGULAR_SELECT_EOB(15, 14); 259 REGULAR_SELECT_EOB(16, 15); 260 261 *d->eob = (char)eob; 262 } 263