1 /* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 12 #define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 13 14 #include <stdlib.h> 15 16 #include "./vp9_rtcd.h" 17 #include "vp9/common/vp9_common.h" 18 #include "vp9/common/vp9_onyxc_int.h" 19 20 #if HAVE_DSPR2 21 /* inputs & outputs are quad-byte vectors */ 22 static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev, 23 uint32_t *ps1, uint32_t *ps0, 24 uint32_t *qs0, uint32_t *qs1) { 25 int32_t vp9_filter_l, vp9_filter_r; 26 int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; 27 int32_t subr_r, subr_l; 28 uint32_t t1, t2, HWM, t3; 29 uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; 30 int32_t vps1, vps0, vqs0, vqs1; 31 int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; 32 uint32_t N128; 33 34 N128 = 0x80808080; 35 t1 = 0x03000300; 36 t2 = 0x04000400; 37 t3 = 0x01000100; 38 HWM = 0xFF00FF00; 39 40 vps0 = (*ps0) ^ N128; 41 vps1 = (*ps1) ^ N128; 42 vqs0 = (*qs0) ^ N128; 43 vqs1 = (*qs1) ^ N128; 44 45 /* use halfword pairs instead quad-bytes because of accuracy */ 46 vps0_l = vps0 & HWM; 47 vps0_r = vps0 << 8; 48 vps0_r = vps0_r & HWM; 49 50 vps1_l = vps1 & HWM; 51 vps1_r = vps1 << 8; 52 vps1_r = vps1_r & HWM; 53 54 vqs0_l = vqs0 & HWM; 55 vqs0_r = vqs0 << 8; 56 vqs0_r = vqs0_r & HWM; 57 58 vqs1_l = vqs1 & HWM; 59 vqs1_r = vqs1 << 8; 60 vqs1_r = vqs1_r & HWM; 61 62 mask_l = mask & HWM; 63 mask_r = mask << 8; 64 mask_r = mask_r & HWM; 65 66 hev_l = hev & HWM; 67 hev_r = hev << 8; 68 hev_r = hev_r & HWM; 69 70 __asm__ __volatile__ ( 71 /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */ 72 "subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t" 73 "subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t" 74 75 /* qs0 - ps0 */ 76 "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" 77 "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" 78 79 /* vp9_filter &= hev; */ 80 "and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t" 81 "and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t" 82 83 /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */ 84 "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" 85 "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" 86 "xor %[invhev_l], %[hev_l], %[HWM] \n\t" 87 "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" 88 "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" 89 "xor %[invhev_r], %[hev_r], %[HWM] \n\t" 90 "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" 91 "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" 92 93 /* vp9_filter &= mask; */ 94 "and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t" 95 "and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t" 96 97 : [vp9_filter_l] "=&r" (vp9_filter_l), 98 [vp9_filter_r] "=&r" (vp9_filter_r), 99 [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), 100 [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) 101 : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), 102 [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), 103 [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r), 104 [mask_l] "r" (mask_l), [mask_r] "r" (mask_r), 105 [hev_l] "r" (hev_l), [hev_r] "r" (hev_r), 106 [HWM] "r" (HWM) 107 ); 108 109 /* save bottom 3 bits so that we round one side +4 and the other +3 */ 110 __asm__ __volatile__ ( 111 /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */ 112 "addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t" 113 "addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t" 114 115 /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */ 116 "addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t" 117 "addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t" 118 "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" 119 "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" 120 121 "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" 122 "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" 123 124 "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" 125 "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" 126 127 /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ 128 "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" 129 "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" 130 131 /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ 132 "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" 133 "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" 134 135 : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), 136 [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r), 137 [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), 138 [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) 139 : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), 140 [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r) 141 ); 142 143 __asm__ __volatile__ ( 144 /* (vp9_filter += 1) >>= 1 */ 145 "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" 146 "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" 147 148 /* vp9_filter &= ~hev; */ 149 "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" 150 "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" 151 152 /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */ 153 "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" 154 "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" 155 156 /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */ 157 "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" 158 "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" 159 160 : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r), 161 [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), 162 [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) 163 : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) 164 ); 165 166 /* Create quad-bytes from halfword pairs */ 167 vqs0_l = vqs0_l & HWM; 168 vqs1_l = vqs1_l & HWM; 169 vps0_l = vps0_l & HWM; 170 vps1_l = vps1_l & HWM; 171 172 __asm__ __volatile__ ( 173 "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" 174 "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" 175 "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" 176 "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" 177 178 : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r), 179 [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r) 180 : 181 ); 182 183 vqs0 = vqs0_l | vqs0_r; 184 vqs1 = vqs1_l | vqs1_r; 185 vps0 = vps0_l | vps0_r; 186 vps1 = vps1_l | vps1_r; 187 188 *ps0 = vps0 ^ N128; 189 *ps1 = vps1 ^ N128; 190 *qs0 = vqs0 ^ N128; 191 *qs1 = vqs1 ^ N128; 192 } 193 194 static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev, 195 uint32_t ps1, uint32_t ps0, 196 uint32_t qs0, uint32_t qs1, 197 uint32_t *p1_f0, uint32_t *p0_f0, 198 uint32_t *q0_f0, uint32_t *q1_f0) { 199 int32_t vp9_filter_l, vp9_filter_r; 200 int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; 201 int32_t subr_r, subr_l; 202 uint32_t t1, t2, HWM, t3; 203 uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; 204 int32_t vps1, vps0, vqs0, vqs1; 205 int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; 206 uint32_t N128; 207 208 N128 = 0x80808080; 209 t1 = 0x03000300; 210 t2 = 0x04000400; 211 t3 = 0x01000100; 212 HWM = 0xFF00FF00; 213 214 vps0 = (ps0) ^ N128; 215 vps1 = (ps1) ^ N128; 216 vqs0 = (qs0) ^ N128; 217 vqs1 = (qs1) ^ N128; 218 219 /* use halfword pairs instead quad-bytes because of accuracy */ 220 vps0_l = vps0 & HWM; 221 vps0_r = vps0 << 8; 222 vps0_r = vps0_r & HWM; 223 224 vps1_l = vps1 & HWM; 225 vps1_r = vps1 << 8; 226 vps1_r = vps1_r & HWM; 227 228 vqs0_l = vqs0 & HWM; 229 vqs0_r = vqs0 << 8; 230 vqs0_r = vqs0_r & HWM; 231 232 vqs1_l = vqs1 & HWM; 233 vqs1_r = vqs1 << 8; 234 vqs1_r = vqs1_r & HWM; 235 236 mask_l = mask & HWM; 237 mask_r = mask << 8; 238 mask_r = mask_r & HWM; 239 240 hev_l = hev & HWM; 241 hev_r = hev << 8; 242 hev_r = hev_r & HWM; 243 244 __asm__ __volatile__ ( 245 /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */ 246 "subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t" 247 "subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t" 248 249 /* qs0 - ps0 */ 250 "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" 251 "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" 252 253 /* vp9_filter &= hev; */ 254 "and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t" 255 "and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t" 256 257 /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */ 258 "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" 259 "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" 260 "xor %[invhev_l], %[hev_l], %[HWM] \n\t" 261 "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" 262 "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" 263 "xor %[invhev_r], %[hev_r], %[HWM] \n\t" 264 "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" 265 "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" 266 267 /* vp9_filter &= mask; */ 268 "and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t" 269 "and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t" 270 271 : [vp9_filter_l] "=&r" (vp9_filter_l), 272 [vp9_filter_r] "=&r" (vp9_filter_r), 273 [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), 274 [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) 275 : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), 276 [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), 277 [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r), 278 [mask_l] "r" (mask_l), [mask_r] "r" (mask_r), 279 [hev_l] "r" (hev_l), [hev_r] "r" (hev_r), [HWM] "r" (HWM) 280 ); 281 282 /* save bottom 3 bits so that we round one side +4 and the other +3 */ 283 __asm__ __volatile__ ( 284 /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */ 285 "addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t" 286 "addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t" 287 288 /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */ 289 "addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t" 290 "addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t" 291 "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" 292 "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" 293 294 "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" 295 "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" 296 297 "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" 298 "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" 299 300 /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ 301 "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" 302 "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" 303 304 /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ 305 "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" 306 "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" 307 308 : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), 309 [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r), 310 [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), 311 [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) 312 : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), 313 [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r) 314 ); 315 316 __asm__ __volatile__ ( 317 /* (vp9_filter += 1) >>= 1 */ 318 "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" 319 "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" 320 321 /* vp9_filter &= ~hev; */ 322 "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" 323 "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" 324 325 /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */ 326 "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" 327 "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" 328 329 /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */ 330 "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" 331 "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" 332 333 : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r), 334 [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), 335 [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) 336 : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) 337 ); 338 339 /* Create quad-bytes from halfword pairs */ 340 vqs0_l = vqs0_l & HWM; 341 vqs1_l = vqs1_l & HWM; 342 vps0_l = vps0_l & HWM; 343 vps1_l = vps1_l & HWM; 344 345 __asm__ __volatile__ ( 346 "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" 347 "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" 348 "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" 349 "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" 350 351 : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r), 352 [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r) 353 : 354 ); 355 356 vqs0 = vqs0_l | vqs0_r; 357 vqs1 = vqs1_l | vqs1_r; 358 vps0 = vps0_l | vps0_r; 359 vps1 = vps1_l | vps1_r; 360 361 *p0_f0 = vps0 ^ N128; 362 *p1_f0 = vps1 ^ N128; 363 *q0_f0 = vqs0 ^ N128; 364 *q1_f0 = vqs1 ^ N128; 365 } 366 367 static INLINE void vp9_mbfilter_dspr2(uint32_t *op3, uint32_t *op2, 368 uint32_t *op1, uint32_t *op0, 369 uint32_t *oq0, uint32_t *oq1, 370 uint32_t *oq2, uint32_t *oq3) { 371 /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ 372 const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; 373 const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; 374 uint32_t res_op2, res_op1, res_op0; 375 uint32_t res_oq0, res_oq1, res_oq2; 376 uint32_t tmp; 377 uint32_t add_p210_q012; 378 uint32_t u32Four = 0x00040004; 379 380 /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */ 381 /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */ 382 /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */ 383 /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */ 384 /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */ 385 /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */ 386 387 __asm__ __volatile__ ( 388 "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t" 389 "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t" 390 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t" 391 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t" 392 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t" 393 "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t" 394 395 "shll.ph %[tmp], %[p3], 1 \n\t" 396 "addu.ph %[res_op2], %[tmp], %[p3] \n\t" 397 "addu.ph %[res_op1], %[p3], %[p3] \n\t" 398 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 399 "addu.ph %[res_op1], %[res_op1], %[p1] \n\t" 400 "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t" 401 "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t" 402 "subu.ph %[res_op2], %[res_op2], %[q1] \n\t" 403 "subu.ph %[res_op1], %[res_op1], %[q2] \n\t" 404 "subu.ph %[res_op2], %[res_op2], %[q2] \n\t" 405 "shrl.ph %[res_op1], %[res_op1], 3 \n\t" 406 "shrl.ph %[res_op2], %[res_op2], 3 \n\t" 407 "addu.ph %[res_op0], %[p3], %[p0] \n\t" 408 "addu.ph %[res_oq0], %[q0], %[q3] \n\t" 409 "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t" 410 "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t" 411 "addu.ph %[res_oq1], %[q3], %[q3] \n\t" 412 "shll.ph %[tmp], %[q3], 1 \n\t" 413 "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t" 414 "addu.ph %[res_oq2], %[tmp], %[q3] \n\t" 415 "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t" 416 "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t" 417 "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t" 418 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 419 "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t" 420 "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t" 421 "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t" 422 "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t" 423 "shrl.ph %[res_op0], %[res_op0], 3 \n\t" 424 "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t" 425 426 : [add_p210_q012] "=&r" (add_p210_q012), 427 [tmp] "=&r" (tmp), [res_op2] "=&r" (res_op2), 428 [res_op1] "=&r" (res_op1), [res_op0] "=&r" (res_op0), 429 [res_oq0] "=&r" (res_oq0), [res_oq1] "=&r" (res_oq1), 430 [res_oq2] "=&r" (res_oq2) 431 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [q1] "r" (q1), 432 [p2] "r" (p2), [q2] "r" (q2), [p3] "r" (p3), [q3] "r" (q3), 433 [u32Four] "r" (u32Four) 434 ); 435 436 *op2 = res_op2; 437 *op1 = res_op1; 438 *op0 = res_op0; 439 *oq0 = res_oq0; 440 *oq1 = res_oq1; 441 *oq2 = res_oq2; 442 } 443 444 static INLINE void vp9_mbfilter1_dspr2(uint32_t p3, uint32_t p2, 445 uint32_t p1, uint32_t p0, 446 uint32_t q0, uint32_t q1, 447 uint32_t q2, uint32_t q3, 448 uint32_t *op2_f1, 449 uint32_t *op1_f1, uint32_t *op0_f1, 450 uint32_t *oq0_f1, uint32_t *oq1_f1, 451 uint32_t *oq2_f1) { 452 /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ 453 uint32_t res_op2, res_op1, res_op0; 454 uint32_t res_oq0, res_oq1, res_oq2; 455 uint32_t tmp; 456 uint32_t add_p210_q012; 457 uint32_t u32Four = 0x00040004; 458 459 /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */ 460 /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */ 461 /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */ 462 /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */ 463 /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */ 464 /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */ 465 466 __asm__ __volatile__ ( 467 "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t" 468 "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t" 469 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t" 470 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t" 471 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t" 472 "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t" 473 474 "shll.ph %[tmp], %[p3], 1 \n\t" 475 "addu.ph %[res_op2], %[tmp], %[p3] \n\t" 476 "addu.ph %[res_op1], %[p3], %[p3] \n\t" 477 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 478 "addu.ph %[res_op1], %[res_op1], %[p1] \n\t" 479 "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t" 480 "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t" 481 "subu.ph %[res_op2], %[res_op2], %[q1] \n\t" 482 "subu.ph %[res_op1], %[res_op1], %[q2] \n\t" 483 "subu.ph %[res_op2], %[res_op2], %[q2] \n\t" 484 "shrl.ph %[res_op1], %[res_op1], 3 \n\t" 485 "shrl.ph %[res_op2], %[res_op2], 3 \n\t" 486 "addu.ph %[res_op0], %[p3], %[p0] \n\t" 487 "addu.ph %[res_oq0], %[q0], %[q3] \n\t" 488 "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t" 489 "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t" 490 "addu.ph %[res_oq1], %[q3], %[q3] \n\t" 491 "shll.ph %[tmp], %[q3], 1 \n\t" 492 "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t" 493 "addu.ph %[res_oq2], %[tmp], %[q3] \n\t" 494 "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t" 495 "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t" 496 "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t" 497 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 498 "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t" 499 "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t" 500 "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t" 501 "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t" 502 "shrl.ph %[res_op0], %[res_op0], 3 \n\t" 503 "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t" 504 505 : [add_p210_q012] "=&r" (add_p210_q012), [tmp] "=&r" (tmp), 506 [res_op2] "=&r" (res_op2), [res_op1] "=&r" (res_op1), 507 [res_op0] "=&r" (res_op0), [res_oq0] "=&r" (res_oq0), 508 [res_oq1] "=&r" (res_oq1), [res_oq2] "=&r" (res_oq2) 509 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [q1] "r" (q1), 510 [p2] "r" (p2), [q2] "r" (q2), [p3] "r" (p3), [q3] "r" (q3), 511 [u32Four] "r" (u32Four) 512 ); 513 514 *op2_f1 = res_op2; 515 *op1_f1 = res_op1; 516 *op0_f1 = res_op0; 517 *oq0_f1 = res_oq0; 518 *oq1_f1 = res_oq1; 519 *oq2_f1 = res_oq2; 520 } 521 522 static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6, 523 uint32_t *op5, uint32_t *op4, 524 uint32_t *op3, uint32_t *op2, 525 uint32_t *op1, uint32_t *op0, 526 uint32_t *oq0, uint32_t *oq1, 527 uint32_t *oq2, uint32_t *oq3, 528 uint32_t *oq4, uint32_t *oq5, 529 uint32_t *oq6, uint32_t *oq7) { 530 const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4; 531 const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; 532 const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; 533 const uint32_t q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; 534 uint32_t res_op6, res_op5, res_op4, res_op3, res_op2, res_op1, res_op0; 535 uint32_t res_oq0, res_oq1, res_oq2, res_oq3, res_oq4, res_oq5, res_oq6; 536 uint32_t tmp; 537 uint32_t add_p6toq6; 538 uint32_t u32Eight = 0x00080008; 539 540 __asm__ __volatile__ ( 541 /* addition of p6,p5,p4,p3,p2,p1,p0,q0,q1,q2,q3,q4,q5,q6 542 which is used most of the time */ 543 "addu.ph %[add_p6toq6], %[p6], %[p5] \n\t" 544 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p4] \n\t" 545 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p3] \n\t" 546 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p2] \n\t" 547 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p1] \n\t" 548 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p0] \n\t" 549 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q0] \n\t" 550 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q1] \n\t" 551 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q2] \n\t" 552 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q3] \n\t" 553 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q4] \n\t" 554 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q5] \n\t" 555 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q6] \n\t" 556 "addu.ph %[add_p6toq6], %[add_p6toq6], %[u32Eight] \n\t" 557 558 : [add_p6toq6] "=&r" (add_p6toq6) 559 : [p6] "r" (p6), [p5] "r" (p5), [p4] "r" (p4), 560 [p3] "r" (p3), [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0), 561 [q0] "r" (q0), [q1] "r" (q1), [q2] "r" (q2), [q3] "r" (q3), 562 [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), 563 [u32Eight] "r" (u32Eight) 564 ); 565 566 __asm__ __volatile__ ( 567 /* *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + 568 p3 + p2 + p1 + p0 + q0, 4) */ 569 "shll.ph %[tmp], %[p7], 3 \n\t" 570 "subu.ph %[res_op6], %[tmp], %[p7] \n\t" 571 "addu.ph %[res_op6], %[res_op6], %[p6] \n\t" 572 "addu.ph %[res_op6], %[res_op6], %[add_p6toq6] \n\t" 573 "subu.ph %[res_op6], %[res_op6], %[q1] \n\t" 574 "subu.ph %[res_op6], %[res_op6], %[q2] \n\t" 575 "subu.ph %[res_op6], %[res_op6], %[q3] \n\t" 576 "subu.ph %[res_op6], %[res_op6], %[q4] \n\t" 577 "subu.ph %[res_op6], %[res_op6], %[q5] \n\t" 578 "subu.ph %[res_op6], %[res_op6], %[q6] \n\t" 579 "shrl.ph %[res_op6], %[res_op6], 4 \n\t" 580 581 /* *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + 582 p2 + p1 + p0 + q0 + q1, 4) */ 583 "shll.ph %[tmp], %[p7], 2 \n\t" 584 "addu.ph %[res_op5], %[tmp], %[p7] \n\t" 585 "addu.ph %[res_op5], %[res_op5], %[p7] \n\t" 586 "addu.ph %[res_op5], %[res_op5], %[p5] \n\t" 587 "addu.ph %[res_op5], %[res_op5], %[add_p6toq6] \n\t" 588 "subu.ph %[res_op5], %[res_op5], %[q2] \n\t" 589 "subu.ph %[res_op5], %[res_op5], %[q3] \n\t" 590 "subu.ph %[res_op5], %[res_op5], %[q4] \n\t" 591 "subu.ph %[res_op5], %[res_op5], %[q5] \n\t" 592 "subu.ph %[res_op5], %[res_op5], %[q6] \n\t" 593 "shrl.ph %[res_op5], %[res_op5], 4 \n\t" 594 595 /* *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + 596 p1 + p0 + q0 + q1 + q2, 4) */ 597 "shll.ph %[tmp], %[p7], 2 \n\t" 598 "addu.ph %[res_op4], %[tmp], %[p7] \n\t" 599 "addu.ph %[res_op4], %[res_op4], %[p4] \n\t" 600 "addu.ph %[res_op4], %[res_op4], %[add_p6toq6] \n\t" 601 "subu.ph %[res_op4], %[res_op4], %[q3] \n\t" 602 "subu.ph %[res_op4], %[res_op4], %[q4] \n\t" 603 "subu.ph %[res_op4], %[res_op4], %[q5] \n\t" 604 "subu.ph %[res_op4], %[res_op4], %[q6] \n\t" 605 "shrl.ph %[res_op4], %[res_op4], 4 \n\t" 606 607 /* *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + 608 p1 + p0 + q0 + q1 + q2 + q3, 4) */ 609 "shll.ph %[tmp], %[p7], 2 \n\t" 610 "addu.ph %[res_op3], %[tmp], %[p3] \n\t" 611 "addu.ph %[res_op3], %[res_op3], %[add_p6toq6] \n\t" 612 "subu.ph %[res_op3], %[res_op3], %[q4] \n\t" 613 "subu.ph %[res_op3], %[res_op3], %[q5] \n\t" 614 "subu.ph %[res_op3], %[res_op3], %[q6] \n\t" 615 "shrl.ph %[res_op3], %[res_op3], 4 \n\t" 616 617 /* *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + 618 p0 + q0 + q1 + q2 + q3 + q4, 4) */ 619 "shll.ph %[tmp], %[p7], 1 \n\t" 620 "addu.ph %[res_op2], %[tmp], %[p7] \n\t" 621 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 622 "addu.ph %[res_op2], %[res_op2], %[add_p6toq6] \n\t" 623 "subu.ph %[res_op2], %[res_op2], %[q5] \n\t" 624 "subu.ph %[res_op2], %[res_op2], %[q6] \n\t" 625 "shrl.ph %[res_op2], %[res_op2], 4 \n\t" 626 627 /* *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + 628 p0 + q0 + q1 + q2 + q3 + q4 + q5, 4); */ 629 "shll.ph %[tmp], %[p7], 1 \n\t" 630 "addu.ph %[res_op1], %[tmp], %[p1] \n\t" 631 "addu.ph %[res_op1], %[res_op1], %[add_p6toq6] \n\t" 632 "subu.ph %[res_op1], %[res_op1], %[q6] \n\t" 633 "shrl.ph %[res_op1], %[res_op1], 4 \n\t" 634 635 /* *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + 636 q0 + q1 + q2 + q3 + q4 + q5 + q6, 4) */ 637 "addu.ph %[res_op0], %[p7], %[p0] \n\t" 638 "addu.ph %[res_op0], %[res_op0], %[add_p6toq6] \n\t" 639 "shrl.ph %[res_op0], %[res_op0], 4 \n\t" 640 641 : [res_op6] "=&r" (res_op6), [res_op5] "=&r" (res_op5), 642 [res_op4] "=&r" (res_op4), [res_op3] "=&r" (res_op3), 643 [res_op2] "=&r" (res_op2), [res_op1] "=&r" (res_op1), 644 [res_op0] "=&r" (res_op0), [tmp] "=&r" (tmp) 645 : [p7] "r" (p7), [p6] "r" (p6), [p5] "r" (p5), [p4] "r" (p4), 646 [p3] "r" (p3), [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0), 647 [q2] "r" (q2), [q1] "r" (q1), 648 [q3] "r" (q3), [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), 649 [add_p6toq6] "r" (add_p6toq6) 650 ); 651 652 *op6 = res_op6; 653 *op5 = res_op5; 654 *op4 = res_op4; 655 *op3 = res_op3; 656 *op2 = res_op2; 657 *op1 = res_op1; 658 *op0 = res_op0; 659 660 __asm__ __volatile__ ( 661 /* *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + 662 q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); */ 663 "addu.ph %[res_oq0], %[q7], %[q0] \n\t" 664 "addu.ph %[res_oq0], %[res_oq0], %[add_p6toq6] \n\t" 665 "shrl.ph %[res_oq0], %[res_oq0], 4 \n\t" 666 667 /* *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + 668 q2 + q3 + q4 + q5 + q6 + q7 * 2, 4) */ 669 "shll.ph %[tmp], %[q7], 1 \n\t" 670 "addu.ph %[res_oq1], %[tmp], %[q1] \n\t" 671 "addu.ph %[res_oq1], %[res_oq1], %[add_p6toq6] \n\t" 672 "subu.ph %[res_oq1], %[res_oq1], %[p6] \n\t" 673 "shrl.ph %[res_oq1], %[res_oq1], 4 \n\t" 674 675 /* *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + 676 q3 + q4 + q5 + q6 + q7 * 3, 4) */ 677 "shll.ph %[tmp], %[q7], 1 \n\t" 678 "addu.ph %[res_oq2], %[tmp], %[q7] \n\t" 679 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 680 "addu.ph %[res_oq2], %[res_oq2], %[add_p6toq6] \n\t" 681 "subu.ph %[res_oq2], %[res_oq2], %[p5] \n\t" 682 "subu.ph %[res_oq2], %[res_oq2], %[p6] \n\t" 683 "shrl.ph %[res_oq2], %[res_oq2], 4 \n\t" 684 685 /* *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + q0 + q1 + q2 + 686 q3 * 2 + q4 + q5 + q6 + q7 * 4, 4) */ 687 "shll.ph %[tmp], %[q7], 2 \n\t" 688 "addu.ph %[res_oq3], %[tmp], %[q3] \n\t" 689 "addu.ph %[res_oq3], %[res_oq3], %[add_p6toq6] \n\t" 690 "subu.ph %[res_oq3], %[res_oq3], %[p4] \n\t" 691 "subu.ph %[res_oq3], %[res_oq3], %[p5] \n\t" 692 "subu.ph %[res_oq3], %[res_oq3], %[p6] \n\t" 693 "shrl.ph %[res_oq3], %[res_oq3], 4 \n\t" 694 695 /* *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q1 + q2 + q3 + 696 q4 * 2 + q5 + q6 + q7 * 5, 4) */ 697 "shll.ph %[tmp], %[q7], 2 \n\t" 698 "addu.ph %[res_oq4], %[tmp], %[q7] \n\t" 699 "addu.ph %[res_oq4], %[res_oq4], %[q4] \n\t" 700 "addu.ph %[res_oq4], %[res_oq4], %[add_p6toq6] \n\t" 701 "subu.ph %[res_oq4], %[res_oq4], %[p3] \n\t" 702 "subu.ph %[res_oq4], %[res_oq4], %[p4] \n\t" 703 "subu.ph %[res_oq4], %[res_oq4], %[p5] \n\t" 704 "subu.ph %[res_oq4], %[res_oq4], %[p6] \n\t" 705 "shrl.ph %[res_oq4], %[res_oq4], 4 \n\t" 706 707 /* *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q2 + q3 + q4 + 708 q5 * 2 + q6 + q7 * 6, 4) */ 709 "shll.ph %[tmp], %[q7], 2 \n\t" 710 "addu.ph %[res_oq5], %[tmp], %[q7] \n\t" 711 "addu.ph %[res_oq5], %[res_oq5], %[q7] \n\t" 712 "addu.ph %[res_oq5], %[res_oq5], %[q5] \n\t" 713 "addu.ph %[res_oq5], %[res_oq5], %[add_p6toq6] \n\t" 714 "subu.ph %[res_oq5], %[res_oq5], %[p2] \n\t" 715 "subu.ph %[res_oq5], %[res_oq5], %[p3] \n\t" 716 "subu.ph %[res_oq5], %[res_oq5], %[p4] \n\t" 717 "subu.ph %[res_oq5], %[res_oq5], %[p5] \n\t" 718 "subu.ph %[res_oq5], %[res_oq5], %[p6] \n\t" 719 "shrl.ph %[res_oq5], %[res_oq5], 4 \n\t" 720 721 /* *oq6 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + 722 q4 + q5 + q6 * 2 + q7 * 7, 4) */ 723 "shll.ph %[tmp], %[q7], 3 \n\t" 724 "subu.ph %[res_oq6], %[tmp], %[q7] \n\t" 725 "addu.ph %[res_oq6], %[res_oq6], %[q6] \n\t" 726 "addu.ph %[res_oq6], %[res_oq6], %[add_p6toq6] \n\t" 727 "subu.ph %[res_oq6], %[res_oq6], %[p1] \n\t" 728 "subu.ph %[res_oq6], %[res_oq6], %[p2] \n\t" 729 "subu.ph %[res_oq6], %[res_oq6], %[p3] \n\t" 730 "subu.ph %[res_oq6], %[res_oq6], %[p4] \n\t" 731 "subu.ph %[res_oq6], %[res_oq6], %[p5] \n\t" 732 "subu.ph %[res_oq6], %[res_oq6], %[p6] \n\t" 733 "shrl.ph %[res_oq6], %[res_oq6], 4 \n\t" 734 735 : [res_oq6] "=&r" (res_oq6), [res_oq5] "=&r" (res_oq5), 736 [res_oq4] "=&r" (res_oq4), [res_oq3] "=&r" (res_oq3), 737 [res_oq2] "=&r" (res_oq2), [res_oq1] "=&r" (res_oq1), 738 [res_oq0] "=&r" (res_oq0), [tmp] "=&r" (tmp) 739 : [q7] "r" (q7), [q6] "r" (q6), [q5] "r" (q5), [q4] "r" (q4), 740 [q3] "r" (q3), [q2] "r" (q2), [q1] "r" (q1), [q0] "r" (q0), 741 [p1] "r" (p1), [p2] "r" (p2), 742 [p3] "r" (p3), [p4] "r" (p4), [p5] "r" (p5), [p6] "r" (p6), 743 [add_p6toq6] "r" (add_p6toq6) 744 ); 745 746 *oq0 = res_oq0; 747 *oq1 = res_oq1; 748 *oq2 = res_oq2; 749 *oq3 = res_oq3; 750 *oq4 = res_oq4; 751 *oq5 = res_oq5; 752 *oq6 = res_oq6; 753 } 754 #endif // #if HAVE_DSPR2 755 #endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 756