1 /* 2 * Copyright 2014 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkBlitRow.h" 9 #include "SkBlitMask.h" 10 #include "SkColorPriv.h" 11 #include "SkDither.h" 12 #include "SkMathPriv.h" 13 14 static void S32_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst, 15 const SkPMColor* SK_RESTRICT src, int count, 16 U8CPU alpha, int /*x*/, int /*y*/) { 17 register uint32_t t0, t1, t2, t3, t4, t5, t6; 18 register uint32_t s0, s1, s2, s4, s5, s6; 19 20 alpha += 1; 21 if (count >= 2) { 22 __asm__ volatile ( 23 ".set push \n\t" 24 ".set noreorder \n\t" 25 "sll %[s4], %[alpha], 8 \n\t" 26 "or %[s4], %[s4], %[alpha] \n\t" 27 "repl.ph %[s5], 0x1f \n\t" 28 "repl.ph %[s6], 0x3f \n\t" 29 "1: \n\t" 30 "lw %[s2], 0(%[src]) \n\t" 31 "lw %[s1], 4(%[src]) \n\t" 32 "lwr %[s0], 0(%[dst]) \n\t" 33 "lwl %[s0], 3(%[dst]) \n\t" 34 "and %[t1], %[s0], %[s5] \n\t" 35 "shra.ph %[t0], %[s0], 5 \n\t" 36 "and %[t2], %[t0], %[s6] \n\t" 37 #ifdef __MIPS_HAVE_DSPR2 38 "shrl.ph %[t3], %[s0], 11 \n\t" 39 #else 40 "shra.ph %[t0], %[s0], 11 \n\t" 41 "and %[t3], %[t0], %[s5] \n\t" 42 #endif 43 "precrq.ph.w %[t0], %[s1], %[s2] \n\t" 44 "shrl.qb %[t5], %[t0], 3 \n\t" 45 "and %[t4], %[t5], %[s5] \n\t" 46 "ins %[s2], %[s1], 16, 16 \n\t" 47 "preceu.ph.qbra %[t0], %[s2] \n\t" 48 "shrl.qb %[t6], %[t0], 3 \n\t" 49 #ifdef __MIPS_HAVE_DSPR2 50 "shrl.ph %[t5], %[s2], 10 \n\t" 51 #else 52 "shra.ph %[t0], %[s2], 10 \n\t" 53 "and %[t5], %[t0], %[s6] \n\t" 54 #endif 55 "subu.qb %[t4], %[t4], %[t1] \n\t" 56 "subu.qb %[t5], %[t5], %[t2] \n\t" 57 "subu.qb %[t6], %[t6], %[t3] \n\t" 58 "muleu_s.ph.qbr %[t4], %[s4], %[t4] \n\t" 59 "muleu_s.ph.qbr %[t5], %[s4], %[t5] \n\t" 60 "muleu_s.ph.qbr %[t6], %[s4], %[t6] \n\t" 61 "addiu %[count], %[count], -2 \n\t" 62 "addiu %[src], %[src], 8 \n\t" 63 "shra.ph %[t4], %[t4], 8 \n\t" 64 "shra.ph %[t5], %[t5], 8 \n\t" 65 "shra.ph %[t6], %[t6], 8 \n\t" 66 "addu.qb %[t4], %[t4], %[t1] \n\t" 67 "addu.qb %[t5], %[t5], %[t2] \n\t" 68 "addu.qb %[t6], %[t6], %[t3] \n\t" 69 "andi %[s0], %[t4], 0xffff \n\t" 70 "andi %[t0], %[t5], 0xffff \n\t" 71 "sll %[t0], %[t0], 0x5 \n\t" 72 "or %[s0], %[s0], %[t0] \n\t" 73 "sll %[t0], %[t6], 0xb \n\t" 74 "or %[t0], %[t0], %[s0] \n\t" 75 "sh %[t0], 0(%[dst]) \n\t" 76 "srl %[s1], %[t4], 16 \n\t" 77 "srl %[t0], %[t5], 16 \n\t" 78 "sll %[t5], %[t0], 5 \n\t" 79 "or %[t0], %[t5], %[s1] \n\t" 80 "srl %[s0], %[t6], 16 \n\t" 81 "sll %[s2], %[s0], 0xb \n\t" 82 "or %[s1], %[s2], %[t0] \n\t" 83 "sh %[s1], 2(%[dst]) \n\t" 84 "bge %[count], 2, 1b \n\t" 85 " addiu %[dst], %[dst], 4 \n\t" 86 ".set pop \n\t" 87 : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 88 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0), 89 [s1]"=&r"(s1), [s2]"=&r"(s2), [s4]"=&r"(s4), [s5]"=&r"(s5), 90 [s6]"=&r"(s6), [count]"+r"(count), [dst]"+r"(dst), 91 [src]"+r"(src) 92 : [alpha]"r"(alpha) 93 : "memory", "hi", "lo" 94 ); 95 } 96 97 if (count == 1) { 98 SkPMColor c = *src++; 99 SkPMColorAssert(c); 100 SkASSERT(SkGetPackedA32(c) == 255); 101 uint16_t d = *dst; 102 *dst++ = SkPackRGB16(SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), alpha), 103 SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), alpha), 104 SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), alpha)); 105 } 106 } 107 108 static void S32A_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst, 109 const SkPMColor* __restrict__ src, 110 int count, U8CPU alpha, int x, int y) { 111 __asm__ volatile ( 112 "pref 0, 0(%[src]) \n\t" 113 "pref 1, 0(%[dst]) \n\t" 114 "pref 0, 32(%[src]) \n\t" 115 "pref 1, 32(%[dst]) \n\t" 116 : 117 : [src]"r"(src), [dst]"r"(dst) 118 : "memory" 119 ); 120 121 register int32_t t0, t1, t2, t3, t4, t5, t6; 122 register int32_t t7, t8, t9, s0, s1, s2, s3; 123 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3]; 124 125 if (count >= 2) { 126 __asm__ volatile ( 127 ".set push \n\t" 128 ".set noreorder \n\t" 129 "li %[s1], 0x01010101 \n\t" 130 "li %[s2], -2017 \n\t" 131 "1: \n\t" 132 "bnez %[s3], 4f \n\t" 133 " li %[s3], 2 \n\t" 134 "pref 0, 64(%[src]) \n\t" 135 "pref 1, 64(%[dst]) \n\t" 136 "4: \n\t" 137 "addiu %[s3], %[s3], -1 \n\t" 138 "lw %[t1], 0(%[src]) \n\t" 139 "andi %[t3], %[x], 0x3 \n\t" 140 "addiu %[x], %[x], 1 \n\t" 141 "sll %[t4], %[t3], 2 \n\t" 142 "srav %[t5], %[dither_scan], %[t4] \n\t" 143 "andi %[t3], %[t5], 0xf \n\t" 144 "lw %[t2], 4(%[src]) \n\t" 145 "andi %[t4], %[x], 0x3 \n\t" 146 "sll %[t5], %[t4], 2 \n\t" 147 "srav %[t6], %[dither_scan], %[t5] \n\t" 148 "addiu %[x], %[x], 1 \n\t" 149 "ins %[t3], %[t6], 8, 4 \n\t" 150 "srl %[t4], %[t1], 24 \n\t" 151 "addiu %[t0], %[t4], 1 \n\t" 152 "srl %[t4], %[t2], 24 \n\t" 153 "addiu %[t5], %[t4], 1 \n\t" 154 "ins %[t0], %[t5], 16, 16 \n\t" 155 "muleu_s.ph.qbr %[t4], %[t3], %[t0] \n\t" 156 "preceu.ph.qbla %[t3], %[t4] \n\t" 157 "andi %[t4], %[t1], 0xff \n\t" 158 "ins %[t4], %[t2], 16, 8 \n\t" 159 "shrl.qb %[t5], %[t4], 5 \n\t" 160 "subu.qb %[t6], %[t3], %[t5] \n\t" 161 "addq.ph %[t5], %[t6], %[t4] \n\t" 162 "ext %[t4], %[t1], 8, 8 \n\t" 163 "srl %[t6], %[t2], 8 \n\t" 164 "ins %[t4], %[t6], 16, 8 \n\t" 165 "shrl.qb %[t6], %[t4], 6 \n\t" 166 "shrl.qb %[t7], %[t3], 1 \n\t" 167 "subu.qb %[t8], %[t7], %[t6] \n\t" 168 "addq.ph %[t6], %[t8], %[t4] \n\t" 169 "ext %[t4], %[t1], 16, 8 \n\t" 170 "srl %[t7], %[t2], 16 \n\t" 171 "ins %[t4], %[t7], 16, 8 \n\t" 172 "shrl.qb %[t7], %[t4], 5 \n\t" 173 "subu.qb %[t8], %[t3], %[t7] \n\t" 174 "addq.ph %[t7], %[t8], %[t4] \n\t" 175 "shll.ph %[t4], %[t7], 2 \n\t" 176 "andi %[t9], %[t4], 0xffff \n\t" 177 "srl %[s0], %[t4], 16 \n\t" 178 "andi %[t3], %[t6], 0xffff \n\t" 179 "srl %[t4], %[t6], 16 \n\t" 180 "andi %[t6], %[t5], 0xffff \n\t" 181 "srl %[t7], %[t5], 16 \n\t" 182 "subq.ph %[t5], %[s1], %[t0] \n\t" 183 "srl %[t0], %[t5], 3 \n\t" 184 "beqz %[t1], 3f \n\t" 185 " lhu %[t5], 0(%[dst]) \n\t" 186 "sll %[t1], %[t6], 13 \n\t" 187 "or %[t8], %[t9], %[t1] \n\t" 188 "sll %[t1], %[t3], 24 \n\t" 189 "or %[t9], %[t1], %[t8] \n\t" 190 "andi %[t3], %[t5], 0x7e0 \n\t" 191 "sll %[t6], %[t3], 0x10 \n\t" 192 "and %[t8], %[s2], %[t5] \n\t" 193 "or %[t5], %[t6], %[t8] \n\t" 194 "andi %[t6], %[t0], 0xff \n\t" 195 "mul %[t1], %[t6], %[t5] \n\t" 196 "addu %[t5], %[t1], %[t9] \n\t" 197 "srl %[t6], %[t5], 5 \n\t" 198 "and %[t5], %[s2], %[t6] \n\t" 199 "srl %[t8], %[t6], 16 \n\t" 200 "andi %[t6], %[t8], 0x7e0 \n\t" 201 "or %[t1], %[t5], %[t6] \n\t" 202 "sh %[t1], 0(%[dst]) \n\t" 203 "3: \n\t" 204 "beqz %[t2], 2f \n\t" 205 " lhu %[t5], 2(%[dst]) \n\t" 206 "sll %[t1], %[t7], 13 \n\t" 207 "or %[t8], %[s0], %[t1] \n\t" 208 "sll %[t1], %[t4], 24 \n\t" 209 "or %[t9], %[t1], %[t8] \n\t" 210 "andi %[t3], %[t5], 0x7e0 \n\t" 211 "sll %[t6], %[t3], 0x10 \n\t" 212 "and %[t8], %[s2], %[t5] \n\t" 213 "or %[t5], %[t6], %[t8] \n\t" 214 "srl %[t6], %[t0], 16 \n\t" 215 "mul %[t1], %[t6], %[t5] \n\t" 216 "addu %[t5], %[t1], %[t9] \n\t" 217 "srl %[t6], %[t5], 5 \n\t" 218 "and %[t5], %[s2], %[t6] \n\t" 219 "srl %[t8], %[t6], 16 \n\t" 220 "andi %[t6], %[t8], 0x7e0 \n\t" 221 "or %[t1], %[t5], %[t6] \n\t" 222 "sh %[t1], 2(%[dst]) \n\t" 223 "2: \n\t" 224 "addiu %[count], %[count], -2 \n\t" 225 "addiu %[src], %[src], 8 \n\t" 226 "addiu %[t1], %[count], -1 \n\t" 227 "bgtz %[t1], 1b \n\t" 228 " addiu %[dst], %[dst], 4 \n\t" 229 ".set pop \n\t" 230 : [src]"+r"(src), [count]"+r"(count), [dst]"+r"(dst), [x]"+r"(x), 231 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 232 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7), 233 [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0), [s1]"=&r"(s1), 234 [s2]"=&r"(s2), [s3]"=&r"(s3) 235 : [dither_scan]"r"(dither_scan) 236 : "memory", "hi", "lo" 237 ); 238 } 239 240 if (count == 1) { 241 SkPMColor c = *src++; 242 SkPMColorAssert(c); 243 if (c) { 244 unsigned a = SkGetPackedA32(c); 245 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a)); 246 247 unsigned sr = SkGetPackedR32(c); 248 unsigned sg = SkGetPackedG32(c); 249 unsigned sb = SkGetPackedB32(c); 250 sr = SkDITHER_R32_FOR_565(sr, d); 251 sg = SkDITHER_G32_FOR_565(sg, d); 252 sb = SkDITHER_B32_FOR_565(sb, d); 253 254 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); 255 uint32_t dst_expanded = SkExpand_rgb_16(*dst); 256 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 257 // now src and dst expanded are in g:11 r:10 x:1 b:10 258 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 259 } 260 dst += 1; 261 DITHER_INC_X(x); 262 } 263 } 264 265 static void S32_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst, 266 const SkPMColor* __restrict__ src, 267 int count, U8CPU alpha, int x, int y) { 268 uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3]; 269 register uint32_t t0, t1, t2, t3, t4, t5; 270 register uint32_t t6, t7, t8, t9, s0; 271 int dither[4]; 272 int i; 273 274 for (i = 0; i < 4; i++, x++) { 275 dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF; 276 } 277 278 __asm__ volatile ( 279 ".set push \n\t" 280 ".set noreorder \n\t" 281 "li %[s0], 1 \n\t" 282 "2: \n\t" 283 "beqz %[count], 1f \n\t" 284 " nop \n\t" 285 "addiu %[t0], %[count], -1 \n\t" 286 "beqz %[t0], 1f \n\t" 287 " nop \n\t" 288 "beqz %[s0], 3f \n\t" 289 " nop \n\t" 290 "lw %[t0], 0(%[dither]) \n\t" 291 "lw %[t1], 4(%[dither]) \n\t" 292 "li %[s0], 0 \n\t" 293 "b 4f \n\t" 294 " nop \n\t" 295 "3: \n\t" 296 "lw %[t0], 8(%[dither]) \n\t" 297 "lw %[t1], 12(%[dither]) \n\t" 298 "li %[s0], 1 \n\t" 299 "4: \n\t" 300 "sll %[t2], %[t0], 16 \n\t" 301 "or %[t1], %[t2], %[t1] \n\t" 302 "lw %[t0], 0(%[src]) \n\t" 303 "lw %[t2], 4(%[src]) \n\t" 304 "precrq.ph.w %[t3], %[t0], %[t2] \n\t" 305 "preceu.ph.qbra %[t9], %[t3] \n\t" 306 #ifdef __MIPS_HAVE_DSPR2 307 "append %[t0], %[t2], 16 \n\t" 308 "preceu.ph.qbra %[t4], %[t0] \n\t" 309 "preceu.ph.qbla %[t5], %[t0] \n\t" 310 #else 311 "sll %[t6], %[t0], 16 \n\t" 312 "sll %[t7], %[t2], 16 \n\t" 313 "precrq.ph.w %[t8], %[t6], %[t7] \n\t" 314 "preceu.ph.qbra %[t4], %[t8] \n\t" 315 "preceu.ph.qbla %[t5], %[t8] \n\t" 316 #endif 317 "addu.qb %[t0], %[t4], %[t1] \n\t" 318 "shra.ph %[t2], %[t4], 5 \n\t" 319 "subu.qb %[t3], %[t0], %[t2] \n\t" 320 "shra.ph %[t6], %[t3], 3 \n\t" 321 "addu.qb %[t0], %[t9], %[t1] \n\t" 322 "shra.ph %[t2], %[t9], 5 \n\t" 323 "subu.qb %[t3], %[t0], %[t2] \n\t" 324 "shra.ph %[t7], %[t3], 3 \n\t" 325 "shra.ph %[t0], %[t1], 1 \n\t" 326 "shra.ph %[t2], %[t5], 6 \n\t" 327 "addu.qb %[t3], %[t5], %[t0] \n\t" 328 "subu.qb %[t4], %[t3], %[t2] \n\t" 329 "shra.ph %[t8], %[t4], 2 \n\t" 330 "precrq.ph.w %[t0], %[t6], %[t7] \n\t" 331 #ifdef __MIPS_HAVE_DSPR2 332 "append %[t6], %[t7], 16 \n\t" 333 #else 334 "sll %[t6], %[t6], 16 \n\t" 335 "sll %[t2], %[t7], 16 \n\t" 336 "precrq.ph.w %[t6], %[t6], %[t2] \n\t" 337 #endif 338 "sra %[t4], %[t8], 16 \n\t" 339 "andi %[t5], %[t8], 0xFF \n\t" 340 "sll %[t7], %[t4], 5 \n\t" 341 "sra %[t8], %[t0], 5 \n\t" 342 "or %[t9], %[t7], %[t8] \n\t" 343 "or %[t3], %[t9], %[t0] \n\t" 344 "andi %[t4], %[t3], 0xFFFF \n\t" 345 "sll %[t7], %[t5], 5 \n\t" 346 "sra %[t8], %[t6], 5 \n\t" 347 "or %[t9], %[t7], %[t8] \n\t" 348 "or %[t3], %[t9], %[t6] \n\t" 349 "and %[t7], %[t3], 0xFFFF \n\t" 350 "sh %[t4], 0(%[dst]) \n\t" 351 "sh %[t7], 2(%[dst]) \n\t" 352 "addiu %[count], %[count], -2 \n\t" 353 "addiu %[src], %[src], 8 \n\t" 354 "b 2b \n\t" 355 " addiu %[dst], %[dst], 4 \n\t" 356 "1: \n\t" 357 ".set pop \n\t" 358 : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count), 359 [x]"+r"(x), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), 360 [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), 361 [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0) 362 : [dither] "r" (dither) 363 : "memory" 364 ); 365 366 if (count == 1) { 367 SkPMColor c = *src++; 368 SkPMColorAssert(c); // only if DEBUG is turned on 369 SkASSERT(SkGetPackedA32(c) == 255); 370 unsigned dither = DITHER_VALUE(x); 371 *dst++ = SkDitherRGB32To565(c, dither); 372 } 373 } 374 375 static void S32_D565_Blend_Dither_mips_dsp(uint16_t* dst, 376 const SkPMColor* src, 377 int count, U8CPU alpha, int x, int y) { 378 register int32_t t0, t1, t2, t3, t4, t5, t6; 379 register int32_t s0, s1, s2, s3; 380 register int x1 = 0; 381 register uint32_t sc_mul; 382 register uint32_t sc_add; 383 #ifdef ENABLE_DITHER_MATRIX_4X4 384 const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3]; 385 #else // ENABLE_DITHER_MATRIX_4X4 386 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3]; 387 #endif // ENABLE_DITHER_MATRIX_4X4 388 int dither[4]; 389 390 for (int i = 0; i < 4; i++) { 391 dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF; 392 x += 1; 393 } 394 alpha += 1; 395 __asm__ volatile ( 396 ".set push \n\t" 397 ".set noreorder \n\t" 398 "li %[t0], 0x100 \n\t" 399 "subu %[t0], %[t0], %[alpha] \n\t" 400 "replv.ph %[sc_mul], %[alpha] \n\t" 401 "beqz %[alpha], 1f \n\t" 402 " nop \n\t" 403 "replv.qb %[sc_add], %[t0] \n\t" 404 "b 2f \n\t" 405 " nop \n\t" 406 "1: \n\t" 407 "replv.qb %[sc_add], %[alpha] \n\t" 408 "2: \n\t" 409 "addiu %[t2], %[count], -1 \n\t" 410 "blez %[t2], 3f \n\t" 411 " nop \n\t" 412 "lw %[s0], 0(%[src]) \n\t" 413 "lw %[s1], 4(%[src]) \n\t" 414 "bnez %[x1], 4f \n\t" 415 " nop \n\t" 416 "lw %[t0], 0(%[dither]) \n\t" 417 "lw %[t1], 4(%[dither]) \n\t" 418 "li %[x1], 1 \n\t" 419 "b 5f \n\t" 420 " nop \n\t" 421 "4: \n\t" 422 "lw %[t0], 8(%[dither]) \n\t" 423 "lw %[t1], 12(%[dither]) \n\t" 424 "li %[x1], 0 \n\t" 425 "5: \n\t" 426 "sll %[t3], %[t0], 7 \n\t" 427 "sll %[t4], %[t1], 7 \n\t" 428 #ifdef __MIPS_HAVE_DSPR2 429 "append %[t0], %[t1], 16 \n\t" 430 #else 431 "sll %[t0], %[t0], 8 \n\t" 432 "sll %[t2], %[t1], 8 \n\t" 433 "precrq.qb.ph %[t0], %[t0], %[t2] \n\t" 434 #endif 435 "precrq.qb.ph %[t1], %[t3], %[t4] \n\t" 436 "sll %[t5], %[s0], 8 \n\t" 437 "sll %[t6], %[s1], 8 \n\t" 438 "precrq.qb.ph %[t4], %[t5], %[t6] \n\t" 439 "precrq.qb.ph %[t6], %[s0], %[s1] \n\t" 440 "preceu.ph.qbla %[t5], %[t4] \n\t" 441 "preceu.ph.qbra %[t4], %[t4] \n\t" 442 "preceu.ph.qbra %[t6], %[t6] \n\t" 443 "lh %[t2], 0(%[dst]) \n\t" 444 "lh %[s1], 2(%[dst]) \n\t" 445 #ifdef __MIPS_HAVE_DSPR2 446 "append %[t2], %[s1], 16 \n\t" 447 #else 448 "sll %[s1], %[s1], 16 \n\t" 449 "packrl.ph %[t2], %[t2], %[s1] \n\t" 450 #endif 451 "shra.ph %[s1], %[t2], 11 \n\t" 452 "and %[s1], %[s1], 0x1F001F \n\t" 453 "shra.ph %[s2], %[t2], 5 \n\t" 454 "and %[s2], %[s2], 0x3F003F \n\t" 455 "and %[s3], %[t2], 0x1F001F \n\t" 456 "shrl.qb %[t3], %[t4], 5 \n\t" 457 "addu.qb %[t4], %[t4], %[t0] \n\t" 458 "subu.qb %[t4], %[t4], %[t3] \n\t" 459 "shrl.qb %[t4], %[t4], 3 \n\t" 460 "shrl.qb %[t3], %[t5], 5 \n\t" 461 "addu.qb %[t5], %[t5], %[t0] \n\t" 462 "subu.qb %[t5], %[t5], %[t3] \n\t" 463 "shrl.qb %[t5], %[t5], 3 \n\t" 464 "shrl.qb %[t3], %[t6], 6 \n\t" 465 "addu.qb %[t6], %[t6], %[t1] \n\t" 466 "subu.qb %[t6], %[t6], %[t3] \n\t" 467 "shrl.qb %[t6], %[t6], 2 \n\t" 468 "cmpu.lt.qb %[t4], %[s1] \n\t" 469 "pick.qb %[s0], %[sc_add], $0 \n\t" 470 "addu.qb %[s0], %[s0], %[s1] \n\t" 471 "subu.qb %[t4], %[t4], %[s1] \n\t" 472 "muleu_s.ph.qbl %[t0], %[t4], %[sc_mul] \n\t" 473 "muleu_s.ph.qbr %[t1], %[t4], %[sc_mul] \n\t" 474 "precrq.qb.ph %[t4], %[t0], %[t1] \n\t" 475 "addu.qb %[t4], %[t4], %[s0] \n\t" 476 "cmpu.lt.qb %[t5], %[s3] \n\t" 477 "pick.qb %[s0], %[sc_add], $0 \n\t" 478 "addu.qb %[s0], %[s0], %[s3] \n\t" 479 "subu.qb %[t5], %[t5], %[s3] \n\t" 480 "muleu_s.ph.qbl %[t0], %[t5], %[sc_mul] \n\t" 481 "muleu_s.ph.qbr %[t1], %[t5], %[sc_mul] \n\t" 482 "precrq.qb.ph %[t5], %[t0], %[t1] \n\t" 483 "addu.qb %[t5], %[t5], %[s0] \n\t" 484 "cmpu.lt.qb %[t6], %[s2] \n\t" 485 "pick.qb %[s0], %[sc_add], $0 \n\t" 486 "addu.qb %[s0], %[s0], %[s2] \n\t" 487 "subu.qb %[t6], %[t6], %[s2] \n\t" 488 "muleu_s.ph.qbl %[t0], %[t6], %[sc_mul] \n\t" 489 "muleu_s.ph.qbr %[t1], %[t6], %[sc_mul] \n\t" 490 "precrq.qb.ph %[t6], %[t0], %[t1] \n\t" 491 "addu.qb %[t6], %[t6], %[s0] \n\t" 492 "shll.ph %[s1], %[t4], 11 \n\t" 493 "shll.ph %[t0], %[t6], 5 \n\t" 494 "or %[s0], %[s1], %[t0] \n\t" 495 "or %[s1], %[s0], %[t5] \n\t" 496 "srl %[t2], %[s1], 16 \n\t" 497 "and %[t3], %[s1], 0xFFFF \n\t" 498 "sh %[t2], 0(%[dst]) \n\t" 499 "sh %[t3], 2(%[dst]) \n\t" 500 "addiu %[src], %[src], 8 \n\t" 501 "addi %[count], %[count], -2 \n\t" 502 "b 2b \n\t" 503 " addu %[dst], %[dst], 4 \n\t" 504 "3: \n\t" 505 ".set pop \n\t" 506 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count), 507 [x1]"+r"(x1), [sc_mul]"=&r"(sc_mul), [sc_add]"=&r"(sc_add), 508 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 509 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0), 510 [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3) 511 : [dither]"r"(dither), [alpha]"r"(alpha) 512 : "memory", "hi", "lo" 513 ); 514 515 if(count == 1) { 516 SkPMColor c = *src++; 517 SkPMColorAssert(c); 518 SkASSERT(SkGetPackedA32(c) == 255); 519 DITHER_565_SCAN(y); 520 int dither = DITHER_VALUE(x); 521 int sr = SkGetPackedR32(c); 522 int sg = SkGetPackedG32(c); 523 int sb = SkGetPackedB32(c); 524 sr = SkDITHER_R32To565(sr, dither); 525 sg = SkDITHER_G32To565(sg, dither); 526 sb = SkDITHER_B32To565(sb, dither); 527 528 uint16_t d = *dst; 529 *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), alpha), 530 SkAlphaBlend(sg, SkGetPackedG16(d), alpha), 531 SkAlphaBlend(sb, SkGetPackedB16(d), alpha)); 532 DITHER_INC_X(x); 533 } 534 } 535 536 static void S32A_D565_Opaque_mips_dsp(uint16_t* __restrict__ dst, 537 const SkPMColor* __restrict__ src, 538 int count, U8CPU alpha, int x, int y) { 539 540 __asm__ volatile ( 541 "pref 0, 0(%[src]) \n\t" 542 "pref 1, 0(%[dst]) \n\t" 543 "pref 0, 32(%[src]) \n\t" 544 "pref 1, 32(%[dst]) \n\t" 545 : 546 : [src]"r"(src), [dst]"r"(dst) 547 : "memory" 548 ); 549 550 register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8; 551 register uint32_t t16; 552 register uint32_t add_x10 = 0x100010; 553 register uint32_t add_x20 = 0x200020; 554 register uint32_t sa = 0xff00ff; 555 556 __asm__ volatile ( 557 ".set push \n\t" 558 ".set noreorder \n\t" 559 "blez %[count], 1f \n\t" 560 " nop \n\t" 561 "2: \n\t" 562 "beqz %[count], 1f \n\t" 563 " nop \n\t" 564 "addiu %[t0], %[count], -1 \n\t" 565 "beqz %[t0], 1f \n\t" 566 " nop \n\t" 567 "bnez %[t16], 3f \n\t" 568 " nop \n\t" 569 "li %[t16], 2 \n\t" 570 "pref 0, 64(%[src]) \n\t" 571 "pref 1, 64(%[dst]) \n\t" 572 "3: \n\t" 573 "addiu %[t16], %[t16], -1 \n\t" 574 "lw %[t0], 0(%[src]) \n\t" 575 "lw %[t1], 4(%[src]) \n\t" 576 "precrq.ph.w %[t2], %[t0], %[t1] \n\t" 577 "preceu.ph.qbra %[t8], %[t2] \n\t" 578 #ifdef __MIPS_HAVE_DSPR2 579 "append %[t0], %[t1], 16 \n\t" 580 #else 581 "sll %[t0], %[t0], 16 \n\t" 582 "sll %[t6], %[t1], 16 \n\t" 583 "precrq.ph.w %[t0], %[t0], %[t6] \n\t" 584 #endif 585 "preceu.ph.qbra %[t3], %[t0] \n\t" 586 "preceu.ph.qbla %[t4], %[t0] \n\t" 587 "preceu.ph.qbla %[t0], %[t2] \n\t" 588 "subq.ph %[t1], %[sa], %[t0] \n\t" 589 "sra %[t2], %[t1], 8 \n\t" 590 "or %[t5], %[t2], %[t1] \n\t" 591 "replv.ph %[t2], %[t5] \n\t" 592 "lh %[t0], 0(%[dst]) \n\t" 593 "lh %[t1], 2(%[dst]) \n\t" 594 "and %[t1], %[t1], 0xffff \n\t" 595 #ifdef __MIPS_HAVE_DSPR2 596 "append %[t0], %[t1], 16 \n\t" 597 #else 598 "sll %[t5], %[t0], 16 \n\t" 599 "or %[t0], %[t5], %[t1] \n\t" 600 #endif 601 "and %[t1], %[t0], 0x1f001f \n\t" 602 "shra.ph %[t6], %[t0], 11 \n\t" 603 "and %[t6], %[t6], 0x1f001f \n\t" 604 "and %[t7], %[t0], 0x7e007e0 \n\t" 605 "shra.ph %[t5], %[t7], 5 \n\t" 606 "muleu_s.ph.qbl %[t0], %[t2], %[t6] \n\t" 607 "addq.ph %[t7], %[t0], %[add_x10] \n\t" 608 "shra.ph %[t6], %[t7], 5 \n\t" 609 "addq.ph %[t6], %[t7], %[t6] \n\t" 610 "shra.ph %[t0], %[t6], 5 \n\t" 611 "addq.ph %[t7], %[t0], %[t3] \n\t" 612 "shra.ph %[t6], %[t7], 3 \n\t" 613 "muleu_s.ph.qbl %[t0], %[t2], %[t1] \n\t" 614 "addq.ph %[t7], %[t0], %[add_x10] \n\t" 615 "shra.ph %[t0], %[t7], 5 \n\t" 616 "addq.ph %[t7], %[t7], %[t0] \n\t" 617 "shra.ph %[t0], %[t7], 5 \n\t" 618 "addq.ph %[t7], %[t0], %[t8] \n\t" 619 "shra.ph %[t3], %[t7], 3 \n\t" 620 "muleu_s.ph.qbl %[t0], %[t2], %[t5] \n\t" 621 "addq.ph %[t7], %[t0], %[add_x20] \n\t" 622 "shra.ph %[t0], %[t7], 6 \n\t" 623 "addq.ph %[t8], %[t7], %[t0] \n\t" 624 "shra.ph %[t0], %[t8], 6 \n\t" 625 "addq.ph %[t7], %[t0], %[t4] \n\t" 626 "shra.ph %[t8], %[t7], 2 \n\t" 627 "shll.ph %[t0], %[t8], 5 \n\t" 628 "shll.ph %[t1], %[t6], 11 \n\t" 629 "or %[t2], %[t0], %[t1] \n\t" 630 "or %[t3], %[t2], %[t3] \n\t" 631 "sra %[t4], %[t3], 16 \n\t" 632 "sh %[t4], 0(%[dst]) \n\t" 633 "sh %[t3], 2(%[dst]) \n\t" 634 "addiu %[count], %[count], -2 \n\t" 635 "addiu %[src], %[src], 8 \n\t" 636 "b 2b \n\t" 637 " addiu %[dst], %[dst], 4 \n\t" 638 "1: \n\t" 639 ".set pop \n\t" 640 : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count), 641 [t16]"=&r"(t16), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), 642 [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), 643 [t7]"=&r"(t7), [t8]"=&r"(t8) 644 : [add_x10]"r"(add_x10), [add_x20]"r"(add_x20), [sa]"r"(sa) 645 : "memory", "hi", "lo" 646 ); 647 648 if (count == 1) { 649 SkPMColor c = *src++; 650 SkPMColorAssert(c); 651 if (c) { 652 *dst = SkSrcOver32To16(c, *dst); 653 } 654 dst += 1; 655 } 656 } 657 658 static void S32A_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst, 659 const SkPMColor* SK_RESTRICT src, int count, 660 U8CPU alpha, int /*x*/, int /*y*/) { 661 register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; 662 register uint32_t s0, s1, s2, s3; 663 register unsigned dst_scale = 0; 664 665 __asm__ volatile ( 666 ".set push \n\t" 667 ".set noreorder \n\t" 668 "replv.qb %[t0], %[alpha] \n\t" 669 "repl.ph %[t6], 0x80 \n\t" 670 "repl.ph %[t7], 0xFF \n\t" 671 "1: \n\t" 672 "addiu %[t8], %[count], -1 \n\t" 673 "blez %[t8], 2f \n\t" 674 " nop \n\t" 675 "lw %[t8], 0(%[src]) \n\t" 676 "lw %[t9], 4(%[src]) \n\t" 677 "lh %[t4], 0(%[dst]) \n\t" 678 "lh %[t5], 2(%[dst]) \n\t" 679 "sll %[t5], %[t5], 16 \n\t" 680 "sll %[t2], %[t8], 8 \n\t" 681 "sll %[t3], %[t9], 8 \n\t" 682 "precrq.qb.ph %[t1], %[t2], %[t3] \n\t" 683 "precrq.qb.ph %[t3], %[t8], %[t9] \n\t" 684 "preceu.ph.qbla %[t8], %[t3] \n\t" 685 "muleu_s.ph.qbr %[s3], %[t0], %[t8] \n\t" 686 "preceu.ph.qbla %[t2], %[t1] \n\t" 687 "preceu.ph.qbra %[t1], %[t1] \n\t" 688 "preceu.ph.qbra %[t3], %[t3] \n\t" 689 "packrl.ph %[t9], %[t4], %[t5] \n\t" 690 "shra.ph %[s0], %[t9], 11 \n\t" 691 "and %[s0], %[s0], 0x1F001F \n\t" 692 "shra.ph %[s1], %[t9], 5 \n\t" 693 "and %[s1], %[s1], 0x3F003F \n\t" 694 "and %[s2], %[t9], 0x1F001F \n\t" 695 "addq.ph %[s3], %[s3], %[t6] \n\t" 696 "shra.ph %[t5], %[s3], 8 \n\t" 697 "and %[t5], %[t5], 0xFF00FF \n\t" 698 "addq.ph %[dst_scale], %[s3], %[t5] \n\t" 699 "shra.ph %[dst_scale], %[dst_scale], 8 \n\t" 700 "subq_s.ph %[dst_scale], %[t7], %[dst_scale] \n\t" 701 "sll %[dst_scale], %[dst_scale], 8 \n\t" 702 "precrq.qb.ph %[dst_scale], %[dst_scale], %[dst_scale] \n\t" 703 "shrl.qb %[t1], %[t1], 3 \n\t" 704 "shrl.qb %[t2], %[t2], 3 \n\t" 705 "shrl.qb %[t3], %[t3], 2 \n\t" 706 "muleu_s.ph.qbl %[t1], %[t0], %[t1] \n\t" 707 "muleu_s.ph.qbl %[t2], %[t0], %[t2] \n\t" 708 "muleu_s.ph.qbl %[t3], %[t0], %[t3] \n\t" 709 "muleu_s.ph.qbl %[t8], %[dst_scale], %[s0] \n\t" 710 "muleu_s.ph.qbl %[t9], %[dst_scale], %[s2] \n\t" 711 "muleu_s.ph.qbl %[t4], %[dst_scale], %[s1] \n\t" 712 "addq.ph %[t1], %[t1], %[t8] \n\t" 713 "addq.ph %[t2], %[t2], %[t9] \n\t" 714 "addq.ph %[t3], %[t3], %[t4] \n\t" 715 "addq.ph %[t8], %[t1], %[t6] \n\t" 716 "addq.ph %[t9], %[t2], %[t6] \n\t" 717 "addq.ph %[t4], %[t3], %[t6] \n\t" 718 "shra.ph %[t1], %[t8], 8 \n\t" 719 "addq.ph %[t1], %[t1], %[t8] \n\t" 720 "preceu.ph.qbla %[t1], %[t1] \n\t" 721 "shra.ph %[t2], %[t9], 8 \n\t" 722 "addq.ph %[t2], %[t2], %[t9] \n\t" 723 "preceu.ph.qbla %[t2], %[t2] \n\t" 724 "shra.ph %[t3], %[t4], 8 \n\t" 725 "addq.ph %[t3], %[t3], %[t4] \n\t" 726 "preceu.ph.qbla %[t3], %[t3] \n\t" 727 "shll.ph %[t8], %[t1], 11 \n\t" 728 "shll.ph %[t9], %[t3], 5 \n\t" 729 "or %[t8], %[t8], %[t9] \n\t" 730 "or %[s0], %[t8], %[t2] \n\t" 731 "srl %[t8], %[s0], 16 \n\t" 732 "and %[t9], %[s0], 0xFFFF \n\t" 733 "sh %[t8], 0(%[dst]) \n\t" 734 "sh %[t9], 2(%[dst]) \n\t" 735 "addiu %[src], %[src], 8 \n\t" 736 "addiu %[count], %[count], -2 \n\t" 737 "b 1b \n\t" 738 " addiu %[dst], %[dst], 4 \n\t" 739 "2: \n\t" 740 ".set pop \n\t" 741 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count), 742 [dst_scale]"+r"(dst_scale), [s0]"=&r"(s0), [s1]"=&r"(s1), 743 [s2]"=&r"(s2), [s3]"=&r"(s3), [t0]"=&r"(t0), [t1]"=&r"(t1), 744 [t2]"=&r"(t2), [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), 745 [t6]"=&r"(t6), [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9) 746 : [alpha]"r"(alpha) 747 : "memory", "hi", "lo" 748 ); 749 750 if (count == 1) { 751 SkPMColor sc = *src++; 752 SkPMColorAssert(sc); 753 if (sc) { 754 uint16_t dc = *dst; 755 unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha); 756 unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) + 757 SkMulS16(SkGetPackedR16(dc), dst_scale); 758 unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) + 759 SkMulS16(SkGetPackedG16(dc), dst_scale); 760 unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) + 761 SkMulS16(SkGetPackedB16(dc), dst_scale); 762 *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db)); 763 } 764 dst += 1; 765 } 766 } 767 768 static void S32_Blend_BlitRow32_mips_dsp(SkPMColor* SK_RESTRICT dst, 769 const SkPMColor* SK_RESTRICT src, 770 int count, U8CPU alpha) { 771 register int32_t t0, t1, t2, t3, t4, t5, t6, t7; 772 773 __asm__ volatile ( 774 ".set push \n\t" 775 ".set noreorder \n\t" 776 "li %[t2], 0x100 \n\t" 777 "addiu %[t0], %[alpha], 1 \n\t" 778 "subu %[t1], %[t2], %[t0] \n\t" 779 "replv.qb %[t7], %[t0] \n\t" 780 "replv.qb %[t6], %[t1] \n\t" 781 "1: \n\t" 782 "blez %[count], 2f \n\t" 783 "lw %[t0], 0(%[src]) \n\t" 784 "lw %[t1], 0(%[dst]) \n\t" 785 "preceu.ph.qbr %[t2], %[t0] \n\t" 786 "preceu.ph.qbl %[t3], %[t0] \n\t" 787 "preceu.ph.qbr %[t4], %[t1] \n\t" 788 "preceu.ph.qbl %[t5], %[t1] \n\t" 789 "muleu_s.ph.qbr %[t2], %[t7], %[t2] \n\t" 790 "muleu_s.ph.qbr %[t3], %[t7], %[t3] \n\t" 791 "muleu_s.ph.qbr %[t4], %[t6], %[t4] \n\t" 792 "muleu_s.ph.qbr %[t5], %[t6], %[t5] \n\t" 793 "addiu %[src], %[src], 4 \n\t" 794 "addiu %[count], %[count], -1 \n\t" 795 "precrq.qb.ph %[t0], %[t3], %[t2] \n\t" 796 "precrq.qb.ph %[t2], %[t5], %[t4] \n\t" 797 "addu %[t1], %[t0], %[t2] \n\t" 798 "sw %[t1], 0(%[dst]) \n\t" 799 "b 1b \n\t" 800 " addi %[dst], %[dst], 4 \n\t" 801 "2: \n\t" 802 ".set pop \n\t" 803 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count), 804 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 805 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7) 806 : [alpha]"r"(alpha) 807 : "memory", "hi", "lo" 808 ); 809 } 810 811 /////////////////////////////////////////////////////////////////////////////////////////////////// 812 813 const SkBlitRow::Proc platform_565_procs_mips_dsp[] = { 814 // no dither 815 NULL, 816 S32_D565_Blend_mips_dsp, 817 S32A_D565_Opaque_mips_dsp, 818 S32A_D565_Blend_mips_dsp, 819 820 // dither 821 S32_D565_Opaque_Dither_mips_dsp, 822 S32_D565_Blend_Dither_mips_dsp, 823 S32A_D565_Opaque_Dither_mips_dsp, 824 NULL, 825 }; 826 827 static const SkBlitRow::Proc32 platform_32_procs_mips_dsp[] = { 828 NULL, // S32_Opaque, 829 S32_Blend_BlitRow32_mips_dsp, // S32_Blend, 830 NULL, // S32A_Opaque, 831 NULL, // S32A_Blend, 832 }; 833 834 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { 835 return platform_565_procs_mips_dsp[flags]; 836 } 837 838 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 839 return platform_32_procs_mips_dsp[flags]; 840 } 841 842 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { 843 return NULL; 844 } 845 846 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { 847 return NULL; 848 } 849