1 /* 2 * Copyright 2012 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkBlitRow_opts_arm.h" 9 10 #include "SkBlitMask.h" 11 #include "SkBlitRow.h" 12 #include "SkColorPriv.h" 13 #include "SkDither.h" 14 #include "SkMathPriv.h" 15 #include "SkUtils.h" 16 17 #include "SkCachePreload_arm.h" 18 19 #if USE_ARM_CODE 20 21 static void S32A_D565_Opaque(uint16_t* SK_RESTRICT dst, 22 const SkPMColor* SK_RESTRICT src, int count, 23 U8CPU alpha, int /*x*/, int /*y*/) { 24 SkASSERT(255 == alpha); 25 26 asm volatile ( 27 "1: \n\t" 28 "ldr r3, [%[src]], #4 \n\t" 29 "cmp r3, #0xff000000 \n\t" 30 "blo 2f \n\t" 31 "and r4, r3, #0x0000f8 \n\t" 32 "and r5, r3, #0x00fc00 \n\t" 33 "and r6, r3, #0xf80000 \n\t" 34 "pld [r1, #32] \n\t" 35 "lsl r3, r4, #8 \n\t" 36 "orr r3, r3, r5, lsr #5 \n\t" 37 "orr r3, r3, r6, lsr #19 \n\t" 38 "subs %[count], %[count], #1 \n\t" 39 "strh r3, [%[dst]], #2 \n\t" 40 "bne 1b \n\t" 41 "b 4f \n\t" 42 "2: \n\t" 43 "lsrs r7, r3, #24 \n\t" 44 "beq 3f \n\t" 45 "ldrh r4, [%[dst]] \n\t" 46 "rsb r7, r7, #255 \n\t" 47 "and r6, r4, #0x001f \n\t" 48 #if SK_ARM_ARCH == 6 49 "lsl r5, r4, #21 \n\t" 50 "lsr r5, r5, #26 \n\t" 51 #else 52 "ubfx r5, r4, #5, #6 \n\t" 53 #endif 54 "pld [r0, #16] \n\t" 55 "lsr r4, r4, #11 \n\t" 56 #ifdef SK_ARM_HAS_EDSP 57 "smulbb r6, r6, r7 \n\t" 58 "smulbb r5, r5, r7 \n\t" 59 "smulbb r4, r4, r7 \n\t" 60 #else 61 "mul r6, r6, r7 \n\t" 62 "mul r5, r5, r7 \n\t" 63 "mul r4, r4, r7 \n\t" 64 #endif 65 "uxtb r7, r3, ROR #16 \n\t" 66 "uxtb ip, r3, ROR #8 \n\t" 67 "and r3, r3, #0xff \n\t" 68 "add r6, r6, #16 \n\t" 69 "add r5, r5, #32 \n\t" 70 "add r4, r4, #16 \n\t" 71 "add r6, r6, r6, lsr #5 \n\t" 72 "add r5, r5, r5, lsr #6 \n\t" 73 "add r4, r4, r4, lsr #5 \n\t" 74 "add r6, r7, r6, lsr #5 \n\t" 75 "add r5, ip, r5, lsr #6 \n\t" 76 "add r4, r3, r4, lsr #5 \n\t" 77 "lsr r6, r6, #3 \n\t" 78 "and r5, r5, #0xfc \n\t" 79 "and r4, r4, #0xf8 \n\t" 80 "orr r6, r6, r5, lsl #3 \n\t" 81 "orr r4, r6, r4, lsl #8 \n\t" 82 "strh r4, [%[dst]], #2 \n\t" 83 "pld [r1, #32] \n\t" 84 "subs %[count], %[count], #1 \n\t" 85 "bne 1b \n\t" 86 "b 4f \n\t" 87 "3: \n\t" 88 "subs %[count], %[count], #1 \n\t" 89 "add %[dst], %[dst], #2 \n\t" 90 "bne 1b \n\t" 91 "4: \n\t" 92 : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count) 93 : 94 : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "ip" 95 ); 96 } 97 98 static void S32A_Opaque_BlitRow32_arm(SkPMColor* SK_RESTRICT dst, 99 const SkPMColor* SK_RESTRICT src, 100 int count, U8CPU alpha) { 101 102 SkASSERT(255 == alpha); 103 104 asm volatile ( 105 "cmp %[count], #0 \n\t" /* comparing count with 0 */ 106 "beq 3f \n\t" /* if zero exit */ 107 108 "mov ip, #0xff \n\t" /* load the 0xff mask in ip */ 109 "orr ip, ip, ip, lsl #16 \n\t" /* convert it to 0xff00ff in ip */ 110 111 "cmp %[count], #2 \n\t" /* compare count with 2 */ 112 "blt 2f \n\t" /* if less than 2 -> single loop */ 113 114 /* Double Loop */ 115 "1: \n\t" /* <double loop> */ 116 "ldm %[src]!, {r5,r6} \n\t" /* load the src(s) at r5-r6 */ 117 "ldm %[dst], {r7,r8} \n\t" /* loading dst(s) into r7-r8 */ 118 "lsr r4, r5, #24 \n\t" /* extracting the alpha from source and storing it to r4 */ 119 120 /* ----------- */ 121 "and r9, ip, r7 \n\t" /* r9 = br masked by ip */ 122 "rsb r4, r4, #256 \n\t" /* subtracting the alpha from 256 -> r4=scale */ 123 "and r10, ip, r7, lsr #8 \n\t" /* r10 = ag masked by ip */ 124 125 "mul r9, r9, r4 \n\t" /* br = br * scale */ 126 "mul r10, r10, r4 \n\t" /* ag = ag * scale */ 127 "and r9, ip, r9, lsr #8 \n\t" /* lsr br by 8 and mask it */ 128 129 "and r10, r10, ip, lsl #8 \n\t" /* mask ag with reverse mask */ 130 "lsr r4, r6, #24 \n\t" /* extracting the alpha from source and storing it to r4 */ 131 "orr r7, r9, r10 \n\t" /* br | ag*/ 132 133 "add r7, r5, r7 \n\t" /* dst = src + calc dest(r7) */ 134 "rsb r4, r4, #256 \n\t" /* subtracting the alpha from 255 -> r4=scale */ 135 136 /* ----------- */ 137 "and r9, ip, r8 \n\t" /* r9 = br masked by ip */ 138 139 "and r10, ip, r8, lsr #8 \n\t" /* r10 = ag masked by ip */ 140 "mul r9, r9, r4 \n\t" /* br = br * scale */ 141 "sub %[count], %[count], #2 \n\t" 142 "mul r10, r10, r4 \n\t" /* ag = ag * scale */ 143 144 "and r9, ip, r9, lsr #8 \n\t" /* lsr br by 8 and mask it */ 145 "and r10, r10, ip, lsl #8 \n\t" /* mask ag with reverse mask */ 146 "cmp %[count], #1 \n\t" /* comparing count with 1 */ 147 "orr r8, r9, r10 \n\t" /* br | ag */ 148 149 "add r8, r6, r8 \n\t" /* dst = src + calc dest(r8) */ 150 151 /* ----------------- */ 152 "stm %[dst]!, {r7,r8} \n\t" /* *dst = r7, increment dst by two (each times 4) */ 153 /* ----------------- */ 154 155 "bgt 1b \n\t" /* if greater than 1 -> reloop */ 156 "blt 3f \n\t" /* if less than 1 -> exit */ 157 158 /* Single Loop */ 159 "2: \n\t" /* <single loop> */ 160 "ldr r5, [%[src]], #4 \n\t" /* load the src pointer into r5 r5=src */ 161 "ldr r7, [%[dst]] \n\t" /* loading dst into r7 */ 162 "lsr r4, r5, #24 \n\t" /* extracting the alpha from source and storing it to r4 */ 163 164 /* ----------- */ 165 "and r9, ip, r7 \n\t" /* r9 = br masked by ip */ 166 "rsb r4, r4, #256 \n\t" /* subtracting the alpha from 256 -> r4=scale */ 167 168 "and r10, ip, r7, lsr #8 \n\t" /* r10 = ag masked by ip */ 169 "mul r9, r9, r4 \n\t" /* br = br * scale */ 170 "mul r10, r10, r4 \n\t" /* ag = ag * scale */ 171 "and r9, ip, r9, lsr #8 \n\t" /* lsr br by 8 and mask it */ 172 173 "and r10, r10, ip, lsl #8 \n\t" /* mask ag */ 174 "orr r7, r9, r10 \n\t" /* br | ag */ 175 176 "add r7, r5, r7 \n\t" /* *dst = src + calc dest(r7) */ 177 178 /* ----------------- */ 179 "str r7, [%[dst]], #4 \n\t" /* *dst = r7, increment dst by one (times 4) */ 180 /* ----------------- */ 181 182 "3: \n\t" /* <exit> */ 183 : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count) 184 : 185 : "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "ip", "memory" 186 ); 187 } 188 #endif // USE_ARM_CODE 189 190 /* 191 * ARM asm version of S32A_Blend_BlitRow32 192 */ 193 // This version is also used by the NEON procs table, so always compile it 194 void S32A_Blend_BlitRow32_arm(SkPMColor* SK_RESTRICT dst, 195 const SkPMColor* SK_RESTRICT src, 196 int count, U8CPU alpha) { 197 asm volatile ( 198 "cmp %[count], #0 \n\t" /* comparing count with 0 */ 199 "beq 3f \n\t" /* if zero exit */ 200 201 "mov r12, #0xff \n\t" /* load the 0xff mask in r12 */ 202 "orr r12, r12, r12, lsl #16 \n\t" /* convert it to 0xff00ff in r12 */ 203 204 /* src1,2_scale */ 205 "add %[alpha], %[alpha], #1 \n\t" /* loading %[alpha]=src_scale=alpha+1 */ 206 207 "cmp %[count], #2 \n\t" /* comparing count with 2 */ 208 "blt 2f \n\t" /* if less than 2 -> single loop */ 209 210 /* Double Loop */ 211 "1: \n\t" /* <double loop> */ 212 "ldm %[src]!, {r5, r6} \n\t" /* loading src pointers into r5 and r6 */ 213 "ldm %[dst], {r7, r8} \n\t" /* loading dst pointers into r7 and r8 */ 214 215 /* dst1_scale and dst2_scale*/ 216 "lsr r9, r5, #24 \n\t" /* src >> 24 */ 217 "lsr r10, r6, #24 \n\t" /* src >> 24 */ 218 #ifdef SK_ARM_HAS_EDSP 219 "smulbb r9, r9, %[alpha] \n\t" /* r9 = SkMulS16 r9 with src_scale */ 220 "smulbb r10, r10, %[alpha] \n\t" /* r10 = SkMulS16 r10 with src_scale */ 221 #else 222 "mul r9, r9, %[alpha] \n\t" /* r9 = SkMulS16 r9 with src_scale */ 223 "mul r10, r10, %[alpha] \n\t" /* r10 = SkMulS16 r10 with src_scale */ 224 #endif 225 "lsr r9, r9, #8 \n\t" /* r9 >> 8 */ 226 "lsr r10, r10, #8 \n\t" /* r10 >> 8 */ 227 "rsb r9, r9, #256 \n\t" /* dst1_scale = r9 = 255 - r9 + 1 */ 228 "rsb r10, r10, #256 \n\t" /* dst2_scale = r10 = 255 - r10 + 1 */ 229 230 /* ---------------------- */ 231 232 /* src1, src1_scale */ 233 "and r11, r12, r5, lsr #8 \n\t" /* ag = r11 = r5 masked by r12 lsr by #8 */ 234 "and r4, r12, r5 \n\t" /* rb = r4 = r5 masked by r12 */ 235 "mul r11, r11, %[alpha] \n\t" /* ag = r11 times src_scale */ 236 "mul r4, r4, %[alpha] \n\t" /* rb = r4 times src_scale */ 237 "and r11, r11, r12, lsl #8 \n\t" /* ag masked by reverse mask (r12) */ 238 "and r4, r12, r4, lsr #8 \n\t" /* rb masked by mask (r12) */ 239 "orr r5, r11, r4 \n\t" /* r5 = (src1, src_scale) */ 240 241 /* dst1, dst1_scale */ 242 "and r11, r12, r7, lsr #8 \n\t" /* ag = r11 = r7 masked by r12 lsr by #8 */ 243 "and r4, r12, r7 \n\t" /* rb = r4 = r7 masked by r12 */ 244 "mul r11, r11, r9 \n\t" /* ag = r11 times dst_scale (r9) */ 245 "mul r4, r4, r9 \n\t" /* rb = r4 times dst_scale (r9) */ 246 "and r11, r11, r12, lsl #8 \n\t" /* ag masked by reverse mask (r12) */ 247 "and r4, r12, r4, lsr #8 \n\t" /* rb masked by mask (r12) */ 248 "orr r9, r11, r4 \n\t" /* r9 = (dst1, dst_scale) */ 249 250 /* ---------------------- */ 251 "add r9, r5, r9 \n\t" /* *dst = src plus dst both scaled */ 252 /* ---------------------- */ 253 254 /* ====================== */ 255 256 /* src2, src2_scale */ 257 "and r11, r12, r6, lsr #8 \n\t" /* ag = r11 = r6 masked by r12 lsr by #8 */ 258 "and r4, r12, r6 \n\t" /* rb = r4 = r6 masked by r12 */ 259 "mul r11, r11, %[alpha] \n\t" /* ag = r11 times src_scale */ 260 "mul r4, r4, %[alpha] \n\t" /* rb = r4 times src_scale */ 261 "and r11, r11, r12, lsl #8 \n\t" /* ag masked by reverse mask (r12) */ 262 "and r4, r12, r4, lsr #8 \n\t" /* rb masked by mask (r12) */ 263 "orr r6, r11, r4 \n\t" /* r6 = (src2, src_scale) */ 264 265 /* dst2, dst2_scale */ 266 "and r11, r12, r8, lsr #8 \n\t" /* ag = r11 = r8 masked by r12 lsr by #8 */ 267 "and r4, r12, r8 \n\t" /* rb = r4 = r8 masked by r12 */ 268 "mul r11, r11, r10 \n\t" /* ag = r11 times dst_scale (r10) */ 269 "mul r4, r4, r10 \n\t" /* rb = r4 times dst_scale (r6) */ 270 "and r11, r11, r12, lsl #8 \n\t" /* ag masked by reverse mask (r12) */ 271 "and r4, r12, r4, lsr #8 \n\t" /* rb masked by mask (r12) */ 272 "orr r10, r11, r4 \n\t" /* r10 = (dst2, dst_scale) */ 273 274 "sub %[count], %[count], #2 \n\t" /* decrease count by 2 */ 275 /* ---------------------- */ 276 "add r10, r6, r10 \n\t" /* *dst = src plus dst both scaled */ 277 /* ---------------------- */ 278 "cmp %[count], #1 \n\t" /* compare count with 1 */ 279 /* ----------------- */ 280 "stm %[dst]!, {r9, r10} \n\t" /* copy r9 and r10 to r7 and r8 respectively */ 281 /* ----------------- */ 282 283 "bgt 1b \n\t" /* if %[count] greater than 1 reloop */ 284 "blt 3f \n\t" /* if %[count] less than 1 exit */ 285 /* else get into the single loop */ 286 /* Single Loop */ 287 "2: \n\t" /* <single loop> */ 288 "ldr r5, [%[src]], #4 \n\t" /* loading src pointer into r5: r5=src */ 289 "ldr r7, [%[dst]] \n\t" /* loading dst pointer into r7: r7=dst */ 290 291 "lsr r6, r5, #24 \n\t" /* src >> 24 */ 292 "and r8, r12, r5, lsr #8 \n\t" /* ag = r8 = r5 masked by r12 lsr by #8 */ 293 #ifdef SK_ARM_HAS_EDSP 294 "smulbb r6, r6, %[alpha] \n\t" /* r6 = SkMulS16 with src_scale */ 295 #else 296 "mul r6, r6, %[alpha] \n\t" /* r6 = SkMulS16 with src_scale */ 297 #endif 298 "and r9, r12, r5 \n\t" /* rb = r9 = r5 masked by r12 */ 299 "lsr r6, r6, #8 \n\t" /* r6 >> 8 */ 300 "mul r8, r8, %[alpha] \n\t" /* ag = r8 times scale */ 301 "rsb r6, r6, #256 \n\t" /* r6 = 255 - r6 + 1 */ 302 303 /* src, src_scale */ 304 "mul r9, r9, %[alpha] \n\t" /* rb = r9 times scale */ 305 "and r8, r8, r12, lsl #8 \n\t" /* ag masked by reverse mask (r12) */ 306 "and r9, r12, r9, lsr #8 \n\t" /* rb masked by mask (r12) */ 307 "orr r10, r8, r9 \n\t" /* r10 = (scr, src_scale) */ 308 309 /* dst, dst_scale */ 310 "and r8, r12, r7, lsr #8 \n\t" /* ag = r8 = r7 masked by r12 lsr by #8 */ 311 "and r9, r12, r7 \n\t" /* rb = r9 = r7 masked by r12 */ 312 "mul r8, r8, r6 \n\t" /* ag = r8 times scale (r6) */ 313 "mul r9, r9, r6 \n\t" /* rb = r9 times scale (r6) */ 314 "and r8, r8, r12, lsl #8 \n\t" /* ag masked by reverse mask (r12) */ 315 "and r9, r12, r9, lsr #8 \n\t" /* rb masked by mask (r12) */ 316 "orr r7, r8, r9 \n\t" /* r7 = (dst, dst_scale) */ 317 318 "add r10, r7, r10 \n\t" /* *dst = src plus dst both scaled */ 319 320 /* ----------------- */ 321 "str r10, [%[dst]], #4 \n\t" /* *dst = r10, postincrement dst by one (times 4) */ 322 /* ----------------- */ 323 324 "3: \n\t" /* <exit> */ 325 : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count), [alpha] "+r" (alpha) 326 : 327 : "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "memory" 328 ); 329 330 } 331 332 /////////////////////////////////////////////////////////////////////////////// 333 334 #if USE_ARM_CODE 335 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm[] = { 336 // no dither 337 // NOTE: For the functions below, we don't have a special version 338 // that assumes that each source pixel is opaque. But our S32A is 339 // still faster than the default, so use it. 340 S32A_D565_Opaque, // S32_D565_Opaque 341 NULL, // S32_D565_Blend 342 S32A_D565_Opaque, // S32A_D565_Opaque 343 NULL, // S32A_D565_Blend 344 345 // dither 346 NULL, // S32_D565_Opaque_Dither 347 NULL, // S32_D565_Blend_Dither 348 NULL, // S32A_D565_Opaque_Dither 349 NULL, // S32A_D565_Blend_Dither 350 }; 351 352 const SkBlitRow::Proc sk_blitrow_platform_4444_procs_arm[] = { 353 // no dither 354 NULL, // S32_D4444_Opaque, 355 NULL, // S32_D4444_Blend, 356 NULL, // S32A_D4444_Opaque, 357 NULL, // S32A_D4444_Blend, 358 359 // dither 360 NULL, // S32_D4444_Opaque_Dither, 361 NULL, // S32_D4444_Blend_Dither, 362 NULL, // S32A_D4444_Opaque_Dither, 363 NULL, // S32A_D4444_Blend_Dither 364 }; 365 366 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm[] = { 367 NULL, // S32_Opaque, 368 NULL, // S32_Blend, 369 S32A_Opaque_BlitRow32_arm, // S32A_Opaque, 370 S32A_Blend_BlitRow32_arm // S32A_Blend 371 }; 372 #endif 373 374 SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { 375 return SK_ARM_NEON_WRAP(sk_blitrow_platform_4444_procs_arm)[flags]; 376 } 377 378 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { 379 return SK_ARM_NEON_WRAP(sk_blitrow_platform_565_procs_arm)[flags]; 380 } 381 382 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 383 return SK_ARM_NEON_WRAP(sk_blitrow_platform_32_procs_arm)[flags]; 384 } 385 386 /////////////////////////////////////////////////////////////////////////////// 387 #define Color32_arm NULL 388 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { 389 return SK_ARM_NEON_WRAP(Color32_arm); 390 } 391 392 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, 393 SkMask::Format maskFormat, 394 SkColor color) { 395 return NULL; 396 } 397 398 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { 399 return NULL; 400 } 401 402 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, 403 SkMask::Format maskFormat, 404 RowFlags flags) { 405 return NULL; 406 } 407