Lines Matching refs:dst
31 static void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
42 "vld1.16 {q12}, [%[dst]] \n\t"
45 "mov %[keep_dst], %[dst] \n\t"
48 "add %[dst], %[dst], ip, LSL#1 \n\t"
54 "vld1.16 {q12}, [%[dst]]! \n\t"
57 "sub %[keep_dst], %[dst], #8*2 \n\t"
60 "pld [%[dst],#32] \n\t"
105 : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src)
117 "mov %[keep_dst], %[dst] \n\t"
121 "vld1.16 {d25}, [%[dst]]! \n\t"
127 "vld1.32 {d24[1]}, [%[dst]]! \n\t"
133 "vld1.16 {d24[1]}, [%[dst]]! \n\t"
197 : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src)
205 static void S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst,
238 "vld1.u16 {d0, d1}, [%[dst]] \n\t" // load eight dst RGB565 pixels
288 "vst1.16 {d26, d27}, [%[dstdst, update ptr
293 : [src] "+r" (src), [dst] "+r" (dst), [count] "+r" (count), [alpha] "+r" (alpha_for_asm)
303 uint16_t dc = *dst;
308 *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db));
310 dst += 1;
328 static void S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src,
357 // load 8 pixels from dst, extract rgb
358 "vld1.16 {d0, d1}, [%[dst]] \n\t" // load 8 pixels
366 // dst = {d16 (r), d17 (g), d18 (b)}
367 // subtract dst from src and widen
368 "vsubl.s8 q0, d22, d16 \n\t" // subtract red src from dst
369 "vsubl.s8 q1, d23, d17 \n\t" // subtract green src from dst
370 "vsubl.s8 q2, d24, d18 \n\t" // subtract blue src from dst
379 // add dst to result
380 "vaddl.s8 q0, d0, d16 \n\t" // add dst to red
381 "vaddl.s8 q1, d2, d17 \n\t" // add dst to green
382 "vaddl.s8 q2, d4, d18 \n\t" // add dst to blue
386 "vst1.16 {d4, d5}, [%[dst]]! \n\t" // store result
388 : [src] "+r" (src), [dst] "+r" (dst), [count] "+r" (count)
407 uint16_t d = *dst;
408 *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), scale),
435 static void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
460 /* get and hold the dst too */
461 dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
463 dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
525 vst1_u32(dst, vreinterpret_u32_u8(dst_final));
527 vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2));
531 dst += UNROLL;
544 result = SkPMSrcOver(sc, *dst);
546 *dst = result;
549 *dst = SkPMSrcOver(*src, *dst);
552 dst += 1;
565 static void S32A_Opaque_BlitRow32_arm(SkPMColor* SK_RESTRICT dst,
585 "ldm %[dst], {r7,r8} \n\t" /* loading dst(s) into r7-r8 */
601 "add r7, r5, r7 \n\t" /* dst = src + calc dest(r7) */
617 "add r8, r6, r8 \n\t" /* dst = src + calc dest(r8) */
620 "stm %[dst]!, {r7,r8} \n\t" /* *dst = r7, increment dst by two (each times 4) */
629 "ldr r7, [%[dst]] \n\t" /* loading dst into r7 */
644 "add r7, r5, r7 \n\t" /* *dst = src + calc dest(r7) */
647 "str r7, [%[dst]], #4 \n\t" /* *dst = r7, increment dst by one (times 4) */
651 : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count)
662 static void S32A_Blend_BlitRow32_arm(SkPMColor* SK_RESTRICT dst,
681 "ldm %[dst], {r7, r8} \n\t" /* loading dst pointers into r7 and r8 */
714 "add r9, r5, r9 \n\t" /* *dst = src plus dst both scaled */
739 "add r10, r6, r10 \n\t" /* *dst = src plus dst both scaled */
743 "stm %[dst]!, {r9, r10} \n\t" /* copy r9 and r10 to r7 and r8 respectively */
752 "ldr r7, [%[dst]] \n\t" /* loading dst pointer into r7: r7=dst */
768 /* dst, dst_scale */
775 "orr r7, r8, r9 \n\t" /* r7 = (dst, dst_scale) */
777 "add r10, r7, r10 \n\t" /* *dst = src plus dst both scaled */
780 "str r10, [%[dst]], #4 \n\t" /* *dst = r10, postincrement dst by one (times 4) */
784 : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count), [alpha] "+r" (alpha)
796 static void S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
806 * so we can work under that guise. We *do* know that the src&dst
814 * (as a pair of 64s) from each of src and dst, processing them
830 /* ditto with dst */
831 dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
834 /* combine add with dst multiply into mul-accumulate */
838 vst1_u32(dst, vreinterpret_u32_u8(dst_final));
841 dst += UNROLL;
844 /* RBE: well, i don't like how gcc manages src/dst across the above
845 * loop it's constantly calculating src+bias, dst+bias and it only
848 * the adjustments to src/dst/count, but it does...
854 *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale);
859 *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale);
861 dst += 1;
911 static void S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst,
946 uint16_t* my_dst = dst;
972 // now src and dst expanded are in g:11 r:10 x:1 b:10
1027 /* need to pick up 8 dst's -- at 16 bits each, 128 bits */
1028 dst8 = vld1q_u16(dst);
1068 vst1q_u16(dst, dst8);
1077 if (tmpbuf[i] != dst[i]) bad=1;
1084 i, ((tmpbuf[i] != dst[i])?"BAD":"got"),
1085 dst[i], tmpbuf[i], in_dst[i], src[i], td[i], tdv[i], tap[i], ta[i]);
1106 dst += UNROLL;
1137 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1139 // now src and dst expanded are in g:11 r:10 x:1 b:10
1140 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1142 dst += 1;
1165 static void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
1219 vst1q_u16(dst, dst8);
1230 if (val != dst[i]) {
1232 c, dither, val, dst[i], dstart[i]);
1239 dst += UNROLL;
1256 *dst++ = SkDitherRGB32To565(c, dither);