Home | History | Annotate | Download | only in source

Lines Matching defs:xmm0

136     movq       xmm0, qword ptr [eax]
138 punpcklbw xmm0, xmm0
139 movdqa xmm1, xmm0
140 punpcklwd xmm0, xmm0
142 por xmm0, xmm5
144 movdqa [edx], xmm0
164 movdqa xmm0, [eax]
165 pshufb xmm0, xmm5
167 movdqa [eax + edx], xmm0
185 movdqa xmm0, [eax]
186 pshufb xmm0, xmm5
188 movdqa [eax + edx], xmm0
206 movdqa xmm0, [eax]
207 pshufb xmm0, xmm5
209 movdqa [eax + edx], xmm0
227 movdqa xmm0, [eax]
228 pshufb xmm0, xmm5
230 movdqa [eax + edx], xmm0
249 movdqu xmm0, [eax]
257 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
258 pshufb xmm0, xmm4
260 por xmm0, xmm5
262 movdqa [edx], xmm0
289 movdqu xmm0, [eax]
297 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
298 pshufb xmm0, xmm4
300 por xmm0, xmm5
302 movdqa [edx], xmm0
349 movdqu xmm0, [eax] // fetch 8 pixels of bgr565
350 movdqa xmm1, xmm0
351 movdqa xmm2, xmm0
358 pand xmm0, xmm4 // G in middle 6 bits
359 pmulhuw xmm0, xmm6 // << 5 * (256 + 4)
360 por xmm0, xmm7 // AG
362 punpcklbw xmm1, xmm0
363 punpckhbw xmm2, xmm0
399 movdqu xmm0, [eax] // fetch 8 pixels of 1555
400 movdqa xmm1, xmm0
401 movdqa xmm2, xmm0
409 movdqa xmm2, xmm0
410 pand xmm0, xmm4 // G in middle 5 bits
412 pmulhuw xmm0, xmm6 // << 6 * (256 + 8)
414 por xmm0, xmm2 // AG
416 punpcklbw xmm1, xmm0
417 punpckhbw xmm2, xmm0
445 movdqu xmm0, [eax] // fetch 8 pixels of bgra4444
446 movdqa xmm2, xmm0
447 pand xmm0, xmm4 // mask low nibbles
449 movdqa xmm1, xmm0
453 por xmm0, xmm1
455 movdqa xmm1, xmm0
456 punpcklbw xmm0, xmm2
458 movdqa [eax * 2 + edx], xmm0 // store 4 pixels of ARGB
477 movdqa xmm0, [eax] // fetch 16 pixels of argb
482 pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB
490 por xmm0, xmm4 // 4 bytes from 1 for 0
492 movdqa [edx], xmm0 // store 0
516 movdqa xmm0, [eax] // fetch 16 pixels of argb
521 pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB
529 por xmm0, xmm4 // 4 bytes from 1 for 0
531 movdqa [edx], xmm0 // store 0
561 movdqa xmm0, [eax] // fetch 4 pixels of argb
562 movdqa xmm1, xmm0 // B
563 movdqa xmm2, xmm0 // G
564 pslld xmm0, 8 // R
567 psrad xmm0, 16 // R
570 pand xmm0, xmm5 // R
572 por xmm0, xmm1 // BGR
573 packssdw xmm0, xmm0
575 movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555
601 movdqa xmm0, [eax] // fetch 4 pixels of argb
602 movdqa xmm1, xmm0 // B
603 movdqa xmm2, xmm0 // G
604 movdqa xmm3, xmm0 // R
605 psrad xmm0, 16 // A
609 pand xmm0, xmm7 // A
613 por xmm0, xmm1 // BA
615 por xmm0, xmm2 // BGRA
616 packssdw xmm0, xmm0
618 movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555
639 movdqa xmm0, [eax] // fetch 4 pixels of argb
640 movdqa xmm1, xmm0
641 pand xmm0, xmm3 // low nibble
643 psrl xmm0, 4
645 por xmm0, xmm1
646 packuswb xmm0, xmm0
648 movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444
668 movdqa xmm0, [eax]
672 pmaddubsw xmm0, xmm4
677 phaddw xmm0, xmm1
679 psrlw xmm0, 7
681 packuswb xmm0, xmm2
682 paddb xmm0, xmm5
684 movdqa [edx], xmm0
702 movdqu xmm0, [eax]
706 pmaddubsw xmm0, xmm4
711 phaddw xmm0, xmm1
713 psrlw xmm0, 7
715 packuswb xmm0, xmm2
716 paddb xmm0, xmm5
718 movdqu [edx], xmm0
736 movdqa xmm0, [eax]
740 pmaddubsw xmm0, xmm4
745 phaddw xmm0, xmm1
747 psrlw xmm0, 7
749 packuswb xmm0, xmm2
750 paddb xmm0, xmm5
752 movdqa [edx], xmm0
770 movdqu xmm0, [eax]
774 pmaddubsw xmm0, xmm4
779 phaddw xmm0, xmm1
781 psrlw xmm0, 7
783 packuswb xmm0, xmm2
784 paddb xmm0, xmm5
786 movdqu [edx], xmm0
804 movdqa xmm0, [eax]
808 pmaddubsw xmm0, xmm4
813 phaddw xmm0, xmm1
815 psrlw xmm0, 7
817 packuswb xmm0, xmm2
818 paddb xmm0, xmm5
820 movdqa [edx], xmm0
838 movdqu xmm0, [eax]
842 pmaddubsw xmm0, xmm4
847 phaddw xmm0, xmm1
849 psrlw xmm0, 7
851 packuswb xmm0, xmm2
852 paddb xmm0, xmm5
854 movdqu [edx], xmm0
872 movdqa xmm0, [eax]
876 pmaddubsw xmm0, xmm4
881 phaddw xmm0, xmm1
883 psrlw xmm0, 7
885 packuswb xmm0, xmm2
886 paddb xmm0, xmm5
888 movdqa [edx], xmm0
906 movdqu xmm0, [eax]
910 pmaddubsw xmm0, xmm4
915 phaddw xmm0, xmm1
917 psrlw xmm0, 7
919 packuswb xmm0, xmm2
920 paddb xmm0, xmm5
922 movdqu [edx], xmm0
948 movdqa xmm0, [eax]
952 pavgb xmm0, [eax + esi]
957 movdqa xmm4, xmm0
958 shufps xmm0, xmm1, 0x88
960 pavgb xmm0, xmm4
969 movdqa xmm1, xmm0
971 pmaddubsw xmm0, xmm7 // U
975 phaddw xmm0, xmm2
977 psraw xmm0, 8
979 packsswb xmm0, xmm1
980 paddb xmm0, xmm5 // -> unsigned
984 movlps qword ptr [edx], xmm0 // U
985 movhps qword ptr [edx + edi], xmm0 // V
1014 movdqu xmm0, [eax]
1019 pavgb xmm0, xmm4
1027 movdqa xmm4, xmm0
1028 shufps xmm0, xmm1, 0x88
1030 pavgb xmm0, xmm4
1039 movdqa xmm1, xmm0
1041 pmaddubsw xmm0, xmm7 // U
1045 phaddw xmm0, xmm2
1047 psraw xmm0, 8
1049 packsswb xmm0, xmm1
1050 paddb xmm0, xmm5 // -> unsigned
1054 movlps qword ptr [edx], xmm0 // U
1055 movhps qword ptr [edx + edi], xmm0 // V
1084 movdqa xmm0, [eax]
1088 pavgb xmm0, [eax + esi]
1093 movdqa xmm4, xmm0
1094 shufps xmm0, xmm1, 0x88
1096 pavgb xmm0, xmm4
1105 movdqa xmm1, xmm0
1107 pmaddubsw xmm0, xmm7 // U
1111 phaddw xmm0, xmm2
1113 psraw xmm0, 8
1115 packsswb xmm0, xmm1
1116 paddb xmm0
1120 movlps qword ptr [edx], xmm0 // U
1121 movhps qword ptr [edx + edi], xmm0 // V
1150 movdqu xmm0, [eax]
1155 pavgb xmm0, xmm4
1163 movdqa xmm4, xmm0
1164 shufps xmm0, xmm1, 0x88
1166 pavgb xmm0, xmm4
1175 movdqa xmm1, xmm0
1177 pmaddubsw xmm0, xmm7 // U
1181 phaddw xmm0, xmm2
1183 psraw xmm0, 8
1185 packsswb xmm0, xmm1
1186 paddb xmm0, xmm5 // -> unsigned
1190 movlps qword ptr [edx], xmm0 // U
1191 movhps qword ptr [edx + edi], xmm0 // V
1220 movdqa xmm0, [eax]
1224 pavgb xmm0, [eax + esi]
1229 movdqa xmm4, xmm0
1230 shufps xmm0, xmm1, 0x88
1232 pavgb xmm0, xmm4
1241 movdqa xmm1, xmm0
1243 pmaddubsw xmm0, xmm7 // U
1247 phaddw xmm0, xmm2
1249 psraw xmm0, 8
1251 packsswb xmm0, xmm1
1252 paddb xmm0, xmm5 // -> unsigned
1256 movlps qword ptr [edx], xmm0 // U
1257 movhps qword ptr [edx + edi], xmm0 // V
1286 movdqu xmm0, [eax]
1291 pavgb xmm0, xmm4
1299 movdqa xmm4, xmm0
1300 shufps xmm0, xmm1, 0x88
1302 pavgb xmm0, xmm4
1311 movdqa xmm1, xmm0
1313 pmaddubsw xmm0, xmm7 // U
1317 phaddw xmm0, xmm2
1319 psraw xmm0, 8
1321 packsswb xmm0, xmm1
1322 paddb xmm0, xmm5 // -> unsigned
1326 movlps qword ptr [edx], xmm0 // U
1327 movhps qword ptr [edx + edi], xmm0 // V
1356 movdqa xmm0, [eax]
1360 pavgb xmm0, [eax + esi]
1365 movdqa xmm4, xmm0
1366 shufps xmm0, xmm1, 0x88
1368 pavgb xmm0, xmm4
1377 movdqa xmm1, xmm0
1379 pmaddubsw xmm0, xmm7 // U
1383 phaddw xmm0, xmm2
1385 psraw xmm0, 8
1387 packsswb xmm0, xmm1
1388 paddb xmm0, xmm5 // -> unsigned
1392 movlps qword ptr [edx], xmm0 // U
1393 movhps qword ptr [edx + edi], xmm0 // V
1422 movdqu xmm0, [eax]
1427 pavgb xmm0, xmm4
1435 movdqa xmm4, xmm0
1436 shufps xmm0, xmm1, 0x88
1438 pavgb xmm0, xmm4
1447 movdqa xmm1, xmm0
1449 pmaddubsw xmm0, xmm7 // U
1453 phaddw xmm0, xmm2
1455 psraw xmm0, 8
1457 packsswb xmm0, xmm1
1458 paddb xmm0, xmm5 // -> unsigned
1462 movlps qword ptr [edx], xmm0 // U
1463 movhps qword ptr [edx + edi], xmm0 // V
1526 __asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \
1529 __asm punpcklbw xmm0, xmm1 /* UV */ \
1534 __asm movd xmm0, [esi] /* U */ \
1537 __asm punpcklbw xmm0, xmm1 /* UV */ \
1538 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
1543 __asm movd xmm0, [esi] /* U */ \
1546 __asm punpcklbw xmm0, xmm1 /* UV */ \
1547 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
1548 __asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \
1553 __asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \
1555 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
1561 __asm movdqa xmm1, xmm0 \
1562 __asm movdqa xmm2, xmm0 \
1563 __asm pmaddubsw xmm0, kUVToB /* scale B UV */ \
1566 __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
1575 __asm paddsw xmm0, xmm3 /* B += Y */ \
1578 __asm psraw xmm0, 6 \
1581 __asm packuswb xmm0, xmm0 /* B */ \
1589 __asm movdqa xmm1, xmm0
1590 __asm movdqa xmm2, xmm0 \
1591 __asm pmaddubsw xmm0, kVUToB /* scale B UV */ \
1594 __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
1603 __asm paddsw xmm0, xmm3 /* B += Y */ \
1606 __asm psraw xmm0, 6 \
1609 __asm packuswb xmm0, xmm0 /* B */ \
1640 punpcklbw xmm0, xmm1 // BG
1642 movdqa xmm1, xmm0
1643 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1645 movdqa [edx], xmm0
1683 punpcklbw xmm0, xmm1 // BG
1685 movdqa xmm1, xmm0
1686 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1688 movdqa [edx], xmm0
1727 punpcklbw xmm0, xmm1 // BG
1729 movdqa xmm1, xmm0
1730 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1732 movdqa [edx], xmm0
1766 punpcklbw xmm0, xmm1 // BG
1768 movdqa xmm1, xmm0
1769 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1771 movdqa [edx], xmm0
1804 punpcklbw xmm0, xmm1 // BG
1806 movdqa xmm1, xmm0
1807 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1809 movdqa [edx], xmm0
1846 punpcklbw xmm0, xmm1 // BG
1848 movdqa xmm1, xmm0
1849 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1851 movdqu [edx], xmm0
1889 punpcklbw xmm0, xmm1 // BG
1891 movdqa xmm1, xmm0
1892 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1894 movdqu [edx], xmm0
1933 punpcklbw xmm0, xmm1 // BG
1935 movdqa xmm1, xmm0
1936 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1938 movdqu [edx], xmm0
1973 punpcklbw xmm0, xmm1 // BG
1975 movdqa xmm1, xmm0
1976 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
1978 movdqu [edx], xmm0
2011 punpcklbw xmm0, xmm1 // BG
2013 movdqa xmm1, xmm0
2014 punpcklwd xmm0, xmm2 // BGRA first 4 pixels
2016 movdqu [edx], xmm0
2051 punpcklbw xmm1, xmm0 // GB
2053 movdqa xmm0, xmm5
2055 punpckhwd xmm0, xmm1 // BGRA next 4 pixels
2057 movdqa [edx + 16], xmm0
2092 punpcklbw xmm1, xmm0 // GB
2094 movdqa xmm0, xmm5
2096 punpckhwd xmm0, xmm1 // BGRA next 4 pixels
2098 movdqu [edx + 16], xmm0
2134 punpcklbw xmm0, xmm5 // BA
2136 punpcklwd xmm2, xmm0 // RGBA first 4 pixels
2137 punpckhwd xmm1, xmm0 // RGBA next 4 pixels
2175 punpcklbw xmm0, xmm5 // BA
2177 punpcklwd xmm2, xmm0 // RGBA first 4 pixels
2178 punpckhwd xmm1, xmm0 // RGBA next 4 pixels
2216 punpcklbw xmm5, xmm0 // AB
2217 movdqa xmm0, xmm5
2219 punpckhwd xmm0, xmm1 // RGBA next 4 pixels
2221 movdqa [edx + 16], xmm0
2257 punpcklbw xmm5, xmm0 // AB
2258 movdqa xmm0, xmm5
2260 punpckhwd xmm0, xmm1 // RGBA next 4 pixels
2262 movdqu [edx + 16], xmm0
2296 movq xmm0, qword ptr [eax]
2298 punpcklbw xmm0, xmm0 // Y.Y
2299 psubusw xmm0, xmm3
2300 pmulhuw xmm0, xmm2
2301 packuswb xmm0, xmm0 // G
2304 punpcklbw xmm0, xmm0 // GG
2305 movdqa xmm1, xmm0
2306 punpcklwd xmm0, xmm0 // BGRA first 4 pixels
2308 por xmm0, xmm4
2310 movdqa [edx], xmm0
2339 movdqa xmm0, [eax + ecx]
2340 pshufb xmm0, xmm5
2342 movdqa [edx], xmm0
2363 movdqu xmm0, [eax + ecx]
2364 movdqa xmm1, xmm0 // swap bytes
2365 psllw xmm0, 8
2367 por xmm0, xmm1
2368 pshuflw xmm0, xmm0, 0x1b // swap words
2369 pshufhw xmm0, xmm0, 0x1b
2370 pshufd xmm0, xmm0, 0x4e // swap qwords
2372 movdqu [edx], xmm0
2401 movdqa xmm0, [eax]
2403 pshufb xmm0, xmm1
2405 movlpd qword ptr [edx], xmm0
2406 movhpd qword ptr [edx + edi], xmm0
2434 movdqa xmm0, [eax + ecx * 4]
2435 pshufb xmm0, xmm5
2437 movdqa [edx], xmm0
2460 movdqa xmm0, [eax]
2463 movdqa xmm2, xmm0
2465 pand xmm0, xmm5 // even bytes
2467 packuswb xmm0, xmm1
2471 movdqa [edx], xmm0
2495 movdqa xmm0, [eax]
2497 movdqa [eax + edx], xmm0
2586 movdqa xmm0, [eax]
2589 pand xmm0, xmm5 // even bytes are Y
2591 packuswb xmm0, xmm1
2593 movdqa [edx], xmm0
2617 movdqa xmm0, [eax]
2622 pavgb xmm0, xmm2
2624 psrlw xmm0, 8 // YUYV -> UVUV
2626 packuswb xmm0, xmm1
2627 movdqa xmm1, xmm0
2628 pand xmm0, xmm5 // U
2629 packuswb xmm0, xmm0
2632 movq qword ptr [edx], xmm0
2659 movdqa xmm0, [eax]
2662 psrlw xmm0, 8 // YUYV -> UVUV
2664 packuswb xmm0, xmm1
2665 movdqa xmm1, xmm0
2666 pand xmm0, xmm5 // U
2667 packuswb xmm0, xmm0
2670 movq qword ptr [edx], xmm0
2693 movdqu xmm0, [eax]
2696 pand xmm0, xmm5 // even bytes are Y
2698 packuswb xmm0, xmm1
2700 movdqu [edx], xmm0
2724 movdqu xmm0, [eax]
2729 pavgb xmm0, xmm2
2731 psrlw xmm0, 8 // YUYV -> UVUV
2733 packuswb xmm0, xmm1
2734 movdqa xmm1, xmm0
2735 pand xmm0, xmm5 // U
2736 packuswb xmm0, xmm0
2739 movq qword ptr [edx], xmm0
2766 movdqu xmm0, [eax]
2769 psrlw xmm0, 8 // YUYV -> UVUV
2771 packuswb xmm0, xmm1
2772 movdqa xmm1, xmm0
2773 pand xmm0, xmm5 // U
2774 packuswb xmm0, xmm0
2777 movq qword ptr [edx], xmm0
2798 movdqa xmm0, [eax]
2801 psrlw xmm0, 8 // odd bytes are Y
2803 packuswb xmm0, xmm1
2805 movdqa [edx], xmm0
2829 movdqa xmm0, [eax]
2834 pavgb xmm0, xmm2
2836 pand xmm0, xmm5 // UYVY -> UVUV
2838 packuswb xmm0, xmm1
2839 movdqa xmm1, xmm0
2840 pand xmm0, xmm5 // U
2841 packuswb xmm0, xmm0
2844 movq qword ptr [edx], xmm0
2871 movdqa xmm0, [eax]
2874 pand xmm0, xmm5 // UYVY -> UVUV
2876 packuswb xmm0, xmm1
2877 movdqa xmm1, xmm0
2878 pand xmm0, xmm5 // U
2879 packuswb xmm0, xmm0
2882 movq qword ptr [edx], xmm0
2903 movdqu xmm0, [eax]
2906 psrlw xmm0, 8 // odd bytes are Y
2908 packuswb xmm0, xmm1
2910 movdqu [edx], xmm0
2934 movdqu xmm0, [eax]
2939 pavgb xmm0, xmm2
2941 pand xmm0, xmm5 // UYVY -> UVUV
2943 packuswb xmm0, xmm1
2944 movdqa xmm1, xmm0
2945 pand xmm0, xmm5 // U
2946 packuswb xmm0, xmm0
2949 movq qword ptr [edx], xmm0
2976 movdqu xmm0, [eax]
2979 pand xmm0, xmm5 // UYVY -> UVUV
2981 packuswb xmm0, xmm1
2982 movdqa xmm1, xmm0
2983 pand xmm0, xmm5 // U
2984 packuswb xmm0, xmm0
2987 movq qword ptr [edx], xmm0
3029 movdqa xmm0, xmm3 // src argb
3041 por xmm0, xmm4 // set alpha to 255
3044 paddusb xmm0, xmm2 // + src argb
3046 paddusb xmm0, xmm1 // + src argb
3048 movd [edx], xmm0
3060 movdqa xmm0, xmm3 // src argb
3072 por xmm0, xmm4 // set alpha to 255
3075 paddusb xmm0, xmm2 // + src argb
3077 paddusb xmm0, xmm1 // + src argb
3079 movdqa [edx], xmm0
3091 movdqa xmm0, xmm3 // src argb
3103 por xmm0, xmm4 // set alpha to 255
3106 paddusb xmm0, xmm2 // + src argb
3108 paddusb xmm0, xmm1 // + src argb
3110 movd [edx], xmm0
3163 movdqa xmm0, xmm3 // src argb
3173 por xmm0, xmm4 // set alpha to 255
3176 paddusb xmm0, xmm2 // + src argb
3178 paddusb xmm0, xmm1 // + src argb
3180 movd [edx], xmm0
3197 movdqa xmm0, xmm3 // src argb
3207 por xmm0, xmm4 // set alpha to 255
3210 paddusb xmm0, xmm2 // + src argb
3212 paddusb xmm0, xmm1 // + src argb
3214 movdqa [edx], xmm0
3223 movdqa xmm0, xmm3 // src argb
3233 por xmm0, xmm4 // set alpha to 255
3236 paddusb xmm0, xmm2 // + src argb
3238 paddusb xmm0, xmm1 // + src argb
3240 movdqa [edx], xmm0
3252 movdqa xmm0, xmm3 // src argb
3262 por xmm0, xmm4 // set alpha to 255
3265 paddusb xmm0, xmm2 // + src argb
3267 paddusb xmm0, xmm1 // + src argb
3269 movd [edx], xmm0
3297 movdqa xmm0, [eax] // read 4 pixels
3298 punpcklbw xmm0, xmm0 // first 2
3299 pshufhw xmm2, xmm0,0FFh // 8 alpha words
3301 pmulhuw xmm0, xmm2 // rgb * a
3308 psrlw xmm0, 8
3311 packuswb xmm0, xmm1
3312 pand xmm0, xmm5 // keep original alphas
3313 por xmm0, xmm2
3315 movdqa [eax + edx], xmm0
3347 movdqa xmm0, [eax] // read 4 pixels
3348 pshufb xmm0, xmm4 // isolate first 2 alphas
3351 pmulhuw xmm0, xmm1 // rgb * a
3359 psrlw xmm0, 8
3361 packuswb xmm0, xmm1
3362 por xmm0, xmm2 // copy original alpha
3364 movdqa [eax + edx], xmm0
3391 movdqa xmm0, [eax] // read 4 pixels
3394 punpcklbw xmm0, xmm0 // first 2
3400 pmulhuw xmm0, xmm2 // rgb * a
3415 packuswb xmm0, xmm1
3416 por xmm0, xmm2
3418 movdqa [eax + edx], xmm0
3446 movdqa xmm0, [eax] // G
3448 pmaddubsw xmm0, xmm4
3450 phaddw xmm0, xmm1
3451 psrlw xmm0, 7
3452 packuswb xmm0, xmm0 // 8 G bytes
3459 movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA
3460 punpcklbw xmm0, xmm0 // 8 GG words
3462 movdqa xmm1, xmm0
3463 punpcklwd xmm0, xmm3 // GGGA first 4
3466 movdqa [eax + edx], xmm0
3504 movdqa xmm0, [eax] // B
3506 pmaddubsw xmm0, xmm2
3508 phaddw xmm0, xmm6
3509 psrlw xmm0, 7
3510 packuswb xmm0, xmm0 // 8 B values
3518 punpcklbw xmm0, xmm5 // 8 BG values
3533 movdqa xmm1, xmm0 // Weave BG, RA together
3534 punpcklwd xmm0, xmm5 // BGRA first 4
3537 movdqa [eax], xmm0
3567 movdqa xmm0, [eax] // B
3569 pmaddubsw xmm0, xmm2
3575 phaddsw xmm0, xmm6 // B
3577 psraw xmm0, 7 // B
3579 packuswb xmm0, xmm0 // 8 B values
3581 punpcklbw xmm0, xmm5 // 8 BG values
3595 movdqa xmm1, xmm0 // Weave BG, RA together
3597 punpcklwd xmm0, xmm5 // BGRA first 4
3600 movdqa [eax], xmm0
3681 movdqa xmm0, [eax] // read 4 pixels
3682 punpcklbw xmm0, xmm5 // first 2 pixels
3683 pmulhuw xmm0, xmm2 // pixel * scale >> 16
3687 pmullw xmm0, xmm3 // * interval_size
3691 paddw xmm0, xmm4 // + interval_size / 2
3693 packuswb xmm0, xmm1
3694 por xmm0, xmm7
3696 movdqa [eax], xmm0
3737 movdqa xmm0, [eax]
3743 psubd xmm0, [eax + edx * 4]
3750 psubd xmm0, [esi]
3756 paddd xmm0, [esi + edx * 4]
3762 cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area
3764 mulps xmm0, xmm4
3770 cvtps2dq xmm0, xmm0
3774 packssdw xmm0, xmm1
3776 packuswb xmm0, xmm2
3777 movdqu [edi], xmm0
3789 movdqa xmm0, [eax]
3790 psubd xmm0, [eax + edx * 4]
3792 psubd xmm0, [esi]
3793 paddd xmm0, [esi + edx * 4]
3795 cvtdq2ps xmm0, xmm0
3796 mulps xmm0, xmm4
3797 cvtps2dq xmm0, xmm0
3798 packssdw xmm0, xmm0
3799 packuswb xmm0, xmm0
3800 movd dword ptr [edi], xmm0
3820 pxor xmm0, xmm0
3845 paddd xmm0, xmm2
3847 paddd xmm2, xmm0
3849 paddd xmm0, xmm3
3851 paddd xmm3, xmm0
3853 paddd xmm0, xmm4
3855 paddd xmm4, xmm0
3857 paddd xmm0, xmm5
3859 paddd xmm5, xmm0
3881 paddd xmm0, xmm2
3883 paddd xmm2, xmm0
3911 movdqa xmm0, [eax] // read 4 pixels
3912 movdqa xmm1, xmm0
3913 punpcklbw xmm0, xmm0 // first 2
3915 pmulhuw xmm0, xmm2 // argb * value
3917 psrlw xmm0, 8
3919 packuswb xmm0, xmm1
3921 movdqa [eax + edx], xmm0
3955 movdqa xmm0, xmm2 // x0, y0, x1, y1
3956 addps xmm0, xmm7
3957 movlhps xmm2, xmm0
3967 cvttps2dq xmm0, xmm2 // x, y float to int first 2
3969 packssdw xmm0, xmm1 // x, y as 8 shorts
3970 pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride.
3971 movd esi, xmm0
3972 pshufd xmm0, xmm0, 0x39 // shift right
3973 movd edi, xmm0
3974 pshufd xmm0, xmm0, 0x39 // shift right
3980 movd esi, xmm0
3981 pshufd xmm0, xmm0, 0x39 // shift right
3982 movd edi, xmm0
3984 movd xmm0, [eax + edi] // read pixel 3
3985 punpckldq xmm6, xmm0 // combine pixel 2 and 3
3999 cvttps2dq xmm0, xmm2 // x, y float to int
4000 packssdw xmm0, xmm0 // x, y as shorts
4001 pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride
4003 movd esi, xmm0
4004 movd xmm0, [eax + esi] // copy a pixel
4006 movd [edx], xmm0
4036 movd xmm0, eax // high fraction 0..127
4040 punpcklbw xmm5, xmm0
4046 movdqa xmm0, [esi]
4048 movdqa xmm1, xmm0
4049 punpcklbw xmm0, xmm2
4051 pmaddubsw xmm0, xmm5
4053 psrlw xmm0, 7
4055 packuswb xmm0, xmm1
4057 movdqa [esi + edi], xmm0
4067 movdqa xmm0, [esi]
4069 movdqa [esi + edi], xmm0
4079 movdqa xmm0, [esi]
4080 pavgb xmm0, [esi + edx]
4082 movdqa [esi + edi], xmm0