row_win.cc | 82 __m128i xmm0, xmm1, xmm2, xmm3; local 93 xmm2 = _mm_load_si128(&xmm0); 96 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR); 99 xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR); 106 xmm2 = _mm_adds_epi16(xmm2, xmm3); 109 xmm2 = _mm_srai_epi16(xmm2, 6) 136 __m128i xmm0, xmm1, xmm2, xmm3; local 384 movdqa xmm2, xmm3 local 385 palignr xmm2, xmm1, 8 \/\/ xmm2 = { xmm3[0:3] xmm1[8:15]} local 386 pshufb xmm2, xmm4 local 387 por xmm2, xmm5 local 424 movdqa xmm2, xmm3 local 425 palignr xmm2, xmm1, 8 \/\/ xmm2 = { xmm3[0:3] xmm1[8:15]} local 426 pshufb xmm2, xmm4 local 427 por xmm2, xmm5 local 482 movdqa xmm2, xmm0 local 484 psllw xmm2, 11 \/\/ B in upper 5 bits local 486 pmulhuw xmm2, xmm5 \/\/ * (256 + 8) local 492 movdqa xmm2, xmm1 local 494 punpckhbw xmm2, xmm0 local 532 movdqa xmm2, xmm0 local 534 psllw xmm2, 11 \/\/ B in upper 5 bits local 536 pmulhuw xmm2, xmm5 \/\/ * (256 + 8) local 540 movdqa xmm2, xmm0 local 542 psraw xmm2, 8 \/\/ A local 544 pand xmm2, xmm7 local 546 movdqa xmm2, xmm1 local 548 punpckhbw xmm2, xmm0 local 577 movdqa xmm2, xmm0 local 579 pand xmm2, xmm5 \/\/ mask high nibbles local 585 por xmm2, xmm3 local 610 movdqu xmm2, [eax + 32] local 615 pshufb xmm2, xmm6 local 625 psrldq xmm2, 8 \/\/ 4 bytes from 2 local 627 por xmm2, xmm3 \/\/ 12 bytes from 3 for 2 local 649 movdqu xmm2, [eax + 32] local 654 pshufb xmm2, xmm6 local 664 psrldq xmm2, 8 \/\/ 4 bytes from 2 local 666 por xmm2, xmm3 \/\/ 12 bytes from 3 for 2 local 694 movdqa xmm2, xmm0 \/\/ G local 697 psrld xmm2, 5 \/\/ G local 700 pand xmm2, xmm4 \/\/ G local 734 movdqa xmm2, xmm0 \/\/ G local 738 psrld xmm2, 6 \/\/ G local 742 pand xmm2, xmm5 \/\/ G local 745 por xmm2, xmm3 \/\/ GR local 801 movdqa xmm2, [eax + 32] local 805 pmaddubsw xmm2, xmm4 local 809 phaddw xmm2, xmm3 local 811 psrlw xmm2, 7 local 836 movdqa xmm2, [eax + 32] local 840 pmaddubsw xmm2, xmm4 local 844 phaddw xmm2, xmm3 local 846 paddw xmm2, xmm5 local 848 psrlw xmm2, 7 local 953 movdqu xmm2, [eax + 32] local 957 pmaddubsw xmm2, xmm4 local 961 phaddw xmm2, xmm3 local 963 psrlw xmm2, 7 local 987 movdqu xmm2, [eax + 32] local 991 pmaddubsw xmm2, xmm4 local 995 phaddw xmm2, xmm3 local 997 paddw xmm2, xmm5 local 999 psrlw xmm2, 7 local 1022 movdqa xmm2, [eax + 32] local 1026 pmaddubsw xmm2, xmm4 local 1030 phaddw xmm2, xmm3 local 1032 psrlw xmm2, 7 local 1056 movdqu xmm2, [eax + 32] local 1060 pmaddubsw xmm2, xmm4 local 1064 phaddw xmm2, xmm3 local 1066 psrlw xmm2, 7 local 1090 movdqa xmm2, [eax + 32] local 1094 pmaddubsw xmm2, xmm4 local 1098 phaddw xmm2, xmm3 local 1100 psrlw xmm2, 7 local 1124 movdqu xmm2, [eax + 32] local 1128 pmaddubsw xmm2, xmm4 local 1132 phaddw xmm2, xmm3 local 1134 psrlw xmm2, 7 local 1158 movdqa xmm2, [eax + 32] local 1162 pmaddubsw xmm2, xmm4 local 1166 phaddw xmm2, xmm3 local 1168 psrlw xmm2, 7 local 1192 movdqu xmm2, [eax + 32] local 1196 pmaddubsw xmm2, xmm4 local 1200 phaddw xmm2, xmm3 local 1202 psrlw xmm2, 7 local 1234 movdqa xmm2, [eax + 32] local 1238 pavgb xmm2, [eax + esi + 32] local 1246 shufps xmm2, xmm3, 0x88 local 1248 pavgb xmm2, xmm4 local 1256 pmaddubsw xmm2, xmm7 local 1300 movdqa xmm2, [eax + 32] local 1304 pavgb xmm2, [eax + esi + 32] local 1312 shufps xmm2, xmm3, 0x88 local 1314 pavgb xmm2, xmm4 local 1322 pmaddubsw xmm2, xmm7 local 1434 movdqu xmm2, [eax + 32] local 1441 pavgb xmm2, xmm4 local 1450 shufps xmm2, xmm3, 0x88 local 1452 pavgb xmm2, xmm4 local 1460 pmaddubsw xmm2, xmm7 local 1504 movdqu xmm2, [eax + 32] local 1511 pavgb xmm2, xmm4 local 1520 shufps xmm2, xmm3, 0x88 local 1522 pavgb xmm2, xmm4 local 1530 pmaddubsw xmm2, xmm7 local 1573 movdqa xmm2, [eax + 32] local 1577 pmaddubsw xmm2, xmm7 local 1580 phaddw xmm2, xmm3 local 1582 psraw xmm2, 8 local 1590 movdqa xmm2, [eax + 32] local 1594 pmaddubsw xmm2, xmm6 local 1597 phaddw xmm2, xmm3 local 1599 psraw xmm2, 8 local 1631 movdqu xmm2, [eax + 32] local 1635 pmaddubsw xmm2, xmm7 local 1638 phaddw xmm2, xmm3 local 1640 psraw xmm2, 8 local 1648 movdqu xmm2, [eax + 32] local 1652 pmaddubsw xmm2, xmm6 local 1655 phaddw xmm2, xmm3 local 1657 psraw xmm2, 8 local 1689 movdqa xmm2, [eax + 32] local 1697 shufps xmm2, xmm3, 0x88 local 1699 pavgb xmm2, xmm4 local 1707 pmaddubsw xmm2, xmm7 local 1748 movdqu xmm2, [eax + 32] local 1756 shufps xmm2, xmm3, 0x88 local 1758 pavgb xmm2, xmm4 local 1766 pmaddubsw xmm2, xmm7 local 1809 movdqa xmm2, [eax + 32] local 1813 pavgb xmm2, [eax + esi + 32] local 1821 shufps xmm2, xmm3, 0x88 local 1823 pavgb xmm2, xmm4 local 1831 pmaddubsw xmm2, xmm7 local 1875 movdqu xmm2, [eax + 32] local 1882 pavgb xmm2, xmm4 local 1891 shufps xmm2, xmm3, 0x88 local 1893 pavgb xmm2, xmm4 local 1901 pmaddubsw xmm2, xmm7 local 1945 movdqa xmm2, [eax + 32] local 1949 pavgb xmm2, [eax + esi + 32] local 1957 shufps xmm2, xmm3, 0x88 local 1959 pavgb xmm2, xmm4 local 1967 pmaddubsw xmm2, xmm7 local 2011 movdqu xmm2, [eax + 32] local 2018 pavgb xmm2, xmm4 local 2027 shufps xmm2, xmm3, 0x88 local 2029 pavgb xmm2, xmm4 local 2037 pmaddubsw xmm2, xmm7 local 2081 movdqa xmm2, [eax + 32] local 2085 pavgb xmm2, [eax + esi + 32] local 2093 shufps xmm2, xmm3, 0x88 local 2095 pavgb xmm2, xmm4 local 2103 pmaddubsw xmm2, xmm7 local 2147 movdqu xmm2, [eax + 32] local 2154 pavgb xmm2, xmm4 local 2163 shufps xmm2, xmm3, 0x88 local 2165 pavgb xmm2, xmm4 local 2173 pmaddubsw xmm2, xmm7 local 2423 punpcklbw xmm2, xmm5 \/\/ RA local 2467 punpcklbw xmm2, xmm2 \/\/ RR local 2514 punpcklbw xmm2, xmm2 \/\/ RR local 2566 punpcklbw xmm2, xmm2 \/\/ RR local 2573 movdqa xmm2, xmm0 \/\/ G local 2576 psrld xmm2, 5 \/\/ G local 2579 pand xmm2, xmm6 \/\/ G local 2584 movdqa xmm2, xmm1 \/\/ G local 2587 psrld xmm2, 5 \/\/ G local 2590 pand xmm2, xmm6 \/\/ G local 2633 punpcklbw xmm2, xmm5 \/\/ RA local 2678 punpcklbw xmm2, xmm5 \/\/ RA local 2718 punpcklbw xmm2, xmm5 \/\/ RA local 2756 punpcklbw xmm2, xmm5 \/\/ RA local 2798 punpcklbw xmm2, xmm5 \/\/ RA local 2841 punpcklbw xmm2, xmm5 \/\/ RA local 2886 punpcklbw xmm2, xmm5 \/\/ RA local 2926 punpcklbw xmm2, xmm5 \/\/ RA local 2964 punpcklbw xmm2, xmm5 \/\/ RA local 3085 punpcklbw xmm2, xmm1 \/\/ RG local 3088 punpcklwd xmm2, xmm0 \/\/ RGBA first 4 pixels local 3126 punpcklbw xmm2, xmm1 \/\/ RG local 3129 punpcklwd xmm2, xmm0 \/\/ RGBA first 4 pixels local 3240 movd xmm2, eax local 3241 pshufd xmm2, xmm2,0 local 3475 movdqa xmm2, xmm0 local 3480 psrlw xmm2, 8 \/\/ odd bytes local 3482 packuswb xmm2, xmm3 local 3512 movdqa xmm2, xmm0 local 3517 psrlw xmm2, 8 \/\/ odd bytes local 3519 packuswb xmm2, xmm3 local 3588 movdqa xmm2, xmm0 local 3590 punpckhbw xmm2, xmm1 \/\/ next 8 UV pairs local 3618 movdqa xmm2, xmm0 local 3620 punpckhbw xmm2, xmm1 \/\/ next 8 UV pairs local 3740 movdqa xmm2, [eax] local 3745 pand xmm2, xmm0 local 3749 por xmm2, xmm4 local 3807 movq xmm2, qword ptr [eax] \/\/ 8 Y's local 3809 punpcklbw xmm2, xmm2 local 3811 punpcklwd xmm2, xmm2 local 3814 pand xmm2, xmm0 local 3818 por xmm2, xmm4 local 4183 movdqa xmm2, [eax + esi] local 4290 movdqu xmm2, [eax + esi] local 4395 movdqa xmm2, [eax + esi] local 4500 movdqu xmm2, [eax + esi] local 4595 movd xmm2, [esi] \/\/ _r_b local 4599 pand xmm2, xmm6 \/\/ _r_b local 4601 pmullw xmm2, xmm3 \/\/ _r_b * alpha local 4607 psrlw xmm2, 8 \/\/ _r_b convert to 8 bits again local 4626 movdqu xmm2, [esi] \/\/ _r_b local 4630 pand xmm2, xmm6 \/\/ _r_b local 4632 pmullw xmm2, xmm3 \/\/ _r_b * alpha local 4638 psrlw xmm2, 8 \/\/ _r_b convert to 8 bits again local 4657 movd xmm2, [esi] \/\/ _r_b local 4661 pand xmm2, xmm6 \/\/ _r_b local 4663 pmullw xmm2, xmm3 \/\/ _r_b * alpha local 4669 psrlw xmm2, 8 \/\/ _r_b convert to 8 bits again local 4729 movd xmm2, [esi] \/\/ _r_b local 4731 pand xmm2, xmm6 \/\/ _r_b local 4733 pmullw xmm2, xmm3 \/\/ _r_b * alpha local 4739 psrlw xmm2, 8 \/\/ _r_b convert to 8 bits again local 4763 movdqa xmm2, [esi] \/\/ _r_b local 4765 pand xmm2, xmm6 \/\/ _r_b local 4767 pmullw xmm2, xmm3 \/\/ _r_b * alpha local 4773 psrlw xmm2, 8 \/\/ _r_b convert to 8 bits again local 4789 movdqu xmm2, [esi] \/\/ _r_b local 4791 pand xmm2, xmm6 \/\/ _r_b local 4793 pmullw xmm2, xmm3 \/\/ _r_b * alpha local 4799 psrlw xmm2, 8 \/\/ _r_b convert to 8 bits again local 4818 movd xmm2, [esi] \/\/ _r_b local 4820 pand xmm2, xmm6 \/\/ _r_b local 4822 pmullw xmm2, xmm3 \/\/ _r_b * alpha local 4828 psrlw xmm2, 8 \/\/ _r_b convert to 8 bits again local 4862 pshufhw xmm2, xmm0, 0FFh \/\/ 8 alpha words local 4863 pshuflw xmm2, xmm2, 0FFh local 4867 pshufhw xmm2, xmm1, 0FFh \/\/ 8 alpha words local 4868 pshuflw xmm2, xmm2, 0FFh local 4870 movdqa xmm2, [eax] \/\/ alphas local 4873 pand xmm2, xmm4 local 4917 movdqu xmm2, [eax] \/\/ read 4 pixels local 4918 punpckhbw xmm2, xmm2 \/\/ next 2 pixel rgbs local 4920 movdqu xmm2, [eax] \/\/ mask original alpha local 4922 pand xmm2, xmm3 local 5000 movd xmm2, dword ptr fixed_invtbl8[esi * 4] local 5002 pshuflw xmm2, xmm2, 040h \/\/ first 4 inv_alpha words. 1, a, a, a local 5004 movlhps xmm2, xmm3 local 5011 movd xmm2, dword ptr fixed_invtbl8[esi * 4] local 5013 pshuflw xmm2, xmm2, 040h \/\/ first 4 inv_alpha words local 5015 movlhps xmm2, xmm3 local 5099 vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] \/\/ [1,a2] local 5103 vpunpckldq xmm7, xmm2, xmm3 \/\/ [1,a3,1,a2] local 5109 vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] \/\/ [1,a6] local 5111 vpunpckldq xmm2, xmm2, xmm3 \/\/ [1,a7,1,a6] local 5162 movdqa xmm2, [eax] \/\/ A local 5165 psrld xmm2, 24 local 5167 packuswb xmm2, xmm3 local 5168 packuswb xmm2, xmm2 \/\/ 8 A bytes local 5208 movdqa xmm2, kARGBToSepiaB local 5269 pshufd xmm2, xmm5, 0x00 local 5329 movd xmm2, [esp + 8] \/* scale *\/ local 5333 pshuflw xmm2, xmm2, 040h local 5334 pshufd xmm2, xmm2, 044h local 5378 movd xmm2, [esp + 16] \/\/ value local 5379 punpcklbw xmm2, xmm2 local 5380 punpcklqdq xmm2, xmm2 local 5420 movdqu xmm2, [esi] \/\/ read 4 pixels from src_argb1 local 5425 punpcklbw xmm2, xmm5 \/\/ first 2 local 5650 movq xmm2, qword ptr [eax + esi + 2] \/\/ read 8 pixels from src_y1[2] local 5652 punpcklbw xmm2, xmm5 local 5654 movq xmm2, qword ptr [eax + edi] \/\/ read 8 pixels from src_y2[0] local 5656 punpcklbw xmm2, xmm5 local 5658 psubw xmm2, xmm3 local 5704 movq xmm2, qword ptr [eax + esi + 1] \/\/ read 8 pixels from src_y1[1] local 5706 punpcklbw xmm2, xmm5 local 5708 movq xmm2, qword ptr [eax + 2] \/\/ read 8 pixels from src_y0[2] local 5710 punpcklbw xmm2, xmm5 local 5712 psubw xmm2, xmm3 local 5756 movdqa xmm2, xmm0 \/\/ GG local 5757 punpcklbw xmm2, xmm0 \/\/ First 8 local 5761 punpckhwd xmm2, xmm2 \/\/ Next 4 local 5763 por xmm2, xmm5 local 5836 movdqa xmm2, xmm0 local 5837 paddusb xmm2, xmm1 \/\/ sobel = sobelx + sobely local 5912 movdqa xmm2, [eax + 32] local 5918 psubd xmm2, [eax + edx * 4 + 32] local 5925 psubd xmm2, [esi + 32] local 5931 paddd xmm2, [esi + edx * 4 + 32] local 5936 packssdw xmm2, xmm3 local 5939 pmulhuw xmm2, xmm5 local 5955 movdqa xmm2, [eax + 32] local 5961 psubd xmm2, [eax + edx * 4 + 32] local 5968 psubd xmm2, [esi + 32] local 5974 paddd xmm2, [esi + edx * 4 + 32] local 5982 cvtdq2ps xmm2, xmm2 local 5984 mulps xmm2, xmm4 local 5988 cvtps2dq xmm2, xmm2 local 5991 packssdw xmm2, xmm3 local 6046 movdqu xmm2, [eax] \/\/ 4 argb pixels 16 bytes. local 6050 punpcklbw xmm2, xmm1 local 6052 punpcklwd xmm2, xmm1 local 6061 movdqa xmm2, [esi] \/\/ previous row above. local 6062 paddd xmm2, xmm0 local 6093 movd xmm2, dword ptr [eax] \/\/ 1 argb pixel 4 bytes. local 6095 punpcklbw xmm2, xmm1 local 6096 punpcklwd xmm2, xmm1 local 6098 movdqu xmm2, [esi] local 6100 paddd xmm2, xmm0 local 6124 movq xmm2, qword ptr [ecx] \/\/ uv local 6138 movlhps xmm2, xmm0 local 6159 addps xmm2, xmm4 \/\/ x, y += dx, dy first 2 local 6183 addps xmm2, xmm7 \/\/ x, y += dx, dy local 6336 movdqa xmm2, [esi + edx] local 6442 movdqa xmm2, [esi + edx] \/\/ row1 local 6445 punpcklbw xmm2, xmm4 local 6449 psubw xmm2, xmm0 \/\/ row1 - row0 local 6451 paddw xmm2, xmm2 \/\/ 9 bits * 15 bits = 8.16 local 6453 pmulhw xmm2, xmm5 \/\/ scale diff local 6555 movdqu xmm2, [esi + edx] local 6660 movdqu xmm2, [esi + edx] \/\/ row1 local 6663 punpcklbw xmm2, xmm4 local 6667 psubw xmm2, xmm0 \/\/ row1 - row0 local 6669 paddw xmm2, xmm2 \/\/ 9 bits * 15 bits = 8.16 local 6671 pmulhw xmm2, xmm5 \/\/ scale diff local 7072 movq xmm2, qword ptr [esi] \/\/ U local 7075 punpcklbw xmm2, xmm3 \/\/ UV local 7110 movq xmm2, qword ptr [esi] \/\/ U local 7113 punpcklbw xmm2, xmm3 \/\/ UV local 7118 punpckhbw xmm2, xmm0 local 7163 movdqa xmm2, xmm1 local 7165 mulps xmm2, xmm1 \/\/ X * X local 7169 mulps xmm2, [esi + 32] \/\/ C2 * X * X local 7310 movd xmm2, dword ptr [esp + 8 + 16] \/\/ luma table local 7312 pshufd xmm2, xmm2, 0 local [all...] |