Home | History | Annotate | Download | only in source

Lines Matching defs:xmm2

57     xmm2 = _mm_loadu_si128(&xmm0);                                             \
60 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \
63 xmm2 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasR, xmm2); \
67 xmm2 = _mm_adds_epi16(xmm2, xmm4); \
70 xmm2 = _mm_srai_epi16(xmm2, 6); \
73 xmm2 = _mm_packus_epi16(xmm2, xmm2);
78 xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); \
80 xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); \
81 xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \
94 __m128i xmm0, xmm1, xmm2, xmm4;
114 __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
366 movdqa xmm2, xmm3
367 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
368 pshufb xmm2, xmm4
369 por xmm2, xmm5
372 movdqu [edx + 32], xmm2
405 movdqa xmm2, xmm3
406 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
407 pshufb xmm2, xmm4
408 por xmm2, xmm5
411 movdqu [edx + 32], xmm2
441 movdqu xmm2, [eax + 8]
445 pshufb xmm2, xmm5
448 movq qword ptr [edx + 16], xmm2
490 movdqa xmm2, xmm0
492 psllw xmm2, 11 // B in upper 5 bits
494 pmulhuw xmm2, xmm5 // * (256 + 8)
496 por xmm1, xmm2 // RB
500 movdqa xmm2, xmm1
502 punpckhbw xmm2, xmm0
504 movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
686 movdqa xmm2, xmm0
688 psllw xmm2, 11 // B in upper 5 bits
690 pmulhuw xmm2, xmm5 // * (256 + 8)
693 por xmm1, xmm2 // RB
694 movdqa xmm2, xmm0
696 psraw xmm2, 8 // A
698 pand xmm2, xmm7
699 por xmm0, xmm2 // AG
700 movdqa xmm2, xmm1
702 punpckhbw xmm2, xmm0
704 movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
730 movdqa xmm2, xmm0
732 pand xmm2, xmm5 // mask high nibbles
734 movdqa xmm3, xmm2
738 por xmm2, xmm3
740 punpcklbw xmm0, xmm2
741 punpckhbw xmm1, xmm2
762 movdqu xmm2, [eax + 32]
767 pshufb xmm2, xmm6
772 movdqa xmm5, xmm2 // 8 bytes from 2 for 1
777 psrldq xmm2, 8 // 4 bytes from 2
779 por xmm2, xmm3 // 12 bytes from 3 for 2
781 movdqu [edx + 32], xmm2 // store 2
800 movdqu xmm2, [eax + 32]
805 pshufb xmm2, xmm6
810 movdqa xmm5, xmm2 // 8 bytes from 2 for 1
815 psrldq xmm2, 8 // 4 bytes from 2
817 por xmm2, xmm3 // 12 bytes from 3 for 2
819 movdqu [edx + 32], xmm2 // store 2
844 movdqa xmm2, xmm0 // G
847 psrld xmm2, 5 // G
850 pand xmm2, xmm4 // G
852 por xmm1, xmm2 // BG
889 movdqa xmm2, xmm0 // G
892 psrld xmm2, 5 // G
895 pand xmm2, xmm4 // G
897 por xmm1, xmm2 // BG
971 movdqa xmm2, xmm0 // G
975 psrld xmm2, 6 // G
979 pand xmm2, xmm5 // G
982 por xmm2, xmm3 // GR
983 por xmm0, xmm2 // BGRA
1144 movdqu xmm2, [eax + 32]
1148 pmaddubsw xmm2, xmm4
1152 phaddw xmm2, xmm3
1154 psrlw xmm2, 7
1155 packuswb xmm0, xmm2
1179 movdqu xmm2, [eax + 32]
1183 pmaddubsw xmm2, xmm4
1187 phaddw xmm2, xmm3
1189 paddw xmm2, xmm5
1191 psrlw xmm2, 7
1192 packuswb xmm0, xmm2
1298 movdqu xmm2, [eax + 32]
1302 pmaddubsw xmm2, xmm4
1306 phaddw xmm2, xmm3
1308 psrlw xmm2, 7
1309 packuswb xmm0, xmm2
1331 movdqu xmm2, [eax + 32]
1335 pmaddubsw xmm2, xmm4
1339 phaddw xmm2, xmm3
1341 psrlw xmm2, 7
1342 packuswb xmm0, xmm2
1364 movdqu xmm2, [eax + 32]
1368 pmaddubsw xmm2, xmm4
1372 phaddw xmm2, xmm3
1374 psrlw xmm2, 7
1375 packuswb xmm0, xmm2
1409 movdqu xmm2, [eax + 32]
1411 pavgb xmm2, xmm4
1421 movdqa xmm4, xmm2
1422 shufps xmm2, xmm3, 0x88
1424 pavgb xmm2, xmm4
1430 movdqa xmm3, xmm2
1432 pmaddubsw xmm2, xmm7
1435 phaddw xmm0, xmm2
1479 movdqu xmm2, [eax + 32]
1481 pavgb xmm2, xmm4
1491 movdqa xmm4, xmm2
1492 shufps xmm2, xmm3, 0x88
1494 pavgb xmm2, xmm4
1500 movdqa xmm3, xmm2
1502 pmaddubsw xmm2, xmm7
1505 phaddw xmm0, xmm2
1677 movdqu xmm2, [eax + 32]
1681 pmaddubsw xmm2, xmm7
1684 phaddw xmm2, xmm3
1686 psraw xmm2, 8
1687 packsswb xmm0, xmm2
1693 movdqu xmm2, [eax + 32]
1697 pmaddubsw xmm2, xmm6
1700 phaddw xmm2, xmm3
1702 psraw xmm2, 8
1703 packsswb xmm0, xmm2
1740 movdqu xmm2, [eax + 32]
1742 pavgb xmm2, xmm4
1752 movdqa xmm4, xmm2
1753 shufps xmm2, xmm3, 0x88
1755 pavgb xmm2, xmm4
1761 movdqa xmm3, xmm2
1763 pmaddubsw xmm2, xmm7
1766 phaddw xmm0, xmm2
1810 movdqu xmm2, [eax + 32]
1812 pavgb xmm2, xmm4
1822 movdqa xmm4, xmm2
1823 shufps xmm2, xmm3, 0x88
1825 pavgb xmm2, xmm4
1831 movdqa xmm3, xmm2
1833 pmaddubsw xmm2, xmm7
1836 phaddw xmm0, xmm2
1880 movdqu xmm2, [eax + 32]
1882 pavgb xmm2, xmm4
1892 movdqa xmm4, xmm2
1893 shufps xmm2, xmm3, 0x88
1895 pavgb xmm2, xmm4
1901 movdqa xmm3, xmm2
1903 pmaddubsw xmm2, xmm7
1906 phaddw xmm0, xmm2
2513 __asm movdqa xmm2, xmm0 \
2519 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \
2520 __asm psubw xmm1, xmm2 \
2521 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \
2523 __asm psubw xmm2, xmm3 \
2527 __asm paddsw xmm2, xmm4 /* R += Y */ \
2530 __asm psraw xmm2, 6 \
2533 __asm packuswb xmm2, xmm2 /* R */ \
2539 __asm punpcklbw xmm2, xmm5 /* RA */ \
2541 __asm punpcklwd xmm0, xmm2 /* BGRA first 4 pixels */ \
2542 __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \
2552 __asm punpcklbw xmm5, xmm2 /* AR */ \
2564 __asm punpcklbw xmm1, xmm2 /* GR */ \
2578 __asm punpcklbw xmm2, xmm2 /* RR */ \
2580 __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
2581 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
2595 __asm punpcklbw xmm2, xmm2 /* RR */ \
2597 __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
2598 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
2601 __asm movdqa xmm2, xmm0 /* G */ \
2604 __asm psrld xmm2, 5 /* G */ \
2607 __asm pand xmm2, xmm6 /* G */ \
2609 __asm por xmm3, xmm2 /* BG */ \
2612 __asm movdqa xmm2, xmm1 /* G */ \
2615 __asm psrld xmm2, 5 /* G */ \
2618 __asm pand xmm2, xmm6 /* G */ \
2620 __asm por xmm3, xmm2 /* BG */ \
3025 movd xmm2, eax
3026 pshufd xmm2, xmm2,0
3042 pmulhuw xmm0, xmm2
3073 vmovd xmm2, eax
3074 vbroadcastss ymm2, xmm2
3266 movdqa xmm2, xmm0
3271 psrlw xmm2, 8 // odd bytes
3273 packuswb xmm2, xmm3
3275 movdqu [edx + edi], xmm2
3342 movdqa xmm2, xmm0
3344 punpckhbw xmm2, xmm1 // next 8 UV pairs
3346 movdqu [edi + 16], xmm2
3483 movdqu xmm2, [eax]
3488 pand xmm2, xmm0
3492 por xmm2, xmm4
3494 movdqu [edx], xmm2
3575 movq xmm2, qword ptr [eax] // 8 Y's
3577 punpcklbw xmm2, xmm2
3578 punpckhwd xmm3, xmm2
3579 punpcklwd xmm2, xmm2
3582 pand xmm2, xmm0
3586 por xmm2, xmm4
3588 movdqu [edx], xmm2
3941 movdqu xmm2, [eax + esi]
3944 pavgb xmm0, xmm2
4043 movdqu xmm2, [eax + esi]
4046 pavgb xmm0, xmm2
4141 movq xmm2, qword ptr [edx + esi] // src1
4142 punpcklbw xmm1, xmm2
4256 movdqu xmm2, [esi] // _r_b
4258 pand xmm2, xmm6 // _r_b
4260 pmullw xmm2, xmm3 // _r_b * alpha
4266 psrlw xmm2, 8 // _r_b convert to 8 bits again
4267 paddusb xmm0, xmm2 // + src argb
4285 movd xmm2, [esi] // _r_b
4287 pand xmm2, xmm6 // _r_b
4289 pmullw xmm2, xmm3 // _r_b * alpha
4295 psrlw xmm2, 8 // _r_b convert to 8 bits again
4296 paddusb xmm0, xmm2 // + src argb
4339 movdqu xmm2, [eax] // read 4 pixels
4340 punpckhbw xmm2, xmm2 // next 2 pixel rgbs
4341 pmulhuw xmm1, xmm2 // rgb * a
4342 movdqu xmm2, [eax] // mask original alpha
4344 pand xmm2, xmm3
4348 por xmm0, xmm2 // copy original alpha
4418 movd xmm2, dword ptr [ebx + esi * 4]
4420 pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
4422 movlhps xmm2, xmm3
4423 xmm2 // rgb * a
4429 movd xmm2, dword ptr [ebx + esi * 4]
4431 pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
4433 movlhps xmm2, xmm3
4434 pmulhuw xmm1, xmm2 // rgb * a
4516 vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2]
4520 vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
4526 vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6]
4528 vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
4530 vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
4579 movdqu xmm2, [eax] // A
4582 psrld xmm2, 24
4584 packuswb xmm2, xmm3
4585 packuswb xmm2, xmm2 // 8 A bytes
4588 punpcklbw xmm3, xmm2 // 8 GA words
4625 movdqa xmm2, xmmword ptr kARGBToSepiaB
4632 pmaddubsw xmm0, xmm2
4633 pmaddubsw xmm6, xmm2
4685 pshufd xmm2, xmm5, 0x00
4694 pmaddubsw xmm0, xmm2
4695 pmaddubsw xmm7, xmm2
4743 movd xmm2, [esp + 8] /* scale */
4747 pshuflw xmm2, xmm2, 040h
4748 pshufd xmm2, xmm2, 044h
4760 pmulhuw xmm0, xmm2 // pixel * scale >> 16
4763 pmulhuw xmm1, xmm2
4790 movd xmm2, [esp + 16] // value
4791 punpcklbw xmm2, xmm2
4792 punpcklqdq xmm2, xmm2
4800 pmulhuw xmm0, xmm2 // argb * value
4801 pmulhuw xmm1, xmm2 // argb * value
4830 movdqu xmm2, [esi] // read 4 pixels from src_argb1
4832 movdqu xmm3, xmm2
4835 punpcklbw xmm2, xmm5 // first 2
4837 pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2
5054 movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
5056 punpcklbw xmm2, xmm5
5057 psubw xmm1, xmm2
5058 movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0]
5060 punpcklbw xmm2, xmm5
5062 psubw xmm2, xmm3
5063 paddw xmm0, xmm2
5107 movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1]
5109 punpcklbw xmm2, xmm5
5110 psubw xmm1, xmm2
5111 movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
5113 punpcklbw xmm2, xmm5
5115 psubw xmm2, xmm3
5116 paddw xmm0, xmm2
5158 movdqa xmm2, xmm0 // GG
5159 punpcklbw xmm2, xmm0 // First 8
5161 movdqa xmm1, xmm2 // GGGG
5162 punpcklwd xmm1, xmm2 // First 4
5163 punpckhwd xmm2, xmm2 // Next 4
5165 por xmm2, xmm5
5172 movdqu [edx + 16], xmm2
5236 movdqa xmm2, xmm0
5237 paddusb xmm2, xmm1 // sobel = sobelx + sobely
5242 punpcklbw xmm4, xmm2
5243 punpckhbw xmm1, xmm2
5311 movdqu xmm2, [eax + 32]
5317 psubd xmm2, [eax + edx * 4 + 32]
5324 psubd xmm2, [esi + 32]
5330 paddd xmm2, [esi + edx * 4 + 32]
5335 packssdw xmm2, xmm3
5338 pmulhuw xmm2, xmm5
5340 packuswb xmm0, xmm2
5353 movdqu xmm2, [eax + 32]
5359 psubd xmm2, [eax + edx * 4 + 32]
5366 psubd xmm2, [esi + 32]
5372 paddd xmm2, [esi + edx * 4 + 32]
5380 cvtdq2ps xmm2, xmm2
5382 mulps xmm2, xmm4
5386 cvtps2dq xmm2, xmm2
5389 packssdw xmm2, xmm3
5390 packuswb xmm0, xmm2
5442 movdqu xmm2, [eax] // 4 argb pixels 16 bytes.
5444 movdqa xmm4, xmm2
5446 punpcklbw xmm2, xmm1
5447 movdqa xmm3, xmm2
5448 punpcklwd xmm2, xmm1
5456 paddd xmm0, xmm2
5457 movdqu xmm2, [esi] // previous row above.
5458 paddd xmm2, xmm0
5473 movdqu [edx], xmm2
5488 movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
5490 punpcklbw xmm2, xmm1
5491 punpcklwd xmm2, xmm1
5492 paddd xmm0, xmm2
5493 movdqu xmm2, [esi]
5495 paddd xmm2, xmm0
5496 movdqu [edx], xmm2
5519 movq xmm2, qword ptr [ecx] // uv
5531 movdqa xmm0, xmm2 // x0, y0, x1, y1
5533 movlhps xmm2, xmm0
5536 movdqa xmm3, xmm2 // x2, y2, x3, y3
5542 cvttps2dq xmm0, xmm2 // x, y float to int first 2
5553 addps xmm2, xmm4 // x, y += dx, dy first 2
5573 cvttps2dq xmm0, xmm2 // x, y float to int
5576 addps xmm2, xmm7 // x, y += dx, dy
5702 movdqu xmm2, [esi + edx]
5704 punpcklbw xmm0, xmm2
5705 punpckhbw xmm1, xmm2
5708 movdqa xmm2, xmm5
5710 pmaddubsw xmm2, xmm0
5712 paddw xmm2, xmm4
5714 psrlw xmm2, 8
5716 packuswb xmm2, xmm3
5717 movdqu [esi + edi], xmm2
5942 movq xmm2, qword ptr [esi] // U
5945 punpcklbw xmm2, xmm3 // UV
5949 punpcklbw xmm0, xmm2 // YUYV
5950 punpckhbw xmm1, xmm2
5979 movq xmm2, qword ptr [esi] // U
5982 punpcklbw xmm2, xmm3 // UV
5984 movdqa xmm1, xmm2
5987 punpckhbw xmm2, xmm0
5989 movdqu [edi + 16], xmm2
6031 movdqa xmm2, xmm1
6033 mulps xmm2, xmm1 // X * X
6035 mulps xmm1, xmm2 // X * X * X
6037 mulps xmm2, [esi + 32] // C2 * X * X
6041 addps xmm0, xmm2 // result += C2 * X * X
6175 movd xmm2, dword ptr [esp + 8 + 16] // luma table
6177 pshufd xmm2, xmm2, 0
6190 paddd xmm0, xmm2 // add table base