Lines Matching full:mm0
176 movq mm0,mask0
180 pand mm0,mm7
184 pcmpeqb mm0,mm6
196 pand mm4,mm0
197 movq mm6,mm0
312 movq mm0,mask0
317 pand mm0,mm7
322 pcmpeqb mm0,mm6
336 pand mm4,mm0
337 movq mm6,mm0
454 movq mm0,mask0
456 pand mm0,mm7 //nonzero if keep byte
457 pcmpeqb mm0,mm6 //zeros->1s, v versa
467 pand mm4,mm0
468 movq mm6,mm0
724 movq mm0,mask0
727 pand mm0,mm7
730 pcmpeqb mm0,mm6
741 pand mm4,mm0
742 movq mm6,mm0
845 movq mm0,mask0
852 pand mm0,mm7
859 pcmpeqb mm0,mm6
875 pand mm7,mm0
876 movq mm6,mm0
1250 movq mm0, [esi] ; X X v2 v1 v0 v5 v4 v3
1251 movq mm7, mm0 ; X X v2 v1 v0 v5 v4 v3
1252 movq mm6, mm0 ; X X v2 v1 v0 v5 v4 v3
1253 psllq mm0, 24 ; v1 v0 v5 v4 v3 0 0 0
1256 por mm0, mm7 ; v1 v0 v5 v4 v3 v5 v4 v3
1259 movq [edi], mm0 ; move quad to memory
1297 movd mm0, [esi] ; X X X X X v2 v1 v0
1298 pand mm0, const4 ; 0 0 0 0 0 v2 v1 v0
1299 movq mm1, mm0 ; 0 0 0 0 0 v2 v1 v0
1300 psllq mm0, 16 ; 0 0 0 v2 v1 v0 0 0
1301 movq mm2, mm0 ; 0 0 0 v2 v1 v0 0 0
1302 psllq mm0, 24 ; v2 v1 v0 0 0 0 0 0
1304 por mm0, mm2 ; v2 v1 v0 v2 v1 v0 0 0
1305 por mm0, mm1 ; v2 v1 v0 v2 v1 v0 v2 v1
1306 movq [edi+4], mm0 ; move to memory
1307 psrlq mm0, 16 ; 0 0 v2 v1 v0 v2 v1 v0
1308 movd [edi], mm0 ; move to memory
1325 movd mm0, [esi] ; X X X X X v2 v1 v0
1326 pand mm0, const4 ; 0 0 0 0 0 v2 v1 v0
1327 movq mm1, mm0 ; 0 0 0 0 0 v2 v1 v0
1328 psllq mm0, 16 ; 0 0 0 v2 v1 v0 0 0
1329 movq mm2, mm0 ; 0 0 0 v2 v1 v0 0 0
1330 psllq mm0, 24 ; v2 v1 v0 0 0 0 0 0
1332 por mm0, mm2 ; v2 v1 v0 v2 v1 v0 0 0
1333 por mm0, mm1 ; v2 v1 v0 v2 v1 v0 v2 v1
1334 movq mm3, mm0 ; v2 v1 v0 v2 v1 v0 v2 v1
1335 psllq mm0, 16 ; v0 v2 v1 v0 v2 v1 0 0
1337 punpckhdq mm3, mm0 ; v0 v2 v1 v0 v2 v1 v0 v2
1339 psrlq mm0, 32 ; 0 0 0 0 v0 v2 v1 v0
1341 punpckldq mm0, mm4 ; v1 v0 v2 v1 v0 v2 v1 v0
1343 movq [edi], mm0
1369 movq mm0, [esi] ; v0 v1 v2 v3 v4 v5 v6 v7
1370 movq mm1, mm0 ; v0 v1 v2 v3 v4 v5 v6 v7
1371 punpcklbw mm0, mm0 ; v4 v4 v5 v5 v6 v6 v7 v7
1372 //movq mm1, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1376 movq [edi], mm0 ; move to memory v4 v5 v6 and v7
1412 movd mm0, [esi] ; X X X X v0 v1 v2 v3
1413 punpcklbw mm0, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1414 movq mm1, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1415 punpcklwd mm0, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
1417 movq [edi], mm0 ; move to memory v2 and v3
1454 movd mm0, [esi] ; X X X X v0 v1 v2 v3
1455 movq mm1, mm0 ; X X X X v0 v1 v2 v3
1456 mm0, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1457 movq mm2, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1458 punpcklwd mm0, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
1459 movq mm3, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
1460 punpckldq mm0, mm0 ; v3 v3 v3 v3 v3 v3 v3 v3
1462 movq [edi], mm0 ; move to memory v3
1525 movd mm0, [esi] ; X X X X v1 v0 v3 v2
1526 punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1528 movq [edi], mm0
1565 movd mm0, [esi] ; X X X X v1 v0 v3 v2
1566 punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1567 movq mm1, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1568 punpckldq mm0, mm0 ; v3 v2 v3 v2 v3 v2 v3 v2
1570 movq [edi], mm0
1610 movd mm0, [esi] ; X X X X v1 v0 v3 v2
1611 punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1612 movq mm1, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1613 punpckldq mm0, mm0 ; v3 v2 v3 v2 v3 v2 v3 v2
1615 movq [edi], mm0
1616 movq [edi + 8], mm0
1660 movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
1661 movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
1662 punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
1664 movq [edi], mm0
1703 movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
1704 movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
1705 punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
1707 movq [edi], mm0
1708 movq [edi + 8], mm0
1748 movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
1749 movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
1750 punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
1752 movq [edi], mm0
1753 movq [edi + 8], mm0
1754 movq [edi + 16], mm0
1755 movq [edi + 24], mm0
2021 movq mm0, [edi + ebx] // Load mm0 with Avg(x)
2030 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2039 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2043 movq mm2, mm0 // mov updated Raws to mm2
2052 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2058 movq mm2, mm0 // mov updated Raws to mm2
2070 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2074 movq [edi + ebx - 8], mm0
2077 movq mm2, mm0 // mov updated Raw(x) to mm2
2108 movq mm0, [edi + ebx]
2116 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2125 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2128 movq mm2, mm0 // mov updated Raws to mm2
2138 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2142 movq [edi + ebx - 8], mm0
2144 movq mm2, mm0 // mov updated Raws to mm2
2167 movq mm0, [edi + ebx]
2176 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2185 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
2188 movq mm2, mm0 // mov updated Raws to mm2
2197 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
2201 movq mm2, mm0 // mov updated Raws to mm2
2212 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
2216 movq mm2, mm0 // mov updated Raws to mm2
2228 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
2232 movq [edi + ebx - 8], mm0
2234 movq mm2, mm0 // mov updated Raws to mm2
2284 movq mm0, [edi + ebx]
2294 paddb mm0, mm3 // add LBCarrys to Avg for each byte
2296 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2297 paddb mm0, mm2 // add (Raw/2) to Avg for each byte
2299 movq [edi + ebx - 8], mm0
2300 movq mm2, mm0 // reuse as Raw(x-bpp)
2317 movq mm0, [edi + ebx]
2327 paddb mm0, mm3 // add LBCarrys to Avg for each byte
2329 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2331 paddb mm0, mm2 // add (Raw/2) to Avg for each byte
2333 movq [edi + ebx - 8], mm0
2501 pxor mm0, mm0
2507 punpcklbw mm1, mm0 // Unpack High bytes of a
2509 punpcklbw mm2, mm0 // Unpack High bytes of b
2513 punpcklbw mm3, mm0 // Unpack High bytes of c
2525 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2527 pand mm0, mm4 // Only pav bytes < 0 in mm7
2529 psubw mm4, mm0
2530 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2531 psubw mm4, mm0
2533 pxor mm0, mm0
2534 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2535 pand mm0, mm6 // Only pav bytes < 0 in mm7
2537 psubw mm6, mm0
2540 psubw mm6, mm0
2542 movq mm0, mm7
2545 // use mm0 mask copy to merge a & b
2546 pand mm2, mm0
2548 pandn mm0, mm1
2550 paddw mm0, mm2
2555 pandn mm7, mm0
2557 pxor mm0, mm0
2563 punpcklbw mm3, mm0 // Unpack High bytes of c
2568 punpcklbw mm1, mm0 // Unpack High bytes of a
2570 punpcklbw mm2, mm0 // Unpack High bytes of b
2585 pcmpgtw mm0, mm5 // Create mask pbv bytes < 0
2587 pand mm0, mm5 // Only pbv bytes < 0 in mm0
2589 psubw mm5, mm0
2591 psubw mm5, mm0
2593 pxor mm0, mm0
2594 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2595 pand mm0, mm6 // Only pav bytes < 0 in mm7
2596 psubw mm6, mm0
2599 psubw mm6, mm0
2601 movq mm0, mm7
2604 // use mm0 mask copy to merge a & b
2605 pand mm2, mm0
2607 pandn mm0, mm1
2609 paddw mm0, mm2
2614 pandn mm7, mm0
2617 pxor mm0, mm0
2621 punpckhbw mm2, mm0 // Unpack High bytes of b
2629 punpckhbw mm3, mm0 // Unpack High bytes of c
2634 punpckhbw mm1, mm0 // Unpack High bytes of a
2641 pxor mm0, mm0
2647 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2649 pand mm0, mm4 // Only pav bytes < 0 in mm7
2650 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2651 psubw mm4, mm0
2653 psubw mm4, mm0
2655 pxor mm0, mm0
2656 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2657 pand mm0, mm6 // Only pav bytes < 0 in mm7
2658 psubw mm6, mm0
2661 psubw mm6, mm0
2663 movq mm0, mm7
2664 // use mm0 mask copy to merge a & b
2665 pand mm2, mm0
2668 pandn mm0, mm1
2670 paddw mm0, mm2
2675 pandn mm7, mm0
2685 pxor mm0, mm0 // pxor does not affect flags
2709 pxor mm0, mm0
2715 punpcklbw mm1, mm0 // Unpack Low bytes of a
2717 punpcklbw mm2, mm0 // Unpack Low bytes of b
2722 punpcklbw mm3, mm0 // Unpack Low bytes of c
2733 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2735 pand mm0, mm4 // Only pav bytes < 0 in mm7
2737 psubw mm4, mm0
2738 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2739 psubw mm4, mm0
2741 pxor mm0, mm0
2742 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2743 pand mm0, mm6 // Only pav bytes < 0 in mm7
2745 psubw mm6, mm0
2748 psubw mm6, mm0
2750 movq mm0, mm7
2753 // use mm0 mask copy to merge a & b
2754 pand mm2, mm0
2756 pandn mm0, mm1
2758 paddw mm0, mm2
2763 pandn mm7, mm0
2765 pxor mm0, mm0
2780 punpckhbw mm3, mm0 // Unpack High bytes of c
2783 punpckhbw mm2, mm0 // Unpack High bytes of b
2784 punpckhbw mm1, mm0 // Unpack High bytes of a
2797 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2799 pand mm0, mm4 // Only pav bytes < 0 in mm7
2801 psubw mm4, mm0
2802 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2803 psubw mm4, mm0
2805 pxor mm0, mm0
2806 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2807 pand mm0, mm6 // Only pav bytes < 0 in mm7
2809 psubw mm6, mm0
2812 psubw mm6, mm0
2814 movq mm0, mm7
2817 // use mm0 mask copy to merge a & b
2818 pand mm2, mm0
2820 pandn mm0, mm1
2822 paddw mm0, mm2
2827 pandn mm7, mm0
2830 pxor mm0, mm0
2850 pxor mm0, mm0
2857 punpckhbw mm1, mm0 // Unpack Low bytes of a
2859 punpcklbw mm2, mm0 // Unpack High bytes of b
2862 punpckhbw mm3, mm0 // Unpack High bytes of c
2873 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2875 pand mm0, mm4 // Only pav bytes < 0 in mm7
2877 psubw mm4, mm0
2878 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2879 psubw mm4, mm0
2881 pxor mm0, mm0
2882 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2883 pand mm0, mm6 // Only pav bytes < 0 in mm7
2885 psubw mm6, mm0
2888 psubw mm6, mm0
2890 movq mm0, mm7
2893 // use mm0 mask copy to merge a & b
2894 pand mm2, mm0
2896 pandn mm0, mm1
2898 paddw mm0, mm2
2903 pandn mm7, mm0
2905 pxor mm0, mm0
2911 punpcklbw mm3, mm0 // Unpack High bytes of c
2915 punpckhbw mm2, mm0 // Unpack Low bytes of b
2916 punpcklbw mm1, mm0 // Unpack Low bytes of a
2929 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2931 pand mm0, mm4 // Only pav bytes < 0 in mm7
2933 psubw mm4, mm0
2934 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2935 psubw mm4, mm0
2937 pxor mm0, mm0
2938 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2939 pand mm0, mm6 // Only pav bytes < 0 in mm7
2941 psubw mm6, mm0
2944 psubw mm6, mm0
2946 movq mm0, mm7
2949 // use mm0 mask copy to merge a & b
2950 pand mm2, mm0
2952 pandn mm0, mm1
2954 paddw mm0, mm2
2959 pandn mm7, mm0
2962 pxor mm0, mm0
2981 pxor mm0, mm0
2988 punpcklbw mm1, mm0 // Unpack Low bytes of a
2990 punpcklbw mm2, mm0 // Unpack Low bytes of b
2993 punpcklbw mm3, mm0 // Unpack Low bytes of c
3004 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
3006 pand mm0, mm4 // Only pav bytes < 0 in mm7
3008 psubw mm4, mm0
3009 pand mm7, mm5 // Only pbv bytes < 0 in mm0
3010 psubw mm4, mm0
3012 pxor mm0, mm0
3013 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
3014 pand mm0, mm6 // Only pav bytes < 0 in mm7
3016 psubw mm6, mm0
3019 psubw mm6, mm0
3021 movq mm0, mm7
3024 // use mm0 mask copy to merge a & b
3025 pand mm2, mm0
3027 pandn mm0, mm1
3029 paddw mm0, mm2
3034 pandn mm7, mm0
3036 pxor mm0, mm0
3042 punpckhbw mm3, mm0 // Unpack High bytes of c
3047 punpckhbw mm2, mm0 // Unpack High bytes of b
3048 punpckhbw mm1, mm0 // Unpack High bytes of a
3061 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
3063 pand mm0, mm4 // Only pav bytes < 0 in mm7
3065 psubw mm4, mm0
3066 pand mm7, mm5 // Only pbv bytes < 0 in mm0
3067 psubw mm4, mm0
3069 pxor mm0, mm0
3070 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
3071 pand mm0, mm6 // Only pav bytes < 0 in mm7
3073 psubw mm6, mm0
3076 psubw mm6, mm0
3078 movq mm0, mm7
3081 // use mm0 mask copy to merge a & b
3082 pand mm2, mm0
3084 pandn mm0, mm1
3086 paddw mm0, mm2
3091 pandn mm7, mm0
3094 pxor mm0, mm0
3339 movq mm0, [edi+ebx]
3340 paddb mm0, mm1
3342 movq mm1, mm0 // mov updated Raws to mm1
3345 paddb mm0, mm1
3347 movq mm1, mm0 // mov updated Raws to mm1
3351 paddb mm0, mm1
3353 movq [edi+ebx-8], mm0 // Write updated Raws back to array
3355 movq mm1, mm0
3411 movq mm0, [edi+ebx]
3412 paddb mm0, mm1
3414 movq mm1, mm0 // mov updated Raws to mm1
3419 paddb mm0, mm1
3421 movq [edi+ebx-8], mm0
3422 movq mm1, mm0 // Prep for doing 1st add at top of loop
3452 movq mm0, [edi+ebx]
3453 paddb mm0, mm1
3455 movq mm1, mm0 // mov updated Raws to mm1
3458 paddb mm0, mm1
3460 movq mm1, mm0 // mov updated Raws to mm1
3463 paddb mm0, mm1
3465 movq mm1, mm0 // mov updated Raws to mm1
3469 paddb mm0, mm1
3471 movq [edi+ebx-8], mm0 // Write updated Raws back to array
3472 movq mm1, mm0 // Prep for doing 1st add at top of loop
3489 movq mm0, [edi+ebx] // Load Sub(x) for 1st 8 bytes
3490 paddb mm0, mm7
3492 movq [edi+ebx], mm0 // Write Raw(x) for 1st 8 bytes
3493 // Now mm0 will be used as Raw(x-bpp) for
3498 paddb mm1, mm0
3524 movq mm0, [edi+ebx]
3526 paddb mm0, mm7
3528 movq [edi+ebx-8], mm0 // use -8 to offset early add to ebx
3529 movq mm7, mm0 // Move calculated Raw(x) data to mm1 to
3545 movq mm0, [edi+ebx]
3548 paddb mm0, mm1
3550 movq [edi+ebx-8], mm0 // mov does not affect flags; -8 to offset
3614 movq mm0, [edi+ebx]
3616 paddb mm0, mm1
3618 movq [edi+ebx], mm0
3630 movq mm0, [edi+ebx+32]
3632 paddb mm0, mm1
3634 movq [edi+ebx+32], mm0
3664 // Loop using MMX registers mm0 & mm1 to update 8 bytes simultaneously
3667 movq mm0, [edi+ebx]
3669 paddb mm0, mm1
3671 movq [edi+ebx-8], mm0 // movq does not affect flags; -8 to offset add ebx