1 ; 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 12 %include "vpx_ports/x86_abi_support.asm" 13 14 ;void vp8_short_inv_walsh4x4_1_mmx(short *input, short *output) 15 global sym(vp8_short_inv_walsh4x4_1_mmx) 16 sym(vp8_short_inv_walsh4x4_1_mmx): 17 push rbp 18 mov rbp, rsp 19 SHADOW_ARGS_TO_STACK 2 20 push rsi 21 push rdi 22 ; end prolog 23 24 mov rsi, arg(0) 25 mov rax, 3 26 27 mov rdi, arg(1) 28 add rax, [rsi] ;input[0] + 3 29 30 movd mm0, eax 31 32 punpcklwd mm0, mm0 ;x x val val 33 34 punpckldq mm0, mm0 ;val val val val 35 36 psraw mm0, 3 ;(input[0] + 3) >> 3 37 38 movq [rdi + 0], mm0 39 movq [rdi + 8], mm0 40 movq [rdi + 16], mm0 41 movq [rdi + 24], mm0 42 43 ; begin epilog 44 pop rdi 45 pop rsi 46 UNSHADOW_ARGS 47 pop rbp 48 ret 49 50 ;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) 51 global sym(vp8_short_inv_walsh4x4_mmx) 52 sym(vp8_short_inv_walsh4x4_mmx): 53 push rbp 54 mov rbp, rsp 55 SHADOW_ARGS_TO_STACK 2 56 push rsi 57 push rdi 58 ; end prolog 59 60 mov rax, 3 61 mov rsi, arg(0) 62 mov rdi, arg(1) 63 shl rax, 16 64 65 movq mm0, [rsi + 0] ;ip[0] 66 movq mm1, [rsi + 8] ;ip[4] 67 or rax, 3 ;00030003h 68 69 movq mm2, [rsi + 16] ;ip[8] 70 movq mm3, [rsi + 24] ;ip[12] 71 72 movq mm7, rax 73 movq mm4, mm0 74 75 punpcklwd mm7, mm7 ;0003000300030003h 76 movq mm5, mm1 77 78 paddw mm4, mm3 ;ip[0] + ip[12] aka al 79 paddw mm5, mm2 ;ip[4] + ip[8] aka bl 80 81 movq mm6, mm4 ;temp al 82 83 paddw mm4, mm5 ;al + bl 84 psubw mm6, mm5 ;al - bl 85 86 psubw mm0, mm3 ;ip[0] - ip[12] aka d1 87 psubw mm1, mm2 ;ip[4] - ip[8] aka c1 88 89 movq mm5, mm0 ;temp dl 90 91 paddw mm0, mm1 ;dl + cl 92 psubw mm5, mm1 ;dl - cl 93 94 ; 03 02 01 00 95 ; 13 12 11 10 96 ; 23 22 21 20 97 ; 33 32 31 30 98 99 movq mm3, mm4 ; 03 02 01 00 100 punpcklwd mm4, mm0 ; 11 01 10 00 101 punpckhwd mm3, mm0 ; 13 03 12 02 102 103 movq mm1, mm6 ; 23 22 21 20 104 punpcklwd mm6, mm5 ; 31 21 30 20 105 punpckhwd mm1, mm5 ; 33 23 32 22 106 107 movq mm0, mm4 ; 11 01 10 00 108 movq mm2, mm3 ; 13 03 12 02 109 110 punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] 111 punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4] 112 113 punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8] 114 punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12] 115 ;~~~~~~~~~~~~~~~~~~~~~ 116 movq mm1, mm0 117 movq mm5, mm4 118 119 paddw mm1, mm3 ;ip[0] + ip[12] aka al 120 paddw mm5, mm2 ;ip[4] + ip[8] aka bl 121 122 movq mm6, mm1 ;temp al 123 124 paddw mm1, mm5 ;al + bl 125 psubw mm6, mm5 ;al - bl 126 127 psubw mm0, mm3 ;ip[0] - ip[12] aka d1 128 psubw mm4, mm2 ;ip[4] - ip[8] aka c1 129 130 movq mm5, mm0 ;temp dl 131 132 paddw mm0, mm4 ;dl + cl 133 psubw mm5, mm4 ;dl - cl 134 ;~~~~~~~~~~~~~~~~~~~~~ 135 movq mm3, mm1 ; 03 02 01 00 136 punpcklwd mm1, mm0 ; 11 01 10 00 137 punpckhwd mm3, mm0 ; 13 03 12 02 138 139 movq mm4, mm6 ; 23 22 21 20 140 punpcklwd mm6, mm5 ; 31 21 30 20 141 punpckhwd mm4, mm5 ; 33 23 32 22 142 143 movq mm0, mm1 ; 11 01 10 00 144 movq mm2, mm3 ; 13 03 12 02 145 146 punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] 147 punpckhdq mm1, mm6 ; 31 21 11 01 aka ip[4] 148 149 punpckldq mm2, mm4 ; 32 22 12 02 aka ip[8] 150 punpckhdq mm3, mm4 ; 33 23 13 03 aka ip[12] 151 152 paddw mm0, mm7 153 paddw mm1, mm7 154 paddw mm2, mm7 155 paddw mm3, mm7 156 157 psraw mm0, 3 158 psraw mm1, 3 159 psraw mm2, 3 160 psraw mm3, 3 161 162 movq [rdi + 0], mm0 163 movq [rdi + 8], mm1 164 movq [rdi + 16], mm2 165 movq [rdi + 24], mm3 166 167 ; begin epilog 168 pop rdi 169 pop rsi 170 UNSHADOW_ARGS 171 pop rbp 172 ret 173 174