1 .text 2 3 .type _mul_1x1,@function 4 .align 16 5 _mul_1x1: 6 subq $128+8,%rsp 7 movq $-1,%r9 8 leaq (%rax,%rax,1),%rsi 9 shrq $3,%r9 10 leaq (,%rax,4),%rdi 11 andq %rax,%r9 12 leaq (,%rax,8),%r12 13 sarq $63,%rax 14 leaq (%r9,%r9,1),%r10 15 sarq $63,%rsi 16 leaq (,%r9,4),%r11 17 andq %rbp,%rax 18 sarq $63,%rdi 19 movq %rax,%rdx 20 shlq $63,%rax 21 andq %rbp,%rsi 22 shrq $1,%rdx 23 movq %rsi,%rcx 24 shlq $62,%rsi 25 andq %rbp,%rdi 26 shrq $2,%rcx 27 xorq %rsi,%rax 28 movq %rdi,%rbx 29 shlq $61,%rdi 30 xorq %rcx,%rdx 31 shrq $3,%rbx 32 xorq %rdi,%rax 33 xorq %rbx,%rdx 34 35 movq %r9,%r13 36 movq $0,0(%rsp) 37 xorq %r10,%r13 38 movq %r9,8(%rsp) 39 movq %r11,%r14 40 movq %r10,16(%rsp) 41 xorq %r12,%r14 42 movq %r13,24(%rsp) 43 44 xorq %r11,%r9 45 movq %r11,32(%rsp) 46 xorq %r11,%r10 47 movq %r9,40(%rsp) 48 xorq %r11,%r13 49 movq %r10,48(%rsp) 50 xorq %r14,%r9 51 movq %r13,56(%rsp) 52 xorq %r14,%r10 53 54 movq %r12,64(%rsp) 55 xorq %r14,%r13 56 movq %r9,72(%rsp) 57 xorq %r11,%r9 58 movq %r10,80(%rsp) 59 xorq %r11,%r10 60 movq %r13,88(%rsp) 61 62 xorq %r11,%r13 63 movq %r14,96(%rsp) 64 movq %r8,%rsi 65 movq %r9,104(%rsp) 66 andq %rbp,%rsi 67 movq %r10,112(%rsp) 68 shrq $4,%rbp 69 movq %r13,120(%rsp) 70 movq %r8,%rdi 71 andq %rbp,%rdi 72 shrq $4,%rbp 73 74 movq (%rsp,%rsi,8),%xmm0 75 movq %r8,%rsi 76 andq %rbp,%rsi 77 shrq $4,%rbp 78 movq (%rsp,%rdi,8),%rcx 79 movq %r8,%rdi 80 movq %rcx,%rbx 81 shlq $4,%rcx 82 andq %rbp,%rdi 83 movq (%rsp,%rsi,8),%xmm1 84 shrq $60,%rbx 85 xorq %rcx,%rax 86 pslldq $1,%xmm1 87 movq %r8,%rsi 88 shrq $4,%rbp 89 xorq %rbx,%rdx 90 andq %rbp,%rsi 91 shrq $4,%rbp 92 pxor %xmm1,%xmm0 93 movq (%rsp,%rdi,8),%rcx 94 movq %r8,%rdi 95 movq %rcx,%rbx 96 shlq $12,%rcx 97 andq %rbp,%rdi 98 movq (%rsp,%rsi,8),%xmm1 99 shrq $52,%rbx 100 xorq %rcx,%rax 101 pslldq $2,%xmm1 102 movq %r8,%rsi 103 shrq $4,%rbp 104 xorq %rbx,%rdx 105 andq %rbp,%rsi 106 shrq $4,%rbp 107 pxor %xmm1,%xmm0 108 movq (%rsp,%rdi,8),%rcx 109 movq %r8,%rdi 110 movq %rcx,%rbx 111 shlq $20,%rcx 112 andq %rbp,%rdi 113 movq (%rsp,%rsi,8),%xmm1 114 shrq $44,%rbx 115 xorq %rcx,%rax 116 pslldq $3,%xmm1 117 movq %r8,%rsi 118 shrq $4,%rbp 119 xorq %rbx,%rdx 120 andq %rbp,%rsi 121 shrq $4,%rbp 122 pxor %xmm1,%xmm0 123 movq (%rsp,%rdi,8),%rcx 124 movq %r8,%rdi 125 movq %rcx,%rbx 126 shlq $28,%rcx 127 andq %rbp,%rdi 128 movq (%rsp,%rsi,8),%xmm1 129 shrq $36,%rbx 130 xorq %rcx,%rax 131 pslldq $4,%xmm1 132 movq %r8,%rsi 133 shrq $4,%rbp 134 xorq %rbx,%rdx 135 andq %rbp,%rsi 136 shrq $4,%rbp 137 pxor %xmm1,%xmm0 138 movq (%rsp,%rdi,8),%rcx 139 movq %r8,%rdi 140 movq %rcx,%rbx 141 shlq $36,%rcx 142 andq %rbp,%rdi 143 movq (%rsp,%rsi,8),%xmm1 144 shrq $28,%rbx 145 xorq %rcx,%rax 146 pslldq $5,%xmm1 147 movq %r8,%rsi 148 shrq $4,%rbp 149 xorq %rbx,%rdx 150 andq %rbp,%rsi 151 shrq $4,%rbp 152 pxor %xmm1,%xmm0 153 movq (%rsp,%rdi,8),%rcx 154 movq %r8,%rdi 155 movq %rcx,%rbx 156 shlq $44,%rcx 157 andq %rbp,%rdi 158 movq (%rsp,%rsi,8),%xmm1 159 shrq $20,%rbx 160 xorq %rcx,%rax 161 pslldq $6,%xmm1 162 movq %r8,%rsi 163 shrq $4,%rbp 164 xorq %rbx,%rdx 165 andq %rbp,%rsi 166 shrq $4,%rbp 167 pxor %xmm1,%xmm0 168 movq (%rsp,%rdi,8),%rcx 169 movq %r8,%rdi 170 movq %rcx,%rbx 171 shlq $52,%rcx 172 andq %rbp,%rdi 173 movq (%rsp,%rsi,8),%xmm1 174 shrq $12,%rbx 175 xorq %rcx,%rax 176 pslldq $7,%xmm1 177 movq %r8,%rsi 178 shrq $4,%rbp 179 xorq %rbx,%rdx 180 andq %rbp,%rsi 181 shrq $4,%rbp 182 pxor %xmm1,%xmm0 183 movq (%rsp,%rdi,8),%rcx 184 movq %rcx,%rbx 185 shlq $60,%rcx 186 .byte 102,72,15,126,198 187 shrq $4,%rbx 188 xorq %rcx,%rax 189 psrldq $8,%xmm0 190 xorq %rbx,%rdx 191 .byte 102,72,15,126,199 192 xorq %rsi,%rax 193 xorq %rdi,%rdx 194 195 addq $128+8,%rsp 196 .byte 0xf3,0xc3 197 .Lend_mul_1x1: 198 .size _mul_1x1,.-_mul_1x1 199 200 .globl bn_GF2m_mul_2x2 201 .type bn_GF2m_mul_2x2,@function 202 .align 16 203 bn_GF2m_mul_2x2: 204 movq OPENSSL_ia32cap_P(%rip),%rax 205 btq $33,%rax 206 jnc .Lvanilla_mul_2x2 207 208 .byte 102,72,15,110,198 209 .byte 102,72,15,110,201 210 .byte 102,72,15,110,210 211 .byte 102,73,15,110,216 212 movdqa %xmm0,%xmm4 213 movdqa %xmm1,%xmm5 214 .byte 102,15,58,68,193,0 215 pxor %xmm2,%xmm4 216 pxor %xmm3,%xmm5 217 .byte 102,15,58,68,211,0 218 .byte 102,15,58,68,229,0 219 xorps %xmm0,%xmm4 220 xorps %xmm2,%xmm4 221 movdqa %xmm4,%xmm5 222 pslldq $8,%xmm4 223 psrldq $8,%xmm5 224 pxor %xmm4,%xmm2 225 pxor %xmm5,%xmm0 226 movdqu %xmm2,0(%rdi) 227 movdqu %xmm0,16(%rdi) 228 .byte 0xf3,0xc3 229 230 .align 16 231 .Lvanilla_mul_2x2: 232 leaq -136(%rsp),%rsp 233 movq %r14,80(%rsp) 234 movq %r13,88(%rsp) 235 movq %r12,96(%rsp) 236 movq %rbp,104(%rsp) 237 movq %rbx,112(%rsp) 238 .Lbody_mul_2x2: 239 movq %rdi,32(%rsp) 240 movq %rsi,40(%rsp) 241 movq %rdx,48(%rsp) 242 movq %rcx,56(%rsp) 243 movq %r8,64(%rsp) 244 245 movq $15,%r8 246 movq %rsi,%rax 247 movq %rcx,%rbp 248 call _mul_1x1 249 movq %rax,16(%rsp) 250 movq %rdx,24(%rsp) 251 252 movq 48(%rsp),%rax 253 movq 64(%rsp),%rbp 254 call _mul_1x1 255 movq %rax,0(%rsp) 256 movq %rdx,8(%rsp) 257 258 movq 40(%rsp),%rax 259 movq 56(%rsp),%rbp 260 xorq 48(%rsp),%rax 261 xorq 64(%rsp),%rbp 262 call _mul_1x1 263 movq 0(%rsp),%rbx 264 movq 8(%rsp),%rcx 265 movq 16(%rsp),%rdi 266 movq 24(%rsp),%rsi 267 movq 32(%rsp),%rbp 268 269 xorq %rdx,%rax 270 xorq %rcx,%rdx 271 xorq %rbx,%rax 272 movq %rbx,0(%rbp) 273 xorq %rdi,%rdx 274 movq %rsi,24(%rbp) 275 xorq %rsi,%rax 276 xorq %rsi,%rdx 277 xorq %rdx,%rax 278 movq %rdx,16(%rbp) 279 movq %rax,8(%rbp) 280 281 movq 80(%rsp),%r14 282 movq 88(%rsp),%r13 283 movq 96(%rsp),%r12 284 movq 104(%rsp),%rbp 285 movq 112(%rsp),%rbx 286 leaq 136(%rsp),%rsp 287 .byte 0xf3,0xc3 288 .Lend_mul_2x2: 289 .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 290 .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 291 .align 16 292