1 #!/usr/bin/env perl 2 # 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] openssl.org> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 # 10 # 11 # AES-NI-CTR+GHASH stitch. 12 # 13 # February 2013 14 # 15 # OpenSSL GCM implementation is organized in such way that its 16 # performance is rather close to the sum of its streamed components, 17 # in the context parallelized AES-NI CTR and modulo-scheduled 18 # PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation 19 # was observed to perform significantly better than the sum of the 20 # components on contemporary CPUs, the effort was deemed impossible to 21 # justify. This module is based on combination of Intel submissions, 22 # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max 23 # Locktyukhin of Intel Corp. who verified that it reduces shuffles 24 # pressure with notable relative improvement, achieving 1.0 cycle per 25 # byte processed with 128-bit key on Haswell processor. 26 # 27 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest 28 # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf 29 30 $flavour = shift; 31 $output = shift; 32 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 33 34 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 35 36 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 37 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 38 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 39 die "can't locate x86_64-xlate.pl"; 40 41 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 42 =~ /GNU assembler version ([2-9]\.[0-9]+)/) { 43 $avx = ($1>=2.19) + ($1>=2.22); 44 } 45 46 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && 47 `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { 48 $avx = ($1>=2.09) + ($1>=2.10); 49 } 50 51 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && 52 `ml64 2>&1` =~ /Version ([0-9]+)\./) { 53 $avx = ($1>=10) + ($1>=11); 54 } 55 56 if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { 57 $avx = ($2>=3.0) + ($2>3.0); 58 } 59 60 open OUT,"| \"$^X\" $xlate $flavour $output"; 61 *STDOUT=*OUT; 62 63 if ($avx>1) {{{ 64 65 ($inp,$out,$len,$key,$ivp,$Xip)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); 66 67 ($Ii,$T1,$T2,$Hkey, 68 $Z0,$Z1,$Z2,$Z3,$Xi) = map("%xmm$_",(0..8)); 69 70 ($inout0,$inout1,$inout2,$inout3,$inout4,$inout5,$rndkey) = map("%xmm$_",(9..15)); 71 72 ($counter,$rounds,$ret,$const,$in0,$end0)=("%ebx","%ebp","%r10","%r11","%r14","%r15"); 73 74 $code=<<___; 75 .text 76 77 .type _aesni_ctr32_ghash_6x,\@abi-omnipotent 78 .align 32 79 _aesni_ctr32_ghash_6x: 80 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 81 sub \$6,$len 82 vpxor $Z0,$Z0,$Z0 # $Z0 = 0 83 vmovdqu 0x00-0x80($key),$rndkey 84 vpaddb $T2,$T1,$inout1 85 vpaddb $T2,$inout1,$inout2 86 vpaddb $T2,$inout2,$inout3 87 vpaddb $T2,$inout3,$inout4 88 vpaddb $T2,$inout4,$inout5 89 vpxor $rndkey,$T1,$inout0 90 vmovdqu $Z0,16+8(%rsp) # "$Z3" = 0 91 jmp .Loop6x 92 93 .align 32 94 .Loop6x: 95 add \$`6<<24`,$counter 96 jc .Lhandle_ctr32 # discard $inout[1-5]? 97 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 98 vpaddb $T2,$inout5,$T1 # next counter value 99 vpxor $rndkey,$inout1,$inout1 100 vpxor $rndkey,$inout2,$inout2 101 102 .Lresume_ctr32: 103 vmovdqu $T1,($ivp) # save next counter value 104 vpclmulqdq \$0x10,$Hkey,$Z3,$Z1 105 vpxor $rndkey,$inout3,$inout3 106 vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey 107 vpclmulqdq \$0x01,$Hkey,$Z3,$Z2 108 xor %r12,%r12 109 cmp $in0,$end0 110 111 vaesenc $T2,$inout0,$inout0 112 vmovdqu 0x30+8(%rsp),$Ii # I[4] 113 vpxor $rndkey,$inout4,$inout4 114 vpclmulqdq \$0x00,$Hkey,$Z3,$T1 115 vaesenc $T2,$inout1,$inout1 116 vpxor $rndkey,$inout5,$inout5 117 setnc %r12b 118 vpclmulqdq \$0x11,$Hkey,$Z3,$Z3 119 vaesenc $T2,$inout2,$inout2 120 vmovdqu 0x10-0x20($Xip),$Hkey # $Hkey^2 121 neg %r12 122 vaesenc $T2,$inout3,$inout3 123 vpxor $Z1,$Z2,$Z2 124 vpclmulqdq \$0x00,$Hkey,$Ii,$Z1 125 vpxor $Z0,$Xi,$Xi # modulo-scheduled 126 vaesenc $T2,$inout4,$inout4 127 vpxor $Z1,$T1,$Z0 128 and \$0x60,%r12 129 vmovups 0x20-0x80($key),$rndkey 130 vpclmulqdq \$0x10,$Hkey,$Ii,$T1 131 vaesenc $T2,$inout5,$inout5 132 133 vpclmulqdq \$0x01,$Hkey,$Ii,$T2 134 lea ($in0,%r12),$in0 135 vaesenc $rndkey,$inout0,$inout0 136 vpxor 16+8(%rsp),$Xi,$Xi # modulo-scheduled [vpxor $Z3,$Xi,$Xi] 137 vpclmulqdq \$0x11,$Hkey,$Ii,$Hkey 138 vmovdqu 0x40+8(%rsp),$Ii # I[3] 139 vaesenc $rndkey,$inout1,$inout1 140 movbe 0x58($in0),%r13 141 vaesenc $rndkey,$inout2,$inout2 142 movbe 0x50($in0),%r12 143 vaesenc $rndkey,$inout3,$inout3 144 mov %r13,0x20+8(%rsp) 145 vaesenc $rndkey,$inout4,$inout4 146 mov %r12,0x28+8(%rsp) 147 vmovdqu 0x30-0x20($Xip),$Z1 # borrow $Z1 for $Hkey^3 148 vaesenc $rndkey,$inout5,$inout5 149 150 vmovups 0x30-0x80($key),$rndkey 151 vpxor $T1,$Z2,$Z2 152 vpclmulqdq \$0x00,$Z1,$Ii,$T1 153 vaesenc $rndkey,$inout0,$inout0 154 vpxor $T2,$Z2,$Z2 155 vpclmulqdq \$0x10,$Z1,$Ii,$T2 156 vaesenc $rndkey,$inout1,$inout1 157 vpxor $Hkey,$Z3,$Z3 158 vpclmulqdq \$0x01,$Z1,$Ii,$Hkey 159 vaesenc $rndkey,$inout2,$inout2 160 vpclmulqdq \$0x11,$Z1,$Ii,$Z1 161 vmovdqu 0x50+8(%rsp),$Ii # I[2] 162 vaesenc $rndkey,$inout3,$inout3 163 vaesenc $rndkey,$inout4,$inout4 164 vpxor $T1,$Z0,$Z0 165 vmovdqu 0x40-0x20($Xip),$T1 # borrow $T1 for $Hkey^4 166 vaesenc $rndkey,$inout5,$inout5 167 168 vmovups 0x40-0x80($key),$rndkey 169 vpxor $T2,$Z2,$Z2 170 vpclmulqdq \$0x00,$T1,$Ii,$T2 171 vaesenc $rndkey,$inout0,$inout0 172 vpxor $Hkey,$Z2,$Z2 173 vpclmulqdq \$0x10,$T1,$Ii,$Hkey 174 vaesenc $rndkey,$inout1,$inout1 175 movbe 0x48($in0),%r13 176 vpxor $Z1,$Z3,$Z3 177 vpclmulqdq \$0x01,$T1,$Ii,$Z1 178 vaesenc $rndkey,$inout2,$inout2 179 movbe 0x40($in0),%r12 180 vpclmulqdq \$0x11,$T1,$Ii,$T1 181 vmovdqu 0x60+8(%rsp),$Ii # I[1] 182 vaesenc $rndkey,$inout3,$inout3 183 mov %r13,0x30+8(%rsp) 184 vaesenc $rndkey,$inout4,$inout4 185 mov %r12,0x38+8(%rsp) 186 vpxor $T2,$Z0,$Z0 187 vmovdqu 0x60-0x20($Xip),$T2 # borrow $T2 for $Hkey^5 188 vaesenc $rndkey,$inout5,$inout5 189 190 vmovups 0x50-0x80($key),$rndkey 191 vpxor $Hkey,$Z2,$Z2 192 vpclmulqdq \$0x00,$T2,$Ii,$Hkey 193 vaesenc $rndkey,$inout0,$inout0 194 vpxor $Z1,$Z2,$Z2 195 vpclmulqdq \$0x10,$T2,$Ii,$Z1 196 vaesenc $rndkey,$inout1,$inout1 197 movbe 0x38($in0),%r13 198 vpxor $T1,$Z3,$Z3 199 vpclmulqdq \$0x01,$T2,$Ii,$T1 200 vpxor 0x70+8(%rsp),$Xi,$Xi # accumulate I[0] 201 vaesenc $rndkey,$inout2,$inout2 202 movbe 0x30($in0),%r12 203 vpclmulqdq \$0x11,$T2,$Ii,$T2 204 vaesenc $rndkey,$inout3,$inout3 205 mov %r13,0x40+8(%rsp) 206 vaesenc $rndkey,$inout4,$inout4 207 mov %r12,0x48+8(%rsp) 208 vpxor $Hkey,$Z0,$Z0 209 vmovdqu 0x70-0x20($Xip),$Hkey # $Hkey^6 210 vaesenc $rndkey,$inout5,$inout5 211 212 vmovups 0x60-0x80($key),$rndkey 213 vpxor $Z1,$Z2,$Z2 214 vpclmulqdq \$0x10,$Hkey,$Xi,$Z1 215 vaesenc $rndkey,$inout0,$inout0 216 vpxor $T1,$Z2,$Z2 217 vpclmulqdq \$0x01,$Hkey,$Xi,$T1 218 vaesenc $rndkey,$inout1,$inout1 219 movbe 0x28($in0),%r13 220 vpxor $T2,$Z3,$Z3 221 vpclmulqdq \$0x00,$Hkey,$Xi,$T2 222 vaesenc $rndkey,$inout2,$inout2 223 movbe 0x20($in0),%r12 224 vpclmulqdq \$0x11,$Hkey,$Xi,$Xi 225 vaesenc $rndkey,$inout3,$inout3 226 mov %r13,0x50+8(%rsp) 227 vaesenc $rndkey,$inout4,$inout4 228 mov %r12,0x58+8(%rsp) 229 vpxor $Z1,$Z2,$Z2 230 vaesenc $rndkey,$inout5,$inout5 231 vpxor $T1,$Z2,$Z2 232 233 vmovups 0x70-0x80($key),$rndkey 234 vpslldq \$8,$Z2,$Z1 235 vpxor $T2,$Z0,$Z0 236 vmovdqu 0x10($const),$Hkey # .Lpoly 237 238 vaesenc $rndkey,$inout0,$inout0 239 vpxor $Xi,$Z3,$Z3 240 vaesenc $rndkey,$inout1,$inout1 241 vpxor $Z1,$Z0,$Z0 242 movbe 0x18($in0),%r13 243 vaesenc $rndkey,$inout2,$inout2 244 movbe 0x10($in0),%r12 245 vpalignr \$8,$Z0,$Z0,$Ii # 1st phase 246 vpclmulqdq \$0x10,$Hkey,$Z0,$Z0 247 mov %r13,0x60+8(%rsp) 248 vaesenc $rndkey,$inout3,$inout3 249 mov %r12,0x68+8(%rsp) 250 vaesenc $rndkey,$inout4,$inout4 251 vmovups 0x80-0x80($key),$T1 # borrow $T1 for $rndkey 252 vaesenc $rndkey,$inout5,$inout5 253 254 vaesenc $T1,$inout0,$inout0 255 vmovups 0x90-0x80($key),$rndkey 256 vaesenc $T1,$inout1,$inout1 257 vpsrldq \$8,$Z2,$Z2 258 vaesenc $T1,$inout2,$inout2 259 vpxor $Z2,$Z3,$Z3 260 vaesenc $T1,$inout3,$inout3 261 vpxor $Ii,$Z0,$Z0 262 movbe 0x08($in0),%r13 263 vaesenc $T1,$inout4,$inout4 264 movbe 0x00($in0),%r12 265 vaesenc $T1,$inout5,$inout5 266 vmovups 0xa0-0x80($key),$T1 267 cmp \$11,$rounds 268 jb .Lenc_tail # 128-bit key 269 270 vaesenc $rndkey,$inout0,$inout0 271 vaesenc $rndkey,$inout1,$inout1 272 vaesenc $rndkey,$inout2,$inout2 273 vaesenc $rndkey,$inout3,$inout3 274 vaesenc $rndkey,$inout4,$inout4 275 vaesenc $rndkey,$inout5,$inout5 276 277 vaesenc $T1,$inout0,$inout0 278 vaesenc $T1,$inout1,$inout1 279 vaesenc $T1,$inout2,$inout2 280 vaesenc $T1,$inout3,$inout3 281 vaesenc $T1,$inout4,$inout4 282 vmovups 0xb0-0x80($key),$rndkey 283 vaesenc $T1,$inout5,$inout5 284 vmovups 0xc0-0x80($key),$T1 285 je .Lenc_tail # 192-bit key 286 287 vaesenc $rndkey,$inout0,$inout0 288 vaesenc $rndkey,$inout1,$inout1 289 vaesenc $rndkey,$inout2,$inout2 290 vaesenc $rndkey,$inout3,$inout3 291 vaesenc $rndkey,$inout4,$inout4 292 vaesenc $rndkey,$inout5,$inout5 293 294 vaesenc $T1,$inout0,$inout0 295 vaesenc $T1,$inout1,$inout1 296 vaesenc $T1,$inout2,$inout2 297 vaesenc $T1,$inout3,$inout3 298 vaesenc $T1,$inout4,$inout4 299 vmovups 0xd0-0x80($key),$rndkey 300 vaesenc $T1,$inout5,$inout5 301 vmovups 0xe0-0x80($key),$T1 302 jmp .Lenc_tail # 256-bit key 303 304 .align 32 305 .Lhandle_ctr32: 306 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 307 vpshufb $Ii,$T1,$Z2 # byte-swap counter 308 vmovdqu 0x30($const),$Z1 # borrow $Z1, .Ltwo_lsb 309 vpaddd 0x40($const),$Z2,$inout1 # .Lone_lsb 310 vpaddd $Z1,$Z2,$inout2 311 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 312 vpaddd $Z1,$inout1,$inout3 313 vpshufb $Ii,$inout1,$inout1 314 vpaddd $Z1,$inout2,$inout4 315 vpshufb $Ii,$inout2,$inout2 316 vpxor $rndkey,$inout1,$inout1 317 vpaddd $Z1,$inout3,$inout5 318 vpshufb $Ii,$inout3,$inout3 319 vpxor $rndkey,$inout2,$inout2 320 vpaddd $Z1,$inout4,$T1 # byte-swapped next counter value 321 vpshufb $Ii,$inout4,$inout4 322 vpshufb $Ii,$inout5,$inout5 323 vpshufb $Ii,$T1,$T1 # next counter value 324 jmp .Lresume_ctr32 325 326 .align 32 327 .Lenc_tail: 328 vaesenc $rndkey,$inout0,$inout0 329 vmovdqu $Z3,16+8(%rsp) # postpone vpxor $Z3,$Xi,$Xi 330 vpalignr \$8,$Z0,$Z0,$Xi # 2nd phase 331 vaesenc $rndkey,$inout1,$inout1 332 vpclmulqdq \$0x10,$Hkey,$Z0,$Z0 333 vpxor 0x00($inp),$T1,$T2 334 vaesenc $rndkey,$inout2,$inout2 335 vpxor 0x10($inp),$T1,$Ii 336 vaesenc $rndkey,$inout3,$inout3 337 vpxor 0x20($inp),$T1,$Z1 338 vaesenc $rndkey,$inout4,$inout4 339 vpxor 0x30($inp),$T1,$Z2 340 vaesenc $rndkey,$inout5,$inout5 341 vpxor 0x40($inp),$T1,$Z3 342 vpxor 0x50($inp),$T1,$Hkey 343 vmovdqu ($ivp),$T1 # load next counter value 344 345 vaesenclast $T2,$inout0,$inout0 346 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 347 vaesenclast $Ii,$inout1,$inout1 348 vpaddb $T2,$T1,$Ii 349 mov %r13,0x70+8(%rsp) 350 lea 0x60($inp),$inp 351 vaesenclast $Z1,$inout2,$inout2 352 vpaddb $T2,$Ii,$Z1 353 mov %r12,0x78+8(%rsp) 354 lea 0x60($out),$out 355 vmovdqu 0x00-0x80($key),$rndkey 356 vaesenclast $Z2,$inout3,$inout3 357 vpaddb $T2,$Z1,$Z2 358 vaesenclast $Z3, $inout4,$inout4 359 vpaddb $T2,$Z2,$Z3 360 vaesenclast $Hkey,$inout5,$inout5 361 vpaddb $T2,$Z3,$Hkey 362 363 add \$0x60,$ret 364 sub \$0x6,$len 365 jc .L6x_done 366 367 vmovups $inout0,-0x60($out) # save output 368 vpxor $rndkey,$T1,$inout0 369 vmovups $inout1,-0x50($out) 370 vmovdqa $Ii,$inout1 # 0 latency 371 vmovups $inout2,-0x40($out) 372 vmovdqa $Z1,$inout2 # 0 latency 373 vmovups $inout3,-0x30($out) 374 vmovdqa $Z2,$inout3 # 0 latency 375 vmovups $inout4,-0x20($out) 376 vmovdqa $Z3,$inout4 # 0 latency 377 vmovups $inout5,-0x10($out) 378 vmovdqa $Hkey,$inout5 # 0 latency 379 vmovdqu 0x20+8(%rsp),$Z3 # I[5] 380 jmp .Loop6x 381 382 .L6x_done: 383 vpxor 16+8(%rsp),$Xi,$Xi # modulo-scheduled 384 vpxor $Z0,$Xi,$Xi # modulo-scheduled 385 386 ret 387 .size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x 388 ___ 389 ###################################################################### 390 # 391 # size_t aesni_gcm_[en|de]crypt(const void *inp, void *out, size_t len, 392 # const AES_KEY *key, unsigned char iv[16], 393 # struct { u128 Xi,H,Htbl[9]; } *Xip); 394 $code.=<<___; 395 .globl aesni_gcm_decrypt 396 .type aesni_gcm_decrypt,\@function,6 397 .align 32 398 aesni_gcm_decrypt: 399 xor $ret,$ret 400 cmp \$0x60,$len # minimal accepted length 401 jb .Lgcm_dec_abort 402 403 lea (%rsp),%rax # save stack pointer 404 push %rbx 405 push %rbp 406 push %r12 407 push %r13 408 push %r14 409 push %r15 410 ___ 411 $code.=<<___ if ($win64); 412 lea -0xa8(%rsp),%rsp 413 movaps %xmm6,-0xd8(%rax) 414 movaps %xmm7,-0xc8(%rax) 415 movaps %xmm8,-0xb8(%rax) 416 movaps %xmm9,-0xa8(%rax) 417 movaps %xmm10,-0x98(%rax) 418 movaps %xmm11,-0x88(%rax) 419 movaps %xmm12,-0x78(%rax) 420 movaps %xmm13,-0x68(%rax) 421 movaps %xmm14,-0x58(%rax) 422 movaps %xmm15,-0x48(%rax) 423 .Lgcm_dec_body: 424 ___ 425 $code.=<<___; 426 vzeroupper 427 428 vmovdqu ($ivp),$T1 # input counter value 429 add \$-128,%rsp 430 mov 12($ivp),$counter 431 lea .Lbswap_mask(%rip),$const 432 lea -0x80($key),$in0 # borrow $in0 433 mov \$0xf80,$end0 # borrow $end0 434 vmovdqu ($Xip),$Xi # load Xi 435 and \$-128,%rsp # ensure stack alignment 436 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 437 lea 0x80($key),$key # size optimization 438 lea 0x20+0x20($Xip),$Xip # size optimization 439 mov 0xf0-0x80($key),$rounds 440 vpshufb $Ii,$Xi,$Xi 441 442 and $end0,$in0 443 and %rsp,$end0 444 sub $in0,$end0 445 jc .Ldec_no_key_aliasing 446 cmp \$768,$end0 447 jnc .Ldec_no_key_aliasing 448 sub $end0,%rsp # avoid aliasing with key 449 .Ldec_no_key_aliasing: 450 451 vmovdqu 0x50($inp),$Z3 # I[5] 452 lea ($inp),$in0 453 vmovdqu 0x40($inp),$Z0 454 lea -0xc0($inp,$len),$end0 455 vmovdqu 0x30($inp),$Z1 456 shr \$4,$len 457 xor $ret,$ret 458 vmovdqu 0x20($inp),$Z2 459 vpshufb $Ii,$Z3,$Z3 # passed to _aesni_ctr32_ghash_6x 460 vmovdqu 0x10($inp),$T2 461 vpshufb $Ii,$Z0,$Z0 462 vmovdqu ($inp),$Hkey 463 vpshufb $Ii,$Z1,$Z1 464 vmovdqu $Z0,0x30(%rsp) 465 vpshufb $Ii,$Z2,$Z2 466 vmovdqu $Z1,0x40(%rsp) 467 vpshufb $Ii,$T2,$T2 468 vmovdqu $Z2,0x50(%rsp) 469 vpshufb $Ii,$Hkey,$Hkey 470 vmovdqu $T2,0x60(%rsp) 471 vmovdqu $Hkey,0x70(%rsp) 472 473 call _aesni_ctr32_ghash_6x 474 475 vmovups $inout0,-0x60($out) # save output 476 vmovups $inout1,-0x50($out) 477 vmovups $inout2,-0x40($out) 478 vmovups $inout3,-0x30($out) 479 vmovups $inout4,-0x20($out) 480 vmovups $inout5,-0x10($out) 481 482 vpshufb ($const),$Xi,$Xi # .Lbswap_mask 483 vmovdqu $Xi,-0x40($Xip) # output Xi 484 485 vzeroupper 486 ___ 487 $code.=<<___ if ($win64); 488 movaps -0xd8(%rax),%xmm6 489 movaps -0xd8(%rax),%xmm7 490 movaps -0xb8(%rax),%xmm8 491 movaps -0xa8(%rax),%xmm9 492 movaps -0x98(%rax),%xmm10 493 movaps -0x88(%rax),%xmm11 494 movaps -0x78(%rax),%xmm12 495 movaps -0x68(%rax),%xmm13 496 movaps -0x58(%rax),%xmm14 497 movaps -0x48(%rax),%xmm15 498 ___ 499 $code.=<<___; 500 mov -48(%rax),%r15 501 mov -40(%rax),%r14 502 mov -32(%rax),%r13 503 mov -24(%rax),%r12 504 mov -16(%rax),%rbp 505 mov -8(%rax),%rbx 506 lea (%rax),%rsp # restore %rsp 507 .Lgcm_dec_abort: 508 mov $ret,%rax # return value 509 ret 510 .size aesni_gcm_decrypt,.-aesni_gcm_decrypt 511 ___ 512 513 $code.=<<___; 514 .type _aesni_ctr32_6x,\@abi-omnipotent 515 .align 32 516 _aesni_ctr32_6x: 517 vmovdqu 0x00-0x80($key),$Z0 # borrow $Z0 for $rndkey 518 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 519 lea -1($rounds),%r13 520 vmovups 0x10-0x80($key),$rndkey 521 lea 0x20-0x80($key),%r12 522 vpxor $Z0,$T1,$inout0 523 add \$`6<<24`,$counter 524 jc .Lhandle_ctr32_2 525 vpaddb $T2,$T1,$inout1 526 vpaddb $T2,$inout1,$inout2 527 vpxor $Z0,$inout1,$inout1 528 vpaddb $T2,$inout2,$inout3 529 vpxor $Z0,$inout2,$inout2 530 vpaddb $T2,$inout3,$inout4 531 vpxor $Z0,$inout3,$inout3 532 vpaddb $T2,$inout4,$inout5 533 vpxor $Z0,$inout4,$inout4 534 vpaddb $T2,$inout5,$T1 535 vpxor $Z0,$inout5,$inout5 536 jmp .Loop_ctr32 537 538 .align 16 539 .Loop_ctr32: 540 vaesenc $rndkey,$inout0,$inout0 541 vaesenc $rndkey,$inout1,$inout1 542 vaesenc $rndkey,$inout2,$inout2 543 vaesenc $rndkey,$inout3,$inout3 544 vaesenc $rndkey,$inout4,$inout4 545 vaesenc $rndkey,$inout5,$inout5 546 vmovups (%r12),$rndkey 547 lea 0x10(%r12),%r12 548 dec %r13d 549 jnz .Loop_ctr32 550 551 vmovdqu (%r12),$Hkey # last round key 552 vaesenc $rndkey,$inout0,$inout0 553 vpxor 0x00($inp),$Hkey,$Z0 554 vaesenc $rndkey,$inout1,$inout1 555 vpxor 0x10($inp),$Hkey,$Z1 556 vaesenc $rndkey,$inout2,$inout2 557 vpxor 0x20($inp),$Hkey,$Z2 558 vaesenc $rndkey,$inout3,$inout3 559 vpxor 0x30($inp),$Hkey,$Xi 560 vaesenc $rndkey,$inout4,$inout4 561 vpxor 0x40($inp),$Hkey,$T2 562 vaesenc $rndkey,$inout5,$inout5 563 vpxor 0x50($inp),$Hkey,$Hkey 564 lea 0x60($inp),$inp 565 566 vaesenclast $Z0,$inout0,$inout0 567 vaesenclast $Z1,$inout1,$inout1 568 vaesenclast $Z2,$inout2,$inout2 569 vaesenclast $Xi,$inout3,$inout3 570 vaesenclast $T2,$inout4,$inout4 571 vaesenclast $Hkey,$inout5,$inout5 572 vmovups $inout0,0x00($out) 573 vmovups $inout1,0x10($out) 574 vmovups $inout2,0x20($out) 575 vmovups $inout3,0x30($out) 576 vmovups $inout4,0x40($out) 577 vmovups $inout5,0x50($out) 578 lea 0x60($out),$out 579 580 ret 581 .align 32 582 .Lhandle_ctr32_2: 583 vpshufb $Ii,$T1,$Z2 # byte-swap counter 584 vmovdqu 0x30($const),$Z1 # borrow $Z1, .Ltwo_lsb 585 vpaddd 0x40($const),$Z2,$inout1 # .Lone_lsb 586 vpaddd $Z1,$Z2,$inout2 587 vpaddd $Z1,$inout1,$inout3 588 vpshufb $Ii,$inout1,$inout1 589 vpaddd $Z1,$inout2,$inout4 590 vpshufb $Ii,$inout2,$inout2 591 vpxor $Z0,$inout1,$inout1 592 vpaddd $Z1,$inout3,$inout5 593 vpshufb $Ii,$inout3,$inout3 594 vpxor $Z0,$inout2,$inout2 595 vpaddd $Z1,$inout4,$T1 # byte-swapped next counter value 596 vpshufb $Ii,$inout4,$inout4 597 vpxor $Z0,$inout3,$inout3 598 vpshufb $Ii,$inout5,$inout5 599 vpxor $Z0,$inout4,$inout4 600 vpshufb $Ii,$T1,$T1 # next counter value 601 vpxor $Z0,$inout5,$inout5 602 jmp .Loop_ctr32 603 .size _aesni_ctr32_6x,.-_aesni_ctr32_6x 604 605 .globl aesni_gcm_encrypt 606 .type aesni_gcm_encrypt,\@function,6 607 .align 32 608 aesni_gcm_encrypt: 609 xor $ret,$ret 610 cmp \$0x60*3,$len # minimal accepted length 611 jb .Lgcm_enc_abort 612 613 lea (%rsp),%rax # save stack pointer 614 push %rbx 615 push %rbp 616 push %r12 617 push %r13 618 push %r14 619 push %r15 620 ___ 621 $code.=<<___ if ($win64); 622 lea -0xa8(%rsp),%rsp 623 movaps %xmm6,-0xd8(%rax) 624 movaps %xmm7,-0xc8(%rax) 625 movaps %xmm8,-0xb8(%rax) 626 movaps %xmm9,-0xa8(%rax) 627 movaps %xmm10,-0x98(%rax) 628 movaps %xmm11,-0x88(%rax) 629 movaps %xmm12,-0x78(%rax) 630 movaps %xmm13,-0x68(%rax) 631 movaps %xmm14,-0x58(%rax) 632 movaps %xmm15,-0x48(%rax) 633 .Lgcm_enc_body: 634 ___ 635 $code.=<<___; 636 vzeroupper 637 638 vmovdqu ($ivp),$T1 # input counter value 639 add \$-128,%rsp 640 mov 12($ivp),$counter 641 lea .Lbswap_mask(%rip),$const 642 lea -0x80($key),$in0 # borrow $in0 643 mov \$0xf80,$end0 # borrow $end0 644 lea 0x80($key),$key # size optimization 645 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 646 and \$-128,%rsp # ensure stack alignment 647 mov 0xf0-0x80($key),$rounds 648 649 and $end0,$in0 650 and %rsp,$end0 651 sub $in0,$end0 652 jc .Lenc_no_key_aliasing 653 cmp \$768,$end0 654 jnc .Lenc_no_key_aliasing 655 sub $end0,%rsp # avoid aliasing with key 656 .Lenc_no_key_aliasing: 657 658 lea ($out),$in0 659 lea -0xc0($out,$len),$end0 660 shr \$4,$len 661 662 call _aesni_ctr32_6x 663 vpshufb $Ii,$inout0,$Xi # save bswapped output on stack 664 vpshufb $Ii,$inout1,$T2 665 vmovdqu $Xi,0x70(%rsp) 666 vpshufb $Ii,$inout2,$Z0 667 vmovdqu $T2,0x60(%rsp) 668 vpshufb $Ii,$inout3,$Z1 669 vmovdqu $Z0,0x50(%rsp) 670 vpshufb $Ii,$inout4,$Z2 671 vmovdqu $Z1,0x40(%rsp) 672 vpshufb $Ii,$inout5,$Z3 # passed to _aesni_ctr32_ghash_6x 673 vmovdqu $Z2,0x30(%rsp) 674 675 call _aesni_ctr32_6x 676 677 vmovdqu ($Xip),$Xi # load Xi 678 lea 0x20+0x20($Xip),$Xip # size optimization 679 sub \$12,$len 680 mov \$0x60*2,$ret 681 vpshufb $Ii,$Xi,$Xi 682 683 call _aesni_ctr32_ghash_6x 684 vmovdqu 0x20(%rsp),$Z3 # I[5] 685 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 686 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 687 vpunpckhqdq $Z3,$Z3,$T1 688 vmovdqu 0x20-0x20($Xip),$rndkey # borrow $rndkey for $HK 689 vmovups $inout0,-0x60($out) # save output 690 vpshufb $Ii,$inout0,$inout0 # but keep bswapped copy 691 vpxor $Z3,$T1,$T1 692 vmovups $inout1,-0x50($out) 693 vpshufb $Ii,$inout1,$inout1 694 vmovups $inout2,-0x40($out) 695 vpshufb $Ii,$inout2,$inout2 696 vmovups $inout3,-0x30($out) 697 vpshufb $Ii,$inout3,$inout3 698 vmovups $inout4,-0x20($out) 699 vpshufb $Ii,$inout4,$inout4 700 vmovups $inout5,-0x10($out) 701 vpshufb $Ii,$inout5,$inout5 702 vmovdqu $inout0,0x10(%rsp) # free $inout0 703 ___ 704 { my ($HK,$T3)=($rndkey,$inout0); 705 706 $code.=<<___; 707 vmovdqu 0x30(%rsp),$Z2 # I[4] 708 vmovdqu 0x10-0x20($Xip),$Ii # borrow $Ii for $Hkey^2 709 vpunpckhqdq $Z2,$Z2,$T2 710 vpclmulqdq \$0x00,$Hkey,$Z3,$Z1 711 vpxor $Z2,$T2,$T2 712 vpclmulqdq \$0x11,$Hkey,$Z3,$Z3 713 vpclmulqdq \$0x00,$HK,$T1,$T1 714 715 vmovdqu 0x40(%rsp),$T3 # I[3] 716 vpclmulqdq \$0x00,$Ii,$Z2,$Z0 717 vmovdqu 0x30-0x20($Xip),$Hkey # $Hkey^3 718 vpxor $Z1,$Z0,$Z0 719 vpunpckhqdq $T3,$T3,$Z1 720 vpclmulqdq \$0x11,$Ii,$Z2,$Z2 721 vpxor $T3,$Z1,$Z1 722 vpxor $Z3,$Z2,$Z2 723 vpclmulqdq \$0x10,$HK,$T2,$T2 724 vmovdqu 0x50-0x20($Xip),$HK 725 vpxor $T1,$T2,$T2 726 727 vmovdqu 0x50(%rsp),$T1 # I[2] 728 vpclmulqdq \$0x00,$Hkey,$T3,$Z3 729 vmovdqu 0x40-0x20($Xip),$Ii # borrow $Ii for $Hkey^4 730 vpxor $Z0,$Z3,$Z3 731 vpunpckhqdq $T1,$T1,$Z0 732 vpclmulqdq \$0x11,$Hkey,$T3,$T3 733 vpxor $T1,$Z0,$Z0 734 vpxor $Z2,$T3,$T3 735 vpclmulqdq \$0x00,$HK,$Z1,$Z1 736 vpxor $T2,$Z1,$Z1 737 738 vmovdqu 0x60(%rsp),$T2 # I[1] 739 vpclmulqdq \$0x00,$Ii,$T1,$Z2 740 vmovdqu 0x60-0x20($Xip),$Hkey # $Hkey^5 741 vpxor $Z3,$Z2,$Z2 742 vpunpckhqdq $T2,$T2,$Z3 743 vpclmulqdq \$0x11,$Ii,$T1,$T1 744 vpxor $T2,$Z3,$Z3 745 vpxor $T3,$T1,$T1 746 vpclmulqdq \$0x10,$HK,$Z0,$Z0 747 vmovdqu 0x80-0x20($Xip),$HK 748 vpxor $Z1,$Z0,$Z0 749 750 vpxor 0x70(%rsp),$Xi,$Xi # accumulate I[0] 751 vpclmulqdq \$0x00,$Hkey,$T2,$Z1 752 vmovdqu 0x70-0x20($Xip),$Ii # borrow $Ii for $Hkey^6 753 vpunpckhqdq $Xi,$Xi,$T3 754 vpxor $Z2,$Z1,$Z1 755 vpclmulqdq \$0x11,$Hkey,$T2,$T2 756 vpxor $Xi,$T3,$T3 757 vpxor $T1,$T2,$T2 758 vpclmulqdq \$0x00,$HK,$Z3,$Z3 759 vpxor $Z0,$Z3,$Z0 760 761 vpclmulqdq \$0x00,$Ii,$Xi,$Z2 762 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 763 vpunpckhqdq $inout5,$inout5,$T1 764 vpclmulqdq \$0x11,$Ii,$Xi,$Xi 765 vpxor $inout5,$T1,$T1 766 vpxor $Z1,$Z2,$Z1 767 vpclmulqdq \$0x10,$HK,$T3,$T3 768 vmovdqu 0x20-0x20($Xip),$HK 769 vpxor $T2,$Xi,$Z3 770 vpxor $Z0,$T3,$Z2 771 772 vmovdqu 0x10-0x20($Xip),$Ii # borrow $Ii for $Hkey^2 773 vpxor $Z1,$Z3,$T3 # aggregated Karatsuba post-processing 774 vpclmulqdq \$0x00,$Hkey,$inout5,$Z0 775 vpxor $T3,$Z2,$Z2 776 vpunpckhqdq $inout4,$inout4,$T2 777 vpclmulqdq \$0x11,$Hkey,$inout5,$inout5 778 vpxor $inout4,$T2,$T2 779 vpslldq \$8,$Z2,$T3 780 vpclmulqdq \$0x00,$HK,$T1,$T1 781 vpxor $T3,$Z1,$Xi 782 vpsrldq \$8,$Z2,$Z2 783 vpxor $Z2,$Z3,$Z3 784 785 vpclmulqdq \$0x00,$Ii,$inout4,$Z1 786 vmovdqu 0x30-0x20($Xip),$Hkey # $Hkey^3 787 vpxor $Z0,$Z1,$Z1 788 vpunpckhqdq $inout3,$inout3,$T3 789 vpclmulqdq \$0x11,$Ii,$inout4,$inout4 790 vpxor $inout3,$T3,$T3 791 vpxor $inout5,$inout4,$inout4 792 vpalignr \$8,$Xi,$Xi,$inout5 # 1st phase 793 vpclmulqdq \$0x10,$HK,$T2,$T2 794 vmovdqu 0x50-0x20($Xip),$HK 795 vpxor $T1,$T2,$T2 796 797 vpclmulqdq \$0x00,$Hkey,$inout3,$Z0 798 vmovdqu 0x40-0x20($Xip),$Ii # borrow $Ii for $Hkey^4 799 vpxor $Z1,$Z0,$Z0 800 vpunpckhqdq $inout2,$inout2,$T1 801 vpclmulqdq \$0x11,$Hkey,$inout3,$inout3 802 vpxor $inout2,$T1,$T1 803 vpxor $inout4,$inout3,$inout3 804 vxorps 0x10(%rsp),$Z3,$Z3 # accumulate $inout0 805 vpclmulqdq \$0x00,$HK,$T3,$T3 806 vpxor $T2,$T3,$T3 807 808 vpclmulqdq \$0x10,0x10($const),$Xi,$Xi 809 vxorps $inout5,$Xi,$Xi 810 811 vpclmulqdq \$0x00,$Ii,$inout2,$Z1 812 vmovdqu 0x60-0x20($Xip),$Hkey # $Hkey^5 813 vpxor $Z0,$Z1,$Z1 814 vpunpckhqdq $inout1,$inout1,$T2 815 vpclmulqdq \$0x11,$Ii,$inout2,$inout2 816 vpxor $inout1,$T2,$T2 817 vpalignr \$8,$Xi,$Xi,$inout5 # 2nd phase 818 vpxor $inout3,$inout2,$inout2 819 vpclmulqdq \$0x10,$HK,$T1,$T1 820 vmovdqu 0x80-0x20($Xip),$HK 821 vpxor $T3,$T1,$T1 822 823 vxorps $Z3,$inout5,$inout5 824 vpclmulqdq \$0x10,0x10($const),$Xi,$Xi 825 vxorps $inout5,$Xi,$Xi 826 827 vpclmulqdq \$0x00,$Hkey,$inout1,$Z0 828 vmovdqu 0x70-0x20($Xip),$Ii # borrow $Ii for $Hkey^6 829 vpxor $Z1,$Z0,$Z0 830 vpunpckhqdq $Xi,$Xi,$T3 831 vpclmulqdq \$0x11,$Hkey,$inout1,$inout1 832 vpxor $Xi,$T3,$T3 833 vpxor $inout2,$inout1,$inout1 834 vpclmulqdq \$0x00,$HK,$T2,$T2 835 vpxor $T1,$T2,$T2 836 837 vpclmulqdq \$0x00,$Ii,$Xi,$Z1 838 vpclmulqdq \$0x11,$Ii,$Xi,$Z3 839 vpxor $Z0,$Z1,$Z1 840 vpclmulqdq \$0x10,$HK,$T3,$Z2 841 vpxor $inout1,$Z3,$Z3 842 vpxor $T2,$Z2,$Z2 843 844 vpxor $Z1,$Z3,$Z0 # aggregated Karatsuba post-processing 845 vpxor $Z0,$Z2,$Z2 846 vpslldq \$8,$Z2,$T1 847 vmovdqu 0x10($const),$Hkey # .Lpoly 848 vpsrldq \$8,$Z2,$Z2 849 vpxor $T1,$Z1,$Xi 850 vpxor $Z2,$Z3,$Z3 851 852 vpalignr \$8,$Xi,$Xi,$T2 # 1st phase 853 vpclmulqdq \$0x10,$Hkey,$Xi,$Xi 854 vpxor $T2,$Xi,$Xi 855 856 vpalignr \$8,$Xi,$Xi,$T2 # 2nd phase 857 vpclmulqdq \$0x10,$Hkey,$Xi,$Xi 858 vpxor $Z3,$T2,$T2 859 vpxor $T2,$Xi,$Xi 860 ___ 861 } 862 $code.=<<___; 863 vpshufb ($const),$Xi,$Xi # .Lbswap_mask 864 vmovdqu $Xi,-0x40($Xip) # output Xi 865 866 vzeroupper 867 ___ 868 $code.=<<___ if ($win64); 869 movaps -0xd8(%rax),%xmm6 870 movaps -0xc8(%rax),%xmm7 871 movaps -0xb8(%rax),%xmm8 872 movaps -0xa8(%rax),%xmm9 873 movaps -0x98(%rax),%xmm10 874 movaps -0x88(%rax),%xmm11 875 movaps -0x78(%rax),%xmm12 876 movaps -0x68(%rax),%xmm13 877 movaps -0x58(%rax),%xmm14 878 movaps -0x48(%rax),%xmm15 879 ___ 880 $code.=<<___; 881 mov -48(%rax),%r15 882 mov -40(%rax),%r14 883 mov -32(%rax),%r13 884 mov -24(%rax),%r12 885 mov -16(%rax),%rbp 886 mov -8(%rax),%rbx 887 lea (%rax),%rsp # restore %rsp 888 .Lgcm_enc_abort: 889 mov $ret,%rax # return value 890 ret 891 .size aesni_gcm_encrypt,.-aesni_gcm_encrypt 892 ___ 893 894 $code.=<<___; 895 .align 64 896 .Lbswap_mask: 897 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 898 .Lpoly: 899 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 900 .Lone_msb: 901 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 902 .Ltwo_lsb: 903 .byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 904 .Lone_lsb: 905 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 906 .asciz "AES-NI GCM module for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 907 .align 64 908 ___ 909 if ($win64) { 910 $rec="%rcx"; 911 $frame="%rdx"; 912 $context="%r8"; 913 $disp="%r9"; 914 915 $code.=<<___ 916 .extern __imp_RtlVirtualUnwind 917 .type gcm_se_handler,\@abi-omnipotent 918 .align 16 919 gcm_se_handler: 920 push %rsi 921 push %rdi 922 push %rbx 923 push %rbp 924 push %r12 925 push %r13 926 push %r14 927 push %r15 928 pushfq 929 sub \$64,%rsp 930 931 mov 120($context),%rax # pull context->Rax 932 mov 248($context),%rbx # pull context->Rip 933 934 mov 8($disp),%rsi # disp->ImageBase 935 mov 56($disp),%r11 # disp->HandlerData 936 937 mov 0(%r11),%r10d # HandlerData[0] 938 lea (%rsi,%r10),%r10 # prologue label 939 cmp %r10,%rbx # context->Rip<prologue label 940 jb .Lcommon_seh_tail 941 942 mov 152($context),%rax # pull context->Rsp 943 944 mov 4(%r11),%r10d # HandlerData[1] 945 lea (%rsi,%r10),%r10 # epilogue label 946 cmp %r10,%rbx # context->Rip>=epilogue label 947 jae .Lcommon_seh_tail 948 949 mov 120($context),%rax # pull context->Rax 950 951 mov -48(%rax),%r15 952 mov -40(%rax),%r14 953 mov -32(%rax),%r13 954 mov -24(%rax),%r12 955 mov -16(%rax),%rbp 956 mov -8(%rax),%rbx 957 mov %r15,240($context) 958 mov %r14,232($context) 959 mov %r13,224($context) 960 mov %r12,216($context) 961 mov %rbp,160($context) 962 mov %rbx,144($context) 963 964 lea -0xd8(%rax),%rsi # %xmm save area 965 lea 512($context),%rdi # & context.Xmm6 966 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 967 .long 0xa548f3fc # cld; rep movsq 968 969 .Lcommon_seh_tail: 970 mov 8(%rax),%rdi 971 mov 16(%rax),%rsi 972 mov %rax,152($context) # restore context->Rsp 973 mov %rsi,168($context) # restore context->Rsi 974 mov %rdi,176($context) # restore context->Rdi 975 976 mov 40($disp),%rdi # disp->ContextRecord 977 mov $context,%rsi # context 978 mov \$154,%ecx # sizeof(CONTEXT) 979 .long 0xa548f3fc # cld; rep movsq 980 981 mov $disp,%rsi 982 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 983 mov 8(%rsi),%rdx # arg2, disp->ImageBase 984 mov 0(%rsi),%r8 # arg3, disp->ControlPc 985 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 986 mov 40(%rsi),%r10 # disp->ContextRecord 987 lea 56(%rsi),%r11 # &disp->HandlerData 988 lea 24(%rsi),%r12 # &disp->EstablisherFrame 989 mov %r10,32(%rsp) # arg5 990 mov %r11,40(%rsp) # arg6 991 mov %r12,48(%rsp) # arg7 992 mov %rcx,56(%rsp) # arg8, (NULL) 993 call *__imp_RtlVirtualUnwind(%rip) 994 995 mov \$1,%eax # ExceptionContinueSearch 996 add \$64,%rsp 997 popfq 998 pop %r15 999 pop %r14 1000 pop %r13 1001 pop %r12 1002 pop %rbp 1003 pop %rbx 1004 pop %rdi 1005 pop %rsi 1006 ret 1007 .size gcm_se_handler,.-gcm_se_handler 1008 1009 .section .pdata 1010 .align 4 1011 .rva .LSEH_begin_aesni_gcm_decrypt 1012 .rva .LSEH_end_aesni_gcm_decrypt 1013 .rva .LSEH_gcm_dec_info 1014 1015 .rva .LSEH_begin_aesni_gcm_encrypt 1016 .rva .LSEH_end_aesni_gcm_encrypt 1017 .rva .LSEH_gcm_enc_info 1018 .section .xdata 1019 .align 8 1020 .LSEH_gcm_dec_info: 1021 .byte 9,0,0,0 1022 .rva gcm_se_handler 1023 .rva .Lgcm_dec_body,.Lgcm_dec_abort 1024 .LSEH_gcm_enc_info: 1025 .byte 9,0,0,0 1026 .rva gcm_se_handler 1027 .rva .Lgcm_enc_body,.Lgcm_enc_abort 1028 ___ 1029 } 1030 }}} else {{{ 1031 $code=<<___; # assembler is too old 1032 .text 1033 1034 .globl aesni_gcm_encrypt 1035 .type aesni_gcm_encrypt,\@abi-omnipotent 1036 aesni_gcm_encrypt: 1037 xor %eax,%eax 1038 ret 1039 .size aesni_gcm_encrypt,.-aesni_gcm_encrypt 1040 1041 .globl aesni_gcm_decrypt 1042 .type aesni_gcm_decrypt,\@abi-omnipotent 1043 aesni_gcm_decrypt: 1044 xor %eax,%eax 1045 ret 1046 .size aesni_gcm_decrypt,.-aesni_gcm_decrypt 1047 ___ 1048 }}} 1049 1050 $code =~ s/\`([^\`]*)\`/eval($1)/gem; 1051 1052 print $code; 1053 1054 close STDOUT; 1055