Home | History | Annotate | Download | only in cipher_extra
      1 ; This file is generated from a similarly-named Perl script in the BoringSSL
      2 ; source tree. Do not edit by hand.
      3 
      4 default	rel
      5 %define XMMWORD
      6 %define YMMWORD
      7 %define ZMMWORD
      8 
      9 %ifdef BORINGSSL_PREFIX
     10 %include "boringssl_prefix_symbols_nasm.inc"
     11 %endif
     12 section	.data data align=8
     13 
     14 
     15 ALIGN	16
     16 one:
     17 	DQ	1,0
     18 two:
     19 	DQ	2,0
     20 three:
     21 	DQ	3,0
     22 four:
     23 	DQ	4,0
     24 five:
     25 	DQ	5,0
     26 six:
     27 	DQ	6,0
     28 seven:
     29 	DQ	7,0
     30 eight:
     31 	DQ	8,0
     32 
     33 OR_MASK:
     34 	DD	0x00000000,0x00000000,0x00000000,0x80000000
     35 poly:
     36 	DQ	0x1,0xc200000000000000
     37 mask:
     38 	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
     39 con1:
     40 	DD	1,1,1,1
     41 con2:
     42 	DD	0x1b,0x1b,0x1b,0x1b
     43 con3:
     44 DB	-1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7
     45 and_mask:
     46 	DD	0,0xffffffff,0xffffffff,0xffffffff
     47 section	.text code align=64
     48 
     49 
     50 ALIGN	16
     51 GFMUL:
     52 
     53 	vpclmulqdq	xmm2,xmm0,xmm1,0x00
     54 	vpclmulqdq	xmm5,xmm0,xmm1,0x11
     55 	vpclmulqdq	xmm3,xmm0,xmm1,0x10
     56 	vpclmulqdq	xmm4,xmm0,xmm1,0x01
     57 	vpxor	xmm3,xmm3,xmm4
     58 	vpslldq	xmm4,xmm3,8
     59 	vpsrldq	xmm3,xmm3,8
     60 	vpxor	xmm2,xmm2,xmm4
     61 	vpxor	xmm5,xmm5,xmm3
     62 
     63 	vpclmulqdq	xmm3,xmm2,XMMWORD[poly],0x10
     64 	vpshufd	xmm4,xmm2,78
     65 	vpxor	xmm2,xmm3,xmm4
     66 
     67 	vpclmulqdq	xmm3,xmm2,XMMWORD[poly],0x10
     68 	vpshufd	xmm4,xmm2,78
     69 	vpxor	xmm2,xmm3,xmm4
     70 
     71 	vpxor	xmm0,xmm2,xmm5
     72 	DB	0F3h,0C3h		;repret
     73 
     74 
     75 global	aesgcmsiv_htable_init
     76 
     77 ALIGN	16
     78 aesgcmsiv_htable_init:
     79 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
     80 	mov	QWORD[16+rsp],rsi
     81 	mov	rax,rsp
     82 $L$SEH_begin_aesgcmsiv_htable_init:
     83 	mov	rdi,rcx
     84 	mov	rsi,rdx
     85 
     86 
     87 
     88 	vmovdqa	xmm0,XMMWORD[rsi]
     89 	vmovdqa	xmm1,xmm0
     90 	vmovdqa	XMMWORD[rdi],xmm0
     91 	call	GFMUL
     92 	vmovdqa	XMMWORD[16+rdi],xmm0
     93 	call	GFMUL
     94 	vmovdqa	XMMWORD[32+rdi],xmm0
     95 	call	GFMUL
     96 	vmovdqa	XMMWORD[48+rdi],xmm0
     97 	call	GFMUL
     98 	vmovdqa	XMMWORD[64+rdi],xmm0
     99 	call	GFMUL
    100 	vmovdqa	XMMWORD[80+rdi],xmm0
    101 	call	GFMUL
    102 	vmovdqa	XMMWORD[96+rdi],xmm0
    103 	call	GFMUL
    104 	vmovdqa	XMMWORD[112+rdi],xmm0
    105 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    106 	mov	rsi,QWORD[16+rsp]
    107 	DB	0F3h,0C3h		;repret
    108 
    109 $L$SEH_end_aesgcmsiv_htable_init:
    110 global	aesgcmsiv_htable6_init
    111 
    112 ALIGN	16
    113 aesgcmsiv_htable6_init:
    114 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    115 	mov	QWORD[16+rsp],rsi
    116 	mov	rax,rsp
    117 $L$SEH_begin_aesgcmsiv_htable6_init:
    118 	mov	rdi,rcx
    119 	mov	rsi,rdx
    120 
    121 
    122 
    123 	vmovdqa	xmm0,XMMWORD[rsi]
    124 	vmovdqa	xmm1,xmm0
    125 	vmovdqa	XMMWORD[rdi],xmm0
    126 	call	GFMUL
    127 	vmovdqa	XMMWORD[16+rdi],xmm0
    128 	call	GFMUL
    129 	vmovdqa	XMMWORD[32+rdi],xmm0
    130 	call	GFMUL
    131 	vmovdqa	XMMWORD[48+rdi],xmm0
    132 	call	GFMUL
    133 	vmovdqa	XMMWORD[64+rdi],xmm0
    134 	call	GFMUL
    135 	vmovdqa	XMMWORD[80+rdi],xmm0
    136 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    137 	mov	rsi,QWORD[16+rsp]
    138 	DB	0F3h,0C3h		;repret
    139 
    140 $L$SEH_end_aesgcmsiv_htable6_init:
    141 global	aesgcmsiv_htable_polyval
    142 
    143 ALIGN	16
    144 aesgcmsiv_htable_polyval:
    145 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    146 	mov	QWORD[16+rsp],rsi
    147 	mov	rax,rsp
    148 $L$SEH_begin_aesgcmsiv_htable_polyval:
    149 	mov	rdi,rcx
    150 	mov	rsi,rdx
    151 	mov	rdx,r8
    152 	mov	rcx,r9
    153 
    154 
    155 
    156 	test	rdx,rdx
    157 	jnz	NEAR $L$htable_polyval_start
    158 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    159 	mov	rsi,QWORD[16+rsp]
    160 	DB	0F3h,0C3h		;repret
    161 
    162 $L$htable_polyval_start:
    163 	vzeroall
    164 
    165 
    166 
    167 	mov	r11,rdx
    168 	and	r11,127
    169 
    170 	jz	NEAR $L$htable_polyval_no_prefix
    171 
    172 	vpxor	xmm9,xmm9,xmm9
    173 	vmovdqa	xmm1,XMMWORD[rcx]
    174 	sub	rdx,r11
    175 
    176 	sub	r11,16
    177 
    178 
    179 	vmovdqu	xmm0,XMMWORD[rsi]
    180 	vpxor	xmm0,xmm0,xmm1
    181 
    182 	vpclmulqdq	xmm5,xmm0,XMMWORD[r11*1+rdi],0x01
    183 	vpclmulqdq	xmm3,xmm0,XMMWORD[r11*1+rdi],0x00
    184 	vpclmulqdq	xmm4,xmm0,XMMWORD[r11*1+rdi],0x11
    185 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
    186 	vpxor	xmm5,xmm5,xmm6
    187 
    188 	lea	rsi,[16+rsi]
    189 	test	r11,r11
    190 	jnz	NEAR $L$htable_polyval_prefix_loop
    191 	jmp	NEAR $L$htable_polyval_prefix_complete
    192 
    193 
    194 ALIGN	64
    195 $L$htable_polyval_prefix_loop:
    196 	sub	r11,16
    197 
    198 	vmovdqu	xmm0,XMMWORD[rsi]
    199 
    200 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x00
    201 	vpxor	xmm3,xmm3,xmm6
    202 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x11
    203 	vpxor	xmm4,xmm4,xmm6
    204 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x01
    205 	vpxor	xmm5,xmm5,xmm6
    206 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
    207 	vpxor	xmm5,xmm5,xmm6
    208 
    209 	test	r11,r11
    210 
    211 	lea	rsi,[16+rsi]
    212 
    213 	jnz	NEAR $L$htable_polyval_prefix_loop
    214 
    215 $L$htable_polyval_prefix_complete:
    216 	vpsrldq	xmm6,xmm5,8
    217 	vpslldq	xmm5,xmm5,8
    218 
    219 	vpxor	xmm9,xmm4,xmm6
    220 	vpxor	xmm1,xmm3,xmm5
    221 
    222 	jmp	NEAR $L$htable_polyval_main_loop
    223 
    224 $L$htable_polyval_no_prefix:
    225 
    226 
    227 
    228 
    229 	vpxor	xmm1,xmm1,xmm1
    230 	vmovdqa	xmm9,XMMWORD[rcx]
    231 
    232 ALIGN	64
    233 $L$htable_polyval_main_loop:
    234 	sub	rdx,0x80
    235 	jb	NEAR $L$htable_polyval_out
    236 
    237 	vmovdqu	xmm0,XMMWORD[112+rsi]
    238 
    239 	vpclmulqdq	xmm5,xmm0,XMMWORD[rdi],0x01
    240 	vpclmulqdq	xmm3,xmm0,XMMWORD[rdi],0x00
    241 	vpclmulqdq	xmm4,xmm0,XMMWORD[rdi],0x11
    242 	vpclmulqdq	xmm6,xmm0,XMMWORD[rdi],0x10
    243 	vpxor	xmm5,xmm5,xmm6
    244 
    245 
    246 	vmovdqu	xmm0,XMMWORD[96+rsi]
    247 	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x01
    248 	vpxor	xmm5,xmm5,xmm6
    249 	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x00
    250 	vpxor	xmm3,xmm3,xmm6
    251 	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x11
    252 	vpxor	xmm4,xmm4,xmm6
    253 	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x10
    254 	vpxor	xmm5,xmm5,xmm6
    255 
    256 
    257 
    258 	vmovdqu	xmm0,XMMWORD[80+rsi]
    259 
    260 	vpclmulqdq	xmm7,xmm1,XMMWORD[poly],0x10
    261 	vpalignr	xmm1,xmm1,xmm1,8
    262 
    263 	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x01
    264 	vpxor	xmm5,xmm5,xmm6
    265 	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x00
    266 	vpxor	xmm3,xmm3,xmm6
    267 	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x11
    268 	vpxor	xmm4,xmm4,xmm6
    269 	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x10
    270 	vpxor	xmm5,xmm5,xmm6
    271 
    272 
    273 	vpxor	xmm1,xmm1,xmm7
    274 
    275 	vmovdqu	xmm0,XMMWORD[64+rsi]
    276 
    277 	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x01
    278 	vpxor	xmm5,xmm5,xmm6
    279 	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x00
    280 	vpxor	xmm3,xmm3,xmm6
    281 	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x11
    282 	vpxor	xmm4,xmm4,xmm6
    283 	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x10
    284 	vpxor	xmm5,xmm5,xmm6
    285 
    286 
    287 	vmovdqu	xmm0,XMMWORD[48+rsi]
    288 
    289 	vpclmulqdq	xmm7,xmm1,XMMWORD[poly],0x10
    290 	vpalignr	xmm1,xmm1,xmm1,8
    291 
    292 	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x01
    293 	vpxor	xmm5,xmm5,xmm6
    294 	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x00
    295 	vpxor	xmm3,xmm3,xmm6
    296 	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x11
    297 	vpxor	xmm4,xmm4,xmm6
    298 	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x10
    299 	vpxor	xmm5,xmm5,xmm6
    300 
    301 
    302 	vpxor	xmm1,xmm1,xmm7
    303 
    304 	vmovdqu	xmm0,XMMWORD[32+rsi]
    305 
    306 	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x01
    307 	vpxor	xmm5,xmm5,xmm6
    308 	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x00
    309 	vpxor	xmm3,xmm3,xmm6
    310 	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x11
    311 	vpxor	xmm4,xmm4,xmm6
    312 	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x10
    313 	vpxor	xmm5,xmm5,xmm6
    314 
    315 
    316 	vpxor	xmm1,xmm1,xmm9
    317 
    318 	vmovdqu	xmm0,XMMWORD[16+rsi]
    319 
    320 	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x01
    321 	vpxor	xmm5,xmm5,xmm6
    322 	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x00
    323 	vpxor	xmm3,xmm3,xmm6
    324 	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x11
    325 	vpxor	xmm4,xmm4,xmm6
    326 	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x10
    327 	vpxor	xmm5,xmm5,xmm6
    328 
    329 
    330 	vmovdqu	xmm0,XMMWORD[rsi]
    331 	vpxor	xmm0,xmm0,xmm1
    332 
    333 	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x01
    334 	vpxor	xmm5,xmm5,xmm6
    335 	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x00
    336 	vpxor	xmm3,xmm3,xmm6
    337 	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x11
    338 	vpxor	xmm4,xmm4,xmm6
    339 	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x10
    340 	vpxor	xmm5,xmm5,xmm6
    341 
    342 
    343 	vpsrldq	xmm6,xmm5,8
    344 	vpslldq	xmm5,xmm5,8
    345 
    346 	vpxor	xmm9,xmm4,xmm6
    347 	vpxor	xmm1,xmm3,xmm5
    348 
    349 	lea	rsi,[128+rsi]
    350 	jmp	NEAR $L$htable_polyval_main_loop
    351 
    352 
    353 
    354 $L$htable_polyval_out:
    355 	vpclmulqdq	xmm6,xmm1,XMMWORD[poly],0x10
    356 	vpalignr	xmm1,xmm1,xmm1,8
    357 	vpxor	xmm1,xmm1,xmm6
    358 
    359 	vpclmulqdq	xmm6,xmm1,XMMWORD[poly],0x10
    360 	vpalignr	xmm1,xmm1,xmm1,8
    361 	vpxor	xmm1,xmm1,xmm6
    362 	vpxor	xmm1,xmm1,xmm9
    363 
    364 	vmovdqu	XMMWORD[rcx],xmm1
    365 	vzeroupper
    366 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    367 	mov	rsi,QWORD[16+rsp]
    368 	DB	0F3h,0C3h		;repret
    369 
    370 $L$SEH_end_aesgcmsiv_htable_polyval:
    371 global	aesgcmsiv_polyval_horner
    372 
    373 ALIGN	16
    374 aesgcmsiv_polyval_horner:
    375 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    376 	mov	QWORD[16+rsp],rsi
    377 	mov	rax,rsp
    378 $L$SEH_begin_aesgcmsiv_polyval_horner:
    379 	mov	rdi,rcx
    380 	mov	rsi,rdx
    381 	mov	rdx,r8
    382 	mov	rcx,r9
    383 
    384 
    385 
    386 	test	rcx,rcx
    387 	jnz	NEAR $L$polyval_horner_start
    388 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    389 	mov	rsi,QWORD[16+rsp]
    390 	DB	0F3h,0C3h		;repret
    391 
    392 $L$polyval_horner_start:
    393 
    394 
    395 
    396 	xor	r10,r10
    397 	shl	rcx,4
    398 
    399 	vmovdqa	xmm1,XMMWORD[rsi]
    400 	vmovdqa	xmm0,XMMWORD[rdi]
    401 
    402 $L$polyval_horner_loop:
    403 	vpxor	xmm0,xmm0,XMMWORD[r10*1+rdx]
    404 	call	GFMUL
    405 
    406 	add	r10,16
    407 	cmp	rcx,r10
    408 	jne	NEAR $L$polyval_horner_loop
    409 
    410 
    411 	vmovdqa	XMMWORD[rdi],xmm0
    412 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    413 	mov	rsi,QWORD[16+rsp]
    414 	DB	0F3h,0C3h		;repret
    415 
    416 $L$SEH_end_aesgcmsiv_polyval_horner:
    417 global	aes128gcmsiv_aes_ks
    418 
    419 ALIGN	16
    420 aes128gcmsiv_aes_ks:
    421 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    422 	mov	QWORD[16+rsp],rsi
    423 	mov	rax,rsp
    424 $L$SEH_begin_aes128gcmsiv_aes_ks:
    425 	mov	rdi,rcx
    426 	mov	rsi,rdx
    427 
    428 
    429 
    430 	vmovdqu	xmm1,XMMWORD[rdi]
    431 	vmovdqa	XMMWORD[rsi],xmm1
    432 
    433 	vmovdqa	xmm0,XMMWORD[con1]
    434 	vmovdqa	xmm15,XMMWORD[mask]
    435 
    436 	mov	rax,8
    437 
    438 $L$ks128_loop:
    439 	add	rsi,16
    440 	sub	rax,1
    441 	vpshufb	xmm2,xmm1,xmm15
    442 	vaesenclast	xmm2,xmm2,xmm0
    443 	vpslld	xmm0,xmm0,1
    444 	vpslldq	xmm3,xmm1,4
    445 	vpxor	xmm1,xmm1,xmm3
    446 	vpslldq	xmm3,xmm3,4
    447 	vpxor	xmm1,xmm1,xmm3
    448 	vpslldq	xmm3,xmm3,4
    449 	vpxor	xmm1,xmm1,xmm3
    450 	vpxor	xmm1,xmm1,xmm2
    451 	vmovdqa	XMMWORD[rsi],xmm1
    452 	jne	NEAR $L$ks128_loop
    453 
    454 	vmovdqa	xmm0,XMMWORD[con2]
    455 	vpshufb	xmm2,xmm1,xmm15
    456 	vaesenclast	xmm2,xmm2,xmm0
    457 	vpslld	xmm0,xmm0,1
    458 	vpslldq	xmm3,xmm1,4
    459 	vpxor	xmm1,xmm1,xmm3
    460 	vpslldq	xmm3,xmm3,4
    461 	vpxor	xmm1,xmm1,xmm3
    462 	vpslldq	xmm3,xmm3,4
    463 	vpxor	xmm1,xmm1,xmm3
    464 	vpxor	xmm1,xmm1,xmm2
    465 	vmovdqa	XMMWORD[16+rsi],xmm1
    466 
    467 	vpshufb	xmm2,xmm1,xmm15
    468 	vaesenclast	xmm2,xmm2,xmm0
    469 	vpslldq	xmm3,xmm1,4
    470 	vpxor	xmm1,xmm1,xmm3
    471 	vpslldq	xmm3,xmm3,4
    472 	vpxor	xmm1,xmm1,xmm3
    473 	vpslldq	xmm3,xmm3,4
    474 	vpxor	xmm1,xmm1,xmm3
    475 	vpxor	xmm1,xmm1,xmm2
    476 	vmovdqa	XMMWORD[32+rsi],xmm1
    477 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    478 	mov	rsi,QWORD[16+rsp]
    479 	DB	0F3h,0C3h		;repret
    480 
    481 $L$SEH_end_aes128gcmsiv_aes_ks:
    482 global	aes256gcmsiv_aes_ks
    483 
    484 ALIGN	16
    485 aes256gcmsiv_aes_ks:
    486 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    487 	mov	QWORD[16+rsp],rsi
    488 	mov	rax,rsp
    489 $L$SEH_begin_aes256gcmsiv_aes_ks:
    490 	mov	rdi,rcx
    491 	mov	rsi,rdx
    492 
    493 
    494 
    495 	vmovdqu	xmm1,XMMWORD[rdi]
    496 	vmovdqu	xmm3,XMMWORD[16+rdi]
    497 	vmovdqa	XMMWORD[rsi],xmm1
    498 	vmovdqa	XMMWORD[16+rsi],xmm3
    499 	vmovdqa	xmm0,XMMWORD[con1]
    500 	vmovdqa	xmm15,XMMWORD[mask]
    501 	vpxor	xmm14,xmm14,xmm14
    502 	mov	rax,6
    503 
    504 $L$ks256_loop:
    505 	add	rsi,32
    506 	sub	rax,1
    507 	vpshufb	xmm2,xmm3,xmm15
    508 	vaesenclast	xmm2,xmm2,xmm0
    509 	vpslld	xmm0,xmm0,1
    510 	vpsllq	xmm4,xmm1,32
    511 	vpxor	xmm1,xmm1,xmm4
    512 	vpshufb	xmm4,xmm1,XMMWORD[con3]
    513 	vpxor	xmm1,xmm1,xmm4
    514 	vpxor	xmm1,xmm1,xmm2
    515 	vmovdqa	XMMWORD[rsi],xmm1
    516 	vpshufd	xmm2,xmm1,0xff
    517 	vaesenclast	xmm2,xmm2,xmm14
    518 	vpsllq	xmm4,xmm3,32
    519 	vpxor	xmm3,xmm3,xmm4
    520 	vpshufb	xmm4,xmm3,XMMWORD[con3]
    521 	vpxor	xmm3,xmm3,xmm4
    522 	vpxor	xmm3,xmm3,xmm2
    523 	vmovdqa	XMMWORD[16+rsi],xmm3
    524 	jne	NEAR $L$ks256_loop
    525 
    526 	vpshufb	xmm2,xmm3,xmm15
    527 	vaesenclast	xmm2,xmm2,xmm0
    528 	vpsllq	xmm4,xmm1,32
    529 	vpxor	xmm1,xmm1,xmm4
    530 	vpshufb	xmm4,xmm1,XMMWORD[con3]
    531 	vpxor	xmm1,xmm1,xmm4
    532 	vpxor	xmm1,xmm1,xmm2
    533 	vmovdqa	XMMWORD[32+rsi],xmm1
    534 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    535 	mov	rsi,QWORD[16+rsp]
    536 	DB	0F3h,0C3h		;repret
    537 
    538 global	aes128gcmsiv_aes_ks_enc_x1
    539 
    540 ALIGN	16
    541 aes128gcmsiv_aes_ks_enc_x1:
    542 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    543 	mov	QWORD[16+rsp],rsi
    544 	mov	rax,rsp
    545 $L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1:
    546 	mov	rdi,rcx
    547 	mov	rsi,rdx
    548 	mov	rdx,r8
    549 	mov	rcx,r9
    550 
    551 
    552 
    553 	vmovdqa	xmm1,XMMWORD[rcx]
    554 	vmovdqa	xmm4,XMMWORD[rdi]
    555 
    556 	vmovdqa	XMMWORD[rdx],xmm1
    557 	vpxor	xmm4,xmm4,xmm1
    558 
    559 	vmovdqa	xmm0,XMMWORD[con1]
    560 	vmovdqa	xmm15,XMMWORD[mask]
    561 
    562 	vpshufb	xmm2,xmm1,xmm15
    563 	vaesenclast	xmm2,xmm2,xmm0
    564 	vpslld	xmm0,xmm0,1
    565 	vpsllq	xmm3,xmm1,32
    566 	vpxor	xmm1,xmm1,xmm3
    567 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    568 	vpxor	xmm1,xmm1,xmm3
    569 	vpxor	xmm1,xmm1,xmm2
    570 
    571 	vaesenc	xmm4,xmm4,xmm1
    572 	vmovdqa	XMMWORD[16+rdx],xmm1
    573 
    574 	vpshufb	xmm2,xmm1,xmm15
    575 	vaesenclast	xmm2,xmm2,xmm0
    576 	vpslld	xmm0,xmm0,1
    577 	vpsllq	xmm3,xmm1,32
    578 	vpxor	xmm1,xmm1,xmm3
    579 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    580 	vpxor	xmm1,xmm1,xmm3
    581 	vpxor	xmm1,xmm1,xmm2
    582 
    583 	vaesenc	xmm4,xmm4,xmm1
    584 	vmovdqa	XMMWORD[32+rdx],xmm1
    585 
    586 	vpshufb	xmm2,xmm1,xmm15
    587 	vaesenclast	xmm2,xmm2,xmm0
    588 	vpslld	xmm0,xmm0,1
    589 	vpsllq	xmm3,xmm1,32
    590 	vpxor	xmm1,xmm1,xmm3
    591 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    592 	vpxor	xmm1,xmm1,xmm3
    593 	vpxor	xmm1,xmm1,xmm2
    594 
    595 	vaesenc	xmm4,xmm4,xmm1
    596 	vmovdqa	XMMWORD[48+rdx],xmm1
    597 
    598 	vpshufb	xmm2,xmm1,xmm15
    599 	vaesenclast	xmm2,xmm2,xmm0
    600 	vpslld	xmm0,xmm0,1
    601 	vpsllq	xmm3,xmm1,32
    602 	vpxor	xmm1,xmm1,xmm3
    603 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    604 	vpxor	xmm1,xmm1,xmm3
    605 	vpxor	xmm1,xmm1,xmm2
    606 
    607 	vaesenc	xmm4,xmm4,xmm1
    608 	vmovdqa	XMMWORD[64+rdx],xmm1
    609 
    610 	vpshufb	xmm2,xmm1,xmm15
    611 	vaesenclast	xmm2,xmm2,xmm0
    612 	vpslld	xmm0,xmm0,1
    613 	vpsllq	xmm3,xmm1,32
    614 	vpxor	xmm1,xmm1,xmm3
    615 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    616 	vpxor	xmm1,xmm1,xmm3
    617 	vpxor	xmm1,xmm1,xmm2
    618 
    619 	vaesenc	xmm4,xmm4,xmm1
    620 	vmovdqa	XMMWORD[80+rdx],xmm1
    621 
    622 	vpshufb	xmm2,xmm1,xmm15
    623 	vaesenclast	xmm2,xmm2,xmm0
    624 	vpslld	xmm0,xmm0,1
    625 	vpsllq	xmm3,xmm1,32
    626 	vpxor	xmm1,xmm1,xmm3
    627 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    628 	vpxor	xmm1,xmm1,xmm3
    629 	vpxor	xmm1,xmm1,xmm2
    630 
    631 	vaesenc	xmm4,xmm4,xmm1
    632 	vmovdqa	XMMWORD[96+rdx],xmm1
    633 
    634 	vpshufb	xmm2,xmm1,xmm15
    635 	vaesenclast	xmm2,xmm2,xmm0
    636 	vpslld	xmm0,xmm0,1
    637 	vpsllq	xmm3,xmm1,32
    638 	vpxor	xmm1,xmm1,xmm3
    639 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    640 	vpxor	xmm1,xmm1,xmm3
    641 	vpxor	xmm1,xmm1,xmm2
    642 
    643 	vaesenc	xmm4,xmm4,xmm1
    644 	vmovdqa	XMMWORD[112+rdx],xmm1
    645 
    646 	vpshufb	xmm2,xmm1,xmm15
    647 	vaesenclast	xmm2,xmm2,xmm0
    648 	vpslld	xmm0,xmm0,1
    649 	vpsllq	xmm3,xmm1,32
    650 	vpxor	xmm1,xmm1,xmm3
    651 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    652 	vpxor	xmm1,xmm1,xmm3
    653 	vpxor	xmm1,xmm1,xmm2
    654 
    655 	vaesenc	xmm4,xmm4,xmm1
    656 	vmovdqa	XMMWORD[128+rdx],xmm1
    657 
    658 
    659 	vmovdqa	xmm0,XMMWORD[con2]
    660 
    661 	vpshufb	xmm2,xmm1,xmm15
    662 	vaesenclast	xmm2,xmm2,xmm0
    663 	vpslld	xmm0,xmm0,1
    664 	vpsllq	xmm3,xmm1,32
    665 	vpxor	xmm1,xmm1,xmm3
    666 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    667 	vpxor	xmm1,xmm1,xmm3
    668 	vpxor	xmm1,xmm1,xmm2
    669 
    670 	vaesenc	xmm4,xmm4,xmm1
    671 	vmovdqa	XMMWORD[144+rdx],xmm1
    672 
    673 	vpshufb	xmm2,xmm1,xmm15
    674 	vaesenclast	xmm2,xmm2,xmm0
    675 	vpsllq	xmm3,xmm1,32
    676 	vpxor	xmm1,xmm1,xmm3
    677 	vpshufb	xmm3,xmm1,XMMWORD[con3]
    678 	vpxor	xmm1,xmm1,xmm3
    679 	vpxor	xmm1,xmm1,xmm2
    680 
    681 	vaesenclast	xmm4,xmm4,xmm1
    682 	vmovdqa	XMMWORD[160+rdx],xmm1
    683 
    684 
    685 	vmovdqa	XMMWORD[rsi],xmm4
    686 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    687 	mov	rsi,QWORD[16+rsp]
    688 	DB	0F3h,0C3h		;repret
    689 
    690 $L$SEH_end_aes128gcmsiv_aes_ks_enc_x1:
    691 global	aes128gcmsiv_kdf
    692 
    693 ALIGN	16
    694 aes128gcmsiv_kdf:
    695 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    696 	mov	QWORD[16+rsp],rsi
    697 	mov	rax,rsp
    698 $L$SEH_begin_aes128gcmsiv_kdf:
    699 	mov	rdi,rcx
    700 	mov	rsi,rdx
    701 	mov	rdx,r8
    702 
    703 
    704 
    705 
    706 
    707 
    708 
    709 	vmovdqa	xmm1,XMMWORD[rdx]
    710 	vmovdqa	xmm9,XMMWORD[rdi]
    711 	vmovdqa	xmm12,XMMWORD[and_mask]
    712 	vmovdqa	xmm13,XMMWORD[one]
    713 	vpshufd	xmm9,xmm9,0x90
    714 	vpand	xmm9,xmm9,xmm12
    715 	vpaddd	xmm10,xmm9,xmm13
    716 	vpaddd	xmm11,xmm10,xmm13
    717 	vpaddd	xmm12,xmm11,xmm13
    718 
    719 	vpxor	xmm9,xmm9,xmm1
    720 	vpxor	xmm10,xmm10,xmm1
    721 	vpxor	xmm11,xmm11,xmm1
    722 	vpxor	xmm12,xmm12,xmm1
    723 
    724 	vmovdqa	xmm1,XMMWORD[16+rdx]
    725 	vaesenc	xmm9,xmm9,xmm1
    726 	vaesenc	xmm10,xmm10,xmm1
    727 	vaesenc	xmm11,xmm11,xmm1
    728 	vaesenc	xmm12,xmm12,xmm1
    729 
    730 	vmovdqa	xmm2,XMMWORD[32+rdx]
    731 	vaesenc	xmm9,xmm9,xmm2
    732 	vaesenc	xmm10,xmm10,xmm2
    733 	vaesenc	xmm11,xmm11,xmm2
    734 	vaesenc	xmm12,xmm12,xmm2
    735 
    736 	vmovdqa	xmm1,XMMWORD[48+rdx]
    737 	vaesenc	xmm9,xmm9,xmm1
    738 	vaesenc	xmm10,xmm10,xmm1
    739 	vaesenc	xmm11,xmm11,xmm1
    740 	vaesenc	xmm12,xmm12,xmm1
    741 
    742 	vmovdqa	xmm2,XMMWORD[64+rdx]
    743 	vaesenc	xmm9,xmm9,xmm2
    744 	vaesenc	xmm10,xmm10,xmm2
    745 	vaesenc	xmm11,xmm11,xmm2
    746 	vaesenc	xmm12,xmm12,xmm2
    747 
    748 	vmovdqa	xmm1,XMMWORD[80+rdx]
    749 	vaesenc	xmm9,xmm9,xmm1
    750 	vaesenc	xmm10,xmm10,xmm1
    751 	vaesenc	xmm11,xmm11,xmm1
    752 	vaesenc	xmm12,xmm12,xmm1
    753 
    754 	vmovdqa	xmm2,XMMWORD[96+rdx]
    755 	vaesenc	xmm9,xmm9,xmm2
    756 	vaesenc	xmm10,xmm10,xmm2
    757 	vaesenc	xmm11,xmm11,xmm2
    758 	vaesenc	xmm12,xmm12,xmm2
    759 
    760 	vmovdqa	xmm1,XMMWORD[112+rdx]
    761 	vaesenc	xmm9,xmm9,xmm1
    762 	vaesenc	xmm10,xmm10,xmm1
    763 	vaesenc	xmm11,xmm11,xmm1
    764 	vaesenc	xmm12,xmm12,xmm1
    765 
    766 	vmovdqa	xmm2,XMMWORD[128+rdx]
    767 	vaesenc	xmm9,xmm9,xmm2
    768 	vaesenc	xmm10,xmm10,xmm2
    769 	vaesenc	xmm11,xmm11,xmm2
    770 	vaesenc	xmm12,xmm12,xmm2
    771 
    772 	vmovdqa	xmm1,XMMWORD[144+rdx]
    773 	vaesenc	xmm9,xmm9,xmm1
    774 	vaesenc	xmm10,xmm10,xmm1
    775 	vaesenc	xmm11,xmm11,xmm1
    776 	vaesenc	xmm12,xmm12,xmm1
    777 
    778 	vmovdqa	xmm2,XMMWORD[160+rdx]
    779 	vaesenclast	xmm9,xmm9,xmm2
    780 	vaesenclast	xmm10,xmm10,xmm2
    781 	vaesenclast	xmm11,xmm11,xmm2
    782 	vaesenclast	xmm12,xmm12,xmm2
    783 
    784 
    785 	vmovdqa	XMMWORD[rsi],xmm9
    786 	vmovdqa	XMMWORD[16+rsi],xmm10
    787 	vmovdqa	XMMWORD[32+rsi],xmm11
    788 	vmovdqa	XMMWORD[48+rsi],xmm12
    789 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    790 	mov	rsi,QWORD[16+rsp]
    791 	DB	0F3h,0C3h		;repret
    792 
    793 $L$SEH_end_aes128gcmsiv_kdf:
    794 global	aes128gcmsiv_enc_msg_x4
    795 
    796 ALIGN	16
    797 aes128gcmsiv_enc_msg_x4:
    798 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    799 	mov	QWORD[16+rsp],rsi
    800 	mov	rax,rsp
    801 $L$SEH_begin_aes128gcmsiv_enc_msg_x4:
    802 	mov	rdi,rcx
    803 	mov	rsi,rdx
    804 	mov	rdx,r8
    805 	mov	rcx,r9
    806 	mov	r8,QWORD[40+rsp]
    807 
    808 
    809 
    810 	test	r8,r8
    811 	jnz	NEAR $L$128_enc_msg_x4_start
    812 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    813 	mov	rsi,QWORD[16+rsp]
    814 	DB	0F3h,0C3h		;repret
    815 
    816 $L$128_enc_msg_x4_start:
    817 	push	r12
    818 
    819 	push	r13
    820 
    821 
    822 	shr	r8,4
    823 	mov	r10,r8
    824 	shl	r10,62
    825 	shr	r10,62
    826 
    827 
    828 	vmovdqa	xmm15,XMMWORD[rdx]
    829 	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
    830 
    831 	vmovdqu	xmm4,XMMWORD[four]
    832 	vmovdqa	xmm0,xmm15
    833 	vpaddd	xmm1,xmm15,XMMWORD[one]
    834 	vpaddd	xmm2,xmm15,XMMWORD[two]
    835 	vpaddd	xmm3,xmm15,XMMWORD[three]
    836 
    837 	shr	r8,2
    838 	je	NEAR $L$128_enc_msg_x4_check_remainder
    839 
    840 	sub	rsi,64
    841 	sub	rdi,64
    842 
    843 $L$128_enc_msg_x4_loop1:
    844 	add	rsi,64
    845 	add	rdi,64
    846 
    847 	vmovdqa	xmm5,xmm0
    848 	vmovdqa	xmm6,xmm1
    849 	vmovdqa	xmm7,xmm2
    850 	vmovdqa	xmm8,xmm3
    851 
    852 	vpxor	xmm5,xmm5,XMMWORD[rcx]
    853 	vpxor	xmm6,xmm6,XMMWORD[rcx]
    854 	vpxor	xmm7,xmm7,XMMWORD[rcx]
    855 	vpxor	xmm8,xmm8,XMMWORD[rcx]
    856 
    857 	vmovdqu	xmm12,XMMWORD[16+rcx]
    858 	vaesenc	xmm5,xmm5,xmm12
    859 	vaesenc	xmm6,xmm6,xmm12
    860 	vaesenc	xmm7,xmm7,xmm12
    861 	vaesenc	xmm8,xmm8,xmm12
    862 
    863 	vpaddd	xmm0,xmm0,xmm4
    864 	vmovdqu	xmm12,XMMWORD[32+rcx]
    865 	vaesenc	xmm5,xmm5,xmm12
    866 	vaesenc	xmm6,xmm6,xmm12
    867 	vaesenc	xmm7,xmm7,xmm12
    868 	vaesenc	xmm8,xmm8,xmm12
    869 
    870 	vpaddd	xmm1,xmm1,xmm4
    871 	vmovdqu	xmm12,XMMWORD[48+rcx]
    872 	vaesenc	xmm5,xmm5,xmm12
    873 	vaesenc	xmm6,xmm6,xmm12
    874 	vaesenc	xmm7,xmm7,xmm12
    875 	vaesenc	xmm8,xmm8,xmm12
    876 
    877 	vpaddd	xmm2,xmm2,xmm4
    878 	vmovdqu	xmm12,XMMWORD[64+rcx]
    879 	vaesenc	xmm5,xmm5,xmm12
    880 	vaesenc	xmm6,xmm6,xmm12
    881 	vaesenc	xmm7,xmm7,xmm12
    882 	vaesenc	xmm8,xmm8,xmm12
    883 
    884 	vpaddd	xmm3,xmm3,xmm4
    885 
    886 	vmovdqu	xmm12,XMMWORD[80+rcx]
    887 	vaesenc	xmm5,xmm5,xmm12
    888 	vaesenc	xmm6,xmm6,xmm12
    889 	vaesenc	xmm7,xmm7,xmm12
    890 	vaesenc	xmm8,xmm8,xmm12
    891 
    892 	vmovdqu	xmm12,XMMWORD[96+rcx]
    893 	vaesenc	xmm5,xmm5,xmm12
    894 	vaesenc	xmm6,xmm6,xmm12
    895 	vaesenc	xmm7,xmm7,xmm12
    896 	vaesenc	xmm8,xmm8,xmm12
    897 
    898 	vmovdqu	xmm12,XMMWORD[112+rcx]
    899 	vaesenc	xmm5,xmm5,xmm12
    900 	vaesenc	xmm6,xmm6,xmm12
    901 	vaesenc	xmm7,xmm7,xmm12
    902 	vaesenc	xmm8,xmm8,xmm12
    903 
    904 	vmovdqu	xmm12,XMMWORD[128+rcx]
    905 	vaesenc	xmm5,xmm5,xmm12
    906 	vaesenc	xmm6,xmm6,xmm12
    907 	vaesenc	xmm7,xmm7,xmm12
    908 	vaesenc	xmm8,xmm8,xmm12
    909 
    910 	vmovdqu	xmm12,XMMWORD[144+rcx]
    911 	vaesenc	xmm5,xmm5,xmm12
    912 	vaesenc	xmm6,xmm6,xmm12
    913 	vaesenc	xmm7,xmm7,xmm12
    914 	vaesenc	xmm8,xmm8,xmm12
    915 
    916 	vmovdqu	xmm12,XMMWORD[160+rcx]
    917 	vaesenclast	xmm5,xmm5,xmm12
    918 	vaesenclast	xmm6,xmm6,xmm12
    919 	vaesenclast	xmm7,xmm7,xmm12
    920 	vaesenclast	xmm8,xmm8,xmm12
    921 
    922 
    923 
    924 	vpxor	xmm5,xmm5,XMMWORD[rdi]
    925 	vpxor	xmm6,xmm6,XMMWORD[16+rdi]
    926 	vpxor	xmm7,xmm7,XMMWORD[32+rdi]
    927 	vpxor	xmm8,xmm8,XMMWORD[48+rdi]
    928 
    929 	sub	r8,1
    930 
    931 	vmovdqu	XMMWORD[rsi],xmm5
    932 	vmovdqu	XMMWORD[16+rsi],xmm6
    933 	vmovdqu	XMMWORD[32+rsi],xmm7
    934 	vmovdqu	XMMWORD[48+rsi],xmm8
    935 
    936 	jne	NEAR $L$128_enc_msg_x4_loop1
    937 
    938 	add	rsi,64
    939 	add	rdi,64
    940 
    941 $L$128_enc_msg_x4_check_remainder:
    942 	cmp	r10,0
    943 	je	NEAR $L$128_enc_msg_x4_out
    944 
    945 $L$128_enc_msg_x4_loop2:
    946 
    947 
    948 	vmovdqa	xmm5,xmm0
    949 	vpaddd	xmm0,xmm0,XMMWORD[one]
    950 
    951 	vpxor	xmm5,xmm5,XMMWORD[rcx]
    952 	vaesenc	xmm5,xmm5,XMMWORD[16+rcx]
    953 	vaesenc	xmm5,xmm5,XMMWORD[32+rcx]
    954 	vaesenc	xmm5,xmm5,XMMWORD[48+rcx]
    955 	vaesenc	xmm5,xmm5,XMMWORD[64+rcx]
    956 	vaesenc	xmm5,xmm5,XMMWORD[80+rcx]
    957 	vaesenc	xmm5,xmm5,XMMWORD[96+rcx]
    958 	vaesenc	xmm5,xmm5,XMMWORD[112+rcx]
    959 	vaesenc	xmm5,xmm5,XMMWORD[128+rcx]
    960 	vaesenc	xmm5,xmm5,XMMWORD[144+rcx]
    961 	vaesenclast	xmm5,xmm5,XMMWORD[160+rcx]
    962 
    963 
    964 	vpxor	xmm5,xmm5,XMMWORD[rdi]
    965 	vmovdqu	XMMWORD[rsi],xmm5
    966 
    967 	add	rdi,16
    968 	add	rsi,16
    969 
    970 	sub	r10,1
    971 	jne	NEAR $L$128_enc_msg_x4_loop2
    972 
    973 $L$128_enc_msg_x4_out:
    974 	pop	r13
    975 
    976 	pop	r12
    977 
    978 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    979 	mov	rsi,QWORD[16+rsp]
    980 	DB	0F3h,0C3h		;repret
    981 
    982 $L$SEH_end_aes128gcmsiv_enc_msg_x4:
    983 global	aes128gcmsiv_enc_msg_x8
    984 
    985 ALIGN	16
    986 aes128gcmsiv_enc_msg_x8:
    987 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    988 	mov	QWORD[16+rsp],rsi
    989 	mov	rax,rsp
    990 $L$SEH_begin_aes128gcmsiv_enc_msg_x8:
    991 	mov	rdi,rcx
    992 	mov	rsi,rdx
    993 	mov	rdx,r8
    994 	mov	rcx,r9
    995 	mov	r8,QWORD[40+rsp]
    996 
    997 
    998 
    999 	test	r8,r8
   1000 	jnz	NEAR $L$128_enc_msg_x8_start
   1001 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   1002 	mov	rsi,QWORD[16+rsp]
   1003 	DB	0F3h,0C3h		;repret
   1004 
   1005 $L$128_enc_msg_x8_start:
   1006 	push	r12
   1007 
   1008 	push	r13
   1009 
   1010 	push	rbp
   1011 
   1012 	mov	rbp,rsp
   1013 
   1014 
   1015 
   1016 	sub	rsp,128
   1017 	and	rsp,-64
   1018 
   1019 	shr	r8,4
   1020 	mov	r10,r8
   1021 	shl	r10,61
   1022 	shr	r10,61
   1023 
   1024 
   1025 	vmovdqu	xmm1,XMMWORD[rdx]
   1026 	vpor	xmm1,xmm1,XMMWORD[OR_MASK]
   1027 
   1028 
   1029 	vpaddd	xmm0,xmm1,XMMWORD[seven]
   1030 	vmovdqu	XMMWORD[rsp],xmm0
   1031 	vpaddd	xmm9,xmm1,XMMWORD[one]
   1032 	vpaddd	xmm10,xmm1,XMMWORD[two]
   1033 	vpaddd	xmm11,xmm1,XMMWORD[three]
   1034 	vpaddd	xmm12,xmm1,XMMWORD[four]
   1035 	vpaddd	xmm13,xmm1,XMMWORD[five]
   1036 	vpaddd	xmm14,xmm1,XMMWORD[six]
   1037 	vmovdqa	xmm0,xmm1
   1038 
   1039 	shr	r8,3
   1040 	je	NEAR $L$128_enc_msg_x8_check_remainder
   1041 
   1042 	sub	rsi,128
   1043 	sub	rdi,128
   1044 
   1045 $L$128_enc_msg_x8_loop1:
   1046 	add	rsi,128
   1047 	add	rdi,128
   1048 
   1049 	vmovdqa	xmm1,xmm0
   1050 	vmovdqa	xmm2,xmm9
   1051 	vmovdqa	xmm3,xmm10
   1052 	vmovdqa	xmm4,xmm11
   1053 	vmovdqa	xmm5,xmm12
   1054 	vmovdqa	xmm6,xmm13
   1055 	vmovdqa	xmm7,xmm14
   1056 
   1057 	vmovdqu	xmm8,XMMWORD[rsp]
   1058 
   1059 	vpxor	xmm1,xmm1,XMMWORD[rcx]
   1060 	vpxor	xmm2,xmm2,XMMWORD[rcx]
   1061 	vpxor	xmm3,xmm3,XMMWORD[rcx]
   1062 	vpxor	xmm4,xmm4,XMMWORD[rcx]
   1063 	vpxor	xmm5,xmm5,XMMWORD[rcx]
   1064 	vpxor	xmm6,xmm6,XMMWORD[rcx]
   1065 	vpxor	xmm7,xmm7,XMMWORD[rcx]
   1066 	vpxor	xmm8,xmm8,XMMWORD[rcx]
   1067 
   1068 	vmovdqu	xmm15,XMMWORD[16+rcx]
   1069 	vaesenc	xmm1,xmm1,xmm15
   1070 	vaesenc	xmm2,xmm2,xmm15
   1071 	vaesenc	xmm3,xmm3,xmm15
   1072 	vaesenc	xmm4,xmm4,xmm15
   1073 	vaesenc	xmm5,xmm5,xmm15
   1074 	vaesenc	xmm6,xmm6,xmm15
   1075 	vaesenc	xmm7,xmm7,xmm15
   1076 	vaesenc	xmm8,xmm8,xmm15
   1077 
   1078 	vmovdqu	xmm14,XMMWORD[rsp]
   1079 	vpaddd	xmm14,xmm14,XMMWORD[eight]
   1080 	vmovdqu	XMMWORD[rsp],xmm14
   1081 	vmovdqu	xmm15,XMMWORD[32+rcx]
   1082 	vaesenc	xmm1,xmm1,xmm15
   1083 	vaesenc	xmm2,xmm2,xmm15
   1084 	vaesenc	xmm3,xmm3,xmm15
   1085 	vaesenc	xmm4,xmm4,xmm15
   1086 	vaesenc	xmm5,xmm5,xmm15
   1087 	vaesenc	xmm6,xmm6,xmm15
   1088 	vaesenc	xmm7,xmm7,xmm15
   1089 	vaesenc	xmm8,xmm8,xmm15
   1090 
   1091 	vpsubd	xmm14,xmm14,XMMWORD[one]
   1092 	vmovdqu	xmm15,XMMWORD[48+rcx]
   1093 	vaesenc	xmm1,xmm1,xmm15
   1094 	vaesenc	xmm2,xmm2,xmm15
   1095 	vaesenc	xmm3,xmm3,xmm15
   1096 	vaesenc	xmm4,xmm4,xmm15
   1097 	vaesenc	xmm5,xmm5,xmm15
   1098 	vaesenc	xmm6,xmm6,xmm15
   1099 	vaesenc	xmm7,xmm7,xmm15
   1100 	vaesenc	xmm8,xmm8,xmm15
   1101 
   1102 	vpaddd	xmm0,xmm0,XMMWORD[eight]
   1103 	vmovdqu	xmm15,XMMWORD[64+rcx]
   1104 	vaesenc	xmm1,xmm1,xmm15
   1105 	vaesenc	xmm2,xmm2,xmm15
   1106 	vaesenc	xmm3,xmm3,xmm15
   1107 	vaesenc	xmm4,xmm4,xmm15
   1108 	vaesenc	xmm5,xmm5,xmm15
   1109 	vaesenc	xmm6,xmm6,xmm15
   1110 	vaesenc	xmm7,xmm7,xmm15
   1111 	vaesenc	xmm8,xmm8,xmm15
   1112 
   1113 	vpaddd	xmm9,xmm9,XMMWORD[eight]
   1114 	vmovdqu	xmm15,XMMWORD[80+rcx]
   1115 	vaesenc	xmm1,xmm1,xmm15
   1116 	vaesenc	xmm2,xmm2,xmm15
   1117 	vaesenc	xmm3,xmm3,xmm15
   1118 	vaesenc	xmm4,xmm4,xmm15
   1119 	vaesenc	xmm5,xmm5,xmm15
   1120 	vaesenc	xmm6,xmm6,xmm15
   1121 	vaesenc	xmm7,xmm7,xmm15
   1122 	vaesenc	xmm8,xmm8,xmm15
   1123 
   1124 	vpaddd	xmm10,xmm10,XMMWORD[eight]
   1125 	vmovdqu	xmm15,XMMWORD[96+rcx]
   1126 	vaesenc	xmm1,xmm1,xmm15
   1127 	vaesenc	xmm2,xmm2,xmm15
   1128 	vaesenc	xmm3,xmm3,xmm15
   1129 	vaesenc	xmm4,xmm4,xmm15
   1130 	vaesenc	xmm5,xmm5,xmm15
   1131 	vaesenc	xmm6,xmm6,xmm15
   1132 	vaesenc	xmm7,xmm7,xmm15
   1133 	vaesenc	xmm8,xmm8,xmm15
   1134 
   1135 	vpaddd	xmm11,xmm11,XMMWORD[eight]
   1136 	vmovdqu	xmm15,XMMWORD[112+rcx]
   1137 	vaesenc	xmm1,xmm1,xmm15
   1138 	vaesenc	xmm2,xmm2,xmm15
   1139 	vaesenc	xmm3,xmm3,xmm15
   1140 	vaesenc	xmm4,xmm4,xmm15
   1141 	vaesenc	xmm5,xmm5,xmm15
   1142 	vaesenc	xmm6,xmm6,xmm15
   1143 	vaesenc	xmm7,xmm7,xmm15
   1144 	vaesenc	xmm8,xmm8,xmm15
   1145 
   1146 	vpaddd	xmm12,xmm12,XMMWORD[eight]
   1147 	vmovdqu	xmm15,XMMWORD[128+rcx]
   1148 	vaesenc	xmm1,xmm1,xmm15
   1149 	vaesenc	xmm2,xmm2,xmm15
   1150 	vaesenc	xmm3,xmm3,xmm15
   1151 	vaesenc	xmm4,xmm4,xmm15
   1152 	vaesenc	xmm5,xmm5,xmm15
   1153 	vaesenc	xmm6,xmm6,xmm15
   1154 	vaesenc	xmm7,xmm7,xmm15
   1155 	vaesenc	xmm8,xmm8,xmm15
   1156 
   1157 	vpaddd	xmm13,xmm13,XMMWORD[eight]
   1158 	vmovdqu	xmm15,XMMWORD[144+rcx]
   1159 	vaesenc	xmm1,xmm1,xmm15
   1160 	vaesenc	xmm2,xmm2,xmm15
   1161 	vaesenc	xmm3,xmm3,xmm15
   1162 	vaesenc	xmm4,xmm4,xmm15
   1163 	vaesenc	xmm5,xmm5,xmm15
   1164 	vaesenc	xmm6,xmm6,xmm15
   1165 	vaesenc	xmm7,xmm7,xmm15
   1166 	vaesenc	xmm8,xmm8,xmm15
   1167 
   1168 	vmovdqu	xmm15,XMMWORD[160+rcx]
   1169 	vaesenclast	xmm1,xmm1,xmm15
   1170 	vaesenclast	xmm2,xmm2,xmm15
   1171 	vaesenclast	xmm3,xmm3,xmm15
   1172 	vaesenclast	xmm4,xmm4,xmm15
   1173 	vaesenclast	xmm5,xmm5,xmm15
   1174 	vaesenclast	xmm6,xmm6,xmm15
   1175 	vaesenclast	xmm7,xmm7,xmm15
   1176 	vaesenclast	xmm8,xmm8,xmm15
   1177 
   1178 
   1179 
   1180 	vpxor	xmm1,xmm1,XMMWORD[rdi]
   1181 	vpxor	xmm2,xmm2,XMMWORD[16+rdi]
   1182 	vpxor	xmm3,xmm3,XMMWORD[32+rdi]
   1183 	vpxor	xmm4,xmm4,XMMWORD[48+rdi]
   1184 	vpxor	xmm5,xmm5,XMMWORD[64+rdi]
   1185 	vpxor	xmm6,xmm6,XMMWORD[80+rdi]
   1186 	vpxor	xmm7,xmm7,XMMWORD[96+rdi]
   1187 	vpxor	xmm8,xmm8,XMMWORD[112+rdi]
   1188 
   1189 	dec	r8
   1190 
   1191 	vmovdqu	XMMWORD[rsi],xmm1
   1192 	vmovdqu	XMMWORD[16+rsi],xmm2
   1193 	vmovdqu	XMMWORD[32+rsi],xmm3
   1194 	vmovdqu	XMMWORD[48+rsi],xmm4
   1195 	vmovdqu	XMMWORD[64+rsi],xmm5
   1196 	vmovdqu	XMMWORD[80+rsi],xmm6
   1197 	vmovdqu	XMMWORD[96+rsi],xmm7
   1198 	vmovdqu	XMMWORD[112+rsi],xmm8
   1199 
   1200 	jne	NEAR $L$128_enc_msg_x8_loop1
   1201 
   1202 	add	rsi,128
   1203 	add	rdi,128
   1204 
   1205 $L$128_enc_msg_x8_check_remainder:
   1206 	cmp	r10,0
   1207 	je	NEAR $L$128_enc_msg_x8_out
   1208 
   1209 $L$128_enc_msg_x8_loop2:
   1210 
   1211 
   1212 	vmovdqa	xmm1,xmm0
   1213 	vpaddd	xmm0,xmm0,XMMWORD[one]
   1214 
   1215 	vpxor	xmm1,xmm1,XMMWORD[rcx]
   1216 	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]
   1217 	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]
   1218 	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]
   1219 	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]
   1220 	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]
   1221 	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]
   1222 	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]
   1223 	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]
   1224 	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]
   1225 	vaesenclast	xmm1,xmm1,XMMWORD[160+rcx]
   1226 
   1227 
   1228 	vpxor	xmm1,xmm1,XMMWORD[rdi]
   1229 
   1230 	vmovdqu	XMMWORD[rsi],xmm1
   1231 
   1232 	add	rdi,16
   1233 	add	rsi,16
   1234 
   1235 	dec	r10
   1236 	jne	NEAR $L$128_enc_msg_x8_loop2
   1237 
   1238 $L$128_enc_msg_x8_out:
   1239 	mov	rsp,rbp
   1240 
   1241 	pop	rbp
   1242 
   1243 	pop	r13
   1244 
   1245 	pop	r12
   1246 
   1247 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   1248 	mov	rsi,QWORD[16+rsp]
   1249 	DB	0F3h,0C3h		;repret
   1250 
   1251 $L$SEH_end_aes128gcmsiv_enc_msg_x8:
   1252 global	aes128gcmsiv_dec
   1253 
   1254 ALIGN	16
   1255 aes128gcmsiv_dec:
   1256 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   1257 	mov	QWORD[16+rsp],rsi
   1258 	mov	rax,rsp
   1259 $L$SEH_begin_aes128gcmsiv_dec:
   1260 	mov	rdi,rcx
   1261 	mov	rsi,rdx
   1262 	mov	rdx,r8
   1263 	mov	rcx,r9
   1264 	mov	r8,QWORD[40+rsp]
   1265 	mov	r9,QWORD[48+rsp]
   1266 
   1267 
   1268 
   1269 	test	r9,~15
   1270 	jnz	NEAR $L$128_dec_start
   1271 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   1272 	mov	rsi,QWORD[16+rsp]
   1273 	DB	0F3h,0C3h		;repret
   1274 
   1275 $L$128_dec_start:
   1276 	vzeroupper
   1277 	vmovdqa	xmm0,XMMWORD[rdx]
   1278 	mov	rax,rdx
   1279 
   1280 	lea	rax,[32+rax]
   1281 	lea	rcx,[32+rcx]
   1282 
   1283 
   1284 	vmovdqu	xmm15,XMMWORD[r9*1+rdi]
   1285 	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
   1286 	and	r9,~15
   1287 
   1288 
   1289 	cmp	r9,96
   1290 	jb	NEAR $L$128_dec_loop2
   1291 
   1292 
   1293 	sub	r9,96
   1294 	vmovdqa	xmm7,xmm15
   1295 	vpaddd	xmm8,xmm7,XMMWORD[one]
   1296 	vpaddd	xmm9,xmm7,XMMWORD[two]
   1297 	vpaddd	xmm10,xmm9,XMMWORD[one]
   1298 	vpaddd	xmm11,xmm9,XMMWORD[two]
   1299 	vpaddd	xmm12,xmm11,XMMWORD[one]
   1300 	vpaddd	xmm15,xmm11,XMMWORD[two]
   1301 
   1302 	vpxor	xmm7,xmm7,XMMWORD[r8]
   1303 	vpxor	xmm8,xmm8,XMMWORD[r8]
   1304 	vpxor	xmm9,xmm9,XMMWORD[r8]
   1305 	vpxor	xmm10,xmm10,XMMWORD[r8]
   1306 	vpxor	xmm11,xmm11,XMMWORD[r8]
   1307 	vpxor	xmm12,xmm12,XMMWORD[r8]
   1308 
   1309 	vmovdqu	xmm4,XMMWORD[16+r8]
   1310 	vaesenc	xmm7,xmm7,xmm4
   1311 	vaesenc	xmm8,xmm8,xmm4
   1312 	vaesenc	xmm9,xmm9,xmm4
   1313 	vaesenc	xmm10,xmm10,xmm4
   1314 	vaesenc	xmm11,xmm11,xmm4
   1315 	vaesenc	xmm12,xmm12,xmm4
   1316 
   1317 	vmovdqu	xmm4,XMMWORD[32+r8]
   1318 	vaesenc	xmm7,xmm7,xmm4
   1319 	vaesenc	xmm8,xmm8,xmm4
   1320 	vaesenc	xmm9,xmm9,xmm4
   1321 	vaesenc	xmm10,xmm10,xmm4
   1322 	vaesenc	xmm11,xmm11,xmm4
   1323 	vaesenc	xmm12,xmm12,xmm4
   1324 
   1325 	vmovdqu	xmm4,XMMWORD[48+r8]
   1326 	vaesenc	xmm7,xmm7,xmm4
   1327 	vaesenc	xmm8,xmm8,xmm4
   1328 	vaesenc	xmm9,xmm9,xmm4
   1329 	vaesenc	xmm10,xmm10,xmm4
   1330 	vaesenc	xmm11,xmm11,xmm4
   1331 	vaesenc	xmm12,xmm12,xmm4
   1332 
   1333 	vmovdqu	xmm4,XMMWORD[64+r8]
   1334 	vaesenc	xmm7,xmm7,xmm4
   1335 	vaesenc	xmm8,xmm8,xmm4
   1336 	vaesenc	xmm9,xmm9,xmm4
   1337 	vaesenc	xmm10,xmm10,xmm4
   1338 	vaesenc	xmm11,xmm11,xmm4
   1339 	vaesenc	xmm12,xmm12,xmm4
   1340 
   1341 	vmovdqu	xmm4,XMMWORD[80+r8]
   1342 	vaesenc	xmm7,xmm7,xmm4
   1343 	vaesenc	xmm8,xmm8,xmm4
   1344 	vaesenc	xmm9,xmm9,xmm4
   1345 	vaesenc	xmm10,xmm10,xmm4
   1346 	vaesenc	xmm11,xmm11,xmm4
   1347 	vaesenc	xmm12,xmm12,xmm4
   1348 
   1349 	vmovdqu	xmm4,XMMWORD[96+r8]
   1350 	vaesenc	xmm7,xmm7,xmm4
   1351 	vaesenc	xmm8,xmm8,xmm4
   1352 	vaesenc	xmm9,xmm9,xmm4
   1353 	vaesenc	xmm10,xmm10,xmm4
   1354 	vaesenc	xmm11,xmm11,xmm4
   1355 	vaesenc	xmm12,xmm12,xmm4
   1356 
   1357 	vmovdqu	xmm4,XMMWORD[112+r8]
   1358 	vaesenc	xmm7,xmm7,xmm4
   1359 	vaesenc	xmm8,xmm8,xmm4
   1360 	vaesenc	xmm9,xmm9,xmm4
   1361 	vaesenc	xmm10,xmm10,xmm4
   1362 	vaesenc	xmm11,xmm11,xmm4
   1363 	vaesenc	xmm12,xmm12,xmm4
   1364 
   1365 	vmovdqu	xmm4,XMMWORD[128+r8]
   1366 	vaesenc	xmm7,xmm7,xmm4
   1367 	vaesenc	xmm8,xmm8,xmm4
   1368 	vaesenc	xmm9,xmm9,xmm4
   1369 	vaesenc	xmm10,xmm10,xmm4
   1370 	vaesenc	xmm11,xmm11,xmm4
   1371 	vaesenc	xmm12,xmm12,xmm4
   1372 
   1373 	vmovdqu	xmm4,XMMWORD[144+r8]
   1374 	vaesenc	xmm7,xmm7,xmm4
   1375 	vaesenc	xmm8,xmm8,xmm4
   1376 	vaesenc	xmm9,xmm9,xmm4
   1377 	vaesenc	xmm10,xmm10,xmm4
   1378 	vaesenc	xmm11,xmm11,xmm4
   1379 	vaesenc	xmm12,xmm12,xmm4
   1380 
   1381 	vmovdqu	xmm4,XMMWORD[160+r8]
   1382 	vaesenclast	xmm7,xmm7,xmm4
   1383 	vaesenclast	xmm8,xmm8,xmm4
   1384 	vaesenclast	xmm9,xmm9,xmm4
   1385 	vaesenclast	xmm10,xmm10,xmm4
   1386 	vaesenclast	xmm11,xmm11,xmm4
   1387 	vaesenclast	xmm12,xmm12,xmm4
   1388 
   1389 
   1390 	vpxor	xmm7,xmm7,XMMWORD[rdi]
   1391 	vpxor	xmm8,xmm8,XMMWORD[16+rdi]
   1392 	vpxor	xmm9,xmm9,XMMWORD[32+rdi]
   1393 	vpxor	xmm10,xmm10,XMMWORD[48+rdi]
   1394 	vpxor	xmm11,xmm11,XMMWORD[64+rdi]
   1395 	vpxor	xmm12,xmm12,XMMWORD[80+rdi]
   1396 
   1397 	vmovdqu	XMMWORD[rsi],xmm7
   1398 	vmovdqu	XMMWORD[16+rsi],xmm8
   1399 	vmovdqu	XMMWORD[32+rsi],xmm9
   1400 	vmovdqu	XMMWORD[48+rsi],xmm10
   1401 	vmovdqu	XMMWORD[64+rsi],xmm11
   1402 	vmovdqu	XMMWORD[80+rsi],xmm12
   1403 
   1404 	add	rdi,96
   1405 	add	rsi,96
   1406 	jmp	NEAR $L$128_dec_loop1
   1407 
   1408 
   1409 ALIGN	64
   1410 $L$128_dec_loop1:
   1411 	cmp	r9,96
   1412 	jb	NEAR $L$128_dec_finish_96
   1413 	sub	r9,96
   1414 
   1415 	vmovdqa	xmm6,xmm12
   1416 	vmovdqa	XMMWORD[(16-32)+rax],xmm11
   1417 	vmovdqa	XMMWORD[(32-32)+rax],xmm10
   1418 	vmovdqa	XMMWORD[(48-32)+rax],xmm9
   1419 	vmovdqa	XMMWORD[(64-32)+rax],xmm8
   1420 	vmovdqa	XMMWORD[(80-32)+rax],xmm7
   1421 
   1422 	vmovdqa	xmm7,xmm15
   1423 	vpaddd	xmm8,xmm7,XMMWORD[one]
   1424 	vpaddd	xmm9,xmm7,XMMWORD[two]
   1425 	vpaddd	xmm10,xmm9,XMMWORD[one]
   1426 	vpaddd	xmm11,xmm9,XMMWORD[two]
   1427 	vpaddd	xmm12,xmm11,XMMWORD[one]
   1428 	vpaddd	xmm15,xmm11,XMMWORD[two]
   1429 
   1430 	vmovdqa	xmm4,XMMWORD[r8]
   1431 	vpxor	xmm7,xmm7,xmm4
   1432 	vpxor	xmm8,xmm8,xmm4
   1433 	vpxor	xmm9,xmm9,xmm4
   1434 	vpxor	xmm10,xmm10,xmm4
   1435 	vpxor	xmm11,xmm11,xmm4
   1436 	vpxor	xmm12,xmm12,xmm4
   1437 
   1438 	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
   1439 	vpclmulqdq	xmm2,xmm6,xmm4,0x11
   1440 	vpclmulqdq	xmm3,xmm6,xmm4,0x00
   1441 	vpclmulqdq	xmm1,xmm6,xmm4,0x01
   1442 	vpclmulqdq	xmm4,xmm6,xmm4,0x10
   1443 	vpxor	xmm1,xmm1,xmm4
   1444 
   1445 	vmovdqu	xmm4,XMMWORD[16+r8]
   1446 	vaesenc	xmm7,xmm7,xmm4
   1447 	vaesenc	xmm8,xmm8,xmm4
   1448 	vaesenc	xmm9,xmm9,xmm4
   1449 	vaesenc	xmm10,xmm10,xmm4
   1450 	vaesenc	xmm11,xmm11,xmm4
   1451 	vaesenc	xmm12,xmm12,xmm4
   1452 
   1453 	vmovdqu	xmm6,XMMWORD[((-16))+rax]
   1454 	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
   1455 
   1456 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   1457 	vpxor	xmm1,xmm1,xmm4
   1458 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   1459 	vpxor	xmm2,xmm2,xmm4
   1460 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   1461 	vpxor	xmm3,xmm3,xmm4
   1462 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   1463 	vpxor	xmm1,xmm1,xmm4
   1464 
   1465 
   1466 	vmovdqu	xmm4,XMMWORD[32+r8]
   1467 	vaesenc	xmm7,xmm7,xmm4
   1468 	vaesenc	xmm8,xmm8,xmm4
   1469 	vaesenc	xmm9,xmm9,xmm4
   1470 	vaesenc	xmm10,xmm10,xmm4
   1471 	vaesenc	xmm11,xmm11,xmm4
   1472 	vaesenc	xmm12,xmm12,xmm4
   1473 
   1474 	vmovdqu	xmm6,XMMWORD[rax]
   1475 	vmovdqu	xmm13,XMMWORD[rcx]
   1476 
   1477 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   1478 	vpxor	xmm1,xmm1,xmm4
   1479 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   1480 	vpxor	xmm2,xmm2,xmm4
   1481 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   1482 	vpxor	xmm3,xmm3,xmm4
   1483 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   1484 	vpxor	xmm1,xmm1,xmm4
   1485 
   1486 
   1487 	vmovdqu	xmm4,XMMWORD[48+r8]
   1488 	vaesenc	xmm7,xmm7,xmm4
   1489 	vaesenc	xmm8,xmm8,xmm4
   1490 	vaesenc	xmm9,xmm9,xmm4
   1491 	vaesenc	xmm10,xmm10,xmm4
   1492 	vaesenc	xmm11,xmm11,xmm4
   1493 	vaesenc	xmm12,xmm12,xmm4
   1494 
   1495 	vmovdqu	xmm6,XMMWORD[16+rax]
   1496 	vmovdqu	xmm13,XMMWORD[16+rcx]
   1497 
   1498 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   1499 	vpxor	xmm1,xmm1,xmm4
   1500 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   1501 	vpxor	xmm2,xmm2,xmm4
   1502 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   1503 	vpxor	xmm3,xmm3,xmm4
   1504 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   1505 	vpxor	xmm1,xmm1,xmm4
   1506 
   1507 
   1508 	vmovdqu	xmm4,XMMWORD[64+r8]
   1509 	vaesenc	xmm7,xmm7,xmm4
   1510 	vaesenc	xmm8,xmm8,xmm4
   1511 	vaesenc	xmm9,xmm9,xmm4
   1512 	vaesenc	xmm10,xmm10,xmm4
   1513 	vaesenc	xmm11,xmm11,xmm4
   1514 	vaesenc	xmm12,xmm12,xmm4
   1515 
   1516 	vmovdqu	xmm6,XMMWORD[32+rax]
   1517 	vmovdqu	xmm13,XMMWORD[32+rcx]
   1518 
   1519 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   1520 	vpxor	xmm1,xmm1,xmm4
   1521 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   1522 	vpxor	xmm2,xmm2,xmm4
   1523 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   1524 	vpxor	xmm3,xmm3,xmm4
   1525 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   1526 	vpxor	xmm1,xmm1,xmm4
   1527 
   1528 
   1529 	vmovdqu	xmm4,XMMWORD[80+r8]
   1530 	vaesenc	xmm7,xmm7,xmm4
   1531 	vaesenc	xmm8,xmm8,xmm4
   1532 	vaesenc	xmm9,xmm9,xmm4
   1533 	vaesenc	xmm10,xmm10,xmm4
   1534 	vaesenc	xmm11,xmm11,xmm4
   1535 	vaesenc	xmm12,xmm12,xmm4
   1536 
   1537 	vmovdqu	xmm4,XMMWORD[96+r8]
   1538 	vaesenc	xmm7,xmm7,xmm4
   1539 	vaesenc	xmm8,xmm8,xmm4
   1540 	vaesenc	xmm9,xmm9,xmm4
   1541 	vaesenc	xmm10,xmm10,xmm4
   1542 	vaesenc	xmm11,xmm11,xmm4
   1543 	vaesenc	xmm12,xmm12,xmm4
   1544 
   1545 	vmovdqu	xmm4,XMMWORD[112+r8]
   1546 	vaesenc	xmm7,xmm7,xmm4
   1547 	vaesenc	xmm8,xmm8,xmm4
   1548 	vaesenc	xmm9,xmm9,xmm4
   1549 	vaesenc	xmm10,xmm10,xmm4
   1550 	vaesenc	xmm11,xmm11,xmm4
   1551 	vaesenc	xmm12,xmm12,xmm4
   1552 
   1553 
   1554 	vmovdqa	xmm6,XMMWORD[((80-32))+rax]
   1555 	vpxor	xmm6,xmm6,xmm0
   1556 	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
   1557 
   1558 	vpclmulqdq	xmm4,xmm6,xmm5,0x01
   1559 	vpxor	xmm1,xmm1,xmm4
   1560 	vpclmulqdq	xmm4,xmm6,xmm5,0x11
   1561 	vpxor	xmm2,xmm2,xmm4
   1562 	vpclmulqdq	xmm4,xmm6,xmm5,0x00
   1563 	vpxor	xmm3,xmm3,xmm4
   1564 	vpclmulqdq	xmm4,xmm6,xmm5,0x10
   1565 	vpxor	xmm1,xmm1,xmm4
   1566 
   1567 	vmovdqu	xmm4,XMMWORD[128+r8]
   1568 	vaesenc	xmm7,xmm7,xmm4
   1569 	vaesenc	xmm8,xmm8,xmm4
   1570 	vaesenc	xmm9,xmm9,xmm4
   1571 	vaesenc	xmm10,xmm10,xmm4
   1572 	vaesenc	xmm11,xmm11,xmm4
   1573 	vaesenc	xmm12,xmm12,xmm4
   1574 
   1575 
   1576 	vpsrldq	xmm4,xmm1,8
   1577 	vpxor	xmm5,xmm2,xmm4
   1578 	vpslldq	xmm4,xmm1,8
   1579 	vpxor	xmm0,xmm3,xmm4
   1580 
   1581 	vmovdqa	xmm3,XMMWORD[poly]
   1582 
   1583 	vmovdqu	xmm4,XMMWORD[144+r8]
   1584 	vaesenc	xmm7,xmm7,xmm4
   1585 	vaesenc	xmm8,xmm8,xmm4
   1586 	vaesenc	xmm9,xmm9,xmm4
   1587 	vaesenc	xmm10,xmm10,xmm4
   1588 	vaesenc	xmm11,xmm11,xmm4
   1589 	vaesenc	xmm12,xmm12,xmm4
   1590 
   1591 	vmovdqu	xmm6,XMMWORD[160+r8]
   1592 	vpalignr	xmm2,xmm0,xmm0,8
   1593 	vpclmulqdq	xmm0,xmm0,xmm3,0x10
   1594 	vpxor	xmm0,xmm2,xmm0
   1595 
   1596 	vpxor	xmm4,xmm6,XMMWORD[rdi]
   1597 	vaesenclast	xmm7,xmm7,xmm4
   1598 	vpxor	xmm4,xmm6,XMMWORD[16+rdi]
   1599 	vaesenclast	xmm8,xmm8,xmm4
   1600 	vpxor	xmm4,xmm6,XMMWORD[32+rdi]
   1601 	vaesenclast	xmm9,xmm9,xmm4
   1602 	vpxor	xmm4,xmm6,XMMWORD[48+rdi]
   1603 	vaesenclast	xmm10,xmm10,xmm4
   1604 	vpxor	xmm4,xmm6,XMMWORD[64+rdi]
   1605 	vaesenclast	xmm11,xmm11,xmm4
   1606 	vpxor	xmm4,xmm6,XMMWORD[80+rdi]
   1607 	vaesenclast	xmm12,xmm12,xmm4
   1608 
   1609 	vpalignr	xmm2,xmm0,xmm0,8
   1610 	vpclmulqdq	xmm0,xmm0,xmm3,0x10
   1611 	vpxor	xmm0,xmm2,xmm0
   1612 
   1613 	vmovdqu	XMMWORD[rsi],xmm7
   1614 	vmovdqu	XMMWORD[16+rsi],xmm8
   1615 	vmovdqu	XMMWORD[32+rsi],xmm9
   1616 	vmovdqu	XMMWORD[48+rsi],xmm10
   1617 	vmovdqu	XMMWORD[64+rsi],xmm11
   1618 	vmovdqu	XMMWORD[80+rsi],xmm12
   1619 
   1620 	vpxor	xmm0,xmm0,xmm5
   1621 
   1622 	lea	rdi,[96+rdi]
   1623 	lea	rsi,[96+rsi]
   1624 	jmp	NEAR $L$128_dec_loop1
   1625 
   1626 $L$128_dec_finish_96:
   1627 	vmovdqa	xmm6,xmm12
   1628 	vmovdqa	XMMWORD[(16-32)+rax],xmm11
   1629 	vmovdqa	XMMWORD[(32-32)+rax],xmm10
   1630 	vmovdqa	XMMWORD[(48-32)+rax],xmm9
   1631 	vmovdqa	XMMWORD[(64-32)+rax],xmm8
   1632 	vmovdqa	XMMWORD[(80-32)+rax],xmm7
   1633 
   1634 	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
   1635 	vpclmulqdq	xmm1,xmm6,xmm4,0x10
   1636 	vpclmulqdq	xmm2,xmm6,xmm4,0x11
   1637 	vpclmulqdq	xmm3,xmm6,xmm4,0x00
   1638 	vpclmulqdq	xmm4,xmm6,xmm4,0x01
   1639 	vpxor	xmm1,xmm1,xmm4
   1640 
   1641 	vmovdqu	xmm6,XMMWORD[((-16))+rax]
   1642 	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
   1643 
   1644 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   1645 	vpxor	xmm1,xmm1,xmm4
   1646 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   1647 	vpxor	xmm2,xmm2,xmm4
   1648 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   1649 	vpxor	xmm3,xmm3,xmm4
   1650 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   1651 	vpxor	xmm1,xmm1,xmm4
   1652 
   1653 	vmovdqu	xmm6,XMMWORD[rax]
   1654 	vmovdqu	xmm13,XMMWORD[rcx]
   1655 
   1656 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   1657 	vpxor	xmm1,xmm1,xmm4
   1658 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   1659 	vpxor	xmm2,xmm2,xmm4
   1660 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   1661 	vpxor	xmm3,xmm3,xmm4
   1662 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   1663 	vpxor	xmm1,xmm1,xmm4
   1664 
   1665 	vmovdqu	xmm6,XMMWORD[16+rax]
   1666 	vmovdqu	xmm13,XMMWORD[16+rcx]
   1667 
   1668 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   1669 	vpxor	xmm1,xmm1,xmm4
   1670 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   1671 	vpxor	xmm2,xmm2,xmm4
   1672 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   1673 	vpxor	xmm3,xmm3,xmm4
   1674 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   1675 	vpxor	xmm1,xmm1,xmm4
   1676 
   1677 	vmovdqu	xmm6,XMMWORD[32+rax]
   1678 	vmovdqu	xmm13,XMMWORD[32+rcx]
   1679 
   1680 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   1681 	vpxor	xmm1,xmm1,xmm4
   1682 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   1683 	vpxor	xmm2,xmm2,xmm4
   1684 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   1685 	vpxor	xmm3,xmm3,xmm4
   1686 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   1687 	vpxor	xmm1,xmm1,xmm4
   1688 
   1689 
   1690 	vmovdqu	xmm6,XMMWORD[((80-32))+rax]
   1691 	vpxor	xmm6,xmm6,xmm0
   1692 	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
   1693 	vpclmulqdq	xmm4,xmm6,xmm5,0x11
   1694 	vpxor	xmm2,xmm2,xmm4
   1695 	vpclmulqdq	xmm4,xmm6,xmm5,0x00
   1696 	vpxor	xmm3,xmm3,xmm4
   1697 	vpclmulqdq	xmm4,xmm6,xmm5,0x10
   1698 	vpxor	xmm1,xmm1,xmm4
   1699 	vpclmulqdq	xmm4,xmm6,xmm5,0x01
   1700 	vpxor	xmm1,xmm1,xmm4
   1701 
   1702 	vpsrldq	xmm4,xmm1,8
   1703 	vpxor	xmm5,xmm2,xmm4
   1704 	vpslldq	xmm4,xmm1,8
   1705 	vpxor	xmm0,xmm3,xmm4
   1706 
   1707 	vmovdqa	xmm3,XMMWORD[poly]
   1708 
   1709 	vpalignr	xmm2,xmm0,xmm0,8
   1710 	vpclmulqdq	xmm0,xmm0,xmm3,0x10
   1711 	vpxor	xmm0,xmm2,xmm0
   1712 
   1713 	vpalignr	xmm2,xmm0,xmm0,8
   1714 	vpclmulqdq	xmm0,xmm0,xmm3,0x10
   1715 	vpxor	xmm0,xmm2,xmm0
   1716 
   1717 	vpxor	xmm0,xmm0,xmm5
   1718 
   1719 $L$128_dec_loop2:
   1720 
   1721 
   1722 
   1723 	cmp	r9,16
   1724 	jb	NEAR $L$128_dec_out
   1725 	sub	r9,16
   1726 
   1727 	vmovdqa	xmm2,xmm15
   1728 	vpaddd	xmm15,xmm15,XMMWORD[one]
   1729 
   1730 	vpxor	xmm2,xmm2,XMMWORD[r8]
   1731 	vaesenc	xmm2,xmm2,XMMWORD[16+r8]
   1732 	vaesenc	xmm2,xmm2,XMMWORD[32+r8]
   1733 	vaesenc	xmm2,xmm2,XMMWORD[48+r8]
   1734 	vaesenc	xmm2,xmm2,XMMWORD[64+r8]
   1735 	vaesenc	xmm2,xmm2,XMMWORD[80+r8]
   1736 	vaesenc	xmm2,xmm2,XMMWORD[96+r8]
   1737 	vaesenc	xmm2,xmm2,XMMWORD[112+r8]
   1738 	vaesenc	xmm2,xmm2,XMMWORD[128+r8]
   1739 	vaesenc	xmm2,xmm2,XMMWORD[144+r8]
   1740 	vaesenclast	xmm2,xmm2,XMMWORD[160+r8]
   1741 	vpxor	xmm2,xmm2,XMMWORD[rdi]
   1742 	vmovdqu	XMMWORD[rsi],xmm2
   1743 	add	rdi,16
   1744 	add	rsi,16
   1745 
   1746 	vpxor	xmm0,xmm0,xmm2
   1747 	vmovdqa	xmm1,XMMWORD[((-32))+rcx]
   1748 	call	GFMUL
   1749 
   1750 	jmp	NEAR $L$128_dec_loop2
   1751 
   1752 $L$128_dec_out:
   1753 	vmovdqu	XMMWORD[rdx],xmm0
   1754 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   1755 	mov	rsi,QWORD[16+rsp]
   1756 	DB	0F3h,0C3h		;repret
   1757 
   1758 $L$SEH_end_aes128gcmsiv_dec:
   1759 global	aes128gcmsiv_ecb_enc_block
   1760 
   1761 ALIGN	16
   1762 aes128gcmsiv_ecb_enc_block:
   1763 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   1764 	mov	QWORD[16+rsp],rsi
   1765 	mov	rax,rsp
   1766 $L$SEH_begin_aes128gcmsiv_ecb_enc_block:
   1767 	mov	rdi,rcx
   1768 	mov	rsi,rdx
   1769 	mov	rdx,r8
   1770 
   1771 
   1772 
   1773 	vmovdqa	xmm1,XMMWORD[rdi]
   1774 
   1775 	vpxor	xmm1,xmm1,XMMWORD[rdx]
   1776 	vaesenc	xmm1,xmm1,XMMWORD[16+rdx]
   1777 	vaesenc	xmm1,xmm1,XMMWORD[32+rdx]
   1778 	vaesenc	xmm1,xmm1,XMMWORD[48+rdx]
   1779 	vaesenc	xmm1,xmm1,XMMWORD[64+rdx]
   1780 	vaesenc	xmm1,xmm1,XMMWORD[80+rdx]
   1781 	vaesenc	xmm1,xmm1,XMMWORD[96+rdx]
   1782 	vaesenc	xmm1,xmm1,XMMWORD[112+rdx]
   1783 	vaesenc	xmm1,xmm1,XMMWORD[128+rdx]
   1784 	vaesenc	xmm1,xmm1,XMMWORD[144+rdx]
   1785 	vaesenclast	xmm1,xmm1,XMMWORD[160+rdx]
   1786 
   1787 	vmovdqa	XMMWORD[rsi],xmm1
   1788 
   1789 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   1790 	mov	rsi,QWORD[16+rsp]
   1791 	DB	0F3h,0C3h		;repret
   1792 
   1793 $L$SEH_end_aes128gcmsiv_ecb_enc_block:
   1794 global	aes256gcmsiv_aes_ks_enc_x1
   1795 
   1796 ALIGN	16
   1797 aes256gcmsiv_aes_ks_enc_x1:
   1798 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   1799 	mov	QWORD[16+rsp],rsi
   1800 	mov	rax,rsp
   1801 $L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1:
   1802 	mov	rdi,rcx
   1803 	mov	rsi,rdx
   1804 	mov	rdx,r8
   1805 	mov	rcx,r9
   1806 
   1807 
   1808 
   1809 	vmovdqa	xmm0,XMMWORD[con1]
   1810 	vmovdqa	xmm15,XMMWORD[mask]
   1811 	vmovdqa	xmm8,XMMWORD[rdi]
   1812 	vmovdqa	xmm1,XMMWORD[rcx]
   1813 	vmovdqa	xmm3,XMMWORD[16+rcx]
   1814 	vpxor	xmm8,xmm8,xmm1
   1815 	vaesenc	xmm8,xmm8,xmm3
   1816 	vmovdqu	XMMWORD[rdx],xmm1
   1817 	vmovdqu	XMMWORD[16+rdx],xmm3
   1818 	vpxor	xmm14,xmm14,xmm14
   1819 
   1820 	vpshufb	xmm2,xmm3,xmm15
   1821 	vaesenclast	xmm2,xmm2,xmm0
   1822 	vpslld	xmm0,xmm0,1
   1823 	vpslldq	xmm4,xmm1,4
   1824 	vpxor	xmm1,xmm1,xmm4
   1825 	vpslldq	xmm4,xmm4,4
   1826 	vpxor	xmm1,xmm1,xmm4
   1827 	vpslldq	xmm4,xmm4,4
   1828 	vpxor	xmm1,xmm1,xmm4
   1829 	vpxor	xmm1,xmm1,xmm2
   1830 	vaesenc	xmm8,xmm8,xmm1
   1831 	vmovdqu	XMMWORD[32+rdx],xmm1
   1832 
   1833 	vpshufd	xmm2,xmm1,0xff
   1834 	vaesenclast	xmm2,xmm2,xmm14
   1835 	vpslldq	xmm4,xmm3,4
   1836 	vpxor	xmm3,xmm3,xmm4
   1837 	vpslldq	xmm4,xmm4,4
   1838 	vpxor	xmm3,xmm3,xmm4
   1839 	vpslldq	xmm4,xmm4,4
   1840 	vpxor	xmm3,xmm3,xmm4
   1841 	vpxor	xmm3,xmm3,xmm2
   1842 	vaesenc	xmm8,xmm8,xmm3
   1843 	vmovdqu	XMMWORD[48+rdx],xmm3
   1844 
   1845 	vpshufb	xmm2,xmm3,xmm15
   1846 	vaesenclast	xmm2,xmm2,xmm0
   1847 	vpslld	xmm0,xmm0,1
   1848 	vpslldq	xmm4,xmm1,4
   1849 	vpxor	xmm1,xmm1,xmm4
   1850 	vpslldq	xmm4,xmm4,4
   1851 	vpxor	xmm1,xmm1,xmm4
   1852 	vpslldq	xmm4,xmm4,4
   1853 	vpxor	xmm1,xmm1,xmm4
   1854 	vpxor	xmm1,xmm1,xmm2
   1855 	vaesenc	xmm8,xmm8,xmm1
   1856 	vmovdqu	XMMWORD[64+rdx],xmm1
   1857 
   1858 	vpshufd	xmm2,xmm1,0xff
   1859 	vaesenclast	xmm2,xmm2,xmm14
   1860 	vpslldq	xmm4,xmm3,4
   1861 	vpxor	xmm3,xmm3,xmm4
   1862 	vpslldq	xmm4,xmm4,4
   1863 	vpxor	xmm3,xmm3,xmm4
   1864 	vpslldq	xmm4,xmm4,4
   1865 	vpxor	xmm3,xmm3,xmm4
   1866 	vpxor	xmm3,xmm3,xmm2
   1867 	vaesenc	xmm8,xmm8,xmm3
   1868 	vmovdqu	XMMWORD[80+rdx],xmm3
   1869 
   1870 	vpshufb	xmm2,xmm3,xmm15
   1871 	vaesenclast	xmm2,xmm2,xmm0
   1872 	vpslld	xmm0,xmm0,1
   1873 	vpslldq	xmm4,xmm1,4
   1874 	vpxor	xmm1,xmm1,xmm4
   1875 	vpslldq	xmm4,xmm4,4
   1876 	vpxor	xmm1,xmm1,xmm4
   1877 	vpslldq	xmm4,xmm4,4
   1878 	vpxor	xmm1,xmm1,xmm4
   1879 	vpxor	xmm1,xmm1,xmm2
   1880 	vaesenc	xmm8,xmm8,xmm1
   1881 	vmovdqu	XMMWORD[96+rdx],xmm1
   1882 
   1883 	vpshufd	xmm2,xmm1,0xff
   1884 	vaesenclast	xmm2,xmm2,xmm14
   1885 	vpslldq	xmm4,xmm3,4
   1886 	vpxor	xmm3,xmm3,xmm4
   1887 	vpslldq	xmm4,xmm4,4
   1888 	vpxor	xmm3,xmm3,xmm4
   1889 	vpslldq	xmm4,xmm4,4
   1890 	vpxor	xmm3,xmm3,xmm4
   1891 	vpxor	xmm3,xmm3,xmm2
   1892 	vaesenc	xmm8,xmm8,xmm3
   1893 	vmovdqu	XMMWORD[112+rdx],xmm3
   1894 
   1895 	vpshufb	xmm2,xmm3,xmm15
   1896 	vaesenclast	xmm2,xmm2,xmm0
   1897 	vpslld	xmm0,xmm0,1
   1898 	vpslldq	xmm4,xmm1,4
   1899 	vpxor	xmm1,xmm1,xmm4
   1900 	vpslldq	xmm4,xmm4,4
   1901 	vpxor	xmm1,xmm1,xmm4
   1902 	vpslldq	xmm4,xmm4,4
   1903 	vpxor	xmm1,xmm1,xmm4
   1904 	vpxor	xmm1,xmm1,xmm2
   1905 	vaesenc	xmm8,xmm8,xmm1
   1906 	vmovdqu	XMMWORD[128+rdx],xmm1
   1907 
   1908 	vpshufd	xmm2,xmm1,0xff
   1909 	vaesenclast	xmm2,xmm2,xmm14
   1910 	vpslldq	xmm4,xmm3,4
   1911 	vpxor	xmm3,xmm3,xmm4
   1912 	vpslldq	xmm4,xmm4,4
   1913 	vpxor	xmm3,xmm3,xmm4
   1914 	vpslldq	xmm4,xmm4,4
   1915 	vpxor	xmm3,xmm3,xmm4
   1916 	vpxor	xmm3,xmm3,xmm2
   1917 	vaesenc	xmm8,xmm8,xmm3
   1918 	vmovdqu	XMMWORD[144+rdx],xmm3
   1919 
   1920 	vpshufb	xmm2,xmm3,xmm15
   1921 	vaesenclast	xmm2,xmm2,xmm0
   1922 	vpslld	xmm0,xmm0,1
   1923 	vpslldq	xmm4,xmm1,4
   1924 	vpxor	xmm1,xmm1,xmm4
   1925 	vpslldq	xmm4,xmm4,4
   1926 	vpxor	xmm1,xmm1,xmm4
   1927 	vpslldq	xmm4,xmm4,4
   1928 	vpxor	xmm1,xmm1,xmm4
   1929 	vpxor	xmm1,xmm1,xmm2
   1930 	vaesenc	xmm8,xmm8,xmm1
   1931 	vmovdqu	XMMWORD[160+rdx],xmm1
   1932 
   1933 	vpshufd	xmm2,xmm1,0xff
   1934 	vaesenclast	xmm2,xmm2,xmm14
   1935 	vpslldq	xmm4,xmm3,4
   1936 	vpxor	xmm3,xmm3,xmm4
   1937 	vpslldq	xmm4,xmm4,4
   1938 	vpxor	xmm3,xmm3,xmm4
   1939 	vpslldq	xmm4,xmm4,4
   1940 	vpxor	xmm3,xmm3,xmm4
   1941 	vpxor	xmm3,xmm3,xmm2
   1942 	vaesenc	xmm8,xmm8,xmm3
   1943 	vmovdqu	XMMWORD[176+rdx],xmm3
   1944 
   1945 	vpshufb	xmm2,xmm3,xmm15
   1946 	vaesenclast	xmm2,xmm2,xmm0
   1947 	vpslld	xmm0,xmm0,1
   1948 	vpslldq	xmm4,xmm1,4
   1949 	vpxor	xmm1,xmm1,xmm4
   1950 	vpslldq	xmm4,xmm4,4
   1951 	vpxor	xmm1,xmm1,xmm4
   1952 	vpslldq	xmm4,xmm4,4
   1953 	vpxor	xmm1,xmm1,xmm4
   1954 	vpxor	xmm1,xmm1,xmm2
   1955 	vaesenc	xmm8,xmm8,xmm1
   1956 	vmovdqu	XMMWORD[192+rdx],xmm1
   1957 
   1958 	vpshufd	xmm2,xmm1,0xff
   1959 	vaesenclast	xmm2,xmm2,xmm14
   1960 	vpslldq	xmm4,xmm3,4
   1961 	vpxor	xmm3,xmm3,xmm4
   1962 	vpslldq	xmm4,xmm4,4
   1963 	vpxor	xmm3,xmm3,xmm4
   1964 	vpslldq	xmm4,xmm4,4
   1965 	vpxor	xmm3,xmm3,xmm4
   1966 	vpxor	xmm3,xmm3,xmm2
   1967 	vaesenc	xmm8,xmm8,xmm3
   1968 	vmovdqu	XMMWORD[208+rdx],xmm3
   1969 
   1970 	vpshufb	xmm2,xmm3,xmm15
   1971 	vaesenclast	xmm2,xmm2,xmm0
   1972 	vpslldq	xmm4,xmm1,4
   1973 	vpxor	xmm1,xmm1,xmm4
   1974 	vpslldq	xmm4,xmm4,4
   1975 	vpxor	xmm1,xmm1,xmm4
   1976 	vpslldq	xmm4,xmm4,4
   1977 	vpxor	xmm1,xmm1,xmm4
   1978 	vpxor	xmm1,xmm1,xmm2
   1979 	vaesenclast	xmm8,xmm8,xmm1
   1980 	vmovdqu	XMMWORD[224+rdx],xmm1
   1981 
   1982 	vmovdqa	XMMWORD[rsi],xmm8
   1983 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   1984 	mov	rsi,QWORD[16+rsp]
   1985 	DB	0F3h,0C3h		;repret
   1986 
   1987 $L$SEH_end_aes256gcmsiv_aes_ks_enc_x1:
   1988 global	aes256gcmsiv_ecb_enc_block
   1989 
   1990 ALIGN	16
   1991 aes256gcmsiv_ecb_enc_block:
   1992 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   1993 	mov	QWORD[16+rsp],rsi
   1994 	mov	rax,rsp
   1995 $L$SEH_begin_aes256gcmsiv_ecb_enc_block:
   1996 	mov	rdi,rcx
   1997 	mov	rsi,rdx
   1998 	mov	rdx,r8
   1999 
   2000 
   2001 
   2002 	vmovdqa	xmm1,XMMWORD[rdi]
   2003 	vpxor	xmm1,xmm1,XMMWORD[rdx]
   2004 	vaesenc	xmm1,xmm1,XMMWORD[16+rdx]
   2005 	vaesenc	xmm1,xmm1,XMMWORD[32+rdx]
   2006 	vaesenc	xmm1,xmm1,XMMWORD[48+rdx]
   2007 	vaesenc	xmm1,xmm1,XMMWORD[64+rdx]
   2008 	vaesenc	xmm1,xmm1,XMMWORD[80+rdx]
   2009 	vaesenc	xmm1,xmm1,XMMWORD[96+rdx]
   2010 	vaesenc	xmm1,xmm1,XMMWORD[112+rdx]
   2011 	vaesenc	xmm1,xmm1,XMMWORD[128+rdx]
   2012 	vaesenc	xmm1,xmm1,XMMWORD[144+rdx]
   2013 	vaesenc	xmm1,xmm1,XMMWORD[160+rdx]
   2014 	vaesenc	xmm1,xmm1,XMMWORD[176+rdx]
   2015 	vaesenc	xmm1,xmm1,XMMWORD[192+rdx]
   2016 	vaesenc	xmm1,xmm1,XMMWORD[208+rdx]
   2017 	vaesenclast	xmm1,xmm1,XMMWORD[224+rdx]
   2018 	vmovdqa	XMMWORD[rsi],xmm1
   2019 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   2020 	mov	rsi,QWORD[16+rsp]
   2021 	DB	0F3h,0C3h		;repret
   2022 
   2023 $L$SEH_end_aes256gcmsiv_ecb_enc_block:
   2024 global	aes256gcmsiv_enc_msg_x4
   2025 
   2026 ALIGN	16
   2027 aes256gcmsiv_enc_msg_x4:
   2028 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   2029 	mov	QWORD[16+rsp],rsi
   2030 	mov	rax,rsp
   2031 $L$SEH_begin_aes256gcmsiv_enc_msg_x4:
   2032 	mov	rdi,rcx
   2033 	mov	rsi,rdx
   2034 	mov	rdx,r8
   2035 	mov	rcx,r9
   2036 	mov	r8,QWORD[40+rsp]
   2037 
   2038 
   2039 
   2040 	test	r8,r8
   2041 	jnz	NEAR $L$256_enc_msg_x4_start
   2042 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   2043 	mov	rsi,QWORD[16+rsp]
   2044 	DB	0F3h,0C3h		;repret
   2045 
   2046 $L$256_enc_msg_x4_start:
   2047 	mov	r10,r8
   2048 	shr	r8,4
   2049 	shl	r10,60
   2050 	jz	NEAR $L$256_enc_msg_x4_start2
   2051 	add	r8,1
   2052 
   2053 $L$256_enc_msg_x4_start2:
   2054 	mov	r10,r8
   2055 	shl	r10,62
   2056 	shr	r10,62
   2057 
   2058 
   2059 	vmovdqa	xmm15,XMMWORD[rdx]
   2060 	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
   2061 
   2062 	vmovdqa	xmm4,XMMWORD[four]
   2063 	vmovdqa	xmm0,xmm15
   2064 	vpaddd	xmm1,xmm15,XMMWORD[one]
   2065 	vpaddd	xmm2,xmm15,XMMWORD[two]
   2066 	vpaddd	xmm3,xmm15,XMMWORD[three]
   2067 
   2068 	shr	r8,2
   2069 	je	NEAR $L$256_enc_msg_x4_check_remainder
   2070 
   2071 	sub	rsi,64
   2072 	sub	rdi,64
   2073 
   2074 $L$256_enc_msg_x4_loop1:
   2075 	add	rsi,64
   2076 	add	rdi,64
   2077 
   2078 	vmovdqa	xmm5,xmm0
   2079 	vmovdqa	xmm6,xmm1
   2080 	vmovdqa	xmm7,xmm2
   2081 	vmovdqa	xmm8,xmm3
   2082 
   2083 	vpxor	xmm5,xmm5,XMMWORD[rcx]
   2084 	vpxor	xmm6,xmm6,XMMWORD[rcx]
   2085 	vpxor	xmm7,xmm7,XMMWORD[rcx]
   2086 	vpxor	xmm8,xmm8,XMMWORD[rcx]
   2087 
   2088 	vmovdqu	xmm12,XMMWORD[16+rcx]
   2089 	vaesenc	xmm5,xmm5,xmm12
   2090 	vaesenc	xmm6,xmm6,xmm12
   2091 	vaesenc	xmm7,xmm7,xmm12
   2092 	vaesenc	xmm8,xmm8,xmm12
   2093 
   2094 	vpaddd	xmm0,xmm0,xmm4
   2095 	vmovdqu	xmm12,XMMWORD[32+rcx]
   2096 	vaesenc	xmm5,xmm5,xmm12
   2097 	vaesenc	xmm6,xmm6,xmm12
   2098 	vaesenc	xmm7,xmm7,xmm12
   2099 	vaesenc	xmm8,xmm8,xmm12
   2100 
   2101 	vpaddd	xmm1,xmm1,xmm4
   2102 	vmovdqu	xmm12,XMMWORD[48+rcx]
   2103 	vaesenc	xmm5,xmm5,xmm12
   2104 	vaesenc	xmm6,xmm6,xmm12
   2105 	vaesenc	xmm7,xmm7,xmm12
   2106 	vaesenc	xmm8,xmm8,xmm12
   2107 
   2108 	vpaddd	xmm2,xmm2,xmm4
   2109 	vmovdqu	xmm12,XMMWORD[64+rcx]
   2110 	vaesenc	xmm5,xmm5,xmm12
   2111 	vaesenc	xmm6,xmm6,xmm12
   2112 	vaesenc	xmm7,xmm7,xmm12
   2113 	vaesenc	xmm8,xmm8,xmm12
   2114 
   2115 	vpaddd	xmm3,xmm3,xmm4
   2116 
   2117 	vmovdqu	xmm12,XMMWORD[80+rcx]
   2118 	vaesenc	xmm5,xmm5,xmm12
   2119 	vaesenc	xmm6,xmm6,xmm12
   2120 	vaesenc	xmm7,xmm7,xmm12
   2121 	vaesenc	xmm8,xmm8,xmm12
   2122 
   2123 	vmovdqu	xmm12,XMMWORD[96+rcx]
   2124 	vaesenc	xmm5,xmm5,xmm12
   2125 	vaesenc	xmm6,xmm6,xmm12
   2126 	vaesenc	xmm7,xmm7,xmm12
   2127 	vaesenc	xmm8,xmm8,xmm12
   2128 
   2129 	vmovdqu	xmm12,XMMWORD[112+rcx]
   2130 	vaesenc	xmm5,xmm5,xmm12
   2131 	vaesenc	xmm6,xmm6,xmm12
   2132 	vaesenc	xmm7,xmm7,xmm12
   2133 	vaesenc	xmm8,xmm8,xmm12
   2134 
   2135 	vmovdqu	xmm12,XMMWORD[128+rcx]
   2136 	vaesenc	xmm5,xmm5,xmm12
   2137 	vaesenc	xmm6,xmm6,xmm12
   2138 	vaesenc	xmm7,xmm7,xmm12
   2139 	vaesenc	xmm8,xmm8,xmm12
   2140 
   2141 	vmovdqu	xmm12,XMMWORD[144+rcx]
   2142 	vaesenc	xmm5,xmm5,xmm12
   2143 	vaesenc	xmm6,xmm6,xmm12
   2144 	vaesenc	xmm7,xmm7,xmm12
   2145 	vaesenc	xmm8,xmm8,xmm12
   2146 
   2147 	vmovdqu	xmm12,XMMWORD[160+rcx]
   2148 	vaesenc	xmm5,xmm5,xmm12
   2149 	vaesenc	xmm6,xmm6,xmm12
   2150 	vaesenc	xmm7,xmm7,xmm12
   2151 	vaesenc	xmm8,xmm8,xmm12
   2152 
   2153 	vmovdqu	xmm12,XMMWORD[176+rcx]
   2154 	vaesenc	xmm5,xmm5,xmm12
   2155 	vaesenc	xmm6,xmm6,xmm12
   2156 	vaesenc	xmm7,xmm7,xmm12
   2157 	vaesenc	xmm8,xmm8,xmm12
   2158 
   2159 	vmovdqu	xmm12,XMMWORD[192+rcx]
   2160 	vaesenc	xmm5,xmm5,xmm12
   2161 	vaesenc	xmm6,xmm6,xmm12
   2162 	vaesenc	xmm7,xmm7,xmm12
   2163 	vaesenc	xmm8,xmm8,xmm12
   2164 
   2165 	vmovdqu	xmm12,XMMWORD[208+rcx]
   2166 	vaesenc	xmm5,xmm5,xmm12
   2167 	vaesenc	xmm6,xmm6,xmm12
   2168 	vaesenc	xmm7,xmm7,xmm12
   2169 	vaesenc	xmm8,xmm8,xmm12
   2170 
   2171 	vmovdqu	xmm12,XMMWORD[224+rcx]
   2172 	vaesenclast	xmm5,xmm5,xmm12
   2173 	vaesenclast	xmm6,xmm6,xmm12
   2174 	vaesenclast	xmm7,xmm7,xmm12
   2175 	vaesenclast	xmm8,xmm8,xmm12
   2176 
   2177 
   2178 
   2179 	vpxor	xmm5,xmm5,XMMWORD[rdi]
   2180 	vpxor	xmm6,xmm6,XMMWORD[16+rdi]
   2181 	vpxor	xmm7,xmm7,XMMWORD[32+rdi]
   2182 	vpxor	xmm8,xmm8,XMMWORD[48+rdi]
   2183 
   2184 	sub	r8,1
   2185 
   2186 	vmovdqu	XMMWORD[rsi],xmm5
   2187 	vmovdqu	XMMWORD[16+rsi],xmm6
   2188 	vmovdqu	XMMWORD[32+rsi],xmm7
   2189 	vmovdqu	XMMWORD[48+rsi],xmm8
   2190 
   2191 	jne	NEAR $L$256_enc_msg_x4_loop1
   2192 
   2193 	add	rsi,64
   2194 	add	rdi,64
   2195 
   2196 $L$256_enc_msg_x4_check_remainder:
   2197 	cmp	r10,0
   2198 	je	NEAR $L$256_enc_msg_x4_out
   2199 
   2200 $L$256_enc_msg_x4_loop2:
   2201 
   2202 
   2203 
   2204 	vmovdqa	xmm5,xmm0
   2205 	vpaddd	xmm0,xmm0,XMMWORD[one]
   2206 	vpxor	xmm5,xmm5,XMMWORD[rcx]
   2207 	vaesenc	xmm5,xmm5,XMMWORD[16+rcx]
   2208 	vaesenc	xmm5,xmm5,XMMWORD[32+rcx]
   2209 	vaesenc	xmm5,xmm5,XMMWORD[48+rcx]
   2210 	vaesenc	xmm5,xmm5,XMMWORD[64+rcx]
   2211 	vaesenc	xmm5,xmm5,XMMWORD[80+rcx]
   2212 	vaesenc	xmm5,xmm5,XMMWORD[96+rcx]
   2213 	vaesenc	xmm5,xmm5,XMMWORD[112+rcx]
   2214 	vaesenc	xmm5,xmm5,XMMWORD[128+rcx]
   2215 	vaesenc	xmm5,xmm5,XMMWORD[144+rcx]
   2216 	vaesenc	xmm5,xmm5,XMMWORD[160+rcx]
   2217 	vaesenc	xmm5,xmm5,XMMWORD[176+rcx]
   2218 	vaesenc	xmm5,xmm5,XMMWORD[192+rcx]
   2219 	vaesenc	xmm5,xmm5,XMMWORD[208+rcx]
   2220 	vaesenclast	xmm5,xmm5,XMMWORD[224+rcx]
   2221 
   2222 
   2223 	vpxor	xmm5,xmm5,XMMWORD[rdi]
   2224 
   2225 	vmovdqu	XMMWORD[rsi],xmm5
   2226 
   2227 	add	rdi,16
   2228 	add	rsi,16
   2229 
   2230 	sub	r10,1
   2231 	jne	NEAR $L$256_enc_msg_x4_loop2
   2232 
   2233 $L$256_enc_msg_x4_out:
   2234 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   2235 	mov	rsi,QWORD[16+rsp]
   2236 	DB	0F3h,0C3h		;repret
   2237 
   2238 $L$SEH_end_aes256gcmsiv_enc_msg_x4:
   2239 global	aes256gcmsiv_enc_msg_x8
   2240 
   2241 ALIGN	16
   2242 aes256gcmsiv_enc_msg_x8:
   2243 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   2244 	mov	QWORD[16+rsp],rsi
   2245 	mov	rax,rsp
   2246 $L$SEH_begin_aes256gcmsiv_enc_msg_x8:
   2247 	mov	rdi,rcx
   2248 	mov	rsi,rdx
   2249 	mov	rdx,r8
   2250 	mov	rcx,r9
   2251 	mov	r8,QWORD[40+rsp]
   2252 
   2253 
   2254 
   2255 	test	r8,r8
   2256 	jnz	NEAR $L$256_enc_msg_x8_start
   2257 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   2258 	mov	rsi,QWORD[16+rsp]
   2259 	DB	0F3h,0C3h		;repret
   2260 
   2261 $L$256_enc_msg_x8_start:
   2262 
   2263 	mov	r11,rsp
   2264 	sub	r11,16
   2265 	and	r11,-64
   2266 
   2267 	mov	r10,r8
   2268 	shr	r8,4
   2269 	shl	r10,60
   2270 	jz	NEAR $L$256_enc_msg_x8_start2
   2271 	add	r8,1
   2272 
   2273 $L$256_enc_msg_x8_start2:
   2274 	mov	r10,r8
   2275 	shl	r10,61
   2276 	shr	r10,61
   2277 
   2278 
   2279 	vmovdqa	xmm1,XMMWORD[rdx]
   2280 	vpor	xmm1,xmm1,XMMWORD[OR_MASK]
   2281 
   2282 
   2283 	vpaddd	xmm0,xmm1,XMMWORD[seven]
   2284 	vmovdqa	XMMWORD[r11],xmm0
   2285 	vpaddd	xmm9,xmm1,XMMWORD[one]
   2286 	vpaddd	xmm10,xmm1,XMMWORD[two]
   2287 	vpaddd	xmm11,xmm1,XMMWORD[three]
   2288 	vpaddd	xmm12,xmm1,XMMWORD[four]
   2289 	vpaddd	xmm13,xmm1,XMMWORD[five]
   2290 	vpaddd	xmm14,xmm1,XMMWORD[six]
   2291 	vmovdqa	xmm0,xmm1
   2292 
   2293 	shr	r8,3
   2294 	jz	NEAR $L$256_enc_msg_x8_check_remainder
   2295 
   2296 	sub	rsi,128
   2297 	sub	rdi,128
   2298 
   2299 $L$256_enc_msg_x8_loop1:
   2300 	add	rsi,128
   2301 	add	rdi,128
   2302 
   2303 	vmovdqa	xmm1,xmm0
   2304 	vmovdqa	xmm2,xmm9
   2305 	vmovdqa	xmm3,xmm10
   2306 	vmovdqa	xmm4,xmm11
   2307 	vmovdqa	xmm5,xmm12
   2308 	vmovdqa	xmm6,xmm13
   2309 	vmovdqa	xmm7,xmm14
   2310 
   2311 	vmovdqa	xmm8,XMMWORD[r11]
   2312 
   2313 	vpxor	xmm1,xmm1,XMMWORD[rcx]
   2314 	vpxor	xmm2,xmm2,XMMWORD[rcx]
   2315 	vpxor	xmm3,xmm3,XMMWORD[rcx]
   2316 	vpxor	xmm4,xmm4,XMMWORD[rcx]
   2317 	vpxor	xmm5,xmm5,XMMWORD[rcx]
   2318 	vpxor	xmm6,xmm6,XMMWORD[rcx]
   2319 	vpxor	xmm7,xmm7,XMMWORD[rcx]
   2320 	vpxor	xmm8,xmm8,XMMWORD[rcx]
   2321 
   2322 	vmovdqu	xmm15,XMMWORD[16+rcx]
   2323 	vaesenc	xmm1,xmm1,xmm15
   2324 	vaesenc	xmm2,xmm2,xmm15
   2325 	vaesenc	xmm3,xmm3,xmm15
   2326 	vaesenc	xmm4,xmm4,xmm15
   2327 	vaesenc	xmm5,xmm5,xmm15
   2328 	vaesenc	xmm6,xmm6,xmm15
   2329 	vaesenc	xmm7,xmm7,xmm15
   2330 	vaesenc	xmm8,xmm8,xmm15
   2331 
   2332 	vmovdqa	xmm14,XMMWORD[r11]
   2333 	vpaddd	xmm14,xmm14,XMMWORD[eight]
   2334 	vmovdqa	XMMWORD[r11],xmm14
   2335 	vmovdqu	xmm15,XMMWORD[32+rcx]
   2336 	vaesenc	xmm1,xmm1,xmm15
   2337 	vaesenc	xmm2,xmm2,xmm15
   2338 	vaesenc	xmm3,xmm3,xmm15
   2339 	vaesenc	xmm4,xmm4,xmm15
   2340 	vaesenc	xmm5,xmm5,xmm15
   2341 	vaesenc	xmm6,xmm6,xmm15
   2342 	vaesenc	xmm7,xmm7,xmm15
   2343 	vaesenc	xmm8,xmm8,xmm15
   2344 
   2345 	vpsubd	xmm14,xmm14,XMMWORD[one]
   2346 	vmovdqu	xmm15,XMMWORD[48+rcx]
   2347 	vaesenc	xmm1,xmm1,xmm15
   2348 	vaesenc	xmm2,xmm2,xmm15
   2349 	vaesenc	xmm3,xmm3,xmm15
   2350 	vaesenc	xmm4,xmm4,xmm15
   2351 	vaesenc	xmm5,xmm5,xmm15
   2352 	vaesenc	xmm6,xmm6,xmm15
   2353 	vaesenc	xmm7,xmm7,xmm15
   2354 	vaesenc	xmm8,xmm8,xmm15
   2355 
   2356 	vpaddd	xmm0,xmm0,XMMWORD[eight]
   2357 	vmovdqu	xmm15,XMMWORD[64+rcx]
   2358 	vaesenc	xmm1,xmm1,xmm15
   2359 	vaesenc	xmm2,xmm2,xmm15
   2360 	vaesenc	xmm3,xmm3,xmm15
   2361 	vaesenc	xmm4,xmm4,xmm15
   2362 	vaesenc	xmm5,xmm5,xmm15
   2363 	vaesenc	xmm6,xmm6,xmm15
   2364 	vaesenc	xmm7,xmm7,xmm15
   2365 	vaesenc	xmm8,xmm8,xmm15
   2366 
   2367 	vpaddd	xmm9,xmm9,XMMWORD[eight]
   2368 	vmovdqu	xmm15,XMMWORD[80+rcx]
   2369 	vaesenc	xmm1,xmm1,xmm15
   2370 	vaesenc	xmm2,xmm2,xmm15
   2371 	vaesenc	xmm3,xmm3,xmm15
   2372 	vaesenc	xmm4,xmm4,xmm15
   2373 	vaesenc	xmm5,xmm5,xmm15
   2374 	vaesenc	xmm6,xmm6,xmm15
   2375 	vaesenc	xmm7,xmm7,xmm15
   2376 	vaesenc	xmm8,xmm8,xmm15
   2377 
   2378 	vpaddd	xmm10,xmm10,XMMWORD[eight]
   2379 	vmovdqu	xmm15,XMMWORD[96+rcx]
   2380 	vaesenc	xmm1,xmm1,xmm15
   2381 	vaesenc	xmm2,xmm2,xmm15
   2382 	vaesenc	xmm3,xmm3,xmm15
   2383 	vaesenc	xmm4,xmm4,xmm15
   2384 	vaesenc	xmm5,xmm5,xmm15
   2385 	vaesenc	xmm6,xmm6,xmm15
   2386 	vaesenc	xmm7,xmm7,xmm15
   2387 	vaesenc	xmm8,xmm8,xmm15
   2388 
   2389 	vpaddd	xmm11,xmm11,XMMWORD[eight]
   2390 	vmovdqu	xmm15,XMMWORD[112+rcx]
   2391 	vaesenc	xmm1,xmm1,xmm15
   2392 	vaesenc	xmm2,xmm2,xmm15
   2393 	vaesenc	xmm3,xmm3,xmm15
   2394 	vaesenc	xmm4,xmm4,xmm15
   2395 	vaesenc	xmm5,xmm5,xmm15
   2396 	vaesenc	xmm6,xmm6,xmm15
   2397 	vaesenc	xmm7,xmm7,xmm15
   2398 	vaesenc	xmm8,xmm8,xmm15
   2399 
   2400 	vpaddd	xmm12,xmm12,XMMWORD[eight]
   2401 	vmovdqu	xmm15,XMMWORD[128+rcx]
   2402 	vaesenc	xmm1,xmm1,xmm15
   2403 	vaesenc	xmm2,xmm2,xmm15
   2404 	vaesenc	xmm3,xmm3,xmm15
   2405 	vaesenc	xmm4,xmm4,xmm15
   2406 	vaesenc	xmm5,xmm5,xmm15
   2407 	vaesenc	xmm6,xmm6,xmm15
   2408 	vaesenc	xmm7,xmm7,xmm15
   2409 	vaesenc	xmm8,xmm8,xmm15
   2410 
   2411 	vpaddd	xmm13,xmm13,XMMWORD[eight]
   2412 	vmovdqu	xmm15,XMMWORD[144+rcx]
   2413 	vaesenc	xmm1,xmm1,xmm15
   2414 	vaesenc	xmm2,xmm2,xmm15
   2415 	vaesenc	xmm3,xmm3,xmm15
   2416 	vaesenc	xmm4,xmm4,xmm15
   2417 	vaesenc	xmm5,xmm5,xmm15
   2418 	vaesenc	xmm6,xmm6,xmm15
   2419 	vaesenc	xmm7,xmm7,xmm15
   2420 	vaesenc	xmm8,xmm8,xmm15
   2421 
   2422 	vmovdqu	xmm15,XMMWORD[160+rcx]
   2423 	vaesenc	xmm1,xmm1,xmm15
   2424 	vaesenc	xmm2,xmm2,xmm15
   2425 	vaesenc	xmm3,xmm3,xmm15
   2426 	vaesenc	xmm4,xmm4,xmm15
   2427 	vaesenc	xmm5,xmm5,xmm15
   2428 	vaesenc	xmm6,xmm6,xmm15
   2429 	vaesenc	xmm7,xmm7,xmm15
   2430 	vaesenc	xmm8,xmm8,xmm15
   2431 
   2432 	vmovdqu	xmm15,XMMWORD[176+rcx]
   2433 	vaesenc	xmm1,xmm1,xmm15
   2434 	vaesenc	xmm2,xmm2,xmm15
   2435 	vaesenc	xmm3,xmm3,xmm15
   2436 	vaesenc	xmm4,xmm4,xmm15
   2437 	vaesenc	xmm5,xmm5,xmm15
   2438 	vaesenc	xmm6,xmm6,xmm15
   2439 	vaesenc	xmm7,xmm7,xmm15
   2440 	vaesenc	xmm8,xmm8,xmm15
   2441 
   2442 	vmovdqu	xmm15,XMMWORD[192+rcx]
   2443 	vaesenc	xmm1,xmm1,xmm15
   2444 	vaesenc	xmm2,xmm2,xmm15
   2445 	vaesenc	xmm3,xmm3,xmm15
   2446 	vaesenc	xmm4,xmm4,xmm15
   2447 	vaesenc	xmm5,xmm5,xmm15
   2448 	vaesenc	xmm6,xmm6,xmm15
   2449 	vaesenc	xmm7,xmm7,xmm15
   2450 	vaesenc	xmm8,xmm8,xmm15
   2451 
   2452 	vmovdqu	xmm15,XMMWORD[208+rcx]
   2453 	vaesenc	xmm1,xmm1,xmm15
   2454 	vaesenc	xmm2,xmm2,xmm15
   2455 	vaesenc	xmm3,xmm3,xmm15
   2456 	vaesenc	xmm4,xmm4,xmm15
   2457 	vaesenc	xmm5,xmm5,xmm15
   2458 	vaesenc	xmm6,xmm6,xmm15
   2459 	vaesenc	xmm7,xmm7,xmm15
   2460 	vaesenc	xmm8,xmm8,xmm15
   2461 
   2462 	vmovdqu	xmm15,XMMWORD[224+rcx]
   2463 	vaesenclast	xmm1,xmm1,xmm15
   2464 	vaesenclast	xmm2,xmm2,xmm15
   2465 	vaesenclast	xmm3,xmm3,xmm15
   2466 	vaesenclast	xmm4,xmm4,xmm15
   2467 	vaesenclast	xmm5,xmm5,xmm15
   2468 	vaesenclast	xmm6,xmm6,xmm15
   2469 	vaesenclast	xmm7,xmm7,xmm15
   2470 	vaesenclast	xmm8,xmm8,xmm15
   2471 
   2472 
   2473 
   2474 	vpxor	xmm1,xmm1,XMMWORD[rdi]
   2475 	vpxor	xmm2,xmm2,XMMWORD[16+rdi]
   2476 	vpxor	xmm3,xmm3,XMMWORD[32+rdi]
   2477 	vpxor	xmm4,xmm4,XMMWORD[48+rdi]
   2478 	vpxor	xmm5,xmm5,XMMWORD[64+rdi]
   2479 	vpxor	xmm6,xmm6,XMMWORD[80+rdi]
   2480 	vpxor	xmm7,xmm7,XMMWORD[96+rdi]
   2481 	vpxor	xmm8,xmm8,XMMWORD[112+rdi]
   2482 
   2483 	sub	r8,1
   2484 
   2485 	vmovdqu	XMMWORD[rsi],xmm1
   2486 	vmovdqu	XMMWORD[16+rsi],xmm2
   2487 	vmovdqu	XMMWORD[32+rsi],xmm3
   2488 	vmovdqu	XMMWORD[48+rsi],xmm4
   2489 	vmovdqu	XMMWORD[64+rsi],xmm5
   2490 	vmovdqu	XMMWORD[80+rsi],xmm6
   2491 	vmovdqu	XMMWORD[96+rsi],xmm7
   2492 	vmovdqu	XMMWORD[112+rsi],xmm8
   2493 
   2494 	jne	NEAR $L$256_enc_msg_x8_loop1
   2495 
   2496 	add	rsi,128
   2497 	add	rdi,128
   2498 
   2499 $L$256_enc_msg_x8_check_remainder:
   2500 	cmp	r10,0
   2501 	je	NEAR $L$256_enc_msg_x8_out
   2502 
   2503 $L$256_enc_msg_x8_loop2:
   2504 
   2505 
   2506 	vmovdqa	xmm1,xmm0
   2507 	vpaddd	xmm0,xmm0,XMMWORD[one]
   2508 
   2509 	vpxor	xmm1,xmm1,XMMWORD[rcx]
   2510 	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]
   2511 	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]
   2512 	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]
   2513 	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]
   2514 	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]
   2515 	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]
   2516 	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]
   2517 	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]
   2518 	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]
   2519 	vaesenc	xmm1,xmm1,XMMWORD[160+rcx]
   2520 	vaesenc	xmm1,xmm1,XMMWORD[176+rcx]
   2521 	vaesenc	xmm1,xmm1,XMMWORD[192+rcx]
   2522 	vaesenc	xmm1,xmm1,XMMWORD[208+rcx]
   2523 	vaesenclast	xmm1,xmm1,XMMWORD[224+rcx]
   2524 
   2525 
   2526 	vpxor	xmm1,xmm1,XMMWORD[rdi]
   2527 
   2528 	vmovdqu	XMMWORD[rsi],xmm1
   2529 
   2530 	add	rdi,16
   2531 	add	rsi,16
   2532 	sub	r10,1
   2533 	jnz	NEAR $L$256_enc_msg_x8_loop2
   2534 
   2535 $L$256_enc_msg_x8_out:
   2536 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   2537 	mov	rsi,QWORD[16+rsp]
   2538 	DB	0F3h,0C3h		;repret
   2539 
   2540 
   2541 $L$SEH_end_aes256gcmsiv_enc_msg_x8:
   2542 global	aes256gcmsiv_dec
   2543 
   2544 ALIGN	16
   2545 aes256gcmsiv_dec:
   2546 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   2547 	mov	QWORD[16+rsp],rsi
   2548 	mov	rax,rsp
   2549 $L$SEH_begin_aes256gcmsiv_dec:
   2550 	mov	rdi,rcx
   2551 	mov	rsi,rdx
   2552 	mov	rdx,r8
   2553 	mov	rcx,r9
   2554 	mov	r8,QWORD[40+rsp]
   2555 	mov	r9,QWORD[48+rsp]
   2556 
   2557 
   2558 
   2559 	test	r9,~15
   2560 	jnz	NEAR $L$256_dec_start
   2561 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   2562 	mov	rsi,QWORD[16+rsp]
   2563 	DB	0F3h,0C3h		;repret
   2564 
   2565 $L$256_dec_start:
   2566 	vzeroupper
   2567 	vmovdqa	xmm0,XMMWORD[rdx]
   2568 	mov	rax,rdx
   2569 
   2570 	lea	rax,[32+rax]
   2571 	lea	rcx,[32+rcx]
   2572 
   2573 
   2574 	vmovdqu	xmm15,XMMWORD[r9*1+rdi]
   2575 	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
   2576 	and	r9,~15
   2577 
   2578 
   2579 	cmp	r9,96
   2580 	jb	NEAR $L$256_dec_loop2
   2581 
   2582 
   2583 	sub	r9,96
   2584 	vmovdqa	xmm7,xmm15
   2585 	vpaddd	xmm8,xmm7,XMMWORD[one]
   2586 	vpaddd	xmm9,xmm7,XMMWORD[two]
   2587 	vpaddd	xmm10,xmm9,XMMWORD[one]
   2588 	vpaddd	xmm11,xmm9,XMMWORD[two]
   2589 	vpaddd	xmm12,xmm11,XMMWORD[one]
   2590 	vpaddd	xmm15,xmm11,XMMWORD[two]
   2591 
   2592 	vpxor	xmm7,xmm7,XMMWORD[r8]
   2593 	vpxor	xmm8,xmm8,XMMWORD[r8]
   2594 	vpxor	xmm9,xmm9,XMMWORD[r8]
   2595 	vpxor	xmm10,xmm10,XMMWORD[r8]
   2596 	vpxor	xmm11,xmm11,XMMWORD[r8]
   2597 	vpxor	xmm12,xmm12,XMMWORD[r8]
   2598 
   2599 	vmovdqu	xmm4,XMMWORD[16+r8]
   2600 	vaesenc	xmm7,xmm7,xmm4
   2601 	vaesenc	xmm8,xmm8,xmm4
   2602 	vaesenc	xmm9,xmm9,xmm4
   2603 	vaesenc	xmm10,xmm10,xmm4
   2604 	vaesenc	xmm11,xmm11,xmm4
   2605 	vaesenc	xmm12,xmm12,xmm4
   2606 
   2607 	vmovdqu	xmm4,XMMWORD[32+r8]
   2608 	vaesenc	xmm7,xmm7,xmm4
   2609 	vaesenc	xmm8,xmm8,xmm4
   2610 	vaesenc	xmm9,xmm9,xmm4
   2611 	vaesenc	xmm10,xmm10,xmm4
   2612 	vaesenc	xmm11,xmm11,xmm4
   2613 	vaesenc	xmm12,xmm12,xmm4
   2614 
   2615 	vmovdqu	xmm4,XMMWORD[48+r8]
   2616 	vaesenc	xmm7,xmm7,xmm4
   2617 	vaesenc	xmm8,xmm8,xmm4
   2618 	vaesenc	xmm9,xmm9,xmm4
   2619 	vaesenc	xmm10,xmm10,xmm4
   2620 	vaesenc	xmm11,xmm11,xmm4
   2621 	vaesenc	xmm12,xmm12,xmm4
   2622 
   2623 	vmovdqu	xmm4,XMMWORD[64+r8]
   2624 	vaesenc	xmm7,xmm7,xmm4
   2625 	vaesenc	xmm8,xmm8,xmm4
   2626 	vaesenc	xmm9,xmm9,xmm4
   2627 	vaesenc	xmm10,xmm10,xmm4
   2628 	vaesenc	xmm11,xmm11,xmm4
   2629 	vaesenc	xmm12,xmm12,xmm4
   2630 
   2631 	vmovdqu	xmm4,XMMWORD[80+r8]
   2632 	vaesenc	xmm7,xmm7,xmm4
   2633 	vaesenc	xmm8,xmm8,xmm4
   2634 	vaesenc	xmm9,xmm9,xmm4
   2635 	vaesenc	xmm10,xmm10,xmm4
   2636 	vaesenc	xmm11,xmm11,xmm4
   2637 	vaesenc	xmm12,xmm12,xmm4
   2638 
   2639 	vmovdqu	xmm4,XMMWORD[96+r8]
   2640 	vaesenc	xmm7,xmm7,xmm4
   2641 	vaesenc	xmm8,xmm8,xmm4
   2642 	vaesenc	xmm9,xmm9,xmm4
   2643 	vaesenc	xmm10,xmm10,xmm4
   2644 	vaesenc	xmm11,xmm11,xmm4
   2645 	vaesenc	xmm12,xmm12,xmm4
   2646 
   2647 	vmovdqu	xmm4,XMMWORD[112+r8]
   2648 	vaesenc	xmm7,xmm7,xmm4
   2649 	vaesenc	xmm8,xmm8,xmm4
   2650 	vaesenc	xmm9,xmm9,xmm4
   2651 	vaesenc	xmm10,xmm10,xmm4
   2652 	vaesenc	xmm11,xmm11,xmm4
   2653 	vaesenc	xmm12,xmm12,xmm4
   2654 
   2655 	vmovdqu	xmm4,XMMWORD[128+r8]
   2656 	vaesenc	xmm7,xmm7,xmm4
   2657 	vaesenc	xmm8,xmm8,xmm4
   2658 	vaesenc	xmm9,xmm9,xmm4
   2659 	vaesenc	xmm10,xmm10,xmm4
   2660 	vaesenc	xmm11,xmm11,xmm4
   2661 	vaesenc	xmm12,xmm12,xmm4
   2662 
   2663 	vmovdqu	xmm4,XMMWORD[144+r8]
   2664 	vaesenc	xmm7,xmm7,xmm4
   2665 	vaesenc	xmm8,xmm8,xmm4
   2666 	vaesenc	xmm9,xmm9,xmm4
   2667 	vaesenc	xmm10,xmm10,xmm4
   2668 	vaesenc	xmm11,xmm11,xmm4
   2669 	vaesenc	xmm12,xmm12,xmm4
   2670 
   2671 	vmovdqu	xmm4,XMMWORD[160+r8]
   2672 	vaesenc	xmm7,xmm7,xmm4
   2673 	vaesenc	xmm8,xmm8,xmm4
   2674 	vaesenc	xmm9,xmm9,xmm4
   2675 	vaesenc	xmm10,xmm10,xmm4
   2676 	vaesenc	xmm11,xmm11,xmm4
   2677 	vaesenc	xmm12,xmm12,xmm4
   2678 
   2679 	vmovdqu	xmm4,XMMWORD[176+r8]
   2680 	vaesenc	xmm7,xmm7,xmm4
   2681 	vaesenc	xmm8,xmm8,xmm4
   2682 	vaesenc	xmm9,xmm9,xmm4
   2683 	vaesenc	xmm10,xmm10,xmm4
   2684 	vaesenc	xmm11,xmm11,xmm4
   2685 	vaesenc	xmm12,xmm12,xmm4
   2686 
   2687 	vmovdqu	xmm4,XMMWORD[192+r8]
   2688 	vaesenc	xmm7,xmm7,xmm4
   2689 	vaesenc	xmm8,xmm8,xmm4
   2690 	vaesenc	xmm9,xmm9,xmm4
   2691 	vaesenc	xmm10,xmm10,xmm4
   2692 	vaesenc	xmm11,xmm11,xmm4
   2693 	vaesenc	xmm12,xmm12,xmm4
   2694 
   2695 	vmovdqu	xmm4,XMMWORD[208+r8]
   2696 	vaesenc	xmm7,xmm7,xmm4
   2697 	vaesenc	xmm8,xmm8,xmm4
   2698 	vaesenc	xmm9,xmm9,xmm4
   2699 	vaesenc	xmm10,xmm10,xmm4
   2700 	vaesenc	xmm11,xmm11,xmm4
   2701 	vaesenc	xmm12,xmm12,xmm4
   2702 
   2703 	vmovdqu	xmm4,XMMWORD[224+r8]
   2704 	vaesenclast	xmm7,xmm7,xmm4
   2705 	vaesenclast	xmm8,xmm8,xmm4
   2706 	vaesenclast	xmm9,xmm9,xmm4
   2707 	vaesenclast	xmm10,xmm10,xmm4
   2708 	vaesenclast	xmm11,xmm11,xmm4
   2709 	vaesenclast	xmm12,xmm12,xmm4
   2710 
   2711 
   2712 	vpxor	xmm7,xmm7,XMMWORD[rdi]
   2713 	vpxor	xmm8,xmm8,XMMWORD[16+rdi]
   2714 	vpxor	xmm9,xmm9,XMMWORD[32+rdi]
   2715 	vpxor	xmm10,xmm10,XMMWORD[48+rdi]
   2716 	vpxor	xmm11,xmm11,XMMWORD[64+rdi]
   2717 	vpxor	xmm12,xmm12,XMMWORD[80+rdi]
   2718 
   2719 	vmovdqu	XMMWORD[rsi],xmm7
   2720 	vmovdqu	XMMWORD[16+rsi],xmm8
   2721 	vmovdqu	XMMWORD[32+rsi],xmm9
   2722 	vmovdqu	XMMWORD[48+rsi],xmm10
   2723 	vmovdqu	XMMWORD[64+rsi],xmm11
   2724 	vmovdqu	XMMWORD[80+rsi],xmm12
   2725 
   2726 	add	rdi,96
   2727 	add	rsi,96
   2728 	jmp	NEAR $L$256_dec_loop1
   2729 
   2730 
   2731 ALIGN	64
   2732 $L$256_dec_loop1:
   2733 	cmp	r9,96
   2734 	jb	NEAR $L$256_dec_finish_96
   2735 	sub	r9,96
   2736 
   2737 	vmovdqa	xmm6,xmm12
   2738 	vmovdqa	XMMWORD[(16-32)+rax],xmm11
   2739 	vmovdqa	XMMWORD[(32-32)+rax],xmm10
   2740 	vmovdqa	XMMWORD[(48-32)+rax],xmm9
   2741 	vmovdqa	XMMWORD[(64-32)+rax],xmm8
   2742 	vmovdqa	XMMWORD[(80-32)+rax],xmm7
   2743 
   2744 	vmovdqa	xmm7,xmm15
   2745 	vpaddd	xmm8,xmm7,XMMWORD[one]
   2746 	vpaddd	xmm9,xmm7,XMMWORD[two]
   2747 	vpaddd	xmm10,xmm9,XMMWORD[one]
   2748 	vpaddd	xmm11,xmm9,XMMWORD[two]
   2749 	vpaddd	xmm12,xmm11,XMMWORD[one]
   2750 	vpaddd	xmm15,xmm11,XMMWORD[two]
   2751 
   2752 	vmovdqa	xmm4,XMMWORD[r8]
   2753 	vpxor	xmm7,xmm7,xmm4
   2754 	vpxor	xmm8,xmm8,xmm4
   2755 	vpxor	xmm9,xmm9,xmm4
   2756 	vpxor	xmm10,xmm10,xmm4
   2757 	vpxor	xmm11,xmm11,xmm4
   2758 	vpxor	xmm12,xmm12,xmm4
   2759 
   2760 	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
   2761 	vpclmulqdq	xmm2,xmm6,xmm4,0x11
   2762 	vpclmulqdq	xmm3,xmm6,xmm4,0x00
   2763 	vpclmulqdq	xmm1,xmm6,xmm4,0x01
   2764 	vpclmulqdq	xmm4,xmm6,xmm4,0x10
   2765 	vpxor	xmm1,xmm1,xmm4
   2766 
   2767 	vmovdqu	xmm4,XMMWORD[16+r8]
   2768 	vaesenc	xmm7,xmm7,xmm4
   2769 	vaesenc	xmm8,xmm8,xmm4
   2770 	vaesenc	xmm9,xmm9,xmm4
   2771 	vaesenc	xmm10,xmm10,xmm4
   2772 	vaesenc	xmm11,xmm11,xmm4
   2773 	vaesenc	xmm12,xmm12,xmm4
   2774 
   2775 	vmovdqu	xmm6,XMMWORD[((-16))+rax]
   2776 	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
   2777 
   2778 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   2779 	vpxor	xmm1,xmm1,xmm4
   2780 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   2781 	vpxor	xmm2,xmm2,xmm4
   2782 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   2783 	vpxor	xmm3,xmm3,xmm4
   2784 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   2785 	vpxor	xmm1,xmm1,xmm4
   2786 
   2787 
   2788 	vmovdqu	xmm4,XMMWORD[32+r8]
   2789 	vaesenc	xmm7,xmm7,xmm4
   2790 	vaesenc	xmm8,xmm8,xmm4
   2791 	vaesenc	xmm9,xmm9,xmm4
   2792 	vaesenc	xmm10,xmm10,xmm4
   2793 	vaesenc	xmm11,xmm11,xmm4
   2794 	vaesenc	xmm12,xmm12,xmm4
   2795 
   2796 	vmovdqu	xmm6,XMMWORD[rax]
   2797 	vmovdqu	xmm13,XMMWORD[rcx]
   2798 
   2799 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   2800 	vpxor	xmm1,xmm1,xmm4
   2801 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   2802 	vpxor	xmm2,xmm2,xmm4
   2803 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   2804 	vpxor	xmm3,xmm3,xmm4
   2805 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   2806 	vpxor	xmm1,xmm1,xmm4
   2807 
   2808 
   2809 	vmovdqu	xmm4,XMMWORD[48+r8]
   2810 	vaesenc	xmm7,xmm7,xmm4
   2811 	vaesenc	xmm8,xmm8,xmm4
   2812 	vaesenc	xmm9,xmm9,xmm4
   2813 	vaesenc	xmm10,xmm10,xmm4
   2814 	vaesenc	xmm11,xmm11,xmm4
   2815 	vaesenc	xmm12,xmm12,xmm4
   2816 
   2817 	vmovdqu	xmm6,XMMWORD[16+rax]
   2818 	vmovdqu	xmm13,XMMWORD[16+rcx]
   2819 
   2820 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   2821 	vpxor	xmm1,xmm1,xmm4
   2822 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   2823 	vpxor	xmm2,xmm2,xmm4
   2824 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   2825 	vpxor	xmm3,xmm3,xmm4
   2826 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   2827 	vpxor	xmm1,xmm1,xmm4
   2828 
   2829 
   2830 	vmovdqu	xmm4,XMMWORD[64+r8]
   2831 	vaesenc	xmm7,xmm7,xmm4
   2832 	vaesenc	xmm8,xmm8,xmm4
   2833 	vaesenc	xmm9,xmm9,xmm4
   2834 	vaesenc	xmm10,xmm10,xmm4
   2835 	vaesenc	xmm11,xmm11,xmm4
   2836 	vaesenc	xmm12,xmm12,xmm4
   2837 
   2838 	vmovdqu	xmm6,XMMWORD[32+rax]
   2839 	vmovdqu	xmm13,XMMWORD[32+rcx]
   2840 
   2841 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   2842 	vpxor	xmm1,xmm1,xmm4
   2843 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   2844 	vpxor	xmm2,xmm2,xmm4
   2845 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   2846 	vpxor	xmm3,xmm3,xmm4
   2847 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   2848 	vpxor	xmm1,xmm1,xmm4
   2849 
   2850 
   2851 	vmovdqu	xmm4,XMMWORD[80+r8]
   2852 	vaesenc	xmm7,xmm7,xmm4
   2853 	vaesenc	xmm8,xmm8,xmm4
   2854 	vaesenc	xmm9,xmm9,xmm4
   2855 	vaesenc	xmm10,xmm10,xmm4
   2856 	vaesenc	xmm11,xmm11,xmm4
   2857 	vaesenc	xmm12,xmm12,xmm4
   2858 
   2859 	vmovdqu	xmm4,XMMWORD[96+r8]
   2860 	vaesenc	xmm7,xmm7,xmm4
   2861 	vaesenc	xmm8,xmm8,xmm4
   2862 	vaesenc	xmm9,xmm9,xmm4
   2863 	vaesenc	xmm10,xmm10,xmm4
   2864 	vaesenc	xmm11,xmm11,xmm4
   2865 	vaesenc	xmm12,xmm12,xmm4
   2866 
   2867 	vmovdqu	xmm4,XMMWORD[112+r8]
   2868 	vaesenc	xmm7,xmm7,xmm4
   2869 	vaesenc	xmm8,xmm8,xmm4
   2870 	vaesenc	xmm9,xmm9,xmm4
   2871 	vaesenc	xmm10,xmm10,xmm4
   2872 	vaesenc	xmm11,xmm11,xmm4
   2873 	vaesenc	xmm12,xmm12,xmm4
   2874 
   2875 
   2876 	vmovdqa	xmm6,XMMWORD[((80-32))+rax]
   2877 	vpxor	xmm6,xmm6,xmm0
   2878 	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
   2879 
   2880 	vpclmulqdq	xmm4,xmm6,xmm5,0x01
   2881 	vpxor	xmm1,xmm1,xmm4
   2882 	vpclmulqdq	xmm4,xmm6,xmm5,0x11
   2883 	vpxor	xmm2,xmm2,xmm4
   2884 	vpclmulqdq	xmm4,xmm6,xmm5,0x00
   2885 	vpxor	xmm3,xmm3,xmm4
   2886 	vpclmulqdq	xmm4,xmm6,xmm5,0x10
   2887 	vpxor	xmm1,xmm1,xmm4
   2888 
   2889 	vmovdqu	xmm4,XMMWORD[128+r8]
   2890 	vaesenc	xmm7,xmm7,xmm4
   2891 	vaesenc	xmm8,xmm8,xmm4
   2892 	vaesenc	xmm9,xmm9,xmm4
   2893 	vaesenc	xmm10,xmm10,xmm4
   2894 	vaesenc	xmm11,xmm11,xmm4
   2895 	vaesenc	xmm12,xmm12,xmm4
   2896 
   2897 
   2898 	vpsrldq	xmm4,xmm1,8
   2899 	vpxor	xmm5,xmm2,xmm4
   2900 	vpslldq	xmm4,xmm1,8
   2901 	vpxor	xmm0,xmm3,xmm4
   2902 
   2903 	vmovdqa	xmm3,XMMWORD[poly]
   2904 
   2905 	vmovdqu	xmm4,XMMWORD[144+r8]
   2906 	vaesenc	xmm7,xmm7,xmm4
   2907 	vaesenc	xmm8,xmm8,xmm4
   2908 	vaesenc	xmm9,xmm9,xmm4
   2909 	vaesenc	xmm10,xmm10,xmm4
   2910 	vaesenc	xmm11,xmm11,xmm4
   2911 	vaesenc	xmm12,xmm12,xmm4
   2912 
   2913 	vmovdqu	xmm4,XMMWORD[160+r8]
   2914 	vaesenc	xmm7,xmm7,xmm4
   2915 	vaesenc	xmm8,xmm8,xmm4
   2916 	vaesenc	xmm9,xmm9,xmm4
   2917 	vaesenc	xmm10,xmm10,xmm4
   2918 	vaesenc	xmm11,xmm11,xmm4
   2919 	vaesenc	xmm12,xmm12,xmm4
   2920 
   2921 	vmovdqu	xmm4,XMMWORD[176+r8]
   2922 	vaesenc	xmm7,xmm7,xmm4
   2923 	vaesenc	xmm8,xmm8,xmm4
   2924 	vaesenc	xmm9,xmm9,xmm4
   2925 	vaesenc	xmm10,xmm10,xmm4
   2926 	vaesenc	xmm11,xmm11,xmm4
   2927 	vaesenc	xmm12,xmm12,xmm4
   2928 
   2929 	vmovdqu	xmm4,XMMWORD[192+r8]
   2930 	vaesenc	xmm7,xmm7,xmm4
   2931 	vaesenc	xmm8,xmm8,xmm4
   2932 	vaesenc	xmm9,xmm9,xmm4
   2933 	vaesenc	xmm10,xmm10,xmm4
   2934 	vaesenc	xmm11,xmm11,xmm4
   2935 	vaesenc	xmm12,xmm12,xmm4
   2936 
   2937 	vmovdqu	xmm4,XMMWORD[208+r8]
   2938 	vaesenc	xmm7,xmm7,xmm4
   2939 	vaesenc	xmm8,xmm8,xmm4
   2940 	vaesenc	xmm9,xmm9,xmm4
   2941 	vaesenc	xmm10,xmm10,xmm4
   2942 	vaesenc	xmm11,xmm11,xmm4
   2943 	vaesenc	xmm12,xmm12,xmm4
   2944 
   2945 	vmovdqu	xmm6,XMMWORD[224+r8]
   2946 	vpalignr	xmm2,xmm0,xmm0,8
   2947 	vpclmulqdq	xmm0,xmm0,xmm3,0x10
   2948 	vpxor	xmm0,xmm2,xmm0
   2949 
   2950 	vpxor	xmm4,xmm6,XMMWORD[rdi]
   2951 	vaesenclast	xmm7,xmm7,xmm4
   2952 	vpxor	xmm4,xmm6,XMMWORD[16+rdi]
   2953 	vaesenclast	xmm8,xmm8,xmm4
   2954 	vpxor	xmm4,xmm6,XMMWORD[32+rdi]
   2955 	vaesenclast	xmm9,xmm9,xmm4
   2956 	vpxor	xmm4,xmm6,XMMWORD[48+rdi]
   2957 	vaesenclast	xmm10,xmm10,xmm4
   2958 	vpxor	xmm4,xmm6,XMMWORD[64+rdi]
   2959 	vaesenclast	xmm11,xmm11,xmm4
   2960 	vpxor	xmm4,xmm6,XMMWORD[80+rdi]
   2961 	vaesenclast	xmm12,xmm12,xmm4
   2962 
   2963 	vpalignr	xmm2,xmm0,xmm0,8
   2964 	vpclmulqdq	xmm0,xmm0,xmm3,0x10
   2965 	vpxor	xmm0,xmm2,xmm0
   2966 
   2967 	vmovdqu	XMMWORD[rsi],xmm7
   2968 	vmovdqu	XMMWORD[16+rsi],xmm8
   2969 	vmovdqu	XMMWORD[32+rsi],xmm9
   2970 	vmovdqu	XMMWORD[48+rsi],xmm10
   2971 	vmovdqu	XMMWORD[64+rsi],xmm11
   2972 	vmovdqu	XMMWORD[80+rsi],xmm12
   2973 
   2974 	vpxor	xmm0,xmm0,xmm5
   2975 
   2976 	lea	rdi,[96+rdi]
   2977 	lea	rsi,[96+rsi]
   2978 	jmp	NEAR $L$256_dec_loop1
   2979 
   2980 $L$256_dec_finish_96:
   2981 	vmovdqa	xmm6,xmm12
   2982 	vmovdqa	XMMWORD[(16-32)+rax],xmm11
   2983 	vmovdqa	XMMWORD[(32-32)+rax],xmm10
   2984 	vmovdqa	XMMWORD[(48-32)+rax],xmm9
   2985 	vmovdqa	XMMWORD[(64-32)+rax],xmm8
   2986 	vmovdqa	XMMWORD[(80-32)+rax],xmm7
   2987 
   2988 	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
   2989 	vpclmulqdq	xmm1,xmm6,xmm4,0x10
   2990 	vpclmulqdq	xmm2,xmm6,xmm4,0x11
   2991 	vpclmulqdq	xmm3,xmm6,xmm4,0x00
   2992 	vpclmulqdq	xmm4,xmm6,xmm4,0x01
   2993 	vpxor	xmm1,xmm1,xmm4
   2994 
   2995 	vmovdqu	xmm6,XMMWORD[((-16))+rax]
   2996 	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
   2997 
   2998 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   2999 	vpxor	xmm1,xmm1,xmm4
   3000 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   3001 	vpxor	xmm2,xmm2,xmm4
   3002 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   3003 	vpxor	xmm3,xmm3,xmm4
   3004 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   3005 	vpxor	xmm1,xmm1,xmm4
   3006 
   3007 	vmovdqu	xmm6,XMMWORD[rax]
   3008 	vmovdqu	xmm13,XMMWORD[rcx]
   3009 
   3010 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   3011 	vpxor	xmm1,xmm1,xmm4
   3012 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   3013 	vpxor	xmm2,xmm2,xmm4
   3014 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   3015 	vpxor	xmm3,xmm3,xmm4
   3016 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   3017 	vpxor	xmm1,xmm1,xmm4
   3018 
   3019 	vmovdqu	xmm6,XMMWORD[16+rax]
   3020 	vmovdqu	xmm13,XMMWORD[16+rcx]
   3021 
   3022 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   3023 	vpxor	xmm1,xmm1,xmm4
   3024 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   3025 	vpxor	xmm2,xmm2,xmm4
   3026 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   3027 	vpxor	xmm3,xmm3,xmm4
   3028 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   3029 	vpxor	xmm1,xmm1,xmm4
   3030 
   3031 	vmovdqu	xmm6,XMMWORD[32+rax]
   3032 	vmovdqu	xmm13,XMMWORD[32+rcx]
   3033 
   3034 	vpclmulqdq	xmm4,xmm6,xmm13,0x10
   3035 	vpxor	xmm1,xmm1,xmm4
   3036 	vpclmulqdq	xmm4,xmm6,xmm13,0x11
   3037 	vpxor	xmm2,xmm2,xmm4
   3038 	vpclmulqdq	xmm4,xmm6,xmm13,0x00
   3039 	vpxor	xmm3,xmm3,xmm4
   3040 	vpclmulqdq	xmm4,xmm6,xmm13,0x01
   3041 	vpxor	xmm1,xmm1,xmm4
   3042 
   3043 
   3044 	vmovdqu	xmm6,XMMWORD[((80-32))+rax]
   3045 	vpxor	xmm6,xmm6,xmm0
   3046 	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
   3047 	vpclmulqdq	xmm4,xmm6,xmm5,0x11
   3048 	vpxor	xmm2,xmm2,xmm4
   3049 	vpclmulqdq	xmm4,xmm6,xmm5,0x00
   3050 	vpxor	xmm3,xmm3,xmm4
   3051 	vpclmulqdq	xmm4,xmm6,xmm5,0x10
   3052 	vpxor	xmm1,xmm1,xmm4
   3053 	vpclmulqdq	xmm4,xmm6,xmm5,0x01
   3054 	vpxor	xmm1,xmm1,xmm4
   3055 
   3056 	vpsrldq	xmm4,xmm1,8
   3057 	vpxor	xmm5,xmm2,xmm4
   3058 	vpslldq	xmm4,xmm1,8
   3059 	vpxor	xmm0,xmm3,xmm4
   3060 
   3061 	vmovdqa	xmm3,XMMWORD[poly]
   3062 
   3063 	vpalignr	xmm2,xmm0,xmm0,8
   3064 	vpclmulqdq	xmm0,xmm0,xmm3,0x10
   3065 	vpxor	xmm0,xmm2,xmm0
   3066 
   3067 	vpalignr	xmm2,xmm0,xmm0,8
   3068 	vpclmulqdq	xmm0,xmm0,xmm3,0x10
   3069 	vpxor	xmm0,xmm2,xmm0
   3070 
   3071 	vpxor	xmm0,xmm0,xmm5
   3072 
   3073 $L$256_dec_loop2:
   3074 
   3075 
   3076 
   3077 	cmp	r9,16
   3078 	jb	NEAR $L$256_dec_out
   3079 	sub	r9,16
   3080 
   3081 	vmovdqa	xmm2,xmm15
   3082 	vpaddd	xmm15,xmm15,XMMWORD[one]
   3083 
   3084 	vpxor	xmm2,xmm2,XMMWORD[r8]
   3085 	vaesenc	xmm2,xmm2,XMMWORD[16+r8]
   3086 	vaesenc	xmm2,xmm2,XMMWORD[32+r8]
   3087 	vaesenc	xmm2,xmm2,XMMWORD[48+r8]
   3088 	vaesenc	xmm2,xmm2,XMMWORD[64+r8]
   3089 	vaesenc	xmm2,xmm2,XMMWORD[80+r8]
   3090 	vaesenc	xmm2,xmm2,XMMWORD[96+r8]
   3091 	vaesenc	xmm2,xmm2,XMMWORD[112+r8]
   3092 	vaesenc	xmm2,xmm2,XMMWORD[128+r8]
   3093 	vaesenc	xmm2,xmm2,XMMWORD[144+r8]
   3094 	vaesenc	xmm2,xmm2,XMMWORD[160+r8]
   3095 	vaesenc	xmm2,xmm2,XMMWORD[176+r8]
   3096 	vaesenc	xmm2,xmm2,XMMWORD[192+r8]
   3097 	vaesenc	xmm2,xmm2,XMMWORD[208+r8]
   3098 	vaesenclast	xmm2,xmm2,XMMWORD[224+r8]
   3099 	vpxor	xmm2,xmm2,XMMWORD[rdi]
   3100 	vmovdqu	XMMWORD[rsi],xmm2
   3101 	add	rdi,16
   3102 	add	rsi,16
   3103 
   3104 	vpxor	xmm0,xmm0,xmm2
   3105 	vmovdqa	xmm1,XMMWORD[((-32))+rcx]
   3106 	call	GFMUL
   3107 
   3108 	jmp	NEAR $L$256_dec_loop2
   3109 
   3110 $L$256_dec_out:
   3111 	vmovdqu	XMMWORD[rdx],xmm0
   3112 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   3113 	mov	rsi,QWORD[16+rsp]
   3114 	DB	0F3h,0C3h		;repret
   3115 
   3116 $L$SEH_end_aes256gcmsiv_dec:
   3117 global	aes256gcmsiv_kdf
   3118 
   3119 ALIGN	16
   3120 aes256gcmsiv_kdf:
   3121 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   3122 	mov	QWORD[16+rsp],rsi
   3123 	mov	rax,rsp
   3124 $L$SEH_begin_aes256gcmsiv_kdf:
   3125 	mov	rdi,rcx
   3126 	mov	rsi,rdx
   3127 	mov	rdx,r8
   3128 
   3129 
   3130 
   3131 
   3132 
   3133 
   3134 
   3135 	vmovdqa	xmm1,XMMWORD[rdx]
   3136 	vmovdqa	xmm4,XMMWORD[rdi]
   3137 	vmovdqa	xmm11,XMMWORD[and_mask]
   3138 	vmovdqa	xmm8,XMMWORD[one]
   3139 	vpshufd	xmm4,xmm4,0x90
   3140 	vpand	xmm4,xmm4,xmm11
   3141 	vpaddd	xmm6,xmm4,xmm8
   3142 	vpaddd	xmm7,xmm6,xmm8
   3143 	vpaddd	xmm11,xmm7,xmm8
   3144 	vpaddd	xmm12,xmm11,xmm8
   3145 	vpaddd	xmm13,xmm12,xmm8
   3146 
   3147 	vpxor	xmm4,xmm4,xmm1
   3148 	vpxor	xmm6,xmm6,xmm1
   3149 	vpxor	xmm7,xmm7,xmm1
   3150 	vpxor	xmm11,xmm11,xmm1
   3151 	vpxor	xmm12,xmm12,xmm1
   3152 	vpxor	xmm13,xmm13,xmm1
   3153 
   3154 	vmovdqa	xmm1,XMMWORD[16+rdx]
   3155 	vaesenc	xmm4,xmm4,xmm1
   3156 	vaesenc	xmm6,xmm6,xmm1
   3157 	vaesenc	xmm7,xmm7,xmm1
   3158 	vaesenc	xmm11,xmm11,xmm1
   3159 	vaesenc	xmm12,xmm12,xmm1
   3160 	vaesenc	xmm13,xmm13,xmm1
   3161 
   3162 	vmovdqa	xmm2,XMMWORD[32+rdx]
   3163 	vaesenc	xmm4,xmm4,xmm2
   3164 	vaesenc	xmm6,xmm6,xmm2
   3165 	vaesenc	xmm7,xmm7,xmm2
   3166 	vaesenc	xmm11,xmm11,xmm2
   3167 	vaesenc	xmm12,xmm12,xmm2
   3168 	vaesenc	xmm13,xmm13,xmm2
   3169 
   3170 	vmovdqa	xmm1,XMMWORD[48+rdx]
   3171 	vaesenc	xmm4,xmm4,xmm1
   3172 	vaesenc	xmm6,xmm6,xmm1
   3173 	vaesenc	xmm7,xmm7,xmm1
   3174 	vaesenc	xmm11,xmm11,xmm1
   3175 	vaesenc	xmm12,xmm12,xmm1
   3176 	vaesenc	xmm13,xmm13,xmm1
   3177 
   3178 	vmovdqa	xmm2,XMMWORD[64+rdx]
   3179 	vaesenc	xmm4,xmm4,xmm2
   3180 	vaesenc	xmm6,xmm6,xmm2
   3181 	vaesenc	xmm7,xmm7,xmm2
   3182 	vaesenc	xmm11,xmm11,xmm2
   3183 	vaesenc	xmm12,xmm12,xmm2
   3184 	vaesenc	xmm13,xmm13,xmm2
   3185 
   3186 	vmovdqa	xmm1,XMMWORD[80+rdx]
   3187 	vaesenc	xmm4,xmm4,xmm1
   3188 	vaesenc	xmm6,xmm6,xmm1
   3189 	vaesenc	xmm7,xmm7,xmm1
   3190 	vaesenc	xmm11,xmm11,xmm1
   3191 	vaesenc	xmm12,xmm12,xmm1
   3192 	vaesenc	xmm13,xmm13,xmm1
   3193 
   3194 	vmovdqa	xmm2,XMMWORD[96+rdx]
   3195 	vaesenc	xmm4,xmm4,xmm2
   3196 	vaesenc	xmm6,xmm6,xmm2
   3197 	vaesenc	xmm7,xmm7,xmm2
   3198 	vaesenc	xmm11,xmm11,xmm2
   3199 	vaesenc	xmm12,xmm12,xmm2
   3200 	vaesenc	xmm13,xmm13,xmm2
   3201 
   3202 	vmovdqa	xmm1,XMMWORD[112+rdx]
   3203 	vaesenc	xmm4,xmm4,xmm1
   3204 	vaesenc	xmm6,xmm6,xmm1
   3205 	vaesenc	xmm7,xmm7,xmm1
   3206 	vaesenc	xmm11,xmm11,xmm1
   3207 	vaesenc	xmm12,xmm12,xmm1
   3208 	vaesenc	xmm13,xmm13,xmm1
   3209 
   3210 	vmovdqa	xmm2,XMMWORD[128+rdx]
   3211 	vaesenc	xmm4,xmm4,xmm2
   3212 	vaesenc	xmm6,xmm6,xmm2
   3213 	vaesenc	xmm7,xmm7,xmm2
   3214 	vaesenc	xmm11,xmm11,xmm2
   3215 	vaesenc	xmm12,xmm12,xmm2
   3216 	vaesenc	xmm13,xmm13,xmm2
   3217 
   3218 	vmovdqa	xmm1,XMMWORD[144+rdx]
   3219 	vaesenc	xmm4,xmm4,xmm1
   3220 	vaesenc	xmm6,xmm6,xmm1
   3221 	vaesenc	xmm7,xmm7,xmm1
   3222 	vaesenc	xmm11,xmm11,xmm1
   3223 	vaesenc	xmm12,xmm12,xmm1
   3224 	vaesenc	xmm13,xmm13,xmm1
   3225 
   3226 	vmovdqa	xmm2,XMMWORD[160+rdx]
   3227 	vaesenc	xmm4,xmm4,xmm2
   3228 	vaesenc	xmm6,xmm6,xmm2
   3229 	vaesenc	xmm7,xmm7,xmm2
   3230 	vaesenc	xmm11,xmm11,xmm2
   3231 	vaesenc	xmm12,xmm12,xmm2
   3232 	vaesenc	xmm13,xmm13,xmm2
   3233 
   3234 	vmovdqa	xmm1,XMMWORD[176+rdx]
   3235 	vaesenc	xmm4,xmm4,xmm1
   3236 	vaesenc	xmm6,xmm6,xmm1
   3237 	vaesenc	xmm7,xmm7,xmm1
   3238 	vaesenc	xmm11,xmm11,xmm1
   3239 	vaesenc	xmm12,xmm12,xmm1
   3240 	vaesenc	xmm13,xmm13,xmm1
   3241 
   3242 	vmovdqa	xmm2,XMMWORD[192+rdx]
   3243 	vaesenc	xmm4,xmm4,xmm2
   3244 	vaesenc	xmm6,xmm6,xmm2
   3245 	vaesenc	xmm7,xmm7,xmm2
   3246 	vaesenc	xmm11,xmm11,xmm2
   3247 	vaesenc	xmm12,xmm12,xmm2
   3248 	vaesenc	xmm13,xmm13,xmm2
   3249 
   3250 	vmovdqa	xmm1,XMMWORD[208+rdx]
   3251 	vaesenc	xmm4,xmm4,xmm1
   3252 	vaesenc	xmm6,xmm6,xmm1
   3253 	vaesenc	xmm7,xmm7,xmm1
   3254 	vaesenc	xmm11,xmm11,xmm1
   3255 	vaesenc	xmm12,xmm12,xmm1
   3256 	vaesenc	xmm13,xmm13,xmm1
   3257 
   3258 	vmovdqa	xmm2,XMMWORD[224+rdx]
   3259 	vaesenclast	xmm4,xmm4,xmm2
   3260 	vaesenclast	xmm6,xmm6,xmm2
   3261 	vaesenclast	xmm7,xmm7,xmm2
   3262 	vaesenclast	xmm11,xmm11,xmm2
   3263 	vaesenclast	xmm12,xmm12,xmm2
   3264 	vaesenclast	xmm13,xmm13,xmm2
   3265 
   3266 
   3267 	vmovdqa	XMMWORD[rsi],xmm4
   3268 	vmovdqa	XMMWORD[16+rsi],xmm6
   3269 	vmovdqa	XMMWORD[32+rsi],xmm7
   3270 	vmovdqa	XMMWORD[48+rsi],xmm11
   3271 	vmovdqa	XMMWORD[64+rsi],xmm12
   3272 	vmovdqa	XMMWORD[80+rsi],xmm13
   3273 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   3274 	mov	rsi,QWORD[16+rsp]
   3275 	DB	0F3h,0C3h		;repret
   3276 
   3277 $L$SEH_end_aes256gcmsiv_kdf:
   3278