Home | History | Annotate | Download | only in aes
      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "textflag.h"
      6 
      7 // func hasAsm() bool
      8 // returns whether AES-NI is supported
      9 TEXT hasAsm(SB),NOSPLIT,$0
     10 	XORQ AX, AX
     11 	INCL AX
     12 	CPUID
     13 	SHRQ $25, CX
     14 	ANDQ $1, CX
     15 	MOVB CX, ret+0(FP)
     16 	RET
     17 
     18 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
     19 TEXT encryptBlockAsm(SB),NOSPLIT,$0
     20 	MOVQ nr+0(FP), CX
     21 	MOVQ xk+8(FP), AX
     22 	MOVQ dst+16(FP), DX
     23 	MOVQ src+24(FP), BX
     24 	MOVUPS 0(AX), X1
     25 	MOVUPS 0(BX), X0
     26 	ADDQ $16, AX
     27 	PXOR X1, X0
     28 	SUBQ $12, CX
     29 	JE Lenc196
     30 	JB Lenc128
     31 Lenc256:
     32 	MOVUPS 0(AX), X1
     33 	AESENC X1, X0
     34 	MOVUPS 16(AX), X1
     35 	AESENC X1, X0
     36 	ADDQ $32, AX
     37 Lenc196:
     38 	MOVUPS 0(AX), X1
     39 	AESENC X1, X0
     40 	MOVUPS 16(AX), X1
     41 	AESENC X1, X0
     42 	ADDQ $32, AX
     43 Lenc128:
     44 	MOVUPS 0(AX), X1
     45 	AESENC X1, X0
     46 	MOVUPS 16(AX), X1
     47 	AESENC X1, X0
     48 	MOVUPS 32(AX), X1
     49 	AESENC X1, X0
     50 	MOVUPS 48(AX), X1
     51 	AESENC X1, X0
     52 	MOVUPS 64(AX), X1
     53 	AESENC X1, X0
     54 	MOVUPS 80(AX), X1
     55 	AESENC X1, X0
     56 	MOVUPS 96(AX), X1
     57 	AESENC X1, X0
     58 	MOVUPS 112(AX), X1
     59 	AESENC X1, X0
     60 	MOVUPS 128(AX), X1
     61 	AESENC X1, X0
     62 	MOVUPS 144(AX), X1
     63 	AESENCLAST X1, X0
     64 	MOVUPS X0, 0(DX)
     65 	RET
     66 
     67 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
     68 TEXT decryptBlockAsm(SB),NOSPLIT,$0
     69 	MOVQ nr+0(FP), CX
     70 	MOVQ xk+8(FP), AX
     71 	MOVQ dst+16(FP), DX
     72 	MOVQ src+24(FP), BX
     73 	MOVUPS 0(AX), X1
     74 	MOVUPS 0(BX), X0
     75 	ADDQ $16, AX
     76 	PXOR X1, X0
     77 	SUBQ $12, CX
     78 	JE Ldec196
     79 	JB Ldec128
     80 Ldec256:
     81 	MOVUPS 0(AX), X1
     82 	AESDEC X1, X0
     83 	MOVUPS 16(AX), X1
     84 	AESDEC X1, X0
     85 	ADDQ $32, AX
     86 Ldec196:
     87 	MOVUPS 0(AX), X1
     88 	AESDEC X1, X0
     89 	MOVUPS 16(AX), X1
     90 	AESDEC X1, X0
     91 	ADDQ $32, AX
     92 Ldec128:
     93 	MOVUPS 0(AX), X1
     94 	AESDEC X1, X0
     95 	MOVUPS 16(AX), X1
     96 	AESDEC X1, X0
     97 	MOVUPS 32(AX), X1
     98 	AESDEC X1, X0
     99 	MOVUPS 48(AX), X1
    100 	AESDEC X1, X0
    101 	MOVUPS 64(AX), X1
    102 	AESDEC X1, X0
    103 	MOVUPS 80(AX), X1
    104 	AESDEC X1, X0
    105 	MOVUPS 96(AX), X1
    106 	AESDEC X1, X0
    107 	MOVUPS 112(AX), X1
    108 	AESDEC X1, X0
    109 	MOVUPS 128(AX), X1
    110 	AESDEC X1, X0
    111 	MOVUPS 144(AX), X1
    112 	AESDECLAST X1, X0
    113 	MOVUPS X0, 0(DX)
    114 	RET
    115 
    116 // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
    117 // Note that round keys are stored in uint128 format, not uint32
    118 TEXT expandKeyAsm(SB),NOSPLIT,$0
    119 	MOVQ nr+0(FP), CX
    120 	MOVQ key+8(FP), AX
    121 	MOVQ enc+16(FP), BX
    122 	MOVQ dec+24(FP), DX
    123 	MOVUPS (AX), X0
    124 	// enc
    125 	MOVUPS X0, (BX)
    126 	ADDQ $16, BX
    127 	PXOR X4, X4 // _expand_key_* expect X4 to be zero
    128 	CMPL CX, $12
    129 	JE Lexp_enc196
    130 	JB Lexp_enc128
    131 Lexp_enc256:
    132 	MOVUPS 16(AX), X2
    133 	MOVUPS X2, (BX)
    134 	ADDQ $16, BX
    135 	AESKEYGENASSIST $0x01, X2, X1
    136 	CALL _expand_key_256a<>(SB)
    137 	AESKEYGENASSIST $0x01, X0, X1
    138 	CALL _expand_key_256b<>(SB)
    139 	AESKEYGENASSIST $0x02, X2, X1
    140 	CALL _expand_key_256a<>(SB)
    141 	AESKEYGENASSIST $0x02, X0, X1
    142 	CALL _expand_key_256b<>(SB)
    143 	AESKEYGENASSIST $0x04, X2, X1
    144 	CALL _expand_key_256a<>(SB)
    145 	AESKEYGENASSIST $0x04, X0, X1
    146 	CALL _expand_key_256b<>(SB)
    147 	AESKEYGENASSIST $0x08, X2, X1
    148 	CALL _expand_key_256a<>(SB)
    149 	AESKEYGENASSIST $0x08, X0, X1
    150 	CALL _expand_key_256b<>(SB)
    151 	AESKEYGENASSIST $0x10, X2, X1
    152 	CALL _expand_key_256a<>(SB)
    153 	AESKEYGENASSIST $0x10, X0, X1
    154 	CALL _expand_key_256b<>(SB)
    155 	AESKEYGENASSIST $0x20, X2, X1
    156 	CALL _expand_key_256a<>(SB)
    157 	AESKEYGENASSIST $0x20, X0, X1
    158 	CALL _expand_key_256b<>(SB)
    159 	AESKEYGENASSIST $0x40, X2, X1
    160 	CALL _expand_key_256a<>(SB)
    161 	JMP Lexp_dec
    162 Lexp_enc196:
    163 	MOVQ 16(AX), X2
    164 	AESKEYGENASSIST $0x01, X2, X1
    165 	CALL _expand_key_192a<>(SB)
    166 	AESKEYGENASSIST $0x02, X2, X1
    167 	CALL _expand_key_192b<>(SB)
    168 	AESKEYGENASSIST $0x04, X2, X1
    169 	CALL _expand_key_192a<>(SB)
    170 	AESKEYGENASSIST $0x08, X2, X1
    171 	CALL _expand_key_192b<>(SB)
    172 	AESKEYGENASSIST $0x10, X2, X1
    173 	CALL _expand_key_192a<>(SB)
    174 	AESKEYGENASSIST $0x20, X2, X1
    175 	CALL _expand_key_192b<>(SB)
    176 	AESKEYGENASSIST $0x40, X2, X1
    177 	CALL _expand_key_192a<>(SB)
    178 	AESKEYGENASSIST $0x80, X2, X1
    179 	CALL _expand_key_192b<>(SB)
    180 	JMP Lexp_dec
    181 Lexp_enc128:
    182 	AESKEYGENASSIST $0x01, X0, X1
    183 	CALL _expand_key_128<>(SB)
    184 	AESKEYGENASSIST $0x02, X0, X1
    185 	CALL _expand_key_128<>(SB)
    186 	AESKEYGENASSIST $0x04, X0, X1
    187 	CALL _expand_key_128<>(SB)
    188 	AESKEYGENASSIST $0x08, X0, X1
    189 	CALL _expand_key_128<>(SB)
    190 	AESKEYGENASSIST $0x10, X0, X1
    191 	CALL _expand_key_128<>(SB)
    192 	AESKEYGENASSIST $0x20, X0, X1
    193 	CALL _expand_key_128<>(SB)
    194 	AESKEYGENASSIST $0x40, X0, X1
    195 	CALL _expand_key_128<>(SB)
    196 	AESKEYGENASSIST $0x80, X0, X1
    197 	CALL _expand_key_128<>(SB)
    198 	AESKEYGENASSIST $0x1b, X0, X1
    199 	CALL _expand_key_128<>(SB)
    200 	AESKEYGENASSIST $0x36, X0, X1
    201 	CALL _expand_key_128<>(SB)
    202 Lexp_dec:
    203 	// dec
    204 	SUBQ $16, BX
    205 	MOVUPS (BX), X1
    206 	MOVUPS X1, (DX)
    207 	DECQ CX
    208 Lexp_dec_loop:
    209 	MOVUPS -16(BX), X1
    210 	AESIMC X1, X0
    211 	MOVUPS X0, 16(DX)
    212 	SUBQ $16, BX
    213 	ADDQ $16, DX
    214 	DECQ CX
    215 	JNZ Lexp_dec_loop
    216 	MOVUPS -16(BX), X0
    217 	MOVUPS X0, 16(DX)
    218 	RET
    219 
    220 #define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0
    221 #define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9
    222 TEXT _expand_key_128<>(SB),NOSPLIT,$0
    223 	PSHUFD $0xff, X1, X1
    224 	SHUFPS $0x10, X0, X4
    225 	PXOR X4, X0
    226 	SHUFPS $0x8c, X0, X4
    227 	PXOR X4, X0
    228 	PXOR X1, X0
    229 	MOVUPS X0, (BX)
    230 	ADDQ $16, BX
    231 	RET
    232 
    233 #define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd
    234 #define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8
    235 TEXT _expand_key_192a<>(SB),NOSPLIT,$0
    236 	PSHUFD $0x55, X1, X1
    237 	SHUFPS $0x10, X0, X4
    238 	PXOR X4, X0
    239 	SHUFPS $0x8c, X0, X4
    240 	PXOR X4, X0
    241 	PXOR X1, X0
    242 
    243 	MOVAPS X2, X5
    244 	MOVAPS X2, X6
    245 	PSLLDQ_X5_; BYTE $0x4
    246 	PSHUFD $0xff, X0, X3
    247 	PXOR X3, X2
    248 	PXOR X5, X2
    249 
    250 	MOVAPS X0, X1
    251 	SHUFPS $0x44, X0, X6
    252 	MOVUPS X6, (BX)
    253 	SHUFPS $0x4e, X2, X1
    254 	MOVUPS X1, 16(BX)
    255 	ADDQ $32, BX
    256 	RET
    257 
    258 TEXT _expand_key_192b<>(SB),NOSPLIT,$0
    259 	PSHUFD $0x55, X1, X1
    260 	SHUFPS $0x10, X0, X4
    261 	PXOR X4, X0
    262 	SHUFPS $0x8c, X0, X4
    263 	PXOR X4, X0
    264 	PXOR X1, X0
    265 
    266 	MOVAPS X2, X5
    267 	PSLLDQ_X5_; BYTE $0x4
    268 	PSHUFD $0xff, X0, X3
    269 	PXOR X3, X2
    270 	PXOR X5, X2
    271 
    272 	MOVUPS X0, (BX)
    273 	ADDQ $16, BX
    274 	RET
    275 
    276 TEXT _expand_key_256a<>(SB),NOSPLIT,$0
    277 	JMP _expand_key_128<>(SB)
    278 
    279 TEXT _expand_key_256b<>(SB),NOSPLIT,$0
    280 	PSHUFD $0xaa, X1, X1
    281 	SHUFPS $0x10, X2, X4
    282 	PXOR X4, X2
    283 	SHUFPS $0x8c, X2, X4
    284 	PXOR X4, X2
    285 	PXOR X1, X2
    286 
    287 	MOVUPS X2, (BX)
    288 	ADDQ $16, BX
    289 	RET
    290