1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // func hasAsm() bool 8 // returns whether AES-NI is supported 9 TEXT hasAsm(SB),NOSPLIT,$0 10 XORQ AX, AX 11 INCL AX 12 CPUID 13 SHRQ $25, CX 14 ANDQ $1, CX 15 MOVB CX, ret+0(FP) 16 RET 17 18 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) 19 TEXT encryptBlockAsm(SB),NOSPLIT,$0 20 MOVQ nr+0(FP), CX 21 MOVQ xk+8(FP), AX 22 MOVQ dst+16(FP), DX 23 MOVQ src+24(FP), BX 24 MOVUPS 0(AX), X1 25 MOVUPS 0(BX), X0 26 ADDQ $16, AX 27 PXOR X1, X0 28 SUBQ $12, CX 29 JE Lenc196 30 JB Lenc128 31 Lenc256: 32 MOVUPS 0(AX), X1 33 AESENC X1, X0 34 MOVUPS 16(AX), X1 35 AESENC X1, X0 36 ADDQ $32, AX 37 Lenc196: 38 MOVUPS 0(AX), X1 39 AESENC X1, X0 40 MOVUPS 16(AX), X1 41 AESENC X1, X0 42 ADDQ $32, AX 43 Lenc128: 44 MOVUPS 0(AX), X1 45 AESENC X1, X0 46 MOVUPS 16(AX), X1 47 AESENC X1, X0 48 MOVUPS 32(AX), X1 49 AESENC X1, X0 50 MOVUPS 48(AX), X1 51 AESENC X1, X0 52 MOVUPS 64(AX), X1 53 AESENC X1, X0 54 MOVUPS 80(AX), X1 55 AESENC X1, X0 56 MOVUPS 96(AX), X1 57 AESENC X1, X0 58 MOVUPS 112(AX), X1 59 AESENC X1, X0 60 MOVUPS 128(AX), X1 61 AESENC X1, X0 62 MOVUPS 144(AX), X1 63 AESENCLAST X1, X0 64 MOVUPS X0, 0(DX) 65 RET 66 67 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) 68 TEXT decryptBlockAsm(SB),NOSPLIT,$0 69 MOVQ nr+0(FP), CX 70 MOVQ xk+8(FP), AX 71 MOVQ dst+16(FP), DX 72 MOVQ src+24(FP), BX 73 MOVUPS 0(AX), X1 74 MOVUPS 0(BX), X0 75 ADDQ $16, AX 76 PXOR X1, X0 77 SUBQ $12, CX 78 JE Ldec196 79 JB Ldec128 80 Ldec256: 81 MOVUPS 0(AX), X1 82 AESDEC X1, X0 83 MOVUPS 16(AX), X1 84 AESDEC X1, X0 85 ADDQ $32, AX 86 Ldec196: 87 MOVUPS 0(AX), X1 88 AESDEC X1, X0 89 MOVUPS 16(AX), X1 90 AESDEC X1, X0 91 ADDQ $32, AX 92 Ldec128: 93 MOVUPS 0(AX), X1 94 AESDEC X1, X0 95 MOVUPS 16(AX), X1 96 AESDEC X1, X0 97 MOVUPS 32(AX), X1 98 AESDEC X1, X0 99 MOVUPS 48(AX), X1 100 AESDEC X1, X0 101 MOVUPS 64(AX), X1 102 AESDEC X1, X0 103 MOVUPS 80(AX), X1 104 AESDEC X1, X0 105 MOVUPS 96(AX), X1 106 AESDEC X1, X0 107 MOVUPS 112(AX), X1 108 AESDEC X1, X0 109 MOVUPS 128(AX), X1 110 AESDEC X1, X0 111 MOVUPS 144(AX), X1 112 AESDECLAST X1, X0 113 MOVUPS X0, 0(DX) 114 RET 115 116 // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) { 117 // Note that round keys are stored in uint128 format, not uint32 118 TEXT expandKeyAsm(SB),NOSPLIT,$0 119 MOVQ nr+0(FP), CX 120 MOVQ key+8(FP), AX 121 MOVQ enc+16(FP), BX 122 MOVQ dec+24(FP), DX 123 MOVUPS (AX), X0 124 // enc 125 MOVUPS X0, (BX) 126 ADDQ $16, BX 127 PXOR X4, X4 // _expand_key_* expect X4 to be zero 128 CMPL CX, $12 129 JE Lexp_enc196 130 JB Lexp_enc128 131 Lexp_enc256: 132 MOVUPS 16(AX), X2 133 MOVUPS X2, (BX) 134 ADDQ $16, BX 135 AESKEYGENASSIST $0x01, X2, X1 136 CALL _expand_key_256a<>(SB) 137 AESKEYGENASSIST $0x01, X0, X1 138 CALL _expand_key_256b<>(SB) 139 AESKEYGENASSIST $0x02, X2, X1 140 CALL _expand_key_256a<>(SB) 141 AESKEYGENASSIST $0x02, X0, X1 142 CALL _expand_key_256b<>(SB) 143 AESKEYGENASSIST $0x04, X2, X1 144 CALL _expand_key_256a<>(SB) 145 AESKEYGENASSIST $0x04, X0, X1 146 CALL _expand_key_256b<>(SB) 147 AESKEYGENASSIST $0x08, X2, X1 148 CALL _expand_key_256a<>(SB) 149 AESKEYGENASSIST $0x08, X0, X1 150 CALL _expand_key_256b<>(SB) 151 AESKEYGENASSIST $0x10, X2, X1 152 CALL _expand_key_256a<>(SB) 153 AESKEYGENASSIST $0x10, X0, X1 154 CALL _expand_key_256b<>(SB) 155 AESKEYGENASSIST $0x20, X2, X1 156 CALL _expand_key_256a<>(SB) 157 AESKEYGENASSIST $0x20, X0, X1 158 CALL _expand_key_256b<>(SB) 159 AESKEYGENASSIST $0x40, X2, X1 160 CALL _expand_key_256a<>(SB) 161 JMP Lexp_dec 162 Lexp_enc196: 163 MOVQ 16(AX), X2 164 AESKEYGENASSIST $0x01, X2, X1 165 CALL _expand_key_192a<>(SB) 166 AESKEYGENASSIST $0x02, X2, X1 167 CALL _expand_key_192b<>(SB) 168 AESKEYGENASSIST $0x04, X2, X1 169 CALL _expand_key_192a<>(SB) 170 AESKEYGENASSIST $0x08, X2, X1 171 CALL _expand_key_192b<>(SB) 172 AESKEYGENASSIST $0x10, X2, X1 173 CALL _expand_key_192a<>(SB) 174 AESKEYGENASSIST $0x20, X2, X1 175 CALL _expand_key_192b<>(SB) 176 AESKEYGENASSIST $0x40, X2, X1 177 CALL _expand_key_192a<>(SB) 178 AESKEYGENASSIST $0x80, X2, X1 179 CALL _expand_key_192b<>(SB) 180 JMP Lexp_dec 181 Lexp_enc128: 182 AESKEYGENASSIST $0x01, X0, X1 183 CALL _expand_key_128<>(SB) 184 AESKEYGENASSIST $0x02, X0, X1 185 CALL _expand_key_128<>(SB) 186 AESKEYGENASSIST $0x04, X0, X1 187 CALL _expand_key_128<>(SB) 188 AESKEYGENASSIST $0x08, X0, X1 189 CALL _expand_key_128<>(SB) 190 AESKEYGENASSIST $0x10, X0, X1 191 CALL _expand_key_128<>(SB) 192 AESKEYGENASSIST $0x20, X0, X1 193 CALL _expand_key_128<>(SB) 194 AESKEYGENASSIST $0x40, X0, X1 195 CALL _expand_key_128<>(SB) 196 AESKEYGENASSIST $0x80, X0, X1 197 CALL _expand_key_128<>(SB) 198 AESKEYGENASSIST $0x1b, X0, X1 199 CALL _expand_key_128<>(SB) 200 AESKEYGENASSIST $0x36, X0, X1 201 CALL _expand_key_128<>(SB) 202 Lexp_dec: 203 // dec 204 SUBQ $16, BX 205 MOVUPS (BX), X1 206 MOVUPS X1, (DX) 207 DECQ CX 208 Lexp_dec_loop: 209 MOVUPS -16(BX), X1 210 AESIMC X1, X0 211 MOVUPS X0, 16(DX) 212 SUBQ $16, BX 213 ADDQ $16, DX 214 DECQ CX 215 JNZ Lexp_dec_loop 216 MOVUPS -16(BX), X0 217 MOVUPS X0, 16(DX) 218 RET 219 220 #define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0 221 #define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9 222 TEXT _expand_key_128<>(SB),NOSPLIT,$0 223 PSHUFD $0xff, X1, X1 224 SHUFPS $0x10, X0, X4 225 PXOR X4, X0 226 SHUFPS $0x8c, X0, X4 227 PXOR X4, X0 228 PXOR X1, X0 229 MOVUPS X0, (BX) 230 ADDQ $16, BX 231 RET 232 233 #define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd 234 #define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8 235 TEXT _expand_key_192a<>(SB),NOSPLIT,$0 236 PSHUFD $0x55, X1, X1 237 SHUFPS $0x10, X0, X4 238 PXOR X4, X0 239 SHUFPS $0x8c, X0, X4 240 PXOR X4, X0 241 PXOR X1, X0 242 243 MOVAPS X2, X5 244 MOVAPS X2, X6 245 PSLLDQ_X5_; BYTE $0x4 246 PSHUFD $0xff, X0, X3 247 PXOR X3, X2 248 PXOR X5, X2 249 250 MOVAPS X0, X1 251 SHUFPS $0x44, X0, X6 252 MOVUPS X6, (BX) 253 SHUFPS $0x4e, X2, X1 254 MOVUPS X1, 16(BX) 255 ADDQ $32, BX 256 RET 257 258 TEXT _expand_key_192b<>(SB),NOSPLIT,$0 259 PSHUFD $0x55, X1, X1 260 SHUFPS $0x10, X0, X4 261 PXOR X4, X0 262 SHUFPS $0x8c, X0, X4 263 PXOR X4, X0 264 PXOR X1, X0 265 266 MOVAPS X2, X5 267 PSLLDQ_X5_; BYTE $0x4 268 PSHUFD $0xff, X0, X3 269 PXOR X3, X2 270 PXOR X5, X2 271 272 MOVUPS X0, (BX) 273 ADDQ $16, BX 274 RET 275 276 TEXT _expand_key_256a<>(SB),NOSPLIT,$0 277 JMP _expand_key_128<>(SB) 278 279 TEXT _expand_key_256b<>(SB),NOSPLIT,$0 280 PSHUFD $0xaa, X1, X1 281 SHUFPS $0x10, X2, X4 282 PXOR X4, X2 283 SHUFPS $0x8c, X2, X4 284 PXOR X4, X2 285 PXOR X1, X2 286 287 MOVUPS X2, (BX) 288 ADDQ $16, BX 289 RET 290