1 /* AesOpt.c -- Intel's AES 2 2013-11-12 : Igor Pavlov : Public domain */ 3 4 #include "Precomp.h" 5 6 #include "CpuArch.h" 7 8 #ifdef MY_CPU_X86_OR_AMD64 9 #if _MSC_VER >= 1500 10 #define USE_INTEL_AES 11 #endif 12 #endif 13 14 #ifdef USE_INTEL_AES 15 16 #include <wmmintrin.h> 17 18 void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks) 19 { 20 __m128i m = *p; 21 for (; numBlocks != 0; numBlocks--, data++) 22 { 23 UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; 24 const __m128i *w = p + 3; 25 m = _mm_xor_si128(m, *data); 26 m = _mm_xor_si128(m, p[2]); 27 do 28 { 29 m = _mm_aesenc_si128(m, w[0]); 30 m = _mm_aesenc_si128(m, w[1]); 31 w += 2; 32 } 33 while (--numRounds2 != 0); 34 m = _mm_aesenc_si128(m, w[0]); 35 m = _mm_aesenclast_si128(m, w[1]); 36 *data = m; 37 } 38 *p = m; 39 } 40 41 #define NUM_WAYS 3 42 43 #define AES_OP_W(op, n) { \ 44 const __m128i t = w[n]; \ 45 m0 = op(m0, t); \ 46 m1 = op(m1, t); \ 47 m2 = op(m2, t); \ 48 } 49 50 #define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n) 51 #define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n) 52 #define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n) 53 #define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n) 54 55 void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks) 56 { 57 __m128i iv = *p; 58 for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) 59 { 60 UInt32 numRounds2 = *(const UInt32 *)(p + 1); 61 const __m128i *w = p + numRounds2 * 2; 62 __m128i m0, m1, m2; 63 { 64 const __m128i t = w[2]; 65 m0 = _mm_xor_si128(t, data[0]); 66 m1 = _mm_xor_si128(t, data[1]); 67 m2 = _mm_xor_si128(t, data[2]); 68 } 69 numRounds2--; 70 do 71 { 72 AES_DEC(1) 73 AES_DEC(0) 74 w -= 2; 75 } 76 while (--numRounds2 != 0); 77 AES_DEC(1) 78 AES_DEC_LAST(0) 79 80 { 81 __m128i t; 82 t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t; 83 t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t; 84 t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t; 85 } 86 } 87 for (; numBlocks != 0; numBlocks--, data++) 88 { 89 UInt32 numRounds2 = *(const UInt32 *)(p + 1); 90 const __m128i *w = p + numRounds2 * 2; 91 __m128i m = _mm_xor_si128(w[2], *data); 92 numRounds2--; 93 do 94 { 95 m = _mm_aesdec_si128(m, w[1]); 96 m = _mm_aesdec_si128(m, w[0]); 97 w -= 2; 98 } 99 while (--numRounds2 != 0); 100 m = _mm_aesdec_si128(m, w[1]); 101 m = _mm_aesdeclast_si128(m, w[0]); 102 103 m = _mm_xor_si128(m, iv); 104 iv = *data; 105 *data = m; 106 } 107 *p = iv; 108 } 109 110 void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks) 111 { 112 __m128i ctr = *p; 113 __m128i one; 114 one.m128i_u64[0] = 1; 115 one.m128i_u64[1] = 0; 116 for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) 117 { 118 UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; 119 const __m128i *w = p; 120 __m128i m0, m1, m2; 121 { 122 const __m128i t = w[2]; 123 ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t); 124 ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t); 125 ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t); 126 } 127 w += 3; 128 do 129 { 130 AES_ENC(0) 131 AES_ENC(1) 132 w += 2; 133 } 134 while (--numRounds2 != 0); 135 AES_ENC(0) 136 AES_ENC_LAST(1) 137 data[0] = _mm_xor_si128(data[0], m0); 138 data[1] = _mm_xor_si128(data[1], m1); 139 data[2] = _mm_xor_si128(data[2], m2); 140 } 141 for (; numBlocks != 0; numBlocks--, data++) 142 { 143 UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; 144 const __m128i *w = p; 145 __m128i m; 146 ctr = _mm_add_epi64(ctr, one); 147 m = _mm_xor_si128(ctr, p[2]); 148 w += 3; 149 do 150 { 151 m = _mm_aesenc_si128(m, w[0]); 152 m = _mm_aesenc_si128(m, w[1]); 153 w += 2; 154 } 155 while (--numRounds2 != 0); 156 m = _mm_aesenc_si128(m, w[0]); 157 m = _mm_aesenclast_si128(m, w[1]); 158 *data = _mm_xor_si128(*data, m); 159 } 160 *p = ctr; 161 } 162 163 #else 164 165 void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); 166 void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); 167 void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); 168 169 void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks) 170 { 171 AesCbc_Encode(p, data, numBlocks); 172 } 173 174 void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks) 175 { 176 AesCbc_Decode(p, data, numBlocks); 177 } 178 179 void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks) 180 { 181 AesCtr_Code(p, data, numBlocks); 182 } 183 184 #endif 185