1 /* 2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, 3 Joan Daemen, Michal Peeters, Gilles Van Assche and Ronny Van Keer, hereby 4 denoted as "the implementer". 5 6 For more information, feedback or questions, please refer to our websites: 7 http://keccak.noekeon.org/ 8 http://keyak.noekeon.org/ 9 http://ketje.noekeon.org/ 10 11 To the extent possible under law, the implementer has waived all copyright 12 and related or neighboring rights to the source code in this file. 13 http://creativecommons.org/publicdomain/zero/1.0/ 14 */ 15 16 #include <string.h> 17 #include <stdlib.h> 18 /* #include "brg_endian.h" */ 19 #include "KeccakP-1600-opt64-config.h" 20 21 #if NOT_PYTHON 22 typedef unsigned char UINT8; 23 /* typedef unsigned long long int UINT64; */ 24 #endif 25 26 #if defined(KeccakP1600_useLaneComplementing) 27 #define UseBebigokimisa 28 #endif 29 30 #if defined(_MSC_VER) 31 #define ROL64(a, offset) _rotl64(a, offset) 32 #elif defined(KeccakP1600_useSHLD) 33 #define ROL64(x,N) ({ \ 34 register UINT64 __out; \ 35 register UINT64 __in = x; \ 36 __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ 37 __out; \ 38 }) 39 #else 40 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) 41 #endif 42 43 #include "KeccakP-1600-64.macros" 44 #ifdef KeccakP1600_fullUnrolling 45 #define FullUnrolling 46 #else 47 #define Unrolling KeccakP1600_unrolling 48 #endif 49 #include "KeccakP-1600-unrolling.macros" 50 #include "SnP-Relaned.h" 51 52 static const UINT64 KeccakF1600RoundConstants[24] = { 53 0x0000000000000001ULL, 54 0x0000000000008082ULL, 55 0x800000000000808aULL, 56 0x8000000080008000ULL, 57 0x000000000000808bULL, 58 0x0000000080000001ULL, 59 0x8000000080008081ULL, 60 0x8000000000008009ULL, 61 0x000000000000008aULL, 62 0x0000000000000088ULL, 63 0x0000000080008009ULL, 64 0x000000008000000aULL, 65 0x000000008000808bULL, 66 0x800000000000008bULL, 67 0x8000000000008089ULL, 68 0x8000000000008003ULL, 69 0x8000000000008002ULL, 70 0x8000000000000080ULL, 71 0x000000000000800aULL, 72 0x800000008000000aULL, 73 0x8000000080008081ULL, 74 0x8000000000008080ULL, 75 0x0000000080000001ULL, 76 0x8000000080008008ULL }; 77 78 /* ---------------------------------------------------------------- */ 79 80 void KeccakP1600_Initialize(void *state) 81 { 82 memset(state, 0, 200); 83 #ifdef KeccakP1600_useLaneComplementing 84 ((UINT64*)state)[ 1] = ~(UINT64)0; 85 ((UINT64*)state)[ 2] = ~(UINT64)0; 86 ((UINT64*)state)[ 8] = ~(UINT64)0; 87 ((UINT64*)state)[12] = ~(UINT64)0; 88 ((UINT64*)state)[17] = ~(UINT64)0; 89 ((UINT64*)state)[20] = ~(UINT64)0; 90 #endif 91 } 92 93 /* ---------------------------------------------------------------- */ 94 95 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) 96 { 97 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 98 UINT64 lane; 99 if (length == 0) 100 return; 101 if (length == 1) 102 lane = data[0]; 103 else { 104 lane = 0; 105 memcpy(&lane, data, length); 106 } 107 lane <<= offset*8; 108 #else 109 UINT64 lane = 0; 110 unsigned int i; 111 for(i=0; i<length; i++) 112 lane |= ((UINT64)data[i]) << ((i+offset)*8); 113 #endif 114 ((UINT64*)state)[lanePosition] ^= lane; 115 } 116 117 /* ---------------------------------------------------------------- */ 118 119 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) 120 { 121 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 122 unsigned int i = 0; 123 #ifdef NO_MISALIGNED_ACCESSES 124 /* If either pointer is misaligned, fall back to byte-wise xor. */ 125 126 if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) { 127 for (i = 0; i < laneCount * 8; i++) { 128 ((unsigned char*)state)[i] ^= data[i]; 129 } 130 } 131 else 132 #endif 133 { 134 /* Otherwise... */ 135 136 for( ; (i+8)<=laneCount; i+=8) { 137 ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; 138 ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; 139 ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2]; 140 ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3]; 141 ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4]; 142 ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5]; 143 ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6]; 144 ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7]; 145 } 146 for( ; (i+4)<=laneCount; i+=4) { 147 ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; 148 ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; 149 ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2]; 150 ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3]; 151 } 152 for( ; (i+2)<=laneCount; i+=2) { 153 ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; 154 ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; 155 } 156 if (i<laneCount) { 157 ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; 158 } 159 } 160 #else 161 unsigned int i; 162 UINT8 *curData = data; 163 for(i=0; i<laneCount; i++, curData+=8) { 164 UINT64 lane = (UINT64)curData[0] 165 | ((UINT64)curData[1] << 8) 166 | ((UINT64)curData[2] << 16) 167 | ((UINT64)curData[3] << 24) 168 | ((UINT64)curData[4] <<32) 169 | ((UINT64)curData[5] << 40) 170 | ((UINT64)curData[6] << 48) 171 | ((UINT64)curData[7] << 56); 172 ((UINT64*)state)[i] ^= lane; 173 } 174 #endif 175 } 176 177 /* ---------------------------------------------------------------- */ 178 179 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) 180 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) 181 { 182 UINT64 lane = byte; 183 lane <<= (offset%8)*8; 184 ((UINT64*)state)[offset/8] ^= lane; 185 } 186 #endif 187 188 /* ---------------------------------------------------------------- */ 189 190 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) 191 { 192 SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8); 193 } 194 195 /* ---------------------------------------------------------------- */ 196 197 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) 198 { 199 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 200 #ifdef KeccakP1600_useLaneComplementing 201 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { 202 unsigned int i; 203 for(i=0; i<length; i++) 204 ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i]; 205 } 206 else 207 #endif 208 { 209 memcpy((unsigned char*)state+lanePosition*8+offset, data, length); 210 } 211 #else 212 #error "Not yet implemented" 213 #endif 214 } 215 216 /* ---------------------------------------------------------------- */ 217 218 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount) 219 { 220 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 221 #ifdef KeccakP1600_useLaneComplementing 222 unsigned int lanePosition; 223 224 for(lanePosition=0; lanePosition<laneCount; lanePosition++) 225 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) 226 ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition]; 227 else 228 ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition]; 229 #else 230 memcpy(state, data, laneCount*8); 231 #endif 232 #else 233 #error "Not yet implemented" 234 #endif 235 } 236 237 /* ---------------------------------------------------------------- */ 238 239 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) 240 { 241 SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8); 242 } 243 244 /* ---------------------------------------------------------------- */ 245 246 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount) 247 { 248 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 249 #ifdef KeccakP1600_useLaneComplementing 250 unsigned int lanePosition; 251 252 for(lanePosition=0; lanePosition<byteCount/8; lanePosition++) 253 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) 254 ((UINT64*)state)[lanePosition] = ~0; 255 else 256 ((UINT64*)state)[lanePosition] = 0; 257 if (byteCount%8 != 0) { 258 lanePosition = byteCount/8; 259 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) 260 memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8); 261 else 262 memset((unsigned char*)state+lanePosition*8, 0, byteCount%8); 263 } 264 #else 265 memset(state, 0, byteCount); 266 #endif 267 #else 268 #error "Not yet implemented" 269 #endif 270 } 271 272 /* ---------------------------------------------------------------- */ 273 274 void KeccakP1600_Permute_24rounds(void *state) 275 { 276 declareABCDE 277 #ifndef KeccakP1600_fullUnrolling 278 unsigned int i; 279 #endif 280 UINT64 *stateAsLanes = (UINT64*)state; 281 282 copyFromState(A, stateAsLanes) 283 rounds24 284 copyToState(stateAsLanes, A) 285 } 286 287 /* ---------------------------------------------------------------- */ 288 289 void KeccakP1600_Permute_12rounds(void *state) 290 { 291 declareABCDE 292 #ifndef KeccakP1600_fullUnrolling 293 unsigned int i; 294 #endif 295 UINT64 *stateAsLanes = (UINT64*)state; 296 297 copyFromState(A, stateAsLanes) 298 rounds12 299 copyToState(stateAsLanes, A) 300 } 301 302 /* ---------------------------------------------------------------- */ 303 304 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length) 305 { 306 UINT64 lane = ((UINT64*)state)[lanePosition]; 307 #ifdef KeccakP1600_useLaneComplementing 308 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) 309 lane = ~lane; 310 #endif 311 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 312 { 313 UINT64 lane1[1]; 314 lane1[0] = lane; 315 memcpy(data, (UINT8*)lane1+offset, length); 316 } 317 #else 318 unsigned int i; 319 lane >>= offset*8; 320 for(i=0; i<length; i++) { 321 data[i] = lane & 0xFF; 322 lane >>= 8; 323 } 324 #endif 325 } 326 327 /* ---------------------------------------------------------------- */ 328 329 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) 330 void fromWordToBytes(UINT8 *bytes, const UINT64 word) 331 { 332 unsigned int i; 333 334 for(i=0; i<(64/8); i++) 335 bytes[i] = (word >> (8*i)) & 0xFF; 336 } 337 #endif 338 339 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) 340 { 341 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 342 memcpy(data, state, laneCount*8); 343 #else 344 unsigned int i; 345 346 for(i=0; i<laneCount; i++) 347 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); 348 #endif 349 #ifdef KeccakP1600_useLaneComplementing 350 if (laneCount > 1) { 351 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; 352 if (laneCount > 2) { 353 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; 354 if (laneCount > 8) { 355 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; 356 if (laneCount > 12) { 357 ((UINT64*)data)[12] = ~((UINT64*)data)[12]; 358 if (laneCount > 17) { 359 ((UINT64*)data)[17] = ~((UINT64*)data)[17]; 360 if (laneCount > 20) { 361 ((UINT64*)data)[20] = ~((UINT64*)data)[20]; 362 } 363 } 364 } 365 } 366 } 367 } 368 #endif 369 } 370 371 /* ---------------------------------------------------------------- */ 372 373 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) 374 { 375 SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); 376 } 377 378 /* ---------------------------------------------------------------- */ 379 380 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) 381 { 382 UINT64 lane = ((UINT64*)state)[lanePosition]; 383 #ifdef KeccakP1600_useLaneComplementing 384 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) 385 lane = ~lane; 386 #endif 387 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 388 { 389 unsigned int i; 390 UINT64 lane1[1]; 391 lane1[0] = lane; 392 for(i=0; i<length; i++) 393 output[i] = input[i] ^ ((UINT8*)lane1)[offset+i]; 394 } 395 #else 396 unsigned int i; 397 lane >>= offset*8; 398 for(i=0; i<length; i++) { 399 output[i] = input[i] ^ (lane & 0xFF); 400 lane >>= 8; 401 } 402 #endif 403 } 404 405 /* ---------------------------------------------------------------- */ 406 407 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) 408 { 409 unsigned int i; 410 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) 411 unsigned char temp[8]; 412 unsigned int j; 413 #endif 414 415 for(i=0; i<laneCount; i++) { 416 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 417 ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i]; 418 #else 419 fromWordToBytes(temp, ((const UINT64*)state)[i]); 420 for(j=0; j<8; j++) 421 output[i*8+j] = input[i*8+j] ^ temp[j]; 422 #endif 423 } 424 #ifdef KeccakP1600_useLaneComplementing 425 if (laneCount > 1) { 426 ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1]; 427 if (laneCount > 2) { 428 ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2]; 429 if (laneCount > 8) { 430 ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8]; 431 if (laneCount > 12) { 432 ((UINT64*)output)[12] = ~((UINT64*)output)[12]; 433 if (laneCount > 17) { 434 ((UINT64*)output)[17] = ~((UINT64*)output)[17]; 435 if (laneCount > 20) { 436 ((UINT64*)output)[20] = ~((UINT64*)output)[20]; 437 } 438 } 439 } 440 } 441 } 442 } 443 #endif 444 } 445 446 /* ---------------------------------------------------------------- */ 447 448 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) 449 { 450 SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); 451 } 452 453 /* ---------------------------------------------------------------- */ 454 455 size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) 456 { 457 size_t originalDataByteLen = dataByteLen; 458 declareABCDE 459 #ifndef KeccakP1600_fullUnrolling 460 unsigned int i; 461 #endif 462 UINT64 *stateAsLanes = (UINT64*)state; 463 UINT64 *inDataAsLanes = (UINT64*)data; 464 465 copyFromState(A, stateAsLanes) 466 while(dataByteLen >= laneCount*8) { 467 addInput(A, inDataAsLanes, laneCount) 468 rounds24 469 inDataAsLanes += laneCount; 470 dataByteLen -= laneCount*8; 471 } 472 copyToState(stateAsLanes, A) 473 return originalDataByteLen - dataByteLen; 474 } 475