Home | History | Annotate | Download | only in sha
      1 /* crypto/sha/sha512.c */
      2 /* ====================================================================
      3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
      4  * according to the OpenSSL license [found in ../../LICENSE].
      5  * ====================================================================
      6  */
      7 #include <openssl/opensslconf.h>
      8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
      9 /*
     10  * IMPLEMENTATION NOTES.
     11  *
     12  * As you might have noticed 32-bit hash algorithms:
     13  *
     14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
     15  * - optimized versions implement two transform functions: one operating
     16  *   on [aligned] data in host byte order and one - on data in input
     17  *   stream byte order;
     18  * - share common byte-order neutral collector and padding function
     19  *   implementations, ../md32_common.h;
     20  *
     21  * Neither of the above applies to this SHA-512 implementations. Reasons
     22  * [in reverse order] are:
     23  *
     24  * - it's the only 64-bit hash algorithm for the moment of this writing,
     25  *   there is no need for common collector/padding implementation [yet];
     26  * - by supporting only one transform function [which operates on
     27  *   *aligned* data in input stream byte order, big-endian in this case]
     28  *   we minimize burden of maintenance in two ways: a) collector/padding
     29  *   function is simpler; b) only one transform function to stare at;
     30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
     31  *   apply a number of optimizations to mitigate potential performance
     32  *   penalties caused by previous design decision;
     33  *
     34  * Caveat lector.
     35  *
     36  * Implementation relies on the fact that "long long" is 64-bit on
     37  * both 32- and 64-bit platforms. If some compiler vendor comes up
     38  * with 128-bit long long, adjustment to sha.h would be required.
     39  * As this implementation relies on 64-bit integer type, it's totally
     40  * inappropriate for platforms which don't support it, most notably
     41  * 16-bit platforms.
     42  *					<appro (at) fy.chalmers.se>
     43  */
     44 #include <stdlib.h>
     45 #include <string.h>
     46 
     47 #include <openssl/crypto.h>
     48 #include <openssl/sha.h>
     49 #include <openssl/opensslv.h>
     50 
     51 #include "cryptlib.h"
     52 
     53 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
     54 
     55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
     56     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
     57     defined(__s390__) || defined(__s390x__) || \
     58     defined(SHA512_ASM)
     59 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
     60 #endif
     61 
     62 fips_md_init_ctx(SHA384, SHA512)
     63 	{
     64 	c->h[0]=U64(0xcbbb9d5dc1059ed8);
     65 	c->h[1]=U64(0x629a292a367cd507);
     66 	c->h[2]=U64(0x9159015a3070dd17);
     67 	c->h[3]=U64(0x152fecd8f70e5939);
     68 	c->h[4]=U64(0x67332667ffc00b31);
     69 	c->h[5]=U64(0x8eb44a8768581511);
     70 	c->h[6]=U64(0xdb0c2e0d64f98fa7);
     71 	c->h[7]=U64(0x47b5481dbefa4fa4);
     72 
     73         c->Nl=0;        c->Nh=0;
     74         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
     75         return 1;
     76 	}
     77 
     78 fips_md_init(SHA512)
     79 	{
     80 	c->h[0]=U64(0x6a09e667f3bcc908);
     81 	c->h[1]=U64(0xbb67ae8584caa73b);
     82 	c->h[2]=U64(0x3c6ef372fe94f82b);
     83 	c->h[3]=U64(0xa54ff53a5f1d36f1);
     84 	c->h[4]=U64(0x510e527fade682d1);
     85 	c->h[5]=U64(0x9b05688c2b3e6c1f);
     86 	c->h[6]=U64(0x1f83d9abfb41bd6b);
     87 	c->h[7]=U64(0x5be0cd19137e2179);
     88 
     89         c->Nl=0;        c->Nh=0;
     90         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
     91         return 1;
     92 	}
     93 
     94 #ifndef SHA512_ASM
     95 static
     96 #endif
     97 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
     98 
     99 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
    100 	{
    101 	unsigned char *p=(unsigned char *)c->u.p;
    102 	size_t n=c->num;
    103 
    104 	p[n]=0x80;	/* There always is a room for one */
    105 	n++;
    106 	if (n > (sizeof(c->u)-16))
    107 		memset (p+n,0,sizeof(c->u)-n), n=0,
    108 		sha512_block_data_order (c,p,1);
    109 
    110 	memset (p+n,0,sizeof(c->u)-16-n);
    111 #ifdef	B_ENDIAN
    112 	c->u.d[SHA_LBLOCK-2] = c->Nh;
    113 	c->u.d[SHA_LBLOCK-1] = c->Nl;
    114 #else
    115 	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
    116 	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
    117 	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
    118 	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
    119 	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
    120 	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
    121 	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
    122 	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
    123 	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
    124 	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
    125 	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
    126 	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
    127 	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
    128 	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
    129 	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
    130 	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
    131 #endif
    132 
    133 	sha512_block_data_order (c,p,1);
    134 
    135 	if (md==0) return 0;
    136 
    137 	switch (c->md_len)
    138 		{
    139 		/* Let compiler decide if it's appropriate to unroll... */
    140 		case SHA384_DIGEST_LENGTH:
    141 			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
    142 				{
    143 				SHA_LONG64 t = c->h[n];
    144 
    145 				*(md++)	= (unsigned char)(t>>56);
    146 				*(md++)	= (unsigned char)(t>>48);
    147 				*(md++)	= (unsigned char)(t>>40);
    148 				*(md++)	= (unsigned char)(t>>32);
    149 				*(md++)	= (unsigned char)(t>>24);
    150 				*(md++)	= (unsigned char)(t>>16);
    151 				*(md++)	= (unsigned char)(t>>8);
    152 				*(md++)	= (unsigned char)(t);
    153 				}
    154 			break;
    155 		case SHA512_DIGEST_LENGTH:
    156 			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
    157 				{
    158 				SHA_LONG64 t = c->h[n];
    159 
    160 				*(md++)	= (unsigned char)(t>>56);
    161 				*(md++)	= (unsigned char)(t>>48);
    162 				*(md++)	= (unsigned char)(t>>40);
    163 				*(md++)	= (unsigned char)(t>>32);
    164 				*(md++)	= (unsigned char)(t>>24);
    165 				*(md++)	= (unsigned char)(t>>16);
    166 				*(md++)	= (unsigned char)(t>>8);
    167 				*(md++)	= (unsigned char)(t);
    168 				}
    169 			break;
    170 		/* ... as well as make sure md_len is not abused. */
    171 		default:	return 0;
    172 		}
    173 
    174 	return 1;
    175 	}
    176 
    177 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
    178 {   return SHA512_Final (md,c);   }
    179 
    180 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
    181 	{
    182 	SHA_LONG64	l;
    183 	unsigned char  *p=c->u.p;
    184 	const unsigned char *data=(const unsigned char *)_data;
    185 
    186 	if (len==0) return  1;
    187 
    188 	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
    189 	if (l < c->Nl)		c->Nh++;
    190 	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
    191 	c->Nl=l;
    192 
    193 	if (c->num != 0)
    194 		{
    195 		size_t n = sizeof(c->u) - c->num;
    196 
    197 		if (len < n)
    198 			{
    199 			memcpy (p+c->num,data,len), c->num += (unsigned int)len;
    200 			return 1;
    201 			}
    202 		else	{
    203 			memcpy (p+c->num,data,n), c->num = 0;
    204 			len-=n, data+=n;
    205 			sha512_block_data_order (c,p,1);
    206 			}
    207 		}
    208 
    209 	if (len >= sizeof(c->u))
    210 		{
    211 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
    212 		if ((size_t)data%sizeof(c->u.d[0]) != 0)
    213 			while (len >= sizeof(c->u))
    214 				memcpy (p,data,sizeof(c->u)),
    215 				sha512_block_data_order (c,p,1),
    216 				len  -= sizeof(c->u),
    217 				data += sizeof(c->u);
    218 		else
    219 #endif
    220 			sha512_block_data_order (c,data,len/sizeof(c->u)),
    221 			data += len,
    222 			len  %= sizeof(c->u),
    223 			data -= len;
    224 		}
    225 
    226 	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
    227 
    228 	return 1;
    229 	}
    230 
    231 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
    232 {   return SHA512_Update (c,data,len);   }
    233 
    234 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
    235 	{
    236 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
    237 	if ((size_t)data%sizeof(c->u.d[0]) != 0)
    238 		memcpy(c->u.p,data,sizeof(c->u.p)),
    239 		data = c->u.p;
    240 #endif
    241 	sha512_block_data_order (c,data,1);
    242 	}
    243 
    244 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
    245 	{
    246 	SHA512_CTX c;
    247 	static unsigned char m[SHA384_DIGEST_LENGTH];
    248 
    249 	if (md == NULL) md=m;
    250 	SHA384_Init(&c);
    251 	SHA512_Update(&c,d,n);
    252 	SHA512_Final(md,&c);
    253 	OPENSSL_cleanse(&c,sizeof(c));
    254 	return(md);
    255 	}
    256 
    257 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
    258 	{
    259 	SHA512_CTX c;
    260 	static unsigned char m[SHA512_DIGEST_LENGTH];
    261 
    262 	if (md == NULL) md=m;
    263 	SHA512_Init(&c);
    264 	SHA512_Update(&c,d,n);
    265 	SHA512_Final(md,&c);
    266 	OPENSSL_cleanse(&c,sizeof(c));
    267 	return(md);
    268 	}
    269 
    270 #ifndef SHA512_ASM
    271 static const SHA_LONG64 K512[80] = {
    272         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
    273         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
    274         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
    275         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
    276         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
    277         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
    278         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
    279         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
    280         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
    281         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
    282         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
    283         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
    284         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
    285         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
    286         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
    287         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
    288         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
    289         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
    290         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
    291         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
    292         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
    293         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
    294         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
    295         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
    296         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
    297         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
    298         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
    299         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
    300         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
    301         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
    302         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
    303         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
    304         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
    305         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
    306         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
    307         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
    308         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
    309         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
    310         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
    311         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
    312 
    313 #ifndef PEDANTIC
    314 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
    315 #  if defined(__x86_64) || defined(__x86_64__)
    316 #   define ROTR(a,n)	({ SHA_LONG64 ret;		\
    317 				asm ("rorq %1,%0"	\
    318 				: "=r"(ret)		\
    319 				: "J"(n),"0"(a)		\
    320 				: "cc"); ret;		})
    321 #   if !defined(B_ENDIAN)
    322 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
    323 				asm ("bswapq	%0"		\
    324 				: "=r"(ret)			\
    325 				: "0"(ret)); ret;		})
    326 #   endif
    327 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
    328 #   if defined(I386_ONLY)
    329 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
    330 			 unsigned int hi=p[0],lo=p[1];		\
    331 				asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
    332 				    "roll $16,%%eax; roll $16,%%edx; "\
    333 				    "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
    334 				: "=a"(lo),"=d"(hi)		\
    335 				: "0"(lo),"1"(hi) : "cc");	\
    336 				((SHA_LONG64)hi)<<32|lo;	})
    337 #   else
    338 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
    339 			 unsigned int hi=p[0],lo=p[1];		\
    340 				asm ("bswapl %0; bswapl %1;"	\
    341 				: "=r"(lo),"=r"(hi)		\
    342 				: "0"(lo),"1"(hi));		\
    343 				((SHA_LONG64)hi)<<32|lo;	})
    344 #   endif
    345 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
    346 #   define ROTR(a,n)	({ SHA_LONG64 ret;		\
    347 				asm ("rotrdi %0,%1,%2"	\
    348 				: "=r"(ret)		\
    349 				: "r"(a),"K"(n)); ret;	})
    350 #  endif
    351 # elif defined(_MSC_VER)
    352 #  if defined(_WIN64)	/* applies to both IA-64 and AMD64 */
    353 #   pragma intrinsic(_rotr64)
    354 #   define ROTR(a,n)	_rotr64((a),n)
    355 #  endif
    356 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
    357 #   if defined(I386_ONLY)
    358     static SHA_LONG64 __fastcall __pull64be(const void *x)
    359     {	_asm	mov	edx, [ecx + 0]
    360 	_asm	mov	eax, [ecx + 4]
    361 	_asm	xchg	dh,dl
    362 	_asm	xchg	ah,al
    363 	_asm	rol	edx,16
    364 	_asm	rol	eax,16
    365 	_asm	xchg	dh,dl
    366 	_asm	xchg	ah,al
    367     }
    368 #   else
    369     static SHA_LONG64 __fastcall __pull64be(const void *x)
    370     {	_asm	mov	edx, [ecx + 0]
    371 	_asm	mov	eax, [ecx + 4]
    372 	_asm	bswap	edx
    373 	_asm	bswap	eax
    374     }
    375 #   endif
    376 #   define PULL64(x) __pull64be(&(x))
    377 #   if _MSC_VER<=1200
    378 #    pragma inline_depth(0)
    379 #   endif
    380 #  endif
    381 # endif
    382 #endif
    383 
    384 #ifndef PULL64
    385 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
    386 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
    387 #endif
    388 
    389 #ifndef ROTR
    390 #define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
    391 #endif
    392 
    393 #define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
    394 #define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
    395 #define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
    396 #define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
    397 
    398 #define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
    399 #define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
    400 
    401 
    402 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
    403 /*
    404  * This code should give better results on 32-bit CPU with less than
    405  * ~24 registers, both size and performance wise...
    406  */
    407 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
    408 	{
    409 	const SHA_LONG64 *W=in;
    410 	SHA_LONG64	A,E,T;
    411 	SHA_LONG64	X[9+80],*F;
    412 	int i;
    413 
    414 			while (num--) {
    415 
    416 	F    = X+80;
    417 	A    = ctx->h[0];	F[1] = ctx->h[1];
    418 	F[2] = ctx->h[2];	F[3] = ctx->h[3];
    419 	E    = ctx->h[4];	F[5] = ctx->h[5];
    420 	F[6] = ctx->h[6];	F[7] = ctx->h[7];
    421 
    422 	for (i=0;i<16;i++,F--)
    423 		{
    424 #ifdef B_ENDIAN
    425 		T = W[i];
    426 #else
    427 		T = PULL64(W[i]);
    428 #endif
    429 		F[0] = A;
    430 		F[4] = E;
    431 		F[8] = T;
    432 		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
    433 		E    = F[3] + T;
    434 		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
    435 		}
    436 
    437 	for (;i<80;i++,F--)
    438 		{
    439 		T    = sigma0(F[8+16-1]);
    440 		T   += sigma1(F[8+16-14]);
    441 		T   += F[8+16] + F[8+16-9];
    442 
    443 		F[0] = A;
    444 		F[4] = E;
    445 		F[8] = T;
    446 		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
    447 		E    = F[3] + T;
    448 		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
    449 		}
    450 
    451 	ctx->h[0] += A;		ctx->h[1] += F[1];
    452 	ctx->h[2] += F[2];	ctx->h[3] += F[3];
    453 	ctx->h[4] += E;		ctx->h[5] += F[5];
    454 	ctx->h[6] += F[6];	ctx->h[7] += F[7];
    455 
    456 			W+=SHA_LBLOCK;
    457 			}
    458 	}
    459 
    460 #elif defined(OPENSSL_SMALL_FOOTPRINT)
    461 
    462 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
    463 	{
    464 	const SHA_LONG64 *W=in;
    465 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
    466 	SHA_LONG64	X[16];
    467 	int i;
    468 
    469 			while (num--) {
    470 
    471 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
    472 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
    473 
    474 	for (i=0;i<16;i++)
    475 		{
    476 #ifdef B_ENDIAN
    477 		T1 = X[i] = W[i];
    478 #else
    479 		T1 = X[i] = PULL64(W[i]);
    480 #endif
    481 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
    482 		T2 = Sigma0(a) + Maj(a,b,c);
    483 		h = g;	g = f;	f = e;	e = d + T1;
    484 		d = c;	c = b;	b = a;	a = T1 + T2;
    485 		}
    486 
    487 	for (;i<80;i++)
    488 		{
    489 		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
    490 		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
    491 
    492 		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
    493 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
    494 		T2 = Sigma0(a) + Maj(a,b,c);
    495 		h = g;	g = f;	f = e;	e = d + T1;
    496 		d = c;	c = b;	b = a;	a = T1 + T2;
    497 		}
    498 
    499 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
    500 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
    501 
    502 			W+=SHA_LBLOCK;
    503 			}
    504 	}
    505 
    506 #else
    507 
    508 #define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
    509 	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
    510 	h = Sigma0(a) + Maj(a,b,c);			\
    511 	d += T1;	h += T1;		} while (0)
    512 
    513 #define	ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)	do {	\
    514 	s0 = X[(j+1)&0x0f];	s0 = sigma0(s0);	\
    515 	s1 = X[(j+14)&0x0f];	s1 = sigma1(s1);	\
    516 	T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];	\
    517 	ROUND_00_15(i+j,a,b,c,d,e,f,g,h);		} while (0)
    518 
    519 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
    520 	{
    521 	const SHA_LONG64 *W=in;
    522 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
    523 	SHA_LONG64	X[16];
    524 	int i;
    525 
    526 			while (num--) {
    527 
    528 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
    529 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
    530 
    531 #ifdef B_ENDIAN
    532 	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
    533 	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
    534 	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
    535 	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
    536 	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
    537 	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
    538 	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
    539 	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
    540 	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
    541 	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
    542 	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
    543 	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
    544 	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
    545 	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
    546 	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
    547 	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
    548 #else
    549 	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
    550 	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
    551 	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
    552 	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
    553 	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
    554 	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
    555 	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
    556 	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
    557 	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
    558 	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
    559 	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
    560 	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
    561 	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
    562 	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
    563 	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
    564 	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
    565 #endif
    566 
    567 	for (i=16;i<80;i+=16)
    568 		{
    569 		ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
    570 		ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
    571 		ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
    572 		ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
    573 		ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
    574 		ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
    575 		ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
    576 		ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
    577 		ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
    578 		ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
    579 		ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
    580 		ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
    581 		ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
    582 		ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
    583 		ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
    584 		ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
    585 		}
    586 
    587 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
    588 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
    589 
    590 			W+=SHA_LBLOCK;
    591 			}
    592 	}
    593 
    594 #endif
    595 
    596 #endif /* SHA512_ASM */
    597 
    598 #else /* !OPENSSL_NO_SHA512 */
    599 
    600 #if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
    601 static void *dummy=&dummy;
    602 #endif
    603 
    604 #endif /* !OPENSSL_NO_SHA512 */
    605