1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
7 #include <openssl/opensslconf.h>
9 # include <openssl/fips.h>
12 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
14 * IMPLEMENTATION NOTES.
16 * As you might have noticed 32-bit hash algorithms:
18 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
19 * - optimized versions implement two transform functions: one operating
20 * on [aligned] data in host byte order and one - on data in input
22 * - share common byte-order neutral collector and padding function
23 * implementations, ../md32_common.h;
25 * Neither of the above applies to this SHA-512 implementations. Reasons
26 * [in reverse order] are:
28 * - it's the only 64-bit hash algorithm for the moment of this writing,
29 * there is no need for common collector/padding implementation [yet];
30 * - by supporting only one transform function [which operates on
31 * *aligned* data in input stream byte order, big-endian in this case]
32 * we minimize burden of maintenance in two ways: a) collector/padding
33 * function is simpler; b) only one transform function to stare at;
34 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
35 * apply a number of optimizations to mitigate potential performance
36 * penalties caused by previous design decision;
40 * Implementation relies on the fact that "long long" is 64-bit on
41 * both 32- and 64-bit platforms. If some compiler vendor comes up
42 * with 128-bit long long, adjustment to sha.h would be required.
43 * As this implementation relies on 64-bit integer type, it's totally
44 * inappropriate for platforms which don't support it, most notably
46 * <appro@fy.chalmers.se>
51 # include <openssl/crypto.h>
52 # include <openssl/sha.h>
53 # include <openssl/opensslv.h>
55 # include "cryptlib.h"
57 const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT;
59 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
60 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
61 defined(__s390__) || defined(__s390x__) || \
63 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
66 int SHA384_Init(SHA512_CTX *c)
69 FIPS_selftest_check();
71 c->h[0] = U64(0xcbbb9d5dc1059ed8);
72 c->h[1] = U64(0x629a292a367cd507);
73 c->h[2] = U64(0x9159015a3070dd17);
74 c->h[3] = U64(0x152fecd8f70e5939);
75 c->h[4] = U64(0x67332667ffc00b31);
76 c->h[5] = U64(0x8eb44a8768581511);
77 c->h[6] = U64(0xdb0c2e0d64f98fa7);
78 c->h[7] = U64(0x47b5481dbefa4fa4);
82 c->md_len = SHA384_DIGEST_LENGTH;
86 int SHA512_Init(SHA512_CTX *c)
89 FIPS_selftest_check();
91 c->h[0] = U64(0x6a09e667f3bcc908);
92 c->h[1] = U64(0xbb67ae8584caa73b);
93 c->h[2] = U64(0x3c6ef372fe94f82b);
94 c->h[3] = U64(0xa54ff53a5f1d36f1);
95 c->h[4] = U64(0x510e527fade682d1);
96 c->h[5] = U64(0x9b05688c2b3e6c1f);
97 c->h[6] = U64(0x1f83d9abfb41bd6b);
98 c->h[7] = U64(0x5be0cd19137e2179);
102 c->md_len = SHA512_DIGEST_LENGTH;
109 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
111 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
113 unsigned char *p = (unsigned char *)c->u.p;
116 p[n] = 0x80; /* There always is a room for one */
118 if (n > (sizeof(c->u) - 16))
119 memset(p + n, 0, sizeof(c->u) - n), n = 0,
120 sha512_block_data_order(c, p, 1);
122 memset(p + n, 0, sizeof(c->u) - 16 - n);
124 c->u.d[SHA_LBLOCK - 2] = c->Nh;
125 c->u.d[SHA_LBLOCK - 1] = c->Nl;
127 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
128 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
129 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
130 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
131 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
132 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
133 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
134 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
135 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
136 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
137 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
138 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
139 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
140 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
141 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
142 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
145 sha512_block_data_order(c, p, 1);
151 /* Let compiler decide if it's appropriate to unroll... */
152 case SHA384_DIGEST_LENGTH:
153 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
154 SHA_LONG64 t = c->h[n];
156 *(md++) = (unsigned char)(t >> 56);
157 *(md++) = (unsigned char)(t >> 48);
158 *(md++) = (unsigned char)(t >> 40);
159 *(md++) = (unsigned char)(t >> 32);
160 *(md++) = (unsigned char)(t >> 24);
161 *(md++) = (unsigned char)(t >> 16);
162 *(md++) = (unsigned char)(t >> 8);
163 *(md++) = (unsigned char)(t);
166 case SHA512_DIGEST_LENGTH:
167 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
168 SHA_LONG64 t = c->h[n];
170 *(md++) = (unsigned char)(t >> 56);
171 *(md++) = (unsigned char)(t >> 48);
172 *(md++) = (unsigned char)(t >> 40);
173 *(md++) = (unsigned char)(t >> 32);
174 *(md++) = (unsigned char)(t >> 24);
175 *(md++) = (unsigned char)(t >> 16);
176 *(md++) = (unsigned char)(t >> 8);
177 *(md++) = (unsigned char)(t);
180 /* ... as well as make sure md_len is not abused. */
188 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
190 return SHA512_Final(md, c);
193 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
196 unsigned char *p = c->u.p;
197 const unsigned char *data = (const unsigned char *)_data;
202 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
205 if (sizeof(len) >= 8)
206 c->Nh += (((SHA_LONG64) len) >> 61);
210 size_t n = sizeof(c->u) - c->num;
213 memcpy(p + c->num, data, len), c->num += len;
216 memcpy(p + c->num, data, n), c->num = 0;
218 sha512_block_data_order(c, p, 1);
222 if (len >= sizeof(c->u)) {
223 # ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
224 if ((size_t)data % sizeof(c->u.d[0]) != 0)
225 while (len >= sizeof(c->u))
226 memcpy(p, data, sizeof(c->u)),
227 sha512_block_data_order(c, p, 1),
228 len -= sizeof(c->u), data += sizeof(c->u);
231 sha512_block_data_order(c, data, len / sizeof(c->u)),
232 data += len, len %= sizeof(c->u), data -= len;
236 memcpy(p, data, len), c->num = (int)len;
241 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
243 return SHA512_Update(c, data, len);
246 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
248 sha512_block_data_order(c, data, 1);
251 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
254 static unsigned char m[SHA384_DIGEST_LENGTH];
259 SHA512_Update(&c, d, n);
260 SHA512_Final(md, &c);
261 OPENSSL_cleanse(&c, sizeof(c));
265 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
268 static unsigned char m[SHA512_DIGEST_LENGTH];
273 SHA512_Update(&c, d, n);
274 SHA512_Final(md, &c);
275 OPENSSL_cleanse(&c, sizeof(c));
280 static const SHA_LONG64 K512[80] = {
281 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
282 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
283 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
284 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
285 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
286 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
287 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
288 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
289 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
290 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
291 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
292 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
293 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
294 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
295 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
296 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
297 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
298 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
299 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
300 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
301 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
302 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
303 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
304 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
305 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
306 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
307 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
308 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
309 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
310 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
311 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
312 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
313 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
314 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
315 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
316 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
317 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
318 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
319 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
320 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
324 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
325 # if defined(__x86_64) || defined(__x86_64__)
326 # define ROTR(a,n) ({ unsigned long ret; \
331 # if !defined(B_ENDIAN)
332 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
337 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
338 # if defined(I386_ONLY)
339 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
340 unsigned int hi=p[0],lo=p[1]; \
341 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
342 "roll $16,%%eax; roll $16,%%edx; "\
343 "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
344 : "=a"(lo),"=d"(hi) \
345 : "0"(lo),"1"(hi) : "cc"); \
346 ((SHA_LONG64)hi)<<32|lo; })
348 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
349 unsigned int hi=p[0],lo=p[1]; \
350 asm ("bswapl %0; bswapl %1;" \
351 : "=r"(lo),"=r"(hi) \
352 : "0"(lo),"1"(hi)); \
353 ((SHA_LONG64)hi)<<32|lo; })
355 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
356 # define ROTR(a,n) ({ unsigned long ret; \
357 asm ("rotrdi %0,%1,%2" \
359 : "r"(a),"K"(n)); ret; })
361 # elif defined(_MSC_VER)
362 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
363 # define ROTR(a,n) _rotr64((a),n)
365 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
366 # if defined(I386_ONLY)
367 static SHA_LONG64 __fastcall __pull64be(const void *x)
369 _asm mov edx,[ecx + 0]
370 _asm mov eax,[ecx + 4]
373 _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
375 static SHA_LONG64 __fastcall __pull64be(const void *x)
377 _asm mov edx,[ecx + 0]
378 _asm mov eax,[ecx + 4]
379 _asm bswap edx _asm bswap eax}
381 # define PULL64(x) __pull64be(&(x))
383 # pragma inline_depth(0)
389 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
390 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
393 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
395 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
396 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
397 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
398 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
399 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
400 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
401 # if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
402 # define GO_FOR_SSE2(ctx,in,num) do { \
403 void sha512_block_sse2(void *,const void *,size_t); \
404 if (!(OPENSSL_ia32cap_P & (1<<26))) break; \
405 sha512_block_sse2(ctx->h,in,num); return; \
408 # ifdef OPENSSL_SMALL_FOOTPRINT
409 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
412 const SHA_LONG64 *W = in;
413 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
418 GO_FOR_SSE2(ctx, in, num);
432 for (i = 0; i < 16; i++) {
436 T1 = X[i] = PULL64(W[i]);
438 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
439 T2 = Sigma0(a) + Maj(a, b, c);
450 for (; i < 80; i++) {
451 s0 = X[(i + 1) & 0x0f];
453 s1 = X[(i + 14) & 0x0f];
456 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
457 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
458 T2 = Sigma0(a) + Maj(a, b, c);
483 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
484 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
485 h = Sigma0(a) + Maj(a,b,c); \
486 d += T1; h += T1; } while (0)
487 # define ROUND_16_80(i,a,b,c,d,e,f,g,h,X) do { \
488 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \
489 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \
490 T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
491 ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
492 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
495 const SHA_LONG64 *W = in;
496 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
501 GO_FOR_SSE2(ctx, in, num);
517 ROUND_00_15(0, a, b, c, d, e, f, g, h);
519 ROUND_00_15(1, h, a, b, c, d, e, f, g);
521 ROUND_00_15(2, g, h, a, b, c, d, e, f);
523 ROUND_00_15(3, f, g, h, a, b, c, d, e);
525 ROUND_00_15(4, e, f, g, h, a, b, c, d);
527 ROUND_00_15(5, d, e, f, g, h, a, b, c);
529 ROUND_00_15(6, c, d, e, f, g, h, a, b);
531 ROUND_00_15(7, b, c, d, e, f, g, h, a);
533 ROUND_00_15(8, a, b, c, d, e, f, g, h);
535 ROUND_00_15(9, h, a, b, c, d, e, f, g);
537 ROUND_00_15(10, g, h, a, b, c, d, e, f);
539 ROUND_00_15(11, f, g, h, a, b, c, d, e);
541 ROUND_00_15(12, e, f, g, h, a, b, c, d);
543 ROUND_00_15(13, d, e, f, g, h, a, b, c);
545 ROUND_00_15(14, c, d, e, f, g, h, a, b);
547 ROUND_00_15(15, b, c, d, e, f, g, h, a);
549 T1 = X[0] = PULL64(W[0]);
550 ROUND_00_15(0, a, b, c, d, e, f, g, h);
551 T1 = X[1] = PULL64(W[1]);
552 ROUND_00_15(1, h, a, b, c, d, e, f, g);
553 T1 = X[2] = PULL64(W[2]);
554 ROUND_00_15(2, g, h, a, b, c, d, e, f);
555 T1 = X[3] = PULL64(W[3]);
556 ROUND_00_15(3, f, g, h, a, b, c, d, e);
557 T1 = X[4] = PULL64(W[4]);
558 ROUND_00_15(4, e, f, g, h, a, b, c, d);
559 T1 = X[5] = PULL64(W[5]);
560 ROUND_00_15(5, d, e, f, g, h, a, b, c);
561 T1 = X[6] = PULL64(W[6]);
562 ROUND_00_15(6, c, d, e, f, g, h, a, b);
563 T1 = X[7] = PULL64(W[7]);
564 ROUND_00_15(7, b, c, d, e, f, g, h, a);
565 T1 = X[8] = PULL64(W[8]);
566 ROUND_00_15(8, a, b, c, d, e, f, g, h);
567 T1 = X[9] = PULL64(W[9]);
568 ROUND_00_15(9, h, a, b, c, d, e, f, g);
569 T1 = X[10] = PULL64(W[10]);
570 ROUND_00_15(10, g, h, a, b, c, d, e, f);
571 T1 = X[11] = PULL64(W[11]);
572 ROUND_00_15(11, f, g, h, a, b, c, d, e);
573 T1 = X[12] = PULL64(W[12]);
574 ROUND_00_15(12, e, f, g, h, a, b, c, d);
575 T1 = X[13] = PULL64(W[13]);
576 ROUND_00_15(13, d, e, f, g, h, a, b, c);
577 T1 = X[14] = PULL64(W[14]);
578 ROUND_00_15(14, c, d, e, f, g, h, a, b);
579 T1 = X[15] = PULL64(W[15]);
580 ROUND_00_15(15, b, c, d, e, f, g, h, a);
583 for (i = 16; i < 80; i += 8) {
584 ROUND_16_80(i + 0, a, b, c, d, e, f, g, h, X);
585 ROUND_16_80(i + 1, h, a, b, c, d, e, f, g, X);
586 ROUND_16_80(i + 2, g, h, a, b, c, d, e, f, X);
587 ROUND_16_80(i + 3, f, g, h, a, b, c, d, e, X);
588 ROUND_16_80(i + 4, e, f, g, h, a, b, c, d, X);
589 ROUND_16_80(i + 5, d, e, f, g, h, a, b, c, X);
590 ROUND_16_80(i + 6, c, d, e, f, g, h, a, b, X);
591 ROUND_16_80(i + 7, b, c, d, e, f, g, h, a, X);
609 # endif /* SHA512_ASM */
611 #else /* OPENSSL_NO_SHA512 */
614 * Sensitive compilers ("Compaq C V6.4-005 on OpenVMS VAX V7.3", for example)
615 * dislike a statement-free file, complaining: "%CC-W-EMPTYFILE, Source file
616 * does not contain any declarations."
621 #endif /* OPENSSL_NO_SHA512 */