2 BLAKE2 reference source code package - optimized C implementations
4 Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
6 To the extent possible under law, the author(s) have dedicated all copyright
7 and related and neighboring rights to this software to the public domain
8 worldwide. This software is distributed without any warranty.
10 You should have received a copy of the CC0 Public Domain Dedication along with
11 this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
19 #include "blake2-impl.h"
21 #include "blake2-config.h"
27 #if defined(HAVE_SSE2)
28 #include <emmintrin.h>
29 // MSVC only defines _mm_set_epi64x for x86_64...
30 #if defined(_MSC_VER) && !defined(_M_X64)
31 static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
33 return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
38 #if defined(HAVE_SSSE3)
39 #include <tmmintrin.h>
41 #if defined(HAVE_SSE4_1)
42 #include <smmintrin.h>
45 #include <immintrin.h>
47 #if defined(HAVE_XOP) && !defined(_MSC_VER)
48 #include <x86intrin.h>
53 #include "blake2b-round.h"
55 static const uint64_t blake2b_IV[8] =
57 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
58 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
59 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
60 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
63 static const uint8_t blake2b_sigma[12][16] =
65 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
66 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
67 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
68 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
69 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
70 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
71 { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
72 { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
73 { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
74 { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
75 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
76 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
80 /* Some helper functions, not necessarily useful */
81 static inline int blake2b_set_lastnode( blake2b_state *S )
87 static inline int blake2b_clear_lastnode( blake2b_state *S )
93 static inline int blake2b_set_lastblock( blake2b_state *S )
95 if( S->last_node ) blake2b_set_lastnode( S );
101 static inline int blake2b_clear_lastblock( blake2b_state *S )
103 if( S->last_node ) blake2b_clear_lastnode( S );
110 static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
112 #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
114 __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
116 S->t[0] = ( uint64_t )( t >> 0 );
117 S->t[1] = ( uint64_t )( t >> 64 );
120 S->t[1] += ( S->t[0] < inc );
126 // Parameter-related functions
127 static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
129 P->digest_length = digest_length;
133 static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
139 static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
145 static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
147 P->leaf_length = leaf_length;
151 static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
153 P->node_offset = node_offset;
157 static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
159 P->node_depth = node_depth;
163 static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
165 P->inner_length = inner_length;
169 static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
171 memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
175 static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
177 memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
181 static inline int blake2b_init0( blake2b_state *S )
183 memset( S, 0, sizeof( blake2b_state ) );
185 for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
192 #define blake2b_init BLAKE2_IMPL_NAME(blake2b_init)
193 #define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param)
194 #define blake2b_init_key BLAKE2_IMPL_NAME(blake2b_init_key)
195 #define blake2b_update BLAKE2_IMPL_NAME(blake2b_update)
196 #define blake2b_final BLAKE2_IMPL_NAME(blake2b_final)
197 #define blake2b BLAKE2_IMPL_NAME(blake2b)
199 #if defined(__cplusplus)
202 int blake2b_init( blake2b_state *S, size_t outlen );
203 int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
204 int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
205 int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
206 int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
207 int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
208 #if defined(__cplusplus)
212 /* init xors IV with input parameter block */
213 int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
216 //blake2b_init0( S );
217 v = ( uint8_t * )( blake2b_IV );
218 h = ( uint8_t * )( S->h );
219 p = ( uint8_t * )( P );
220 /* IV XOR ParamBlock */
221 memset( S, 0, sizeof( blake2b_state ) );
223 for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
225 S->outlen = P->digest_length;
230 /* Some sort of default parameter block initialization, for sequential blake2b */
232 int blake2b_init( blake2b_state *S, size_t outlen )
234 if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
236 const blake2b_param P =
250 return blake2b_init_param( S, &P );
253 int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
255 if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
257 if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
259 const blake2b_param P =
274 if( blake2b_init_param( S, &P ) < 0 )
278 uint8_t block[BLAKE2B_BLOCKBYTES];
279 memset( block, 0, BLAKE2B_BLOCKBYTES );
280 memcpy( block, key, keylen );
281 blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
282 secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
287 static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
289 __m128i row1l, row1h;
290 __m128i row2l, row2h;
291 __m128i row3l, row3h;
292 __m128i row4l, row4h;
295 #if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
296 const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
297 const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
299 #if defined(HAVE_SSE4_1)
300 const __m128i m0 = LOADU( block + 00 );
301 const __m128i m1 = LOADU( block + 16 );
302 const __m128i m2 = LOADU( block + 32 );
303 const __m128i m3 = LOADU( block + 48 );
304 const __m128i m4 = LOADU( block + 64 );
305 const __m128i m5 = LOADU( block + 80 );
306 const __m128i m6 = LOADU( block + 96 );
307 const __m128i m7 = LOADU( block + 112 );
309 const uint64_t m0 = ( ( uint64_t * )block )[ 0];
310 const uint64_t m1 = ( ( uint64_t * )block )[ 1];
311 const uint64_t m2 = ( ( uint64_t * )block )[ 2];
312 const uint64_t m3 = ( ( uint64_t * )block )[ 3];
313 const uint64_t m4 = ( ( uint64_t * )block )[ 4];
314 const uint64_t m5 = ( ( uint64_t * )block )[ 5];
315 const uint64_t m6 = ( ( uint64_t * )block )[ 6];
316 const uint64_t m7 = ( ( uint64_t * )block )[ 7];
317 const uint64_t m8 = ( ( uint64_t * )block )[ 8];
318 const uint64_t m9 = ( ( uint64_t * )block )[ 9];
319 const uint64_t m10 = ( ( uint64_t * )block )[10];
320 const uint64_t m11 = ( ( uint64_t * )block )[11];
321 const uint64_t m12 = ( ( uint64_t * )block )[12];
322 const uint64_t m13 = ( ( uint64_t * )block )[13];
323 const uint64_t m14 = ( ( uint64_t * )block )[14];
324 const uint64_t m15 = ( ( uint64_t * )block )[15];
326 row1l = LOADU( &S->h[0] );
327 row1h = LOADU( &S->h[2] );
328 row2l = LOADU( &S->h[4] );
329 row2h = LOADU( &S->h[6] );
330 row3l = LOADU( &blake2b_IV[0] );
331 row3h = LOADU( &blake2b_IV[2] );
332 row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
333 row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
346 row1l = _mm_xor_si128( row3l, row1l );
347 row1h = _mm_xor_si128( row3h, row1h );
348 STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
349 STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
350 row2l = _mm_xor_si128( row4l, row2l );
351 row2h = _mm_xor_si128( row4h, row2h );
352 STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
353 STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
358 int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
362 uint32_t left = S->buflen;
363 uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
367 memcpy( S->buf + left, in, fill ); // Fill buffer
369 blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
370 blake2b_compress( S, S->buf ); // Compress
371 memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
372 S->buflen -= BLAKE2B_BLOCKBYTES;
376 else // inlen <= fill
378 memcpy( S->buf + left, in, inlen );
379 S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
389 int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
391 if(S->outlen != outlen) return -1;
393 if( S->buflen > BLAKE2B_BLOCKBYTES )
395 blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
396 blake2b_compress( S, S->buf );
397 S->buflen -= BLAKE2B_BLOCKBYTES;
398 memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
401 blake2b_increment_counter( S, S->buflen );
402 blake2b_set_lastblock( S );
403 memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
404 blake2b_compress( S, S->buf );
405 memcpy( out, &S->h[0], outlen );
410 int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
414 /* Verify parameters */
415 if ( NULL == in && inlen > 0 ) return -1;
417 if ( NULL == out ) return -1;
419 if( NULL == key && keylen > 0 ) return -1;
421 if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
423 if( keylen > BLAKE2B_KEYBYTES ) return -1;
427 if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
431 if( blake2b_init( S, outlen ) < 0 ) return -1;
434 if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
435 return blake2b_final( S, out, outlen );
438 #if defined(SUPERCOP)
439 int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
441 return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );