From eb961c25243e0e1e224deabccd3ee300f1bd65f3 Mon Sep 17 00:00:00 2001 From: delphij Date: Thu, 3 Mar 2016 07:30:55 +0000 Subject: [PATCH] Fix multiple OpenSSL vulnerabilities. Security: FreeBSD-SA-16:12.openssl Approved by: so git-svn-id: https://svn.freebsd.org/base/releng/10.2@296341 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- UPDATING | 4 + crypto/openssl/apps/s_server.c | 50 +- crypto/openssl/crypto/bio/b_print.c | 187 +++-- crypto/openssl/crypto/bn/asm/x86_64-mont5.pl | 513 ++++++++----- crypto/openssl/crypto/bn/bn.h | 14 +- crypto/openssl/crypto/bn/bn_exp.c | 75 +- crypto/openssl/crypto/bn/bn_print.c | 17 +- crypto/openssl/crypto/dsa/dsa_ameth.c | 20 +- crypto/openssl/crypto/perlasm/x86_64-xlate.pl | 2 +- crypto/openssl/crypto/srp/srp.h | 10 + crypto/openssl/crypto/srp/srp_vfy.c | 57 +- crypto/openssl/ssl/s2_lib.c | 6 + crypto/openssl/ssl/s3_lib.c | 54 ++ crypto/openssl/ssl/ssl_lib.c | 7 + crypto/openssl/util/libeay.num | 2 + secure/lib/libcrypto/amd64/x86_64-mont5.S | 681 ++++++++++++++---- sys/conf/newvers.sh | 2 +- 17 files changed, 1232 insertions(+), 469 deletions(-) diff --git a/UPDATING b/UPDATING index d3b3cb920..0ec21dd9c 100644 --- a/UPDATING +++ b/UPDATING @@ -16,6 +16,10 @@ from older versions of FreeBSD, try WITHOUT_CLANG to bootstrap to the tip of stable/10, and then rebuild without this option. The bootstrap process from older version of current is a bit fragile. +20160303 p13 FreeBSD-SA-16:12.openssl + + Fix multiple vulnerabilities of OpenSSL. + 20160130 p12 FreeBSD-SA-16:11.openssl Fix OpenSSL SSLv2 ciphersuite downgrade vulnerability. [SA-16:11] diff --git a/crypto/openssl/apps/s_server.c b/crypto/openssl/apps/s_server.c index b58e5e07c..a53cadd66 100644 --- a/crypto/openssl/apps/s_server.c +++ b/crypto/openssl/apps/s_server.c @@ -416,6 +416,8 @@ typedef struct srpsrvparm_st { static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg) { srpsrvparm *p = (srpsrvparm *) arg; + int ret = SSL3_AL_FATAL; + if (p->login == NULL && p->user == NULL) { p->login = SSL_get_srp_username(s); BIO_printf(bio_err, "SRP username = \"%s\"\n", p->login); @@ -424,21 +426,25 @@ static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg) if (p->user == NULL) { BIO_printf(bio_err, "User %s doesn't exist\n", p->login); - return SSL3_AL_FATAL; + goto err; } + if (SSL_set_srp_server_param (s, p->user->N, p->user->g, p->user->s, p->user->v, p->user->info) < 0) { *ad = SSL_AD_INTERNAL_ERROR; - return SSL3_AL_FATAL; + goto err; } BIO_printf(bio_err, "SRP parameters set: username = \"%s\" info=\"%s\" \n", p->login, p->user->info); - /* need to check whether there are memory leaks */ + ret = SSL_ERROR_NONE; + +err: + SRP_user_pwd_free(p->user); p->user = NULL; p->login = NULL; - return SSL_ERROR_NONE; + return ret; } #endif @@ -2244,9 +2250,10 @@ static int sv_body(char *hostname, int s, unsigned char *context) #ifndef OPENSSL_NO_SRP while (SSL_get_error(con, k) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP renego during write\n"); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -2300,9 +2307,10 @@ static int sv_body(char *hostname, int s, unsigned char *context) #ifndef OPENSSL_NO_SRP while (SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP renego during read\n"); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -2387,9 +2395,10 @@ static int init_ssl_connection(SSL *con) while (i <= 0 && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP during accept %s\n", srp_callback_parm.login); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -2616,9 +2625,10 @@ static int www_body(char *hostname, int s, unsigned char *context) && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP during accept %s\n", srp_callback_parm.login); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -2654,6 +2664,22 @@ static int www_body(char *hostname, int s, unsigned char *context) goto err; } else { BIO_printf(bio_s_out, "read R BLOCK\n"); +#ifndef OPENSSL_NO_SRP + if (BIO_should_io_special(io) + && BIO_get_retry_reason(io) == BIO_RR_SSL_X509_LOOKUP) { + BIO_printf(bio_s_out, "LOOKUP renego during read\n"); + SRP_user_pwd_free(srp_callback_parm.user); + srp_callback_parm.user = + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); + if (srp_callback_parm.user) + BIO_printf(bio_s_out, "LOOKUP done %s\n", + srp_callback_parm.user->info); + else + BIO_printf(bio_s_out, "LOOKUP not successful\n"); + continue; + } +#endif #if defined(OPENSSL_SYS_NETWARE) delay(1000); #elif !defined(OPENSSL_SYS_MSDOS) && !defined(__DJGPP__) diff --git a/crypto/openssl/crypto/bio/b_print.c b/crypto/openssl/crypto/bio/b_print.c index 7c81e25d4..90248fa2a 100644 --- a/crypto/openssl/crypto/bio/b_print.c +++ b/crypto/openssl/crypto/bio/b_print.c @@ -125,16 +125,16 @@ # define LLONG long #endif -static void fmtstr(char **, char **, size_t *, size_t *, - const char *, int, int, int); -static void fmtint(char **, char **, size_t *, size_t *, - LLONG, int, int, int, int); -static void fmtfp(char **, char **, size_t *, size_t *, - LDOUBLE, int, int, int); -static void doapr_outch(char **, char **, size_t *, size_t *, int); -static void _dopr(char **sbuffer, char **buffer, - size_t *maxlen, size_t *retlen, int *truncated, - const char *format, va_list args); +static int fmtstr(char **, char **, size_t *, size_t *, + const char *, int, int, int); +static int fmtint(char **, char **, size_t *, size_t *, + LLONG, int, int, int, int); +static int fmtfp(char **, char **, size_t *, size_t *, + LDOUBLE, int, int, int); +static int doapr_outch(char **, char **, size_t *, size_t *, int); +static int _dopr(char **sbuffer, char **buffer, + size_t *maxlen, size_t *retlen, int *truncated, + const char *format, va_list args); /* format read states */ #define DP_S_DEFAULT 0 @@ -165,7 +165,7 @@ static void _dopr(char **sbuffer, char **buffer, #define char_to_int(p) (p - '0') #define OSSL_MAX(p,q) ((p >= q) ? p : q) -static void +static int _dopr(char **sbuffer, char **buffer, size_t *maxlen, @@ -196,7 +196,8 @@ _dopr(char **sbuffer, if (ch == '%') state = DP_S_FLAGS; else - doapr_outch(sbuffer, buffer, &currlen, maxlen, ch); + if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, ch)) + return 0; ch = *format++; break; case DP_S_FLAGS: @@ -302,8 +303,9 @@ _dopr(char **sbuffer, value = va_arg(args, int); break; } - fmtint(sbuffer, buffer, &currlen, maxlen, - value, 10, min, max, flags); + if (!fmtint(sbuffer, buffer, &currlen, maxlen, value, 10, min, + max, flags)) + return 0; break; case 'X': flags |= DP_F_UP; @@ -326,17 +328,19 @@ _dopr(char **sbuffer, value = (LLONG) va_arg(args, unsigned int); break; } - fmtint(sbuffer, buffer, &currlen, maxlen, value, - ch == 'o' ? 8 : (ch == 'u' ? 10 : 16), - min, max, flags); + if (!fmtint(sbuffer, buffer, &currlen, maxlen, value, + ch == 'o' ? 8 : (ch == 'u' ? 10 : 16), + min, max, flags)) + return 0; break; case 'f': if (cflags == DP_C_LDOUBLE) fvalue = va_arg(args, LDOUBLE); else fvalue = va_arg(args, double); - fmtfp(sbuffer, buffer, &currlen, maxlen, - fvalue, min, max, flags); + if (!fmtfp(sbuffer, buffer, &currlen, maxlen, fvalue, min, max, + flags)) + return 0; break; case 'E': flags |= DP_F_UP; @@ -355,8 +359,9 @@ _dopr(char **sbuffer, fvalue = va_arg(args, double); break; case 'c': - doapr_outch(sbuffer, buffer, &currlen, maxlen, - va_arg(args, int)); + if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, + va_arg(args, int))) + return 0; break; case 's': strvalue = va_arg(args, char *); @@ -366,13 +371,15 @@ _dopr(char **sbuffer, else max = *maxlen; } - fmtstr(sbuffer, buffer, &currlen, maxlen, strvalue, - flags, min, max); + if (!fmtstr(sbuffer, buffer, &currlen, maxlen, strvalue, + flags, min, max)) + return 0; break; case 'p': value = (long)va_arg(args, void *); - fmtint(sbuffer, buffer, &currlen, maxlen, - value, 16, min, max, flags | DP_F_NUM); + if (!fmtint(sbuffer, buffer, &currlen, maxlen, + value, 16, min, max, flags | DP_F_NUM)) + return 0; break; case 'n': /* XXX */ if (cflags == DP_C_SHORT) { @@ -394,7 +401,8 @@ _dopr(char **sbuffer, } break; case '%': - doapr_outch(sbuffer, buffer, &currlen, maxlen, ch); + if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, ch)) + return 0; break; case 'w': /* not supported yet, treat as next char */ @@ -418,46 +426,56 @@ _dopr(char **sbuffer, *truncated = (currlen > *maxlen - 1); if (*truncated) currlen = *maxlen - 1; - doapr_outch(sbuffer, buffer, &currlen, maxlen, '\0'); + if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, '\0')) + return 0; *retlen = currlen - 1; - return; + return 1; } -static void +static int fmtstr(char **sbuffer, char **buffer, size_t *currlen, size_t *maxlen, const char *value, int flags, int min, int max) { - int padlen, strln; + int padlen; + size_t strln; int cnt = 0; if (value == 0) value = ""; - for (strln = 0; value[strln]; ++strln) ; + + strln = strlen(value); + if (strln > INT_MAX) + strln = INT_MAX; + padlen = min - strln; - if (padlen < 0) + if (min < 0 || padlen < 0) padlen = 0; if (flags & DP_F_MINUS) padlen = -padlen; while ((padlen > 0) && (cnt < max)) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; --padlen; ++cnt; } while (*value && (cnt < max)) { - doapr_outch(sbuffer, buffer, currlen, maxlen, *value++); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, *value++)) + return 0; ++cnt; } while ((padlen < 0) && (cnt < max)) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; ++padlen; ++cnt; } + return 1; } -static void +static int fmtint(char **sbuffer, char **buffer, size_t *currlen, @@ -517,37 +535,44 @@ fmtint(char **sbuffer, /* spaces */ while (spadlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; --spadlen; } /* sign */ if (signvalue) - doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue)) + return 0; /* prefix */ while (*prefix) { - doapr_outch(sbuffer, buffer, currlen, maxlen, *prefix); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, *prefix)) + return 0; prefix++; } /* zeros */ if (zpadlen > 0) { while (zpadlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, '0'); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, '0')) + return 0; --zpadlen; } } /* digits */ - while (place > 0) - doapr_outch(sbuffer, buffer, currlen, maxlen, convert[--place]); + while (place > 0) { + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, convert[--place])) + return 0; + } /* left justified spaces */ while (spadlen < 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; ++spadlen; } - return; + return 1; } static LDOUBLE abs_val(LDOUBLE value) @@ -578,7 +603,7 @@ static long roundv(LDOUBLE value) return intpart; } -static void +static int fmtfp(char **sbuffer, char **buffer, size_t *currlen, @@ -657,47 +682,61 @@ fmtfp(char **sbuffer, if ((flags & DP_F_ZERO) && (padlen > 0)) { if (signvalue) { - doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue)) + return 0; --padlen; signvalue = 0; } while (padlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, '0'); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '0')) + return 0; --padlen; } } while (padlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; --padlen; } - if (signvalue) - doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue); + if (signvalue && !doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue)) + return 0; - while (iplace > 0) - doapr_outch(sbuffer, buffer, currlen, maxlen, iconvert[--iplace]); + while (iplace > 0) { + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, iconvert[--iplace])) + return 0; + } /* * Decimal point. This should probably use locale to find the correct * char to print out. */ if (max > 0 || (flags & DP_F_NUM)) { - doapr_outch(sbuffer, buffer, currlen, maxlen, '.'); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '.')) + return 0; - while (fplace > 0) - doapr_outch(sbuffer, buffer, currlen, maxlen, fconvert[--fplace]); + while (fplace > 0) { + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, + fconvert[--fplace])) + return 0; + } } while (zpadlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, '0'); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '0')) + return 0; --zpadlen; } while (padlen < 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; ++padlen; } + return 1; } -static void +#define BUFFER_INC 1024 + +static int doapr_outch(char **sbuffer, char **buffer, size_t *currlen, size_t *maxlen, int c) { @@ -708,24 +747,25 @@ doapr_outch(char **sbuffer, assert(*currlen <= *maxlen); if (buffer && *currlen == *maxlen) { - *maxlen += 1024; + if (*maxlen > INT_MAX - BUFFER_INC) + return 0; + + *maxlen += BUFFER_INC; if (*buffer == NULL) { *buffer = OPENSSL_malloc(*maxlen); - if (!*buffer) { - /* Panic! Can't really do anything sensible. Just return */ - return; - } + if (*buffer == NULL) + return 0; if (*currlen > 0) { assert(*sbuffer != NULL); memcpy(*buffer, *sbuffer, *currlen); } *sbuffer = NULL; } else { - *buffer = OPENSSL_realloc(*buffer, *maxlen); - if (!*buffer) { - /* Panic! Can't really do anything sensible. Just return */ - return; - } + char *tmpbuf; + tmpbuf = OPENSSL_realloc(*buffer, *maxlen); + if (tmpbuf == NULL) + return 0; + *buffer = tmpbuf; } } @@ -736,7 +776,7 @@ doapr_outch(char **sbuffer, (*buffer)[(*currlen)++] = (char)c; } - return; + return 1; } /***************************************************************************/ @@ -768,7 +808,11 @@ int BIO_vprintf(BIO *bio, const char *format, va_list args) dynbuf = NULL; CRYPTO_push_info("doapr()"); - _dopr(&hugebufp, &dynbuf, &hugebufsize, &retlen, &ignored, format, args); + if (!_dopr(&hugebufp, &dynbuf, &hugebufsize, &retlen, &ignored, format, + args)) { + OPENSSL_free(dynbuf); + return -1; + } if (dynbuf) { ret = BIO_write(bio, dynbuf, (int)retlen); OPENSSL_free(dynbuf); @@ -803,7 +847,8 @@ int BIO_vsnprintf(char *buf, size_t n, const char *format, va_list args) size_t retlen; int truncated; - _dopr(&buf, NULL, &n, &retlen, &truncated, format, args); + if(!_dopr(&buf, NULL, &n, &retlen, &truncated, format, args)) + return -1; if (truncated) /* diff --git a/crypto/openssl/crypto/bn/asm/x86_64-mont5.pl b/crypto/openssl/crypto/bn/asm/x86_64-mont5.pl index dae0fe245..235979181 100755 --- a/crypto/openssl/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/openssl/crypto/bn/asm/x86_64-mont5.pl @@ -66,60 +66,113 @@ bn_mul_mont_gather5: .align 16 .Lmul_enter: mov ${num}d,${num}d - mov `($win64?56:8)`(%rsp),%r10d # load 7th argument + movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument + lea .Linc(%rip),%r10 push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) + .Lmul_alloca: -___ -$code.=<<___; mov %rsp,%rax lea 2($num),%r11 neg %r11 - lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)) + lea -264(%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)+256+8) and \$-1024,%rsp # minimize TLB usage mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul_body: - mov $bp,%r12 # reassign $bp + lea 128($bp),%r12 # reassign $bp (+size optimization) ___ $bp="%r12"; $STRIDE=2**5*8; # 5 is "window size" $N=$STRIDE/4; # should match cache line size $code.=<<___; - mov %r10,%r11 - shr \$`log($N/8)/log(2)`,%r10 - and \$`$N/8-1`,%r11 - not %r10 - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" - lea 96($bp,%r11,8),$bp # pointer within 1st cache line - movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which - movq 8(%rax,%r10,8),%xmm5 # cache line contains element - movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument - movq 24(%rax,%r10,8),%xmm7 - - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002 + lea 24-112(%rsp,$num,8),%r10# place the mask after tp[num+3] (+ICache optimization) + and \$-16,%r10 + + pshufd \$0,%xmm5,%xmm5 # broadcast index + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..31 to index and save result to stack +# +$code.=<<___; + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 + .byte 0x67 + movdqa %xmm4,%xmm3 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($k+0)+112`(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($k+1)+112`(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($k+2)+112`(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,`16*($k+3)+112`(%r10) + movdqa %xmm4,%xmm3 +___ +} +$code.=<<___; # last iteration can be optimized + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,`16*($k+0)+112`(%r10) + + paddd %xmm2,%xmm3 + .byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,`16*($k+1)+112`(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,`16*($k+2)+112`(%r10) + pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register + + pand `16*($k+1)-128`($bp),%xmm1 + pand `16*($k+2)-128`($bp),%xmm2 + movdqa %xmm3,`16*($k+3)+112`(%r10) + pand `16*($k+3)-128`($bp),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm4 + movdqa `16*($k+1)-128`($bp),%xmm5 + movdqa `16*($k+2)-128`($bp),%xmm2 + pand `16*($k+0)+112`(%r10),%xmm4 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+1)+112`(%r10),%xmm5 + por %xmm4,%xmm0 + pand `16*($k+2)+112`(%r10),%xmm2 + por %xmm5,%xmm1 + pand `16*($k+3)+112`(%r10),%xmm3 por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +} +$code.=<<___; + por %xmm1,%xmm0 + pshufd \$0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - movq %xmm0,$m0 # m0=bp[0] mov ($n0),$n0 # pull n0[0] value @@ -128,29 +181,14 @@ $code.=<<___; xor $i,$i # i=0 xor $j,$j # j=0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - mov $n0,$m1 mulq $m0 # ap[0]*bp[0] mov %rax,$lo0 mov ($np),%rax - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $lo0,$m1 # "tp[0]"*n0 mov %rdx,$hi0 - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$lo0 # discarded mov 8($ap),%rax @@ -183,8 +221,6 @@ $code.=<<___; cmp $num,$j jne .L1st - movq %xmm0,$m0 # bp[1] - add %rax,$hi1 mov ($ap),%rax # ap[0] adc \$0,%rdx @@ -204,33 +240,46 @@ $code.=<<___; jmp .Louter .align 16 .Louter: + lea 24+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization) + and \$-16,%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($k=0;$k<$STRIDE/16;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm0 + movdqa `16*($k+1)-128`($bp),%xmm1 + movdqa `16*($k+2)-128`($bp),%xmm2 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+0)-128`(%rdx),%xmm0 + pand `16*($k+1)-128`(%rdx),%xmm1 + por %xmm0,%xmm4 + pand `16*($k+2)-128`(%rdx),%xmm2 + por %xmm1,%xmm5 + pand `16*($k+3)-128`(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + lea $STRIDE($bp),$bp + movq %xmm0,$m0 # m0=bp[i] + xor $j,$j # j=0 mov $n0,$m1 mov (%rsp),$lo0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - mulq $m0 # ap[0]*bp[i] add %rax,$lo0 # ap[0]*bp[i]+tp[0] mov ($np),%rax adc \$0,%rdx - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $lo0,$m1 # tp[0]*n0 mov %rdx,$hi0 - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$lo0 # discarded mov 8($ap),%rax @@ -266,8 +315,6 @@ $code.=<<___; cmp $num,$j jne .Linner - movq %xmm0,$m0 # bp[i+1] - add %rax,$hi1 mov ($ap),%rax # ap[0] adc \$0,%rdx @@ -321,13 +368,7 @@ $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax -___ -$code.=<<___ if ($win64); - movaps (%rsi),%xmm6 - movaps 0x10(%rsi),%xmm7 - lea 0x28(%rsi),%rsi -___ -$code.=<<___; + mov (%rsi),%r15 mov 8(%rsi),%r14 mov 16(%rsi),%r13 @@ -348,91 +389,130 @@ $code.=<<___; bn_mul4x_mont_gather5: .Lmul4x_enter: mov ${num}d,${num}d - mov `($win64?56:8)`(%rsp),%r10d # load 7th argument + movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument + lea .Linc(%rip),%r10 push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) + .Lmul4x_alloca: -___ -$code.=<<___; mov %rsp,%rax lea 4($num),%r11 neg %r11 - lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)) + lea -256(%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)+256) and \$-1024,%rsp # minimize TLB usage mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul4x_body: mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp - mov %rdx,%r12 # reassign $bp + lea 128(%rdx),%r12 # reassign $bp (+size optimization) ___ $bp="%r12"; $STRIDE=2**5*8; # 5 is "window size" $N=$STRIDE/4; # should match cache line size $code.=<<___; - mov %r10,%r11 - shr \$`log($N/8)/log(2)`,%r10 - and \$`$N/8-1`,%r11 - not %r10 - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" - lea 96($bp,%r11,8),$bp # pointer within 1st cache line - movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which - movq 8(%rax,%r10,8),%xmm5 # cache line contains element - movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument - movq 24(%rax,%r10,8),%xmm7 - - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002 + lea 32-112(%rsp,$num,8),%r10# place the mask after tp[num+4] (+ICache optimization) + + pshufd \$0,%xmm5,%xmm5 # broadcast index + movdqa %xmm1,%xmm4 + .byte 0x67,0x67 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..31 to index and save result to stack +# +$code.=<<___; + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 + .byte 0x67 + movdqa %xmm4,%xmm3 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($k+0)+112`(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($k+1)+112`(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($k+2)+112`(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,`16*($k+3)+112`(%r10) + movdqa %xmm4,%xmm3 +___ +} +$code.=<<___; # last iteration can be optimized + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,`16*($k+0)+112`(%r10) + + paddd %xmm2,%xmm3 + .byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,`16*($k+1)+112`(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,`16*($k+2)+112`(%r10) + pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register + + pand `16*($k+1)-128`($bp),%xmm1 + pand `16*($k+2)-128`($bp),%xmm2 + movdqa %xmm3,`16*($k+3)+112`(%r10) + pand `16*($k+3)-128`($bp),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm4 + movdqa `16*($k+1)-128`($bp),%xmm5 + movdqa `16*($k+2)-128`($bp),%xmm2 + pand `16*($k+0)+112`(%r10),%xmm4 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+1)+112`(%r10),%xmm5 + por %xmm4,%xmm0 + pand `16*($k+2)+112`(%r10),%xmm2 + por %xmm5,%xmm1 + pand `16*($k+3)+112`(%r10),%xmm3 por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +} +$code.=<<___; + por %xmm1,%xmm0 + pshufd \$0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - movq %xmm0,$m0 # m0=bp[0] + mov ($n0),$n0 # pull n0[0] value mov ($ap),%rax xor $i,$i # i=0 xor $j,$j # j=0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - mov $n0,$m1 mulq $m0 # ap[0]*bp[0] mov %rax,$A[0] mov ($np),%rax - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $A[0],$m1 # "tp[0]"*n0 mov %rdx,$A[1] - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$A[0] # discarded mov 8($ap),%rax @@ -550,8 +630,6 @@ $code.=<<___; mov $N[1],-16(%rsp,$j,8) # tp[j-1] mov %rdx,$N[0] - movq %xmm0,$m0 # bp[1] - xor $N[1],$N[1] add $A[0],$N[0] adc \$0,$N[1] @@ -561,12 +639,34 @@ $code.=<<___; lea 1($i),$i # i++ .align 4 .Louter4x: + lea 32+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization) + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($k=0;$k<$STRIDE/16;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm0 + movdqa `16*($k+1)-128`($bp),%xmm1 + movdqa `16*($k+2)-128`($bp),%xmm2 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+0)-128`(%rdx),%xmm0 + pand `16*($k+1)-128`(%rdx),%xmm1 + por %xmm0,%xmm4 + pand `16*($k+2)-128`(%rdx),%xmm2 + por %xmm1,%xmm5 + pand `16*($k+3)-128`(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + lea $STRIDE($bp),$bp + movq %xmm0,$m0 # m0=bp[i] + xor $j,$j # j=0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 mov (%rsp),$A[0] mov $n0,$m1 @@ -575,18 +675,9 @@ $code.=<<___; mov ($np),%rax adc \$0,%rdx - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $A[0],$m1 # tp[0]*n0 mov %rdx,$A[1] - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$A[0] # "$N[0]", discarded mov 8($ap),%rax @@ -718,7 +809,6 @@ $code.=<<___; mov $N[0],-24(%rsp,$j,8) # tp[j-1] mov %rdx,$N[0] - movq %xmm0,$m0 # bp[i+1] mov $N[1],-16(%rsp,$j,8) # tp[j-1] xor $N[1],$N[1] @@ -809,13 +899,7 @@ ___ $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax -___ -$code.=<<___ if ($win64); - movaps (%rsi),%xmm6 - movaps 0x10(%rsi),%xmm7 - lea 0x28(%rsi),%rsi -___ -$code.=<<___; + mov (%rsi),%r15 mov 8(%rsi),%r14 mov 16(%rsi),%r13 @@ -830,8 +914,8 @@ ___ }}} { -my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order - ("%rdi","%rsi","%rdx","%rcx"); # Unix order +my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9d") : # Win64 order + ("%rdi","%rsi","%rdx","%ecx"); # Unix order my $out=$inp; my $STRIDE=2**5*8; my $N=$STRIDE/4; @@ -859,53 +943,89 @@ bn_scatter5: .type bn_gather5,\@abi-omnipotent .align 16 bn_gather5: -___ -$code.=<<___ if ($win64); -.LSEH_begin_bn_gather5: +.LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases # I can't trust assembler to use specific encoding:-( - .byte 0x48,0x83,0xec,0x28 #sub \$0x28,%rsp - .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) - .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) + .byte 0x4c,0x8d,0x14,0x24 # lea (%rsp),%r10 + .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 # sub $0x108,%rsp + lea .Linc(%rip),%rax + and \$-16,%rsp # shouldn't be formally required + + movd $idx,%xmm5 + movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 + lea 128($tbl),%r11 # size optimization + lea 128(%rsp),%rax # size optimization + + pshufd \$0,%xmm5,%xmm5 # broadcast $idx + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 ___ +######################################################################## +# calculate mask by comparing 0..31 to $idx and save result to stack +# +for($i=0;$i<$STRIDE/16;$i+=4) { +$code.=<<___; + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 +___ +$code.=<<___ if ($i); + movdqa %xmm3,`16*($i-1)-128`(%rax) +___ +$code.=<<___; + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($i+0)-128`(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($i+1)-128`(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($i+2)-128`(%rax) + movdqa %xmm4,%xmm2 +___ +} $code.=<<___; - mov $idx,%r11 - shr \$`log($N/8)/log(2)`,$idx - and \$`$N/8-1`,%r11 - not $idx - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,$idx # 5 is "window size" - lea 96($tbl,%r11,8),$tbl # pointer within 1st cache line - movq 0(%rax,$idx,8),%xmm4 # set of masks denoting which - movq 8(%rax,$idx,8),%xmm5 # cache line contains element - movq 16(%rax,$idx,8),%xmm6 # denoted by 7th argument - movq 24(%rax,$idx,8),%xmm7 + movdqa %xmm3,`16*($i-1)-128`(%rax) jmp .Lgather -.align 16 -.Lgather: - movq `0*$STRIDE/4-96`($tbl),%xmm0 - movq `1*$STRIDE/4-96`($tbl),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($tbl),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($tbl),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - por %xmm2,%xmm0 - lea $STRIDE($tbl),$tbl - por %xmm3,%xmm0 +.align 32 +.Lgather: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($i=0;$i<$STRIDE/16;$i+=4) { +$code.=<<___; + movdqa `16*($i+0)-128`(%r11),%xmm0 + movdqa `16*($i+1)-128`(%r11),%xmm1 + movdqa `16*($i+2)-128`(%r11),%xmm2 + pand `16*($i+0)-128`(%rax),%xmm0 + movdqa `16*($i+3)-128`(%r11),%xmm3 + pand `16*($i+1)-128`(%rax),%xmm1 + por %xmm0,%xmm4 + pand `16*($i+2)-128`(%rax),%xmm2 + por %xmm1,%xmm5 + pand `16*($i+3)-128`(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + lea $STRIDE(%r11),%r11 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 movq %xmm0,($out) # m0=bp[0] lea 8($out),$out sub \$1,$num jnz .Lgather -___ -$code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - lea 0x28(%rsp),%rsp -___ -$code.=<<___; + + lea (%r10),%rsp ret .LSEH_end_bn_gather5: .size bn_gather5,.-bn_gather5 @@ -913,9 +1033,9 @@ ___ } $code.=<<___; .align 64 -.Lmagic_masks: - .long 0,0, 0,0, 0,0, -1,-1 - .long 0,0, 0,0, 0,0, 0,0 +.Linc: + .long 0,0, 1,1 + .long 2,2, 2,2 .asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by " ___ @@ -954,7 +1074,7 @@ mul_handler: cmp %r10,%rbx # context->RipR13 mov %r14,232($context) # restore context->R14 mov %r15,240($context) # restore context->R15 - movups %xmm0,512($context) # restore context->Xmm6 - movups %xmm1,528($context) # restore context->Xmm7 .Lcommon_seh_tail: mov 8(%rax),%rdi @@ -1057,10 +1173,9 @@ mul_handler: .rva .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] .align 8 .LSEH_info_bn_gather5: - .byte 0x01,0x0d,0x05,0x00 - .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 - .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6 - .byte 0x04,0x42,0x00,0x00 #sub rsp,0x28 + .byte 0x01,0x0b,0x03,0x0a + .byte 0x0b,0x01,0x21,0x00 # sub rsp,0x108 + .byte 0x04,0xa3,0x00,0x00 # lea r10,(rsp), set_frame r10 .align 8 ___ } diff --git a/crypto/openssl/crypto/bn/bn.h b/crypto/openssl/crypto/bn/bn.h index 47d8c71d9..b39258d4d 100644 --- a/crypto/openssl/crypto/bn/bn.h +++ b/crypto/openssl/crypto/bn/bn.h @@ -125,6 +125,7 @@ #ifndef HEADER_BN_H # define HEADER_BN_H +# include # include # ifndef OPENSSL_NO_FP_API # include /* FILE */ @@ -739,8 +740,17 @@ const BIGNUM *BN_get0_nist_prime_521(void); /* library internal functions */ -# define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\ - (a):bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2)) +# define bn_expand(a,bits) \ + ( \ + bits > (INT_MAX - BN_BITS2 + 1) ? \ + NULL \ + : \ + (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) ? \ + (a) \ + : \ + bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2) \ + ) + # define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) BIGNUM *bn_expand2(BIGNUM *a, int words); # ifndef OPENSSL_NO_DEPRECATED diff --git a/crypto/openssl/crypto/bn/bn_exp.c b/crypto/openssl/crypto/bn/bn_exp.c index 27146c89e..52ab0b23f 100644 --- a/crypto/openssl/crypto/bn/bn_exp.c +++ b/crypto/openssl/crypto/bn/bn_exp.c @@ -110,6 +110,7 @@ */ #include "cryptlib.h" +#include "constant_time_locl.h" #include "bn_lcl.h" #include @@ -535,15 +536,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, - int width) + int window) { - size_t i, j; + int i, j; + int width = 1 << window; + BN_ULONG *table = (BN_ULONG *)buf; if (top > b->top) top = b->top; /* this works because 'buf' is explicitly * zeroed */ - for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { - buf[j] = ((unsigned char *)b->d)[i]; + for (i = 0, j = idx; i < top; i++, j += width) { + table[j] = b->d[i]; } return 1; @@ -551,15 +554,51 @@ static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, - int width) + int window) { - size_t i, j; + int i, j; + int width = 1 << window; + volatile BN_ULONG *table = (volatile BN_ULONG *)buf; if (bn_wexpand(b, top) == NULL) return 0; - for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { - ((unsigned char *)b->d)[i] = buf[j]; + if (window <= 3) { + for (i = 0; i < top; i++, table += width) { + BN_ULONG acc = 0; + + for (j = 0; j < width; j++) { + acc |= table[j] & + ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); + } + + b->d[i] = acc; + } + } else { + int xstride = 1 << (window - 2); + BN_ULONG y0, y1, y2, y3; + + i = idx >> (window - 2); /* equivalent of idx / xstride */ + idx &= xstride - 1; /* equivalent of idx % xstride */ + + y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); + y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); + y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); + y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); + + for (i = 0; i < top; i++, table += width) { + BN_ULONG acc = 0; + + for (j = 0; j < xstride; j++) { + acc |= ( (table[j + 0 * xstride] & y0) | + (table[j + 1 * xstride] & y1) | + (table[j + 2 * xstride] & y2) | + (table[j + 3 * xstride] & y3) ) + & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); + } + + b->d[i] = acc; + } } b->top = top; @@ -782,9 +821,9 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } else #endif { - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) goto err; /* @@ -796,15 +835,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (window > 1) { if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF - (&tmp, top, powerbuf, 2, numPowers)) + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, + window)) goto err; for (i = 3; i < numPowers; i++) { /* Calculate a^i = a^(i-1) * a */ if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF - (&tmp, top, powerbuf, i, numPowers)) + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, + window)) goto err; } } @@ -812,8 +851,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, bits--; for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - if (!MOD_EXP_CTIME_COPY_FROM_PREBUF - (&tmp, top, powerbuf, wvalue, numPowers)) + if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, + window)) goto err; /* @@ -833,8 +872,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, /* * Fetch the appropriate pre-computed value from the pre-buf */ - if (!MOD_EXP_CTIME_COPY_FROM_PREBUF - (&am, top, powerbuf, wvalue, numPowers)) + if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, + window)) goto err; /* Multiply the result into the intermediate result */ diff --git a/crypto/openssl/crypto/bn/bn_print.c b/crypto/openssl/crypto/bn/bn_print.c index ab10b957b..bfa31efc5 100644 --- a/crypto/openssl/crypto/bn/bn_print.c +++ b/crypto/openssl/crypto/bn/bn_print.c @@ -58,6 +58,7 @@ #include #include +#include #include "cryptlib.h" #include #include "bn_lcl.h" @@ -189,7 +190,11 @@ int BN_hex2bn(BIGNUM **bn, const char *a) a++; } - for (i = 0; isxdigit((unsigned char)a[i]); i++) ; + for (i = 0; i <= (INT_MAX/4) && isxdigit((unsigned char)a[i]); i++) + continue; + + if (i > INT_MAX/4) + goto err; num = i + neg; if (bn == NULL) @@ -204,7 +209,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a) BN_zero(ret); } - /* i is the number of hex digests; */ + /* i is the number of hex digits */ if (bn_expand(ret, i * 4) == NULL) goto err; @@ -260,7 +265,11 @@ int BN_dec2bn(BIGNUM **bn, const char *a) a++; } - for (i = 0; isdigit((unsigned char)a[i]); i++) ; + for (i = 0; i <= (INT_MAX/4) && isdigit((unsigned char)a[i]); i++) + continue; + + if (i > INT_MAX/4) + goto err; num = i + neg; if (bn == NULL) @@ -278,7 +287,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a) BN_zero(ret); } - /* i is the number of digests, a bit of an over expand; */ + /* i is the number of digits, a bit of an over expand */ if (bn_expand(ret, i * 4) == NULL) goto err; diff --git a/crypto/openssl/crypto/dsa/dsa_ameth.c b/crypto/openssl/crypto/dsa/dsa_ameth.c index a2840eaed..d94b377b2 100644 --- a/crypto/openssl/crypto/dsa/dsa_ameth.c +++ b/crypto/openssl/crypto/dsa/dsa_ameth.c @@ -191,6 +191,8 @@ static int dsa_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) STACK_OF(ASN1_TYPE) *ndsa = NULL; DSA *dsa = NULL; + int ret = 0; + if (!PKCS8_pkey_get0(NULL, &p, &pklen, &palg, p8)) return 0; X509_ALGOR_get0(NULL, &ptype, &pval, palg); @@ -262,23 +264,21 @@ static int dsa_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) } EVP_PKEY_assign_DSA(pkey, dsa); - BN_CTX_free(ctx); - if (ndsa) - sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free); - else - ASN1_STRING_clear_free(privkey); - return 1; + ret = 1; + goto done; decerr: DSAerr(DSA_F_DSA_PRIV_DECODE, EVP_R_DECODE_ERROR); dsaerr: + DSA_free(dsa); + done: BN_CTX_free(ctx); - if (privkey) + if (ndsa) + sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free); + else ASN1_STRING_clear_free(privkey); - sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free); - DSA_free(dsa); - return 0; + return ret; } static int dsa_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) diff --git a/crypto/openssl/crypto/perlasm/x86_64-xlate.pl b/crypto/openssl/crypto/perlasm/x86_64-xlate.pl index 56d9b64b6..b262b8dae 100755 --- a/crypto/openssl/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/openssl/crypto/perlasm/x86_64-xlate.pl @@ -121,7 +121,7 @@ my %globals; $self->{sz} = ""; } elsif ($self->{op} =~ /^v/) { # VEX $self->{sz} = ""; - } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) { + } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) { $self->{sz} = ""; } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { $self->{op} = $1; diff --git a/crypto/openssl/crypto/srp/srp.h b/crypto/openssl/crypto/srp/srp.h index d072536fe..028892a1f 100644 --- a/crypto/openssl/crypto/srp/srp.h +++ b/crypto/openssl/crypto/srp/srp.h @@ -82,16 +82,21 @@ typedef struct SRP_gN_cache_st { DECLARE_STACK_OF(SRP_gN_cache) typedef struct SRP_user_pwd_st { + /* Owned by us. */ char *id; BIGNUM *s; BIGNUM *v; + /* Not owned by us. */ const BIGNUM *g; const BIGNUM *N; + /* Owned by us. */ char *info; } SRP_user_pwd; DECLARE_STACK_OF(SRP_user_pwd) +void SRP_user_pwd_free(SRP_user_pwd *user_pwd); + typedef struct SRP_VBASE_st { STACK_OF(SRP_user_pwd) *users_pwd; STACK_OF(SRP_gN_cache) *gN_cache; @@ -115,7 +120,12 @@ DECLARE_STACK_OF(SRP_gN) SRP_VBASE *SRP_VBASE_new(char *seed_key); int SRP_VBASE_free(SRP_VBASE *vb); int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file); + +/* This method ignores the configured seed and fails for an unknown user. */ SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username); +/* NOTE: unlike in SRP_VBASE_get_by_user, caller owns the returned pointer.*/ +SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username); + char *SRP_create_verifier(const char *user, const char *pass, char **salt, char **verifier, const char *N, const char *g); int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, diff --git a/crypto/openssl/crypto/srp/srp_vfy.c b/crypto/openssl/crypto/srp/srp_vfy.c index 50f75d7e4..20989ef0f 100644 --- a/crypto/openssl/crypto/srp/srp_vfy.c +++ b/crypto/openssl/crypto/srp/srp_vfy.c @@ -185,7 +185,7 @@ static char *t_tob64(char *dst, const unsigned char *src, int size) return olddst; } -static void SRP_user_pwd_free(SRP_user_pwd *user_pwd) +void SRP_user_pwd_free(SRP_user_pwd *user_pwd) { if (user_pwd == NULL) return; @@ -247,6 +247,24 @@ static int SRP_user_pwd_set_sv_BN(SRP_user_pwd *vinfo, BIGNUM *s, BIGNUM *v) return (vinfo->s != NULL && vinfo->v != NULL); } +static SRP_user_pwd *srp_user_pwd_dup(SRP_user_pwd *src) +{ + SRP_user_pwd *ret; + + if (src == NULL) + return NULL; + if ((ret = SRP_user_pwd_new()) == NULL) + return NULL; + + SRP_user_pwd_set_gN(ret, src->g, src->N); + if (!SRP_user_pwd_set_ids(ret, src->id, src->info) + || !SRP_user_pwd_set_sv_BN(ret, BN_dup(src->s), BN_dup(src->v))) { + SRP_user_pwd_free(ret); + return NULL; + } + return ret; +} + SRP_VBASE *SRP_VBASE_new(char *seed_key) { SRP_VBASE *vb = (SRP_VBASE *)OPENSSL_malloc(sizeof(SRP_VBASE)); @@ -468,21 +486,50 @@ int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file) } -SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username) +static SRP_user_pwd *find_user(SRP_VBASE *vb, char *username) { int i; SRP_user_pwd *user; - unsigned char digv[SHA_DIGEST_LENGTH]; - unsigned char digs[SHA_DIGEST_LENGTH]; - EVP_MD_CTX ctxt; if (vb == NULL) return NULL; + for (i = 0; i < sk_SRP_user_pwd_num(vb->users_pwd); i++) { user = sk_SRP_user_pwd_value(vb->users_pwd, i); if (strcmp(user->id, username) == 0) return user; } + + return NULL; +} + +/* + * This method ignores the configured seed and fails for an unknown user. + * Ownership of the returned pointer is not released to the caller. + * In other words, caller must not free the result. + */ +SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username) +{ + return find_user(vb, username); +} + +/* + * Ownership of the returned pointer is released to the caller. + * In other words, caller must free the result once done. + */ +SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username) +{ + SRP_user_pwd *user; + unsigned char digv[SHA_DIGEST_LENGTH]; + unsigned char digs[SHA_DIGEST_LENGTH]; + EVP_MD_CTX ctxt; + + if (vb == NULL) + return NULL; + + if ((user = find_user(vb, username)) != NULL) + return srp_user_pwd_dup(user); + if ((vb->seed_key == NULL) || (vb->default_g == NULL) || (vb->default_N == NULL)) return NULL; diff --git a/crypto/openssl/ssl/s2_lib.c b/crypto/openssl/ssl/s2_lib.c index 7e3674a68..82c173154 100644 --- a/crypto/openssl/ssl/s2_lib.c +++ b/crypto/openssl/ssl/s2_lib.c @@ -156,6 +156,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 128, }, +# if 0 /* RC4_128_EXPORT40_WITH_MD5 */ { 1, @@ -171,6 +172,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 40, 128, }, +# endif /* RC2_128_CBC_WITH_MD5 */ { @@ -188,6 +190,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 128, }, +# if 0 /* RC2_128_CBC_EXPORT40_WITH_MD5 */ { 1, @@ -203,6 +206,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 40, 128, }, +# endif # ifndef OPENSSL_NO_IDEA /* IDEA_128_CBC_WITH_MD5 */ @@ -222,6 +226,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { }, # endif +# if 0 /* DES_64_CBC_WITH_MD5 */ { 1, @@ -237,6 +242,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 56, 56, }, +# endif /* DES_192_EDE3_CBC_WITH_MD5 */ { diff --git a/crypto/openssl/ssl/s3_lib.c b/crypto/openssl/ssl/s3_lib.c index de917d3f8..f0682ad96 100644 --- a/crypto/openssl/ssl/s3_lib.c +++ b/crypto/openssl/ssl/s3_lib.c @@ -203,6 +203,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 03 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_RSA_RC4_40_MD5, @@ -217,6 +218,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +#endif /* Cipher 04 */ { @@ -251,6 +253,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 06 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_RSA_RC2_40_MD5, @@ -265,6 +268,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +#endif /* Cipher 07 */ #ifndef OPENSSL_NO_IDEA @@ -285,6 +289,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { #endif /* Cipher 08 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_RSA_DES_40_CBC_SHA, @@ -299,8 +304,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 09 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_RSA_DES_64_CBC_SHA, @@ -315,6 +322,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 0A */ { @@ -334,6 +342,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* The DH ciphers */ /* Cipher 0B */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 0, SSL3_TXT_DH_DSS_DES_40_CBC_SHA, @@ -348,8 +357,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 0C */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 0, /* not implemented (non-ephemeral DH) */ SSL3_TXT_DH_DSS_DES_64_CBC_SHA, @@ -364,6 +375,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 0D */ { @@ -382,6 +394,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 0E */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 0, /* not implemented (non-ephemeral DH) */ SSL3_TXT_DH_RSA_DES_40_CBC_SHA, @@ -396,8 +409,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 0F */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 0, /* not implemented (non-ephemeral DH) */ SSL3_TXT_DH_RSA_DES_64_CBC_SHA, @@ -412,6 +427,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 10 */ { @@ -431,6 +447,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* The Ephemeral DH ciphers */ /* Cipher 11 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_EDH_DSS_DES_40_CBC_SHA, @@ -445,8 +462,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 12 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_EDH_DSS_DES_64_CBC_SHA, @@ -461,6 +480,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 13 */ { @@ -479,6 +499,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 14 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_EDH_RSA_DES_40_CBC_SHA, @@ -493,8 +514,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 15 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_EDH_RSA_DES_64_CBC_SHA, @@ -509,6 +532,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 16 */ { @@ -527,6 +551,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 17 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_ADH_RC4_40_MD5, @@ -541,6 +566,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +#endif /* Cipher 18 */ { @@ -559,6 +585,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 19 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_ADH_DES_40_CBC_SHA, @@ -573,8 +600,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +#endif /* Cipher 1A */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_ADH_DES_64_CBC_SHA, @@ -589,6 +618,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 1B */ { @@ -660,6 +690,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { #ifndef OPENSSL_NO_KRB5 /* The Kerberos ciphers*/ /* Cipher 1E */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_DES_64_CBC_SHA, @@ -674,6 +705,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +# endif /* Cipher 1F */ { @@ -724,6 +756,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 22 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_DES_64_CBC_MD5, @@ -738,6 +771,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +# endif /* Cipher 23 */ { @@ -788,6 +822,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 26 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_DES_40_CBC_SHA, @@ -802,8 +837,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +# endif /* Cipher 27 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_RC2_40_CBC_SHA, @@ -818,8 +855,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +# endif /* Cipher 28 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_RC4_40_SHA, @@ -834,8 +873,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +# endif /* Cipher 29 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_DES_40_CBC_MD5, @@ -850,8 +891,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +# endif /* Cipher 2A */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_RC2_40_CBC_MD5, @@ -866,8 +909,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +# endif /* Cipher 2B */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_RC4_40_MD5, @@ -882,6 +927,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +# endif #endif /* OPENSSL_NO_KRB5 */ /* New AES ciphersuites */ @@ -1305,6 +1351,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { # endif /* Cipher 62 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, TLS1_TXT_RSA_EXPORT1024_WITH_DES_CBC_SHA, @@ -1319,8 +1366,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +# endif /* Cipher 63 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, TLS1_TXT_DHE_DSS_EXPORT1024_WITH_DES_CBC_SHA, @@ -1335,8 +1384,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +# endif /* Cipher 64 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, TLS1_TXT_RSA_EXPORT1024_WITH_RC4_56_SHA, @@ -1351,8 +1402,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 128, }, +# endif /* Cipher 65 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, TLS1_TXT_DHE_DSS_EXPORT1024_WITH_RC4_56_SHA, @@ -1367,6 +1420,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 128, }, +# endif /* Cipher 66 */ { diff --git a/crypto/openssl/ssl/ssl_lib.c b/crypto/openssl/ssl/ssl_lib.c index e11746a69..e8558d59b 100644 --- a/crypto/openssl/ssl/ssl_lib.c +++ b/crypto/openssl/ssl/ssl_lib.c @@ -1896,6 +1896,13 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth) */ ret->options |= SSL_OP_LEGACY_SERVER_CONNECT; + /* + * Disable SSLv2 by default, callers that want to enable SSLv2 will have to + * explicitly clear this option via either of SSL_CTX_clear_options() or + * SSL_clear_options(). + */ + ret->options |= SSL_OP_NO_SSLv2; + return (ret); err: SSLerr(SSL_F_SSL_CTX_NEW, ERR_R_MALLOC_FAILURE); diff --git a/crypto/openssl/util/libeay.num b/crypto/openssl/util/libeay.num index b594caf2c..a83c3beda 100644 --- a/crypto/openssl/util/libeay.num +++ b/crypto/openssl/util/libeay.num @@ -1807,6 +1807,8 @@ ASN1_UTCTIME_get 2350 NOEXIST::FUNCTION: X509_REQ_digest 2362 EXIST::FUNCTION:EVP X509_CRL_digest 2391 EXIST::FUNCTION:EVP ASN1_STRING_clear_free 2392 EXIST::FUNCTION: +SRP_VBASE_get1_by_user 2393 EXIST::FUNCTION:SRP +SRP_user_pwd_free 2394 EXIST::FUNCTION:SRP d2i_ASN1_SET_OF_PKCS7 2397 NOEXIST::FUNCTION: X509_ALGOR_cmp 2398 EXIST::FUNCTION: EVP_CIPHER_CTX_set_key_length 2399 EXIST::FUNCTION: diff --git a/secure/lib/libcrypto/amd64/x86_64-mont5.S b/secure/lib/libcrypto/amd64/x86_64-mont5.S index b0b34423b..5820e12ec 100644 --- a/secure/lib/libcrypto/amd64/x86_64-mont5.S +++ b/secure/lib/libcrypto/amd64/x86_64-mont5.S @@ -14,47 +14,153 @@ bn_mul_mont_gather5: .align 16 .Lmul_enter: movl %r9d,%r9d - movl 8(%rsp),%r10d + movd 8(%rsp),%xmm5 + leaq .Linc(%rip),%r10 pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 + +.Lmul_alloca: movq %rsp,%rax leaq 2(%r9),%r11 negq %r11 - leaq (%rsp,%r11,8),%rsp + leaq -264(%rsp,%r11,8),%rsp andq $-1024,%rsp movq %rax,8(%rsp,%r9,8) .Lmul_body: - movq %rdx,%r12 - movq %r10,%r11 - shrq $3,%r10 - andq $7,%r11 - notq %r10 - leaq .Lmagic_masks(%rip),%rax - andq $3,%r10 - leaq 96(%r12,%r11,8),%r12 - movq 0(%rax,%r10,8),%xmm4 - movq 8(%rax,%r10,8),%xmm5 - movq 16(%rax,%r10,8),%xmm6 - movq 24(%rax,%r10,8),%xmm7 - - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 - movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + leaq 128(%rdx),%r12 + movdqa 0(%r10),%xmm0 + movdqa 16(%r10),%xmm1 + leaq 24-112(%rsp,%r9,8),%r10 + andq $-16,%r10 + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 +.byte 0x67 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 +.byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + pand 64(%r12),%xmm0 + + pand 80(%r12),%xmm1 + pand 96(%r12),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%r12),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%r12),%xmm4 + movdqa -112(%r12),%xmm5 + movdqa -96(%r12),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%r12),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%r12),%xmm4 + movdqa -48(%r12),%xmm5 + movdqa -32(%r12),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%r12),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%r12),%xmm4 + movdqa 16(%r12),%xmm5 + movdqa 32(%r12),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%r12),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + por %xmm1,%xmm0 + pshufd $78,%xmm0,%xmm1 + por %xmm1,%xmm0 leaq 256(%r12),%r12 - por %xmm3,%xmm0 - .byte 102,72,15,126,195 movq (%r8),%r8 @@ -63,29 +169,14 @@ bn_mul_mont_gather5: xorq %r14,%r14 xorq %r15,%r15 - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 - movq %r8,%rbp mulq %rbx movq %rax,%r10 movq (%rcx),%rax - movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq %r10,%rbp movq %rdx,%r11 - por %xmm2,%xmm0 - leaq 256(%r12),%r12 - por %xmm3,%xmm0 - mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax @@ -118,8 +209,6 @@ bn_mul_mont_gather5: cmpq %r9,%r15 jne .L1st -.byte 102,72,15,126,195 - addq %rax,%r13 movq (%rsi),%rax adcq $0,%rdx @@ -139,33 +228,76 @@ bn_mul_mont_gather5: jmp .Louter .align 16 .Louter: + leaq 24+128(%rsp,%r9,8),%rdx + andq $-16,%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r12),%xmm0 + movdqa -112(%r12),%xmm1 + movdqa -96(%r12),%xmm2 + movdqa -80(%r12),%xmm3 + pand -128(%rdx),%xmm0 + pand -112(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -80(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r12),%xmm0 + movdqa -48(%r12),%xmm1 + movdqa -32(%r12),%xmm2 + movdqa -16(%r12),%xmm3 + pand -64(%rdx),%xmm0 + pand -48(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -16(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r12),%xmm0 + movdqa 16(%r12),%xmm1 + movdqa 32(%r12),%xmm2 + movdqa 48(%r12),%xmm3 + pand 0(%rdx),%xmm0 + pand 16(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 48(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r12),%xmm0 + movdqa 80(%r12),%xmm1 + movdqa 96(%r12),%xmm2 + movdqa 112(%r12),%xmm3 + pand 64(%rdx),%xmm0 + pand 80(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 112(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + pshufd $78,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + xorq %r15,%r15 movq %r8,%rbp movq (%rsp),%r10 - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 - mulq %rbx addq %rax,%r10 movq (%rcx),%rax adcq $0,%rdx - movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq %r10,%rbp movq %rdx,%r11 - por %xmm2,%xmm0 - leaq 256(%r12),%r12 - por %xmm3,%xmm0 - mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax @@ -201,8 +333,6 @@ bn_mul_mont_gather5: cmpq %r9,%r15 jne .Linner -.byte 102,72,15,126,195 - addq %rax,%r13 movq (%rsi),%rax adcq $0,%rdx @@ -256,6 +386,7 @@ bn_mul_mont_gather5: movq 8(%rsp,%r9,8),%rsi movq $1,%rax + movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 @@ -271,78 +402,170 @@ bn_mul_mont_gather5: bn_mul4x_mont_gather5: .Lmul4x_enter: movl %r9d,%r9d - movl 8(%rsp),%r10d + movd 8(%rsp),%xmm5 + leaq .Linc(%rip),%r10 pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 + +.Lmul4x_alloca: movq %rsp,%rax leaq 4(%r9),%r11 negq %r11 - leaq (%rsp,%r11,8),%rsp + leaq -256(%rsp,%r11,8),%rsp andq $-1024,%rsp movq %rax,8(%rsp,%r9,8) .Lmul4x_body: movq %rdi,16(%rsp,%r9,8) - movq %rdx,%r12 - movq %r10,%r11 - shrq $3,%r10 - andq $7,%r11 - notq %r10 - leaq .Lmagic_masks(%rip),%rax - andq $3,%r10 - leaq 96(%r12,%r11,8),%r12 - movq 0(%rax,%r10,8),%xmm4 - movq 8(%rax,%r10,8),%xmm5 - movq 16(%rax,%r10,8),%xmm6 - movq 24(%rax,%r10,8),%xmm7 - - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 - movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + leaq 128(%rdx),%r12 + movdqa 0(%r10),%xmm0 + movdqa 16(%r10),%xmm1 + leaq 32-112(%rsp,%r9,8),%r10 + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 +.byte 0x67,0x67 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 +.byte 0x67 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 +.byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + pand 64(%r12),%xmm0 + + pand 80(%r12),%xmm1 + pand 96(%r12),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%r12),%xmm3 por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%r12),%xmm4 + movdqa -112(%r12),%xmm5 + movdqa -96(%r12),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%r12),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%r12),%xmm4 + movdqa -48(%r12),%xmm5 + movdqa -32(%r12),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%r12),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%r12),%xmm4 + movdqa 16(%r12),%xmm5 + movdqa 32(%r12),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%r12),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + por %xmm1,%xmm0 + pshufd $78,%xmm0,%xmm1 + por %xmm1,%xmm0 leaq 256(%r12),%r12 - por %xmm3,%xmm0 - .byte 102,72,15,126,195 + movq (%r8),%r8 movq (%rsi),%rax xorq %r14,%r14 xorq %r15,%r15 - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 - movq %r8,%rbp mulq %rbx movq %rax,%r10 movq (%rcx),%rax - movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq %r10,%rbp movq %rdx,%r11 - por %xmm2,%xmm0 - leaq 256(%r12),%r12 - por %xmm3,%xmm0 - mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax @@ -460,8 +683,6 @@ bn_mul4x_mont_gather5: movq %rdi,-16(%rsp,%r15,8) movq %rdx,%r13 -.byte 102,72,15,126,195 - xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi @@ -471,12 +692,64 @@ bn_mul4x_mont_gather5: leaq 1(%r14),%r14 .align 4 .Louter4x: + leaq 32+128(%rsp,%r9,8),%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r12),%xmm0 + movdqa -112(%r12),%xmm1 + movdqa -96(%r12),%xmm2 + movdqa -80(%r12),%xmm3 + pand -128(%rdx),%xmm0 + pand -112(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -80(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r12),%xmm0 + movdqa -48(%r12),%xmm1 + movdqa -32(%r12),%xmm2 + movdqa -16(%r12),%xmm3 + pand -64(%rdx),%xmm0 + pand -48(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -16(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r12),%xmm0 + movdqa 16(%r12),%xmm1 + movdqa 32(%r12),%xmm2 + movdqa 48(%r12),%xmm3 + pand 0(%rdx),%xmm0 + pand 16(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 48(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r12),%xmm0 + movdqa 80(%r12),%xmm1 + movdqa 96(%r12),%xmm2 + movdqa 112(%r12),%xmm3 + pand 64(%rdx),%xmm0 + pand 80(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 112(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + pshufd $78,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + xorq %r15,%r15 - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 movq (%rsp),%r10 movq %r8,%rbp @@ -485,18 +758,9 @@ bn_mul4x_mont_gather5: movq (%rcx),%rax adcq $0,%rdx - movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq %r10,%rbp movq %rdx,%r11 - por %xmm2,%xmm0 - leaq 256(%r12),%r12 - por %xmm3,%xmm0 - mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax @@ -628,7 +892,6 @@ bn_mul4x_mont_gather5: movq %r13,-24(%rsp,%r15,8) movq %rdx,%r13 -.byte 102,72,15,126,195 movq %rdi,-16(%rsp,%r15,8) xorq %rdi,%rdi @@ -712,6 +975,7 @@ bn_mul4x_mont_gather5: movdqu %xmm2,16(%rdi,%r14,1) movq 8(%rsp,%r9,8),%rsi movq $1,%rax + movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 @@ -744,42 +1008,167 @@ bn_scatter5: .type bn_gather5,@function .align 16 bn_gather5: - movq %rcx,%r11 - shrq $3,%rcx - andq $7,%r11 - notq %rcx - leaq .Lmagic_masks(%rip),%rax - andq $3,%rcx - leaq 96(%rdx,%r11,8),%rdx - movq 0(%rax,%rcx,8),%xmm4 - movq 8(%rax,%rcx,8),%xmm5 - movq 16(%rax,%rcx,8),%xmm6 - movq 24(%rax,%rcx,8),%xmm7 +.LSEH_begin_bn_gather5: + +.byte 0x4c,0x8d,0x14,0x24 +.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 + leaq .Linc(%rip),%rax + andq $-16,%rsp + + movd %ecx,%xmm5 + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 128(%rdx),%r11 + leaq 128(%rsp),%rax + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,-128(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,-112(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,-96(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,-80(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,-64(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,-48(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,-32(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,-16(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,0(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,16(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,32(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,48(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,64(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,80(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,96(%rax) + movdqa %xmm4,%xmm2 + movdqa %xmm3,112(%rax) jmp .Lgather -.align 16 -.Lgather: - movq -96(%rdx),%xmm0 - movq -32(%rdx),%xmm1 - pand %xmm4,%xmm0 - movq 32(%rdx),%xmm2 - pand %xmm5,%xmm1 - movq 96(%rdx),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - por %xmm2,%xmm0 - leaq 256(%rdx),%rdx - por %xmm3,%xmm0 +.align 32 +.Lgather: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r11),%xmm0 + movdqa -112(%r11),%xmm1 + movdqa -96(%r11),%xmm2 + pand -128(%rax),%xmm0 + movdqa -80(%r11),%xmm3 + pand -112(%rax),%xmm1 + por %xmm0,%xmm4 + pand -96(%rax),%xmm2 + por %xmm1,%xmm5 + pand -80(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r11),%xmm0 + movdqa -48(%r11),%xmm1 + movdqa -32(%r11),%xmm2 + pand -64(%rax),%xmm0 + movdqa -16(%r11),%xmm3 + pand -48(%rax),%xmm1 + por %xmm0,%xmm4 + pand -32(%rax),%xmm2 + por %xmm1,%xmm5 + pand -16(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + pand 0(%rax),%xmm0 + movdqa 48(%r11),%xmm3 + pand 16(%rax),%xmm1 + por %xmm0,%xmm4 + pand 32(%rax),%xmm2 + por %xmm1,%xmm5 + pand 48(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r11),%xmm0 + movdqa 80(%r11),%xmm1 + movdqa 96(%r11),%xmm2 + pand 64(%rax),%xmm0 + movdqa 112(%r11),%xmm3 + pand 80(%rax),%xmm1 + por %xmm0,%xmm4 + pand 96(%rax),%xmm2 + por %xmm1,%xmm5 + pand 112(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + leaq 256(%r11),%r11 + pshufd $78,%xmm4,%xmm0 + por %xmm4,%xmm0 movq %xmm0,(%rdi) leaq 8(%rdi),%rdi subq $1,%rsi jnz .Lgather + + leaq (%r10),%rsp .byte 0xf3,0xc3 .LSEH_end_bn_gather5: .size bn_gather5,.-bn_gather5 .align 64 -.Lmagic_masks: -.long 0,0, 0,0, 0,0, -1,-1 -.long 0,0, 0,0, 0,0, 0,0 +.Linc: +.long 0,0, 1,1 +.long 2,2, 2,2 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh index eb1b8c575..c3cdf52a0 100644 --- a/sys/conf/newvers.sh +++ b/sys/conf/newvers.sh @@ -32,7 +32,7 @@ TYPE="FreeBSD" REVISION="10.2" -BRANCH="RELEASE-p12" +BRANCH="RELEASE-p13" if [ "X${BRANCH_OVERRIDE}" != "X" ]; then BRANCH=${BRANCH_OVERRIDE} fi -- 2.42.0