From 72d32bf80dfdcfe0e69da200b66f195e919653f7 Mon Sep 17 00:00:00 2001 From: jkim Date: Tue, 1 Mar 2016 17:57:01 +0000 Subject: Import OpenSSL 1.0.2g. --- CHANGES | 134 +++- Configure | 8 +- Makefile | 6 +- Makefile.shared | 6 +- NEWS | 13 + README | 2 +- apps/apps.c | 8 +- apps/apps.h | 2 +- apps/pkeyutl.c | 90 ++- apps/req.c | 4 +- apps/rsautl.c | 6 +- apps/s_client.c | 2 - apps/s_server.c | 49 +- config | 3 +- crypto/asn1/tasn_dec.c | 14 +- crypto/bio/b_print.c | 187 +++-- crypto/bio/bio.h | 4 +- crypto/bio/bss_mem.c | 6 +- crypto/bn/Makefile | 4 +- crypto/bn/asm/rsaz-avx2.pl | 219 +++--- crypto/bn/asm/rsaz-x86_64.pl | 375 +++++++--- crypto/bn/asm/x86_64-mont.pl | 227 +++--- crypto/bn/asm/x86_64-mont5.pl | 1276 +++++++++++++++++++--------------- crypto/bn/bn.h | 14 +- crypto/bn/bn_exp.c | 103 ++- crypto/bn/bn_print.c | 17 +- crypto/bn/bn_recp.c | 1 + crypto/cmac/cmac.c | 8 + crypto/cryptlib.c | 6 +- crypto/crypto.h | 2 +- crypto/dh/dh.h | 2 +- crypto/dh/dh_check.c | 7 +- crypto/dsa/dsa_ameth.c | 22 +- crypto/dso/dso_lib.c | 1 + crypto/ec/asm/ecp_nistz256-x86_64.pl | 11 +- crypto/ec/ecp_nistp224.c | 4 +- crypto/ec/ecp_nistp256.c | 4 +- crypto/ec/ecp_nistp521.c | 4 +- crypto/ec/ectest.c | 9 + crypto/engine/eng_dyn.c | 4 +- crypto/evp/e_des.c | 11 +- crypto/evp/e_des3.c | 13 +- crypto/modes/asm/aesni-gcm-x86_64.pl | 4 +- crypto/modes/asm/ghash-x86_64.pl | 2 +- crypto/modes/ctr128.c | 41 +- crypto/opensslconf.h | 12 + crypto/opensslv.h | 6 +- crypto/perlasm/x86_64-xlate.pl | 7 +- crypto/pkcs7/pk7_smime.c | 17 + crypto/rsa/rsa_sign.c | 4 +- crypto/srp/srp.h | 10 + crypto/srp/srp_vfy.c | 57 +- crypto/stack/stack.c | 2 +- crypto/x509/x509_vfy.c | 70 +- doc/apps/ciphers.pod | 59 +- doc/apps/pkeyutl.pod | 13 + doc/apps/req.pod | 9 +- doc/apps/s_client.pod | 12 +- doc/apps/s_server.pod | 8 +- doc/crypto/BIO_s_mem.pod | 4 +- doc/ssl/SSL_CONF_cmd.pod | 33 +- doc/ssl/SSL_CTX_new.pod | 168 +++-- doc/ssl/SSL_CTX_set_options.pod | 10 + doc/ssl/ssl.pod | 77 +- engines/e_capi.c | 32 + ssl/Makefile | 69 +- ssl/s2_lib.c | 6 + ssl/s3_lib.c | 69 +- ssl/ssl.h | 1 - ssl/ssl_conf.c | 10 +- ssl/ssl_err.c | 1 - ssl/ssl_lib.c | 14 +- ssl/sslv2conftest.c | 231 ++++++ util/libeay.num | 2 + util/mk1mf.pl | 4 +- util/pl/BC-32.pl | 4 +- util/pl/Mingw32.pl | 2 +- util/pl/OS2-EMX.pl | 4 +- util/pl/VC-32.pl | 10 +- util/pl/linux.pl | 2 +- util/pl/netware.pl | 8 +- util/pl/ultrix.pl | 2 +- util/pl/unix.pl | 2 +- 83 files changed, 2668 insertions(+), 1318 deletions(-) create mode 100644 ssl/sslv2conftest.c diff --git a/CHANGES b/CHANGES index 18693f7..7578f7e 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,138 @@ OpenSSL CHANGES _______________ + Changes between 1.0.2f and 1.0.2g [1 Mar 2016] + + * Disable weak ciphers in SSLv3 and up in default builds of OpenSSL. + Builds that are not configured with "enable-weak-ssl-ciphers" will not + provide any "EXPORT" or "LOW" strength ciphers. + [Viktor Dukhovni] + + * Disable SSLv2 default build, default negotiation and weak ciphers. SSLv2 + is by default disabled at build-time. Builds that are not configured with + "enable-ssl2" will not support SSLv2. Even if "enable-ssl2" is used, + users who want to negotiate SSLv2 via the version-flexible SSLv23_method() + will need to explicitly call either of: + + SSL_CTX_clear_options(ctx, SSL_OP_NO_SSLv2); + or + SSL_clear_options(ssl, SSL_OP_NO_SSLv2); + + as appropriate. Even if either of those is used, or the application + explicitly uses the version-specific SSLv2_method() or its client and + server variants, SSLv2 ciphers vulnerable to exhaustive search key + recovery have been removed. Specifically, the SSLv2 40-bit EXPORT + ciphers, and SSLv2 56-bit DES are no longer available. + (CVE-2016-0800) + [Viktor Dukhovni] + + *) Fix a double-free in DSA code + + A double free bug was discovered when OpenSSL parses malformed DSA private + keys and could lead to a DoS attack or memory corruption for applications + that receive DSA private keys from untrusted sources. This scenario is + considered rare. + + This issue was reported to OpenSSL by Adam Langley(Google/BoringSSL) using + libFuzzer. + (CVE-2016-0705) + [Stephen Henson] + + *) Disable SRP fake user seed to address a server memory leak. + + Add a new method SRP_VBASE_get1_by_user that handles the seed properly. + + SRP_VBASE_get_by_user had inconsistent memory management behaviour. + In order to fix an unavoidable memory leak, SRP_VBASE_get_by_user + was changed to ignore the "fake user" SRP seed, even if the seed + is configured. + + Users should use SRP_VBASE_get1_by_user instead. Note that in + SRP_VBASE_get1_by_user, caller must free the returned value. Note + also that even though configuring the SRP seed attempts to hide + invalid usernames by continuing the handshake with fake + credentials, this behaviour is not constant time and no strong + guarantees are made that the handshake is indistinguishable from + that of a valid user. + (CVE-2016-0798) + [Emilia Käsper] + + *) Fix BN_hex2bn/BN_dec2bn NULL pointer deref/heap corruption + + In the BN_hex2bn function the number of hex digits is calculated using an + int value |i|. Later |bn_expand| is called with a value of |i * 4|. For + large values of |i| this can result in |bn_expand| not allocating any + memory because |i * 4| is negative. This can leave the internal BIGNUM data + field as NULL leading to a subsequent NULL ptr deref. For very large values + of |i|, the calculation |i * 4| could be a positive value smaller than |i|. + In this case memory is allocated to the internal BIGNUM data field, but it + is insufficiently sized leading to heap corruption. A similar issue exists + in BN_dec2bn. This could have security consequences if BN_hex2bn/BN_dec2bn + is ever called by user applications with very large untrusted hex/dec data. + This is anticipated to be a rare occurrence. + + All OpenSSL internal usage of these functions use data that is not expected + to be untrusted, e.g. config file data or application command line + arguments. If user developed applications generate config file data based + on untrusted data then it is possible that this could also lead to security + consequences. This is also anticipated to be rare. + + This issue was reported to OpenSSL by Guido Vranken. + (CVE-2016-0797) + [Matt Caswell] + + *) Fix memory issues in BIO_*printf functions + + The internal |fmtstr| function used in processing a "%s" format string in + the BIO_*printf functions could overflow while calculating the length of a + string and cause an OOB read when printing very long strings. + + Additionally the internal |doapr_outch| function can attempt to write to an + OOB memory location (at an offset from the NULL pointer) in the event of a + memory allocation failure. In 1.0.2 and below this could be caused where + the size of a buffer to be allocated is greater than INT_MAX. E.g. this + could be in processing a very long "%s" format string. Memory leaks can + also occur. + + The first issue may mask the second issue dependent on compiler behaviour. + These problems could enable attacks where large amounts of untrusted data + is passed to the BIO_*printf functions. If applications use these functions + in this way then they could be vulnerable. OpenSSL itself uses these + functions when printing out human-readable dumps of ASN.1 data. Therefore + applications that print this data could be vulnerable if the data is from + untrusted sources. OpenSSL command line applications could also be + vulnerable where they print out ASN.1 data, or if untrusted data is passed + as command line arguments. + + Libssl is not considered directly vulnerable. Additionally certificates etc + received via remote connections via libssl are also unlikely to be able to + trigger these issues because of message size limits enforced within libssl. + + This issue was reported to OpenSSL Guido Vranken. + (CVE-2016-0799) + [Matt Caswell] + + *) Side channel attack on modular exponentiation + + A side-channel attack was found which makes use of cache-bank conflicts on + the Intel Sandy-Bridge microarchitecture which could lead to the recovery + of RSA keys. The ability to exploit this issue is limited as it relies on + an attacker who has control of code in a thread running on the same + hyper-threaded core as the victim thread which is performing decryptions. + + This issue was reported to OpenSSL by Yuval Yarom, The University of + Adelaide and NICTA, Daniel Genkin, Technion and Tel Aviv University, and + Nadia Heninger, University of Pennsylvania with more information at + http://cachebleed.info. + (CVE-2016-0702) + [Andy Polyakov] + + *) Change the req app to generate a 2048-bit RSA/DSA key by default, + if no keysize is specified with default_bits. This fixes an + omission in an earlier change that changed all RSA/DSA key generation + apps to use 2048 bits by default. + [Emilia Käsper] + Changes between 1.0.2e and 1.0.2f [28 Jan 2016] *) DH small subgroups @@ -105,7 +237,7 @@ [Emilia Käsper] *) In DSA_generate_parameters_ex, if the provided seed is too short, - return an error + use a random seed, as already documented. [Rich Salz and Ismo Puustinen ] Changes between 1.0.2c and 1.0.2d [9 Jul 2015] diff --git a/Configure b/Configure index 4a715dc..c98107a 100755 --- a/Configure +++ b/Configure @@ -58,6 +58,10 @@ my $usage="Usage: Configure [no- ...] [enable- ...] [experimenta # library and will be loaded in run-time by the OpenSSL library. # sctp include SCTP support # 386 generate 80386 code +# enable-weak-ssl-ciphers +# Enable EXPORT and LOW SSLv3 ciphers that are disabled by +# default. Note, weak SSLv2 ciphers are unconditionally +# disabled. # no-sse2 disables IA-32 SSE2 code, above option implies no-sse2 # no- build without specified algorithm (rsa, idea, rc5, ...) # - + compiler options are passed through @@ -781,11 +785,13 @@ my %disabled = ( # "what" => "comment" [or special keyword "experimental "md2" => "default", "rc5" => "default", "rfc3779" => "default", - "sctp" => "default", + "sctp" => "default", "shared" => "default", "ssl-trace" => "default", + "ssl2" => "default", "store" => "experimental", "unit-test" => "default", + "weak-ssl-ciphers" => "default", "zlib" => "default", "zlib-dynamic" => "default" ); diff --git a/Makefile b/Makefile index ee04c02..190d064 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ ## Makefile for OpenSSL ## -VERSION=1.0.2f +VERSION=1.0.2g MAJOR=1 MINOR=0.2 SHLIB_VERSION_NUMBER=1.0.0 @@ -13,7 +13,7 @@ SHLIB_MAJOR=1 SHLIB_MINOR=0.0 SHLIB_EXT= PLATFORM=dist -OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-libunbound no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-ssl-trace no-store no-unit-test no-zlib no-zlib-dynamic static-engine +OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-libunbound no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-ssl-trace no-ssl2 no-store no-unit-test no-weak-ssl-ciphers no-zlib no-zlib-dynamic static-engine CONFIGURE_ARGS=dist SHLIB_TARGET= @@ -61,7 +61,7 @@ OPENSSLDIR=/usr/local/ssl CC= cc CFLAG= -O -DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST +DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_SSL2 -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST -DOPENSSL_NO_WEAK_SSL_CIPHERS PEX_LIBS= EX_LIBS= EXE_EXT= diff --git a/Makefile.shared b/Makefile.shared index e753f44..a2aa980 100644 --- a/Makefile.shared +++ b/Makefile.shared @@ -272,7 +272,7 @@ link_o.cygwin: SHLIB_SOVER=${LIBVERSION:+"-$(LIBVERSION)"}; \ ALLSYMSFLAGS='-Wl,--whole-archive'; \ NOALLSYMSFLAGS='-Wl,--no-whole-archive'; \ - SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared $$base $$deffile -Wl,-s,-Bsymbolic"; \ + SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared $$base $$deffile -Wl,-Bsymbolic"; \ $(LINK_SO_O) #for mingw target if def-file is in use dll-name should match library-name link_a.cygwin: @@ -289,7 +289,7 @@ link_a.cygwin: SHLIB_SOVER=32; \ extras="$(LIBNAME).def"; \ $(PERL) util/mkdef.pl 32 $$SHLIB > $$extras; \ - base=; [ $(LIBNAME) = "crypto" ] && base=-Wl,--image-base,0x63000000; \ + base=; [ $(LIBNAME) = "crypto" -a -n "$(FIPSCANLIB)" ] && base=-Wl,--image-base,0x63000000; \ fi; \ dll_name=$$SHLIB$$SHLIB_SOVER$$SHLIB_SUFFIX; \ $(PERL) util/mkrc.pl $$dll_name | \ @@ -297,7 +297,7 @@ link_a.cygwin: extras="$$extras rc.o"; \ ALLSYMSFLAGS='-Wl,--whole-archive'; \ NOALLSYMSFLAGS='-Wl,--no-whole-archive'; \ - SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared $$base -Wl,-s,-Bsymbolic -Wl,--out-implib,lib$(LIBNAME).dll.a $$extras"; \ + SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared $$base -Wl,-Bsymbolic -Wl,--out-implib,lib$(LIBNAME).dll.a $$extras"; \ [ -f apps/$$dll_name ] && rm apps/$$dll_name; \ [ -f test/$$dll_name ] && rm test/$$dll_name; \ $(LINK_SO_A) || exit 1; \ diff --git a/NEWS b/NEWS index 06c7702..33242c8 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,19 @@ This file gives a brief overview of the major changes between each OpenSSL release. For more details please read the CHANGES file. + Major changes between OpenSSL 1.0.2f and OpenSSL 1.0.2g [1 Mar 2016] + + o Disable weak ciphers in SSLv3 and up in default builds of OpenSSL. + o Disable SSLv2 default build, default negotiation and weak ciphers + (CVE-2016-0800) + o Fix a double-free in DSA code (CVE-2016-0705) + o Disable SRP fake user seed to address a server memory leak + (CVE-2016-0798) + o Fix BN_hex2bn/BN_dec2bn NULL pointer deref/heap corruption + (CVE-2016-0797) + o Fix memory issues in BIO_*printf functions (CVE-2016-0799) + o Fix side channel attack on modular exponentiation (CVE-2016-0702) + Major changes between OpenSSL 1.0.2e and OpenSSL 1.0.2f [28 Jan 2016] o DH small subgroups (CVE-2016-0701) diff --git a/README b/README index 1e9869d..2077b04 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ - OpenSSL 1.0.2f 28 Jan 2016 + OpenSSL 1.0.2g 1 Mar 2016 Copyright (c) 1998-2015 The OpenSSL Project Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson diff --git a/apps/apps.c b/apps/apps.c index 2e77805..b1dd970 100644 --- a/apps/apps.c +++ b/apps/apps.c @@ -2442,7 +2442,11 @@ int bio_to_mem(unsigned char **out, int maxlen, BIO *in) else len = 1024; len = BIO_read(in, tbuf, len); - if (len <= 0) + if (len < 0) { + BIO_free(mem); + return -1; + } + if (len == 0) break; if (BIO_write(mem, tbuf, len) != len) { BIO_free(mem); @@ -2459,7 +2463,7 @@ int bio_to_mem(unsigned char **out, int maxlen, BIO *in) return ret; } -int pkey_ctrl_string(EVP_PKEY_CTX *ctx, char *value) +int pkey_ctrl_string(EVP_PKEY_CTX *ctx, const char *value) { int rv; char *stmp, *vtmp = NULL; diff --git a/apps/apps.h b/apps/apps.h index 8276e70..19bf5cc 100644 --- a/apps/apps.h +++ b/apps/apps.h @@ -321,7 +321,7 @@ int args_verify(char ***pargs, int *pargc, int *badarg, BIO *err, X509_VERIFY_PARAM **pm); void policies_print(BIO *out, X509_STORE_CTX *ctx); int bio_to_mem(unsigned char **out, int maxlen, BIO *in); -int pkey_ctrl_string(EVP_PKEY_CTX *ctx, char *value); +int pkey_ctrl_string(EVP_PKEY_CTX *ctx, const char *value); int init_gen_str(BIO *err, EVP_PKEY_CTX **pctx, const char *algname, ENGINE *e, int do_param); int do_X509_sign(BIO *err, X509 *x, EVP_PKEY *pkey, const EVP_MD *md, diff --git a/apps/pkeyutl.c b/apps/pkeyutl.c index c8d513b..e47206c 100644 --- a/apps/pkeyutl.c +++ b/apps/pkeyutl.c @@ -73,7 +73,7 @@ static void usage(void); #define PROG pkeyutl_main static EVP_PKEY_CTX *init_ctx(int *pkeysize, - char *keyfile, int keyform, int key_type, + const char *keyfile, int keyform, int key_type, char *passargin, int pkey_op, ENGINE *e, int impl); @@ -99,10 +99,12 @@ int MAIN(int argc, char **argv) char *passargin = NULL; int keysize = -1; int engine_impl = 0; - unsigned char *buf_in = NULL, *buf_out = NULL, *sig = NULL; - size_t buf_outlen; + size_t buf_outlen = 0; int buf_inlen = 0, siglen = -1; + const char *inkey = NULL; + const char *peerkey = NULL; + STACK_OF(OPENSSL_STRING) *pkeyopts = NULL; int ret = 1, rv = -1; @@ -136,21 +138,13 @@ int MAIN(int argc, char **argv) } else if (!strcmp(*argv, "-inkey")) { if (--argc < 1) badarg = 1; - else { - ctx = init_ctx(&keysize, - *(++argv), keyform, key_type, - passargin, pkey_op, e, engine_impl); - if (!ctx) { - BIO_puts(bio_err, "Error initializing context\n"); - ERR_print_errors(bio_err); - badarg = 1; - } - } + else + inkey = *++argv; } else if (!strcmp(*argv, "-peerkey")) { if (--argc < 1) badarg = 1; - else if (!setup_peer(bio_err, ctx, peerform, *(++argv), e)) - badarg = 1; + else + peerkey = *++argv; } else if (!strcmp(*argv, "-passin")) { if (--argc < 1) badarg = 1; @@ -191,23 +185,21 @@ int MAIN(int argc, char **argv) pkey_op = EVP_PKEY_OP_VERIFY; else if (!strcmp(*argv, "-verifyrecover")) pkey_op = EVP_PKEY_OP_VERIFYRECOVER; - else if (!strcmp(*argv, "-rev")) - rev = 1; else if (!strcmp(*argv, "-encrypt")) pkey_op = EVP_PKEY_OP_ENCRYPT; else if (!strcmp(*argv, "-decrypt")) pkey_op = EVP_PKEY_OP_DECRYPT; else if (!strcmp(*argv, "-derive")) pkey_op = EVP_PKEY_OP_DERIVE; + else if (!strcmp(*argv, "-rev")) + rev = 1; else if (strcmp(*argv, "-pkeyopt") == 0) { if (--argc < 1) badarg = 1; - else if (!ctx) { - BIO_puts(bio_err, "-pkeyopt command before -inkey\n"); - badarg = 1; - } else if (pkey_ctrl_string(ctx, *(++argv)) <= 0) { - BIO_puts(bio_err, "parameter setting error\n"); - ERR_print_errors(bio_err); + else if ((pkeyopts == NULL && + (pkeyopts = sk_OPENSSL_STRING_new_null()) == NULL) || + sk_OPENSSL_STRING_push(pkeyopts, *++argv) == 0) { + BIO_puts(bio_err, "out of memory\n"); goto end; } } else @@ -220,10 +212,37 @@ int MAIN(int argc, char **argv) argv++; } - if (!ctx) { + if (inkey == NULL || + (peerkey != NULL && pkey_op != EVP_PKEY_OP_DERIVE)) { usage(); goto end; } + ctx = init_ctx(&keysize, inkey, keyform, key_type, + passargin, pkey_op, e, engine_impl); + if (!ctx) { + BIO_puts(bio_err, "Error initializing context\n"); + ERR_print_errors(bio_err); + goto end; + } + if (peerkey != NULL && !setup_peer(bio_err, ctx, peerform, peerkey, e)) { + BIO_puts(bio_err, "Error setting up peer key\n"); + ERR_print_errors(bio_err); + goto end; + } + if (pkeyopts != NULL) { + int num = sk_OPENSSL_STRING_num(pkeyopts); + int i; + + for (i = 0; i < num; ++i) { + const char *opt = sk_OPENSSL_STRING_value(pkeyopts, i); + + if (pkey_ctrl_string(ctx, opt) <= 0) { + BIO_puts(bio_err, "parameter setting error\n"); + ERR_print_errors(bio_err); + goto end; + } + } + } if (sigfile && (pkey_op != EVP_PKEY_OP_VERIFY)) { BIO_puts(bio_err, "Signature file specified for non verify\n"); @@ -273,7 +292,7 @@ int MAIN(int argc, char **argv) } siglen = bio_to_mem(&sig, keysize * 10, sigbio); BIO_free(sigbio); - if (siglen <= 0) { + if (siglen < 0) { BIO_printf(bio_err, "Error reading signature data\n"); goto end; } @@ -282,7 +301,7 @@ int MAIN(int argc, char **argv) if (in) { /* Read the input data */ buf_inlen = bio_to_mem(&buf_in, keysize * 10, in); - if (buf_inlen <= 0) { + if (buf_inlen < 0) { BIO_printf(bio_err, "Error reading input Data\n"); exit(1); } @@ -310,7 +329,7 @@ int MAIN(int argc, char **argv) } else { rv = do_keyop(ctx, pkey_op, NULL, (size_t *)&buf_outlen, buf_in, (size_t)buf_inlen); - if (rv > 0) { + if (rv > 0 && buf_outlen != 0) { buf_out = OPENSSL_malloc(buf_outlen); if (!buf_out) rv = -1; @@ -340,12 +359,14 @@ int MAIN(int argc, char **argv) EVP_PKEY_CTX_free(ctx); BIO_free(in); BIO_free_all(out); - if (buf_in) + if (buf_in != NULL) OPENSSL_free(buf_in); - if (buf_out) + if (buf_out != NULL) OPENSSL_free(buf_out); - if (sig) + if (sig != NULL) OPENSSL_free(sig); + if (pkeyopts != NULL) + sk_OPENSSL_STRING_free(pkeyopts); return ret; } @@ -380,7 +401,7 @@ static void usage() } static EVP_PKEY_CTX *init_ctx(int *pkeysize, - char *keyfile, int keyform, int key_type, + const char *keyfile, int keyform, int key_type, char *passargin, int pkey_op, ENGINE *e, int engine_impl) { @@ -484,14 +505,9 @@ static int setup_peer(BIO *err, EVP_PKEY_CTX *ctx, int peerform, EVP_PKEY *peer = NULL; ENGINE* engine = NULL; int ret; - if (!ctx) { - BIO_puts(err, "-peerkey command before -inkey\n"); - return 0; - } if (peerform == FORMAT_ENGINE) - engine = e; - + engine = e; peer = load_pubkey(bio_err, file, peerform, 0, NULL, engine, "Peer Key"); if (!peer) { diff --git a/apps/req.c b/apps/req.c index 57781c9..e818bd2 100644 --- a/apps/req.c +++ b/apps/req.c @@ -101,8 +101,8 @@ #define STRING_MASK "string_mask" #define UTF8_IN "utf8" -#define DEFAULT_KEY_LENGTH 512 -#define MIN_KEY_LENGTH 384 +#define DEFAULT_KEY_LENGTH 2048 +#define MIN_KEY_LENGTH 512 #undef PROG #define PROG req_main diff --git a/apps/rsautl.c b/apps/rsautl.c index d642f9a..5b6f849 100644 --- a/apps/rsautl.c +++ b/apps/rsautl.c @@ -250,7 +250,7 @@ int MAIN(int argc, char **argv) if (outfile) { if (!(out = BIO_new_file(outfile, "wb"))) { - BIO_printf(bio_err, "Error Reading Output File\n"); + BIO_printf(bio_err, "Error Writing Output File\n"); ERR_print_errors(bio_err); goto end; } @@ -276,7 +276,7 @@ int MAIN(int argc, char **argv) /* Read the input data */ rsa_inlen = BIO_read(in, rsa_in, keysize * 2); - if (rsa_inlen <= 0) { + if (rsa_inlen < 0) { BIO_printf(bio_err, "Error reading input Data\n"); exit(1); } @@ -311,7 +311,7 @@ int MAIN(int argc, char **argv) } - if (rsa_outlen <= 0) { + if (rsa_outlen < 0) { BIO_printf(bio_err, "RSA operation error\n"); ERR_print_errors(bio_err); goto end; diff --git a/apps/s_client.c b/apps/s_client.c index caf76d3..0c1102b 100644 --- a/apps/s_client.c +++ b/apps/s_client.c @@ -391,8 +391,6 @@ static void sc_usage(void) BIO_printf(bio_err, " -bugs - Switch on all SSL implementation bug workarounds\n"); BIO_printf(bio_err, - " -serverpref - Use server's cipher preferences (only SSLv2)\n"); - BIO_printf(bio_err, " -cipher - preferred cipher to use, use the 'openssl ciphers'\n"); BIO_printf(bio_err, " command to see what is available\n"); diff --git a/apps/s_server.c b/apps/s_server.c index 65cbaaf..09c755b 100644 --- a/apps/s_server.c +++ b/apps/s_server.c @@ -429,6 +429,8 @@ typedef struct srpsrvparm_st { static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg) { srpsrvparm *p = (srpsrvparm *) arg; + int ret = SSL3_AL_FATAL; + if (p->login == NULL && p->user == NULL) { p->login = SSL_get_srp_username(s); BIO_printf(bio_err, "SRP username = \"%s\"\n", p->login); @@ -437,21 +439,25 @@ static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg) if (p->user == NULL) { BIO_printf(bio_err, "User %s doesn't exist\n", p->login); - return SSL3_AL_FATAL; + goto err; } + if (SSL_set_srp_server_param (s, p->user->N, p->user->g, p->user->s, p->user->v, p->user->info) < 0) { *ad = SSL_AD_INTERNAL_ERROR; - return SSL3_AL_FATAL; + goto err; } BIO_printf(bio_err, "SRP parameters set: username = \"%s\" info=\"%s\" \n", p->login, p->user->info); - /* need to check whether there are memory leaks */ + ret = SSL_ERROR_NONE; + +err: + SRP_user_pwd_free(p->user); p->user = NULL; p->login = NULL; - return SSL_ERROR_NONE; + return ret; } #endif @@ -2452,9 +2458,10 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context) #ifndef OPENSSL_NO_SRP while (SSL_get_error(con, k) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP renego during write\n"); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -2508,9 +2515,10 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context) #ifndef OPENSSL_NO_SRP while (SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP renego during read\n"); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -2605,9 +2613,10 @@ static int init_ssl_connection(SSL *con) while (i <= 0 && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP during accept %s\n", srp_callback_parm.login); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -2849,9 +2858,10 @@ static int www_body(char *hostname, int s, int stype, unsigned char *context) && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP during accept %s\n", srp_callback_parm.login); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -2891,9 +2901,10 @@ static int www_body(char *hostname, int s, int stype, unsigned char *context) if (BIO_should_io_special(io) && BIO_get_retry_reason(io) == BIO_RR_SSL_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP renego during read\n"); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -3236,9 +3247,10 @@ static int rev_body(char *hostname, int s, int stype, unsigned char *context) if (BIO_should_io_special(io) && BIO_get_retry_reason(io) == BIO_RR_SSL_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP renego during accept\n"); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); @@ -3264,9 +3276,10 @@ static int rev_body(char *hostname, int s, int stype, unsigned char *context) if (BIO_should_io_special(io) && BIO_get_retry_reason(io) == BIO_RR_SSL_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP renego during read\n"); + SRP_user_pwd_free(srp_callback_parm.user); srp_callback_parm.user = - SRP_VBASE_get_by_user(srp_callback_parm.vb, - srp_callback_parm.login); + SRP_VBASE_get1_by_user(srp_callback_parm.vb, + srp_callback_parm.login); if (srp_callback_parm.user) BIO_printf(bio_s_out, "LOOKUP done %s\n", srp_callback_parm.user->info); diff --git a/config b/config index 77f730f..bba370c 100755 --- a/config +++ b/config @@ -852,7 +852,8 @@ case "$GUESSOS" in # *-dgux) OUT="dgux" ;; mips-sony-newsos4) OUT="newsos4-gcc" ;; *-*-cygwin_pre1.3) OUT="Cygwin-pre1.3" ;; - *-*-cygwin) OUT="Cygwin" ;; + i[3456]86-*-cygwin) OUT="Cygwin" ;; + *-*-cygwin) OUT="Cygwin-${MACHINE}" ;; t3e-cray-unicosmk) OUT="cray-t3e" ;; j90-cray-unicos) OUT="cray-j90" ;; nsr-tandem-nsk) OUT="tandem-c89" ;; diff --git a/crypto/asn1/tasn_dec.c b/crypto/asn1/tasn_dec.c index 9256049..5a50796 100644 --- a/crypto/asn1/tasn_dec.c +++ b/crypto/asn1/tasn_dec.c @@ -717,7 +717,7 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, long plen; char cst, inf, free_cont = 0; const unsigned char *p; - BUF_MEM buf; + BUF_MEM buf = { 0, NULL, 0 }; const unsigned char *cont = NULL; long len; if (!pval) { @@ -793,7 +793,6 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, } else { len = p - cont + plen; p += plen; - buf.data = NULL; } } else if (cst) { if (utype == V_ASN1_NULL || utype == V_ASN1_BOOLEAN @@ -802,9 +801,9 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, ASN1err(ASN1_F_ASN1_D2I_EX_PRIMITIVE, ASN1_R_TYPE_NOT_PRIMITIVE); return 0; } - buf.length = 0; - buf.max = 0; - buf.data = NULL; + + /* Free any returned 'buf' content */ + free_cont = 1; /* * Should really check the internal tags are correct but some things * may get this wrong. The relevant specs say that constructed string @@ -812,18 +811,16 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, * So instead just check for UNIVERSAL class and ignore the tag. */ if (!asn1_collect(&buf, &p, plen, inf, -1, V_ASN1_UNIVERSAL, 0)) { - free_cont = 1; goto err; } len = buf.length; /* Append a final null to string */ if (!BUF_MEM_grow_clean(&buf, len + 1)) { ASN1err(ASN1_F_ASN1_D2I_EX_PRIMITIVE, ERR_R_MALLOC_FAILURE); - return 0; + goto err; } buf.data[len] = 0; cont = (const unsigned char *)buf.data; - free_cont = 1; } else { cont = p; len = plen; @@ -831,6 +828,7 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, } /* We now have content length and type: translate into a structure */ + /* asn1_ex_c2i may reuse allocated buffer, and so sets free_cont to 0 */ if (!asn1_ex_c2i(pval, cont, len, utype, &free_cont, it)) goto err; diff --git a/crypto/bio/b_print.c b/crypto/bio/b_print.c index 7c81e25..90248fa 100644 --- a/crypto/bio/b_print.c +++ b/crypto/bio/b_print.c @@ -125,16 +125,16 @@ # define LLONG long #endif -static void fmtstr(char **, char **, size_t *, size_t *, - const char *, int, int, int); -static void fmtint(char **, char **, size_t *, size_t *, - LLONG, int, int, int, int); -static void fmtfp(char **, char **, size_t *, size_t *, - LDOUBLE, int, int, int); -static void doapr_outch(char **, char **, size_t *, size_t *, int); -static void _dopr(char **sbuffer, char **buffer, - size_t *maxlen, size_t *retlen, int *truncated, - const char *format, va_list args); +static int fmtstr(char **, char **, size_t *, size_t *, + const char *, int, int, int); +static int fmtint(char **, char **, size_t *, size_t *, + LLONG, int, int, int, int); +static int fmtfp(char **, char **, size_t *, size_t *, + LDOUBLE, int, int, int); +static int doapr_outch(char **, char **, size_t *, size_t *, int); +static int _dopr(char **sbuffer, char **buffer, + size_t *maxlen, size_t *retlen, int *truncated, + const char *format, va_list args); /* format read states */ #define DP_S_DEFAULT 0 @@ -165,7 +165,7 @@ static void _dopr(char **sbuffer, char **buffer, #define char_to_int(p) (p - '0') #define OSSL_MAX(p,q) ((p >= q) ? p : q) -static void +static int _dopr(char **sbuffer, char **buffer, size_t *maxlen, @@ -196,7 +196,8 @@ _dopr(char **sbuffer, if (ch == '%') state = DP_S_FLAGS; else - doapr_outch(sbuffer, buffer, &currlen, maxlen, ch); + if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, ch)) + return 0; ch = *format++; break; case DP_S_FLAGS: @@ -302,8 +303,9 @@ _dopr(char **sbuffer, value = va_arg(args, int); break; } - fmtint(sbuffer, buffer, &currlen, maxlen, - value, 10, min, max, flags); + if (!fmtint(sbuffer, buffer, &currlen, maxlen, value, 10, min, + max, flags)) + return 0; break; case 'X': flags |= DP_F_UP; @@ -326,17 +328,19 @@ _dopr(char **sbuffer, value = (LLONG) va_arg(args, unsigned int); break; } - fmtint(sbuffer, buffer, &currlen, maxlen, value, - ch == 'o' ? 8 : (ch == 'u' ? 10 : 16), - min, max, flags); + if (!fmtint(sbuffer, buffer, &currlen, maxlen, value, + ch == 'o' ? 8 : (ch == 'u' ? 10 : 16), + min, max, flags)) + return 0; break; case 'f': if (cflags == DP_C_LDOUBLE) fvalue = va_arg(args, LDOUBLE); else fvalue = va_arg(args, double); - fmtfp(sbuffer, buffer, &currlen, maxlen, - fvalue, min, max, flags); + if (!fmtfp(sbuffer, buffer, &currlen, maxlen, fvalue, min, max, + flags)) + return 0; break; case 'E': flags |= DP_F_UP; @@ -355,8 +359,9 @@ _dopr(char **sbuffer, fvalue = va_arg(args, double); break; case 'c': - doapr_outch(sbuffer, buffer, &currlen, maxlen, - va_arg(args, int)); + if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, + va_arg(args, int))) + return 0; break; case 's': strvalue = va_arg(args, char *); @@ -366,13 +371,15 @@ _dopr(char **sbuffer, else max = *maxlen; } - fmtstr(sbuffer, buffer, &currlen, maxlen, strvalue, - flags, min, max); + if (!fmtstr(sbuffer, buffer, &currlen, maxlen, strvalue, + flags, min, max)) + return 0; break; case 'p': value = (long)va_arg(args, void *); - fmtint(sbuffer, buffer, &currlen, maxlen, - value, 16, min, max, flags | DP_F_NUM); + if (!fmtint(sbuffer, buffer, &currlen, maxlen, + value, 16, min, max, flags | DP_F_NUM)) + return 0; break; case 'n': /* XXX */ if (cflags == DP_C_SHORT) { @@ -394,7 +401,8 @@ _dopr(char **sbuffer, } break; case '%': - doapr_outch(sbuffer, buffer, &currlen, maxlen, ch); + if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, ch)) + return 0; break; case 'w': /* not supported yet, treat as next char */ @@ -418,46 +426,56 @@ _dopr(char **sbuffer, *truncated = (currlen > *maxlen - 1); if (*truncated) currlen = *maxlen - 1; - doapr_outch(sbuffer, buffer, &currlen, maxlen, '\0'); + if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, '\0')) + return 0; *retlen = currlen - 1; - return; + return 1; } -static void +static int fmtstr(char **sbuffer, char **buffer, size_t *currlen, size_t *maxlen, const char *value, int flags, int min, int max) { - int padlen, strln; + int padlen; + size_t strln; int cnt = 0; if (value == 0) value = ""; - for (strln = 0; value[strln]; ++strln) ; + + strln = strlen(value); + if (strln > INT_MAX) + strln = INT_MAX; + padlen = min - strln; - if (padlen < 0) + if (min < 0 || padlen < 0) padlen = 0; if (flags & DP_F_MINUS) padlen = -padlen; while ((padlen > 0) && (cnt < max)) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; --padlen; ++cnt; } while (*value && (cnt < max)) { - doapr_outch(sbuffer, buffer, currlen, maxlen, *value++); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, *value++)) + return 0; ++cnt; } while ((padlen < 0) && (cnt < max)) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; ++padlen; ++cnt; } + return 1; } -static void +static int fmtint(char **sbuffer, char **buffer, size_t *currlen, @@ -517,37 +535,44 @@ fmtint(char **sbuffer, /* spaces */ while (spadlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; --spadlen; } /* sign */ if (signvalue) - doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue)) + return 0; /* prefix */ while (*prefix) { - doapr_outch(sbuffer, buffer, currlen, maxlen, *prefix); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, *prefix)) + return 0; prefix++; } /* zeros */ if (zpadlen > 0) { while (zpadlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, '0'); + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, '0')) + return 0; --zpadlen; } } /* digits */ - while (place > 0) - doapr_outch(sbuffer, buffer, currlen, maxlen, convert[--place]); + while (place > 0) { + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, convert[--place])) + return 0; + } /* left justified spaces */ while (spadlen < 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; ++spadlen; } - return; + return 1; } static LDOUBLE abs_val(LDOUBLE value) @@ -578,7 +603,7 @@ static long roundv(LDOUBLE value) return intpart; } -static void +static int fmtfp(char **sbuffer, char **buffer, size_t *currlen, @@ -657,47 +682,61 @@ fmtfp(char **sbuffer, if ((flags & DP_F_ZERO) && (padlen > 0)) { if (signvalue) { - doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue)) + return 0; --padlen; signvalue = 0; } while (padlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, '0'); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '0')) + return 0; --padlen; } } while (padlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; --padlen; } - if (signvalue) - doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue); + if (signvalue && !doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue)) + return 0; - while (iplace > 0) - doapr_outch(sbuffer, buffer, currlen, maxlen, iconvert[--iplace]); + while (iplace > 0) { + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, iconvert[--iplace])) + return 0; + } /* * Decimal point. This should probably use locale to find the correct * char to print out. */ if (max > 0 || (flags & DP_F_NUM)) { - doapr_outch(sbuffer, buffer, currlen, maxlen, '.'); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '.')) + return 0; - while (fplace > 0) - doapr_outch(sbuffer, buffer, currlen, maxlen, fconvert[--fplace]); + while (fplace > 0) { + if(!doapr_outch(sbuffer, buffer, currlen, maxlen, + fconvert[--fplace])) + return 0; + } } while (zpadlen > 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, '0'); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '0')) + return 0; --zpadlen; } while (padlen < 0) { - doapr_outch(sbuffer, buffer, currlen, maxlen, ' '); + if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' ')) + return 0; ++padlen; } + return 1; } -static void +#define BUFFER_INC 1024 + +static int doapr_outch(char **sbuffer, char **buffer, size_t *currlen, size_t *maxlen, int c) { @@ -708,24 +747,25 @@ doapr_outch(char **sbuffer, assert(*currlen <= *maxlen); if (buffer && *currlen == *maxlen) { - *maxlen += 1024; + if (*maxlen > INT_MAX - BUFFER_INC) + return 0; + + *maxlen += BUFFER_INC; if (*buffer == NULL) { *buffer = OPENSSL_malloc(*maxlen); - if (!*buffer) { - /* Panic! Can't really do anything sensible. Just return */ - return; - } + if (*buffer == NULL) + return 0; if (*currlen > 0) { assert(*sbuffer != NULL); memcpy(*buffer, *sbuffer, *currlen); } *sbuffer = NULL; } else { - *buffer = OPENSSL_realloc(*buffer, *maxlen); - if (!*buffer) { - /* Panic! Can't really do anything sensible. Just return */ - return; - } + char *tmpbuf; + tmpbuf = OPENSSL_realloc(*buffer, *maxlen); + if (tmpbuf == NULL) + return 0; + *buffer = tmpbuf; } } @@ -736,7 +776,7 @@ doapr_outch(char **sbuffer, (*buffer)[(*currlen)++] = (char)c; } - return; + return 1; } /***************************************************************************/ @@ -768,7 +808,11 @@ int BIO_vprintf(BIO *bio, const char *format, va_list args) dynbuf = NULL; CRYPTO_push_info("doapr()"); - _dopr(&hugebufp, &dynbuf, &hugebufsize, &retlen, &ignored, format, args); + if (!_dopr(&hugebufp, &dynbuf, &hugebufsize, &retlen, &ignored, format, + args)) { + OPENSSL_free(dynbuf); + return -1; + } if (dynbuf) { ret = BIO_write(bio, dynbuf, (int)retlen); OPENSSL_free(dynbuf); @@ -803,7 +847,8 @@ int BIO_vsnprintf(char *buf, size_t n, const char *format, va_list args) size_t retlen; int truncated; - _dopr(&buf, NULL, &n, &retlen, &truncated, format, args); + if(!_dopr(&buf, NULL, &n, &retlen, &truncated, format, args)) + return -1; if (truncated) /* diff --git a/crypto/bio/bio.h b/crypto/bio/bio.h index 6e2293b..6790aed 100644 --- a/crypto/bio/bio.h +++ b/crypto/bio/bio.h @@ -479,7 +479,7 @@ struct bio_dgram_sctp_prinfo { # define BIO_get_conn_hostname(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,0) # define BIO_get_conn_port(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,1) # define BIO_get_conn_ip(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,2) -# define BIO_get_conn_int_port(b) BIO_ctrl(b,BIO_C_GET_CONNECT,3,0,NULL) +# define BIO_get_conn_int_port(b) BIO_ctrl(b,BIO_C_GET_CONNECT,3,NULL) # define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) @@ -689,7 +689,7 @@ long BIO_debug_callback(BIO *bio, int cmd, const char *argp, int argi, long argl, long ret); BIO_METHOD *BIO_s_mem(void); -BIO *BIO_new_mem_buf(void *buf, int len); +BIO *BIO_new_mem_buf(const void *buf, int len); BIO_METHOD *BIO_s_socket(void); BIO_METHOD *BIO_s_connect(void); BIO_METHOD *BIO_s_accept(void); diff --git a/crypto/bio/bss_mem.c b/crypto/bio/bss_mem.c index d190765..b0394a9 100644 --- a/crypto/bio/bss_mem.c +++ b/crypto/bio/bss_mem.c @@ -91,7 +91,8 @@ BIO_METHOD *BIO_s_mem(void) return (&mem_method); } -BIO *BIO_new_mem_buf(void *buf, int len) + +BIO *BIO_new_mem_buf(const void *buf, int len) { BIO *ret; BUF_MEM *b; @@ -105,7 +106,8 @@ BIO *BIO_new_mem_buf(void *buf, int len) if (!(ret = BIO_new(BIO_s_mem()))) return NULL; b = (BUF_MEM *)ret->ptr; - b->data = buf; + /* Cast away const and trust in the MEM_RDONLY flag. */ + b->data = (void *)buf; b->length = sz; b->max = sz; ret->flags |= BIO_FLAGS_MEM_RDONLY; diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile index 215855e..c4c6409 100644 --- a/crypto/bn/Makefile +++ b/crypto/bn/Makefile @@ -252,8 +252,8 @@ bn_exp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp.c bn_lcl.h -bn_exp.o: rsaz_exp.h +bn_exp.o: ../../include/openssl/symhacks.h ../constant_time_locl.h +bn_exp.o: ../cryptlib.h bn_exp.c bn_lcl.h rsaz_exp.h bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index 3b6ccf8..712a77f 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -443,7 +443,7 @@ $TEMP2 = $B2; $TEMP3 = $Y1; $TEMP4 = $Y2; $code.=<<___; - #we need to fix indexes 32-39 to avoid overflow + # we need to fix indices 32-39 to avoid overflow vmovdqu 32*8(%rsp), $ACC8 # 32*8-192($tp0), vmovdqu 32*9(%rsp), $ACC1 # 32*9-192($tp0) vmovdqu 32*10(%rsp), $ACC2 # 32*10-192($tp0) @@ -1592,68 +1592,128 @@ rsaz_1024_scatter5_avx2: .type rsaz_1024_gather5_avx2,\@abi-omnipotent .align 32 rsaz_1024_gather5_avx2: + vzeroupper + mov %rsp,%r11 ___ $code.=<<___ if ($win64); lea -0x88(%rsp),%rax - vzeroupper .LSEH_begin_rsaz_1024_gather5: # I can't trust assembler to use specific encoding:-( - .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax),%rsp - .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6,-0x20(%rax) - .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7,-0x10(%rax) - .byte 0xc5,0x78,0x29,0x40,0x00 #vmovaps %xmm8,0(%rax) - .byte 0xc5,0x78,0x29,0x48,0x10 #vmovaps %xmm9,0x10(%rax) - .byte 0xc5,0x78,0x29,0x50,0x20 #vmovaps %xmm10,0x20(%rax) - .byte 0xc5,0x78,0x29,0x58,0x30 #vmovaps %xmm11,0x30(%rax) - .byte 0xc5,0x78,0x29,0x60,0x40 #vmovaps %xmm12,0x40(%rax) - .byte 0xc5,0x78,0x29,0x68,0x50 #vmovaps %xmm13,0x50(%rax) - .byte 0xc5,0x78,0x29,0x70,0x60 #vmovaps %xmm14,0x60(%rax) - .byte 0xc5,0x78,0x29,0x78,0x70 #vmovaps %xmm15,0x70(%rax) + .byte 0x48,0x8d,0x60,0xe0 # lea -0x20(%rax),%rsp + .byte 0xc5,0xf8,0x29,0x70,0xe0 # vmovaps %xmm6,-0x20(%rax) + .byte 0xc5,0xf8,0x29,0x78,0xf0 # vmovaps %xmm7,-0x10(%rax) + .byte 0xc5,0x78,0x29,0x40,0x00 # vmovaps %xmm8,0(%rax) + .byte 0xc5,0x78,0x29,0x48,0x10 # vmovaps %xmm9,0x10(%rax) + .byte 0xc5,0x78,0x29,0x50,0x20 # vmovaps %xmm10,0x20(%rax) + .byte 0xc5,0x78,0x29,0x58,0x30 # vmovaps %xmm11,0x30(%rax) + .byte 0xc5,0x78,0x29,0x60,0x40 # vmovaps %xmm12,0x40(%rax) + .byte 0xc5,0x78,0x29,0x68,0x50 # vmovaps %xmm13,0x50(%rax) + .byte 0xc5,0x78,0x29,0x70,0x60 # vmovaps %xmm14,0x60(%rax) + .byte 0xc5,0x78,0x29,0x78,0x70 # vmovaps %xmm15,0x70(%rax) ___ $code.=<<___; - lea .Lgather_table(%rip),%r11 - mov $power,%eax - and \$3,$power - shr \$2,%eax # cache line number - shl \$4,$power # offset within cache line - - vmovdqu -32(%r11),%ymm7 # .Lgather_permd - vpbroadcastb 8(%r11,%rax), %xmm8 - vpbroadcastb 7(%r11,%rax), %xmm9 - vpbroadcastb 6(%r11,%rax), %xmm10 - vpbroadcastb 5(%r11,%rax), %xmm11 - vpbroadcastb 4(%r11,%rax), %xmm12 - vpbroadcastb 3(%r11,%rax), %xmm13 - vpbroadcastb 2(%r11,%rax), %xmm14 - vpbroadcastb 1(%r11,%rax), %xmm15 - - lea 64($inp,$power),$inp - mov \$64,%r11 # size optimization - mov \$9,%eax - jmp .Loop_gather_1024 + lea -0x100(%rsp),%rsp + and \$-32, %rsp + lea .Linc(%rip), %r10 + lea -128(%rsp),%rax # control u-op density + + vmovd $power, %xmm4 + vmovdqa (%r10),%ymm0 + vmovdqa 32(%r10),%ymm1 + vmovdqa 64(%r10),%ymm5 + vpbroadcastd %xmm4,%ymm4 + + vpaddd %ymm5, %ymm0, %ymm2 + vpcmpeqd %ymm4, %ymm0, %ymm0 + vpaddd %ymm5, %ymm1, %ymm3 + vpcmpeqd %ymm4, %ymm1, %ymm1 + vmovdqa %ymm0, 32*0+128(%rax) + vpaddd %ymm5, %ymm2, %ymm0 + vpcmpeqd %ymm4, %ymm2, %ymm2 + vmovdqa %ymm1, 32*1+128(%rax) + vpaddd %ymm5, %ymm3, %ymm1 + vpcmpeqd %ymm4, %ymm3, %ymm3 + vmovdqa %ymm2, 32*2+128(%rax) + vpaddd %ymm5, %ymm0, %ymm2 + vpcmpeqd %ymm4, %ymm0, %ymm0 + vmovdqa %ymm3, 32*3+128(%rax) + vpaddd %ymm5, %ymm1, %ymm3 + vpcmpeqd %ymm4, %ymm1, %ymm1 + vmovdqa %ymm0, 32*4+128(%rax) + vpaddd %ymm5, %ymm2, %ymm8 + vpcmpeqd %ymm4, %ymm2, %ymm2 + vmovdqa %ymm1, 32*5+128(%rax) + vpaddd %ymm5, %ymm3, %ymm9 + vpcmpeqd %ymm4, %ymm3, %ymm3 + vmovdqa %ymm2, 32*6+128(%rax) + vpaddd %ymm5, %ymm8, %ymm10 + vpcmpeqd %ymm4, %ymm8, %ymm8 + vmovdqa %ymm3, 32*7+128(%rax) + vpaddd %ymm5, %ymm9, %ymm11 + vpcmpeqd %ymm4, %ymm9, %ymm9 + vpaddd %ymm5, %ymm10, %ymm12 + vpcmpeqd %ymm4, %ymm10, %ymm10 + vpaddd %ymm5, %ymm11, %ymm13 + vpcmpeqd %ymm4, %ymm11, %ymm11 + vpaddd %ymm5, %ymm12, %ymm14 + vpcmpeqd %ymm4, %ymm12, %ymm12 + vpaddd %ymm5, %ymm13, %ymm15 + vpcmpeqd %ymm4, %ymm13, %ymm13 + vpcmpeqd %ymm4, %ymm14, %ymm14 + vpcmpeqd %ymm4, %ymm15, %ymm15 + + vmovdqa -32(%r10),%ymm7 # .Lgather_permd + lea 128($inp), $inp + mov \$9,$power -.align 32 .Loop_gather_1024: - vpand -64($inp), %xmm8,%xmm0 - vpand ($inp), %xmm9,%xmm1 - vpand 64($inp), %xmm10,%xmm2 - vpand ($inp,%r11,2), %xmm11,%xmm3 - vpor %xmm0,%xmm1,%xmm1 - vpand 64($inp,%r11,2), %xmm12,%xmm4 - vpor %xmm2,%xmm3,%xmm3 - vpand ($inp,%r11,4), %xmm13,%xmm5 - vpor %xmm1,%xmm3,%xmm3 - vpand 64($inp,%r11,4), %xmm14,%xmm6 - vpor %xmm4,%xmm5,%xmm5 - vpand -128($inp,%r11,8), %xmm15,%xmm2 - lea ($inp,%r11,8),$inp - vpor %xmm3,%xmm5,%xmm5 - vpor %xmm2,%xmm6,%xmm6 - vpor %xmm5,%xmm6,%xmm6 - vpermd %ymm6,%ymm7,%ymm6 - vmovdqu %ymm6,($out) + vmovdqa 32*0-128($inp), %ymm0 + vmovdqa 32*1-128($inp), %ymm1 + vmovdqa 32*2-128($inp), %ymm2 + vmovdqa 32*3-128($inp), %ymm3 + vpand 32*0+128(%rax), %ymm0, %ymm0 + vpand 32*1+128(%rax), %ymm1, %ymm1 + vpand 32*2+128(%rax), %ymm2, %ymm2 + vpor %ymm0, %ymm1, %ymm4 + vpand 32*3+128(%rax), %ymm3, %ymm3 + vmovdqa 32*4-128($inp), %ymm0 + vmovdqa 32*5-128($inp), %ymm1 + vpor %ymm2, %ymm3, %ymm5 + vmovdqa 32*6-128($inp), %ymm2 + vmovdqa 32*7-128($inp), %ymm3 + vpand 32*4+128(%rax), %ymm0, %ymm0 + vpand 32*5+128(%rax), %ymm1, %ymm1 + vpand 32*6+128(%rax), %ymm2, %ymm2 + vpor %ymm0, %ymm4, %ymm4 + vpand 32*7+128(%rax), %ymm3, %ymm3 + vpand 32*8-128($inp), %ymm8, %ymm0 + vpor %ymm1, %ymm5, %ymm5 + vpand 32*9-128($inp), %ymm9, %ymm1 + vpor %ymm2, %ymm4, %ymm4 + vpand 32*10-128($inp),%ymm10, %ymm2 + vpor %ymm3, %ymm5, %ymm5 + vpand 32*11-128($inp),%ymm11, %ymm3 + vpor %ymm0, %ymm4, %ymm4 + vpand 32*12-128($inp),%ymm12, %ymm0 + vpor %ymm1, %ymm5, %ymm5 + vpand 32*13-128($inp),%ymm13, %ymm1 + vpor %ymm2, %ymm4, %ymm4 + vpand 32*14-128($inp),%ymm14, %ymm2 + vpor %ymm3, %ymm5, %ymm5 + vpand 32*15-128($inp),%ymm15, %ymm3 + lea 32*16($inp), $inp + vpor %ymm0, %ymm4, %ymm4 + vpor %ymm1, %ymm5, %ymm5 + vpor %ymm2, %ymm4, %ymm4 + vpor %ymm3, %ymm5, %ymm5 + + vpor %ymm5, %ymm4, %ymm4 + vextracti128 \$1, %ymm4, %xmm5 # upper half is cleared + vpor %xmm4, %xmm5, %xmm5 + vpermd %ymm5,%ymm7,%ymm5 + vmovdqu %ymm5,($out) lea 32($out),$out - dec %eax + dec $power jnz .Loop_gather_1024 vpxor %ymm0,%ymm0,%ymm0 @@ -1661,20 +1721,20 @@ $code.=<<___; vzeroupper ___ $code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - movaps 0x20(%rsp),%xmm8 - movaps 0x30(%rsp),%xmm9 - movaps 0x40(%rsp),%xmm10 - movaps 0x50(%rsp),%xmm11 - movaps 0x60(%rsp),%xmm12 - movaps 0x70(%rsp),%xmm13 - movaps 0x80(%rsp),%xmm14 - movaps 0x90(%rsp),%xmm15 - lea 0xa8(%rsp),%rsp + movaps -0xa8(%r11),%xmm6 + movaps -0x98(%r11),%xmm7 + movaps -0x88(%r11),%xmm8 + movaps -0x78(%r11),%xmm9 + movaps -0x68(%r11),%xmm10 + movaps -0x58(%r11),%xmm11 + movaps -0x48(%r11),%xmm12 + movaps -0x38(%r11),%xmm13 + movaps -0x28(%r11),%xmm14 + movaps -0x18(%r11),%xmm15 .LSEH_end_rsaz_1024_gather5: ___ $code.=<<___; + lea (%r11),%rsp ret .size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 ___ @@ -1708,8 +1768,10 @@ $code.=<<___; .long 0,2,4,6,7,7,7,7 .Lgather_permd: .long 0,7,1,7,2,7,3,7 -.Lgather_table: - .byte 0,0,0,0,0,0,0,0, 0xff,0,0,0,0,0,0,0 +.Linc: + .long 0,0,0,0, 1,1,1,1 + .long 2,2,2,2, 3,3,3,3 + .long 4,4,4,4, 4,4,4,4 .align 64 ___ @@ -1837,18 +1899,19 @@ rsaz_se_handler: .rva rsaz_se_handler .rva .Lmul_1024_body,.Lmul_1024_epilogue .LSEH_info_rsaz_1024_gather5: - .byte 0x01,0x33,0x16,0x00 - .byte 0x36,0xf8,0x09,0x00 #vmovaps 0x90(rsp),xmm15 - .byte 0x31,0xe8,0x08,0x00 #vmovaps 0x80(rsp),xmm14 - .byte 0x2c,0xd8,0x07,0x00 #vmovaps 0x70(rsp),xmm13 - .byte 0x27,0xc8,0x06,0x00 #vmovaps 0x60(rsp),xmm12 - .byte 0x22,0xb8,0x05,0x00 #vmovaps 0x50(rsp),xmm11 - .byte 0x1d,0xa8,0x04,0x00 #vmovaps 0x40(rsp),xmm10 - .byte 0x18,0x98,0x03,0x00 #vmovaps 0x30(rsp),xmm9 - .byte 0x13,0x88,0x02,0x00 #vmovaps 0x20(rsp),xmm8 - .byte 0x0e,0x78,0x01,0x00 #vmovaps 0x10(rsp),xmm7 - .byte 0x09,0x68,0x00,0x00 #vmovaps 0x00(rsp),xmm6 - .byte 0x04,0x01,0x15,0x00 #sub rsp,0xa8 + .byte 0x01,0x36,0x17,0x0b + .byte 0x36,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15 + .byte 0x31,0xe8,0x08,0x00 # vmovaps 0x80(rsp),xmm14 + .byte 0x2c,0xd8,0x07,0x00 # vmovaps 0x70(rsp),xmm13 + .byte 0x27,0xc8,0x06,0x00 # vmovaps 0x60(rsp),xmm12 + .byte 0x22,0xb8,0x05,0x00 # vmovaps 0x50(rsp),xmm11 + .byte 0x1d,0xa8,0x04,0x00 # vmovaps 0x40(rsp),xmm10 + .byte 0x18,0x98,0x03,0x00 # vmovaps 0x30(rsp),xmm9 + .byte 0x13,0x88,0x02,0x00 # vmovaps 0x20(rsp),xmm8 + .byte 0x0e,0x78,0x01,0x00 # vmovaps 0x10(rsp),xmm7 + .byte 0x09,0x68,0x00,0x00 # vmovaps 0x00(rsp),xmm6 + .byte 0x04,0x01,0x15,0x00 # sub rsp,0xa8 + .byte 0x00,0xb3,0x00,0x00 # set_frame r11 ___ } diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl index 091cdc2..87ce2c3 100755 --- a/crypto/bn/asm/rsaz-x86_64.pl +++ b/crypto/bn/asm/rsaz-x86_64.pl @@ -915,9 +915,76 @@ rsaz_512_mul_gather4: push %r14 push %r15 - mov $pwr, $pwr - subq \$128+24, %rsp + subq \$`128+24+($win64?0xb0:0)`, %rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,0xa0(%rsp) + movaps %xmm7,0xb0(%rsp) + movaps %xmm8,0xc0(%rsp) + movaps %xmm9,0xd0(%rsp) + movaps %xmm10,0xe0(%rsp) + movaps %xmm11,0xf0(%rsp) + movaps %xmm12,0x100(%rsp) + movaps %xmm13,0x110(%rsp) + movaps %xmm14,0x120(%rsp) + movaps %xmm15,0x130(%rsp) +___ +$code.=<<___; .Lmul_gather4_body: + movd $pwr,%xmm8 + movdqa .Linc+16(%rip),%xmm1 # 00000002000000020000000200000002 + movdqa .Linc(%rip),%xmm0 # 00000001000000010000000000000000 + + pshufd \$0,%xmm8,%xmm8 # broadcast $power + movdqa %xmm1,%xmm7 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..15 to $power +# +for($i=0;$i<4;$i++) { +$code.=<<___; + paddd %xmm`$i`,%xmm`$i+1` + pcmpeqd %xmm8,%xmm`$i` + movdqa %xmm7,%xmm`$i+3` +___ +} +for(;$i<7;$i++) { +$code.=<<___; + paddd %xmm`$i`,%xmm`$i+1` + pcmpeqd %xmm8,%xmm`$i` +___ +} +$code.=<<___; + pcmpeqd %xmm8,%xmm7 + + movdqa 16*0($bp),%xmm8 + movdqa 16*1($bp),%xmm9 + movdqa 16*2($bp),%xmm10 + movdqa 16*3($bp),%xmm11 + pand %xmm0,%xmm8 + movdqa 16*4($bp),%xmm12 + pand %xmm1,%xmm9 + movdqa 16*5($bp),%xmm13 + pand %xmm2,%xmm10 + movdqa 16*6($bp),%xmm14 + pand %xmm3,%xmm11 + movdqa 16*7($bp),%xmm15 + leaq 128($bp), %rbp + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd \$0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 ___ $code.=<<___ if ($addx); movl \$0x80100,%r11d @@ -926,45 +993,38 @@ $code.=<<___ if ($addx); je .Lmulx_gather ___ $code.=<<___; - movl 64($bp,$pwr,4), %eax - movq $out, %xmm0 # off-load arguments - movl ($bp,$pwr,4), %ebx - movq $mod, %xmm1 - movq $n0, 128(%rsp) + movq %xmm8,%rbx + + movq $n0, 128(%rsp) # off-load arguments + movq $out, 128+8(%rsp) + movq $mod, 128+16(%rsp) - shlq \$32, %rax - or %rax, %rbx movq ($ap), %rax movq 8($ap), %rcx - leaq 128($bp,$pwr,4), %rbp mulq %rbx # 0 iteration movq %rax, (%rsp) movq %rcx, %rax movq %rdx, %r8 mulq %rbx - movd (%rbp), %xmm4 addq %rax, %r8 movq 16($ap), %rax movq %rdx, %r9 adcq \$0, %r9 mulq %rbx - movd 64(%rbp), %xmm5 addq %rax, %r9 movq 24($ap), %rax movq %rdx, %r10 adcq \$0, %r10 mulq %rbx - pslldq \$4, %xmm5 addq %rax, %r10 movq 32($ap), %rax movq %rdx, %r11 adcq \$0, %r11 mulq %rbx - por %xmm5, %xmm4 addq %rax, %r11 movq 40($ap), %rax movq %rdx, %r12 @@ -977,14 +1037,12 @@ $code.=<<___; adcq \$0, %r13 mulq %rbx - leaq 128(%rbp), %rbp addq %rax, %r13 movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 mulq %rbx - movq %xmm4, %rbx addq %rax, %r14 movq ($ap), %rax movq %rdx, %r15 @@ -996,6 +1054,35 @@ $code.=<<___; .align 32 .Loop_mul_gather: + movdqa 16*0(%rbp),%xmm8 + movdqa 16*1(%rbp),%xmm9 + movdqa 16*2(%rbp),%xmm10 + movdqa 16*3(%rbp),%xmm11 + pand %xmm0,%xmm8 + movdqa 16*4(%rbp),%xmm12 + pand %xmm1,%xmm9 + movdqa 16*5(%rbp),%xmm13 + pand %xmm2,%xmm10 + movdqa 16*6(%rbp),%xmm14 + pand %xmm3,%xmm11 + movdqa 16*7(%rbp),%xmm15 + leaq 128(%rbp), %rbp + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd \$0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 + movq %xmm8,%rbx + mulq %rbx addq %rax, %r8 movq 8($ap), %rax @@ -1004,7 +1091,6 @@ $code.=<<___; adcq \$0, %r8 mulq %rbx - movd (%rbp), %xmm4 addq %rax, %r9 movq 16($ap), %rax adcq \$0, %rdx @@ -1013,7 +1099,6 @@ $code.=<<___; adcq \$0, %r9 mulq %rbx - movd 64(%rbp), %xmm5 addq %rax, %r10 movq 24($ap), %rax adcq \$0, %rdx @@ -1022,7 +1107,6 @@ $code.=<<___; adcq \$0, %r10 mulq %rbx - pslldq \$4, %xmm5 addq %rax, %r11 movq 32($ap), %rax adcq \$0, %rdx @@ -1031,7 +1115,6 @@ $code.=<<___; adcq \$0, %r11 mulq %rbx - por %xmm5, %xmm4 addq %rax, %r12 movq 40($ap), %rax adcq \$0, %rdx @@ -1056,7 +1139,6 @@ $code.=<<___; adcq \$0, %r14 mulq %rbx - movq %xmm4, %rbx addq %rax, %r15 movq ($ap), %rax adcq \$0, %rdx @@ -1064,7 +1146,6 @@ $code.=<<___; movq %rdx, %r15 adcq \$0, %r15 - leaq 128(%rbp), %rbp leaq 8(%rdi), %rdi decl %ecx @@ -1079,8 +1160,8 @@ $code.=<<___; movq %r14, 48(%rdi) movq %r15, 56(%rdi) - movq %xmm0, $out - movq %xmm1, %rbp + movq 128+8(%rsp), $out + movq 128+16(%rsp), %rbp movq (%rsp), %r8 movq 8(%rsp), %r9 @@ -1098,45 +1179,37 @@ $code.=<<___ if ($addx); .align 32 .Lmulx_gather: - mov 64($bp,$pwr,4), %eax - movq $out, %xmm0 # off-load arguments - lea 128($bp,$pwr,4), %rbp - mov ($bp,$pwr,4), %edx - movq $mod, %xmm1 - mov $n0, 128(%rsp) + movq %xmm8,%rdx + + mov $n0, 128(%rsp) # off-load arguments + mov $out, 128+8(%rsp) + mov $mod, 128+16(%rsp) - shl \$32, %rax - or %rax, %rdx mulx ($ap), %rbx, %r8 # 0 iteration mov %rbx, (%rsp) xor %edi, %edi # cf=0, of=0 mulx 8($ap), %rax, %r9 - movd (%rbp), %xmm4 mulx 16($ap), %rbx, %r10 - movd 64(%rbp), %xmm5 adcx %rax, %r8 mulx 24($ap), %rax, %r11 - pslldq \$4, %xmm5 adcx %rbx, %r9 mulx 32($ap), %rbx, %r12 - por %xmm5, %xmm4 adcx %rax, %r10 mulx 40($ap), %rax, %r13 adcx %rbx, %r11 mulx 48($ap), %rbx, %r14 - lea 128(%rbp), %rbp adcx %rax, %r12 mulx 56($ap), %rax, %r15 - movq %xmm4, %rdx adcx %rbx, %r13 adcx %rax, %r14 + .byte 0x67 mov %r8, %rbx adcx %rdi, %r15 # %rdi is 0 @@ -1145,24 +1218,48 @@ $code.=<<___ if ($addx); .align 32 .Loop_mulx_gather: - mulx ($ap), %rax, %r8 + movdqa 16*0(%rbp),%xmm8 + movdqa 16*1(%rbp),%xmm9 + movdqa 16*2(%rbp),%xmm10 + movdqa 16*3(%rbp),%xmm11 + pand %xmm0,%xmm8 + movdqa 16*4(%rbp),%xmm12 + pand %xmm1,%xmm9 + movdqa 16*5(%rbp),%xmm13 + pand %xmm2,%xmm10 + movdqa 16*6(%rbp),%xmm14 + pand %xmm3,%xmm11 + movdqa 16*7(%rbp),%xmm15 + leaq 128(%rbp), %rbp + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd \$0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 + movq %xmm8,%rdx + + .byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 # mulx ($ap), %rax, %r8 adcx %rax, %rbx adox %r9, %r8 mulx 8($ap), %rax, %r9 - .byte 0x66,0x0f,0x6e,0xa5,0x00,0x00,0x00,0x00 # movd (%rbp), %xmm4 adcx %rax, %r8 adox %r10, %r9 mulx 16($ap), %rax, %r10 - movd 64(%rbp), %xmm5 - lea 128(%rbp), %rbp adcx %rax, %r9 adox %r11, %r10 .byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 # mulx 24($ap), %rax, %r11 - pslldq \$4, %xmm5 - por %xmm5, %xmm4 adcx %rax, %r10 adox %r12, %r11 @@ -1176,10 +1273,10 @@ $code.=<<___ if ($addx); .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 # mulx 48($ap), %rax, %r14 adcx %rax, %r13 + .byte 0x67 adox %r15, %r14 mulx 56($ap), %rax, %r15 - movq %xmm4, %rdx mov %rbx, 64(%rsp,%rcx,8) adcx %rax, %r14 adox %rdi, %r15 @@ -1198,10 +1295,10 @@ $code.=<<___ if ($addx); mov %r14, 64+48(%rsp) mov %r15, 64+56(%rsp) - movq %xmm0, $out - movq %xmm1, %rbp + mov 128(%rsp), %rdx # pull arguments + mov 128+8(%rsp), $out + mov 128+16(%rsp), %rbp - mov 128(%rsp), %rdx # pull $n0 mov (%rsp), %r8 mov 8(%rsp), %r9 mov 16(%rsp), %r10 @@ -1229,6 +1326,21 @@ $code.=<<___; call __rsaz_512_subtract leaq 128+24+48(%rsp), %rax +___ +$code.=<<___ if ($win64); + movaps 0xa0-0xc8(%rax),%xmm6 + movaps 0xb0-0xc8(%rax),%xmm7 + movaps 0xc0-0xc8(%rax),%xmm8 + movaps 0xd0-0xc8(%rax),%xmm9 + movaps 0xe0-0xc8(%rax),%xmm10 + movaps 0xf0-0xc8(%rax),%xmm11 + movaps 0x100-0xc8(%rax),%xmm12 + movaps 0x110-0xc8(%rax),%xmm13 + movaps 0x120-0xc8(%rax),%xmm14 + movaps 0x130-0xc8(%rax),%xmm15 + lea 0xb0(%rax),%rax +___ +$code.=<<___; movq -48(%rax), %r15 movq -40(%rax), %r14 movq -32(%rax), %r13 @@ -1258,7 +1370,7 @@ rsaz_512_mul_scatter4: mov $pwr, $pwr subq \$128+24, %rsp .Lmul_scatter4_body: - leaq ($tbl,$pwr,4), $tbl + leaq ($tbl,$pwr,8), $tbl movq $out, %xmm0 # off-load arguments movq $mod, %xmm1 movq $tbl, %xmm2 @@ -1329,30 +1441,14 @@ $code.=<<___; call __rsaz_512_subtract - movl %r8d, 64*0($inp) # scatter - shrq \$32, %r8 - movl %r9d, 64*2($inp) - shrq \$32, %r9 - movl %r10d, 64*4($inp) - shrq \$32, %r10 - movl %r11d, 64*6($inp) - shrq \$32, %r11 - movl %r12d, 64*8($inp) - shrq \$32, %r12 - movl %r13d, 64*10($inp) - shrq \$32, %r13 - movl %r14d, 64*12($inp) - shrq \$32, %r14 - movl %r15d, 64*14($inp) - shrq \$32, %r15 - movl %r8d, 64*1($inp) - movl %r9d, 64*3($inp) - movl %r10d, 64*5($inp) - movl %r11d, 64*7($inp) - movl %r12d, 64*9($inp) - movl %r13d, 64*11($inp) - movl %r14d, 64*13($inp) - movl %r15d, 64*15($inp) + movq %r8, 128*0($inp) # scatter + movq %r9, 128*1($inp) + movq %r10, 128*2($inp) + movq %r11, 128*3($inp) + movq %r12, 128*4($inp) + movq %r13, 128*5($inp) + movq %r14, 128*6($inp) + movq %r15, 128*7($inp) leaq 128+24+48(%rsp), %rax movq -48(%rax), %r15 @@ -1956,16 +2052,14 @@ $code.=<<___; .type rsaz_512_scatter4,\@abi-omnipotent .align 16 rsaz_512_scatter4: - leaq ($out,$power,4), $out + leaq ($out,$power,8), $out movl \$8, %r9d jmp .Loop_scatter .align 16 .Loop_scatter: movq ($inp), %rax leaq 8($inp), $inp - movl %eax, ($out) - shrq \$32, %rax - movl %eax, 64($out) + movq %rax, ($out) leaq 128($out), $out decl %r9d jnz .Loop_scatter @@ -1976,22 +2070,106 @@ rsaz_512_scatter4: .type rsaz_512_gather4,\@abi-omnipotent .align 16 rsaz_512_gather4: - leaq ($inp,$power,4), $inp +___ +$code.=<<___ if ($win64); +.LSEH_begin_rsaz_512_gather4: + .byte 0x48,0x81,0xec,0xa8,0x00,0x00,0x00 # sub $0xa8,%rsp + .byte 0x0f,0x29,0x34,0x24 # movaps %xmm6,(%rsp) + .byte 0x0f,0x29,0x7c,0x24,0x10 # movaps %xmm7,0x10(%rsp) + .byte 0x44,0x0f,0x29,0x44,0x24,0x20 # movaps %xmm8,0x20(%rsp) + .byte 0x44,0x0f,0x29,0x4c,0x24,0x30 # movaps %xmm9,0x30(%rsp) + .byte 0x44,0x0f,0x29,0x54,0x24,0x40 # movaps %xmm10,0x40(%rsp) + .byte 0x44,0x0f,0x29,0x5c,0x24,0x50 # movaps %xmm11,0x50(%rsp) + .byte 0x44,0x0f,0x29,0x64,0x24,0x60 # movaps %xmm12,0x60(%rsp) + .byte 0x44,0x0f,0x29,0x6c,0x24,0x70 # movaps %xmm13,0x70(%rsp) + .byte 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0 # movaps %xmm14,0x80(%rsp) + .byte 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0 # movaps %xmm15,0x90(%rsp) +___ +$code.=<<___; + movd $power,%xmm8 + movdqa .Linc+16(%rip),%xmm1 # 00000002000000020000000200000002 + movdqa .Linc(%rip),%xmm0 # 00000001000000010000000000000000 + + pshufd \$0,%xmm8,%xmm8 # broadcast $power + movdqa %xmm1,%xmm7 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..15 to $power +# +for($i=0;$i<4;$i++) { +$code.=<<___; + paddd %xmm`$i`,%xmm`$i+1` + pcmpeqd %xmm8,%xmm`$i` + movdqa %xmm7,%xmm`$i+3` +___ +} +for(;$i<7;$i++) { +$code.=<<___; + paddd %xmm`$i`,%xmm`$i+1` + pcmpeqd %xmm8,%xmm`$i` +___ +} +$code.=<<___; + pcmpeqd %xmm8,%xmm7 movl \$8, %r9d jmp .Loop_gather .align 16 .Loop_gather: - movl ($inp), %eax - movl 64($inp), %r8d + movdqa 16*0($inp),%xmm8 + movdqa 16*1($inp),%xmm9 + movdqa 16*2($inp),%xmm10 + movdqa 16*3($inp),%xmm11 + pand %xmm0,%xmm8 + movdqa 16*4($inp),%xmm12 + pand %xmm1,%xmm9 + movdqa 16*5($inp),%xmm13 + pand %xmm2,%xmm10 + movdqa 16*6($inp),%xmm14 + pand %xmm3,%xmm11 + movdqa 16*7($inp),%xmm15 leaq 128($inp), $inp - shlq \$32, %r8 - or %r8, %rax - movq %rax, ($out) + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd \$0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 + movq %xmm8,($out) leaq 8($out), $out decl %r9d jnz .Loop_gather +___ +$code.=<<___ if ($win64); + movaps 0x00(%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + movaps 0x20(%rsp),%xmm8 + movaps 0x30(%rsp),%xmm9 + movaps 0x40(%rsp),%xmm10 + movaps 0x50(%rsp),%xmm11 + movaps 0x60(%rsp),%xmm12 + movaps 0x70(%rsp),%xmm13 + movaps 0x80(%rsp),%xmm14 + movaps 0x90(%rsp),%xmm15 + add \$0xa8,%rsp +___ +$code.=<<___; ret +.LSEH_end_rsaz_512_gather4: .size rsaz_512_gather4,.-rsaz_512_gather4 + +.align 64 +.Linc: + .long 0,0, 1,1 + .long 2,2, 2,2 ___ } @@ -2039,6 +2217,18 @@ se_handler: lea 128+24+48(%rax),%rax + lea .Lmul_gather4_epilogue(%rip),%rbx + cmp %r10,%rbx + jne .Lse_not_in_mul_gather4 + + lea 0xb0(%rax),%rax + + lea -48-0xa8(%rax),%rsi + lea 512($context),%rdi + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lse_not_in_mul_gather4: mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 @@ -2090,7 +2280,7 @@ se_handler: pop %rdi pop %rsi ret -.size sqr_handler,.-sqr_handler +.size se_handler,.-se_handler .section .pdata .align 4 @@ -2114,6 +2304,10 @@ se_handler: .rva .LSEH_end_rsaz_512_mul_by_one .rva .LSEH_info_rsaz_512_mul_by_one + .rva .LSEH_begin_rsaz_512_gather4 + .rva .LSEH_end_rsaz_512_gather4 + .rva .LSEH_info_rsaz_512_gather4 + .section .xdata .align 8 .LSEH_info_rsaz_512_sqr: @@ -2136,6 +2330,19 @@ se_handler: .byte 9,0,0,0 .rva se_handler .rva .Lmul_by_one_body,.Lmul_by_one_epilogue # HandlerData[] +.LSEH_info_rsaz_512_gather4: + .byte 0x01,0x46,0x16,0x00 + .byte 0x46,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15 + .byte 0x3d,0xe8,0x08,0x00 # vmovaps 0x80(rsp),xmm14 + .byte 0x34,0xd8,0x07,0x00 # vmovaps 0x70(rsp),xmm13 + .byte 0x2e,0xc8,0x06,0x00 # vmovaps 0x60(rsp),xmm12 + .byte 0x28,0xb8,0x05,0x00 # vmovaps 0x50(rsp),xmm11 + .byte 0x22,0xa8,0x04,0x00 # vmovaps 0x40(rsp),xmm10 + .byte 0x1c,0x98,0x03,0x00 # vmovaps 0x30(rsp),xmm9 + .byte 0x16,0x88,0x02,0x00 # vmovaps 0x20(rsp),xmm8 + .byte 0x10,0x78,0x01,0x00 # vmovaps 0x10(rsp),xmm7 + .byte 0x0b,0x68,0x00,0x00 # vmovaps 0x00(rsp),xmm6 + .byte 0x07,0x01,0x15,0x00 # sub rsp,0xa8 ___ } diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index e82e451..29ba122 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -775,100 +775,126 @@ bn_sqr8x_mont: # 4096. this is done to allow memory disambiguation logic # do its job. # - lea -64(%rsp,$num,4),%r11 + lea -64(%rsp,$num,2),%r11 mov ($n0),$n0 # *n0 sub $aptr,%r11 and \$4095,%r11 cmp %r11,%r10 jb .Lsqr8x_sp_alt sub %r11,%rsp # align with $aptr - lea -64(%rsp,$num,4),%rsp # alloca(frame+4*$num) + lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) jmp .Lsqr8x_sp_done .align 32 .Lsqr8x_sp_alt: - lea 4096-64(,$num,4),%r10 # 4096-frame-4*$num - lea -64(%rsp,$num,4),%rsp # alloca(frame+4*$num) + lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num + lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) sub %r10,%r11 mov \$0,%r10 cmovc %r10,%r11 sub %r11,%rsp .Lsqr8x_sp_done: and \$-64,%rsp - mov $num,%r10 + mov $num,%r10 neg $num - lea 64(%rsp,$num,2),%r11 # copy of modulus mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp .Lsqr8x_body: - mov $num,$i - movq %r11, %xmm2 # save pointer to modulus copy - shr \$3+2,$i - mov OPENSSL_ia32cap_P+8(%rip),%eax - jmp .Lsqr8x_copy_n - -.align 32 -.Lsqr8x_copy_n: - movq 8*0($nptr),%xmm0 - movq 8*1($nptr),%xmm1 - movq 8*2($nptr),%xmm3 - movq 8*3($nptr),%xmm4 - lea 8*4($nptr),$nptr - movdqa %xmm0,16*0(%r11) - movdqa %xmm1,16*1(%r11) - movdqa %xmm3,16*2(%r11) - movdqa %xmm4,16*3(%r11) - lea 16*4(%r11),%r11 - dec $i - jnz .Lsqr8x_copy_n - + movq $nptr, %xmm2 # save pointer to modulus pxor %xmm0,%xmm0 movq $rptr,%xmm1 # save $rptr movq %r10, %xmm3 # -$num ___ $code.=<<___ if ($addx); + mov OPENSSL_ia32cap_P+8(%rip),%eax and \$0x80100,%eax cmp \$0x80100,%eax jne .Lsqr8x_nox call bn_sqrx8x_internal # see x86_64-mont5 module - - pxor %xmm0,%xmm0 - lea 48(%rsp),%rax - lea 64(%rsp,$num,2),%rdx - shr \$3+2,$num - mov 40(%rsp),%rsi # restore %rsp - jmp .Lsqr8x_zero + # %rax top-most carry + # %rbp nptr + # %rcx -8*num + # %r8 end of tp[2*num] + lea (%r8,%rcx),%rbx + mov %rcx,$num + mov %rcx,%rdx + movq %xmm1,$rptr + sar \$3+2,%rcx # %cf=0 + jmp .Lsqr8x_sub .align 32 .Lsqr8x_nox: ___ $code.=<<___; call bn_sqr8x_internal # see x86_64-mont5 module + # %rax top-most carry + # %rbp nptr + # %r8 -8*num + # %rdi end of tp[2*num] + lea (%rdi,$num),%rbx + mov $num,%rcx + mov $num,%rdx + movq %xmm1,$rptr + sar \$3+2,%rcx # %cf=0 + jmp .Lsqr8x_sub +.align 32 +.Lsqr8x_sub: + mov 8*0(%rbx),%r12 + mov 8*1(%rbx),%r13 + mov 8*2(%rbx),%r14 + mov 8*3(%rbx),%r15 + lea 8*4(%rbx),%rbx + sbb 8*0(%rbp),%r12 + sbb 8*1(%rbp),%r13 + sbb 8*2(%rbp),%r14 + sbb 8*3(%rbp),%r15 + lea 8*4(%rbp),%rbp + mov %r12,8*0($rptr) + mov %r13,8*1($rptr) + mov %r14,8*2($rptr) + mov %r15,8*3($rptr) + lea 8*4($rptr),$rptr + inc %rcx # preserves %cf + jnz .Lsqr8x_sub + + sbb \$0,%rax # top-most carry + lea (%rbx,$num),%rbx # rewind + lea ($rptr,$num),$rptr # rewind + + movq %rax,%xmm1 pxor %xmm0,%xmm0 - lea 48(%rsp),%rax - lea 64(%rsp,$num,2),%rdx - shr \$3+2,$num + pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp - jmp .Lsqr8x_zero + jmp .Lsqr8x_cond_copy .align 32 -.Lsqr8x_zero: - movdqa %xmm0,16*0(%rax) # wipe t - movdqa %xmm0,16*1(%rax) - movdqa %xmm0,16*2(%rax) - movdqa %xmm0,16*3(%rax) - lea 16*4(%rax),%rax - movdqa %xmm0,16*0(%rdx) # wipe n - movdqa %xmm0,16*1(%rdx) - movdqa %xmm0,16*2(%rdx) - movdqa %xmm0,16*3(%rdx) - lea 16*4(%rdx),%rdx - dec $num - jnz .Lsqr8x_zero +.Lsqr8x_cond_copy: + movdqa 16*0(%rbx),%xmm2 + movdqa 16*1(%rbx),%xmm3 + lea 16*2(%rbx),%rbx + movdqu 16*0($rptr),%xmm4 + movdqu 16*1($rptr),%xmm5 + lea 16*2($rptr),$rptr + movdqa %xmm0,-16*2(%rbx) # zero tp + movdqa %xmm0,-16*1(%rbx) + movdqa %xmm0,-16*2(%rbx,%rdx) + movdqa %xmm0,-16*1(%rbx,%rdx) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-16*2($rptr) + movdqu %xmm5,-16*1($rptr) + add \$32,$num + jnz .Lsqr8x_cond_copy mov \$1,%rax mov -48(%rsi),%r15 @@ -1135,64 +1161,75 @@ $code.=<<___; adc $zero,%r15 # modulo-scheduled sub 0*8($tptr),$zero # pull top-most carry adc %r15,%r14 - mov -8($nptr),$mi sbb %r15,%r15 # top-most carry mov %r14,-1*8($tptr) cmp 16(%rsp),$bptr jne .Lmulx4x_outer - sub %r14,$mi # compare top-most words - sbb $mi,$mi - or $mi,%r15 - - neg $num - xor %rdx,%rdx + lea 64(%rsp),$tptr + sub $num,$nptr # rewind $nptr + neg %r15 + mov $num,%rdx + shr \$3+2,$num # %cf=0 mov 32(%rsp),$rptr # restore rp + jmp .Lmulx4x_sub + +.align 32 +.Lmulx4x_sub: + mov 8*0($tptr),%r11 + mov 8*1($tptr),%r12 + mov 8*2($tptr),%r13 + mov 8*3($tptr),%r14 + lea 8*4($tptr),$tptr + sbb 8*0($nptr),%r11 + sbb 8*1($nptr),%r12 + sbb 8*2($nptr),%r13 + sbb 8*3($nptr),%r14 + lea 8*4($nptr),$nptr + mov %r11,8*0($rptr) + mov %r12,8*1($rptr) + mov %r13,8*2($rptr) + mov %r14,8*3($rptr) + lea 8*4($rptr),$rptr + dec $num # preserves %cf + jnz .Lmulx4x_sub + + sbb \$0,%r15 # top-most carry lea 64(%rsp),$tptr + sub %rdx,$rptr # rewind + movq %r15,%xmm1 pxor %xmm0,%xmm0 - mov 0*8($nptr,$num),%r8 - mov 1*8($nptr,$num),%r9 - neg %r8 - jmp .Lmulx4x_sub_entry + pshufd \$0,%xmm1,%xmm1 + mov 40(%rsp),%rsi # restore %rsp + jmp .Lmulx4x_cond_copy .align 32 -.Lmulx4x_sub: - mov 0*8($nptr,$num),%r8 - mov 1*8($nptr,$num),%r9 - not %r8 -.Lmulx4x_sub_entry: - mov 2*8($nptr,$num),%r10 - not %r9 - and %r15,%r8 - mov 3*8($nptr,$num),%r11 - not %r10 - and %r15,%r9 - not %r11 - and %r15,%r10 - and %r15,%r11 - - neg %rdx # mov %rdx,%cf - adc 0*8($tptr),%r8 - adc 1*8($tptr),%r9 - movdqa %xmm0,($tptr) - adc 2*8($tptr),%r10 - adc 3*8($tptr),%r11 - movdqa %xmm0,16($tptr) - lea 4*8($tptr),$tptr - sbb %rdx,%rdx # mov %cf,%rdx +.Lmulx4x_cond_copy: + movdqa 16*0($tptr),%xmm2 + movdqa 16*1($tptr),%xmm3 + lea 16*2($tptr),$tptr + movdqu 16*0($rptr),%xmm4 + movdqu 16*1($rptr),%xmm5 + lea 16*2($rptr),$rptr + movdqa %xmm0,-16*2($tptr) # zero tp + movdqa %xmm0,-16*1($tptr) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-16*2($rptr) + movdqu %xmm5,-16*1($rptr) + sub \$32,%rdx + jnz .Lmulx4x_cond_copy - mov %r8,0*8($rptr) - mov %r9,1*8($rptr) - mov %r10,2*8($rptr) - mov %r11,3*8($rptr) - lea 4*8($rptr),$rptr + mov %rdx,($tptr) - add \$32,$num - jnz .Lmulx4x_sub - - mov 40(%rsp),%rsi # restore %rsp mov \$1,%rax mov -48(%rsi),%r15 mov -40(%rsi),%r14 diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index 292409c..2e8c9db 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -99,58 +99,111 @@ $code.=<<___; .Lmul_enter: mov ${num}d,${num}d mov %rsp,%rax - mov `($win64?56:8)`(%rsp),%r10d # load 7th argument + movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument + lea .Linc(%rip),%r10 push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) -___ -$code.=<<___; + lea 2($num),%r11 neg %r11 - lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)) + lea -264(%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)+256+8) and \$-1024,%rsp # minimize TLB usage mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul_body: - mov $bp,%r12 # reassign $bp + lea 128($bp),%r12 # reassign $bp (+size optimization) ___ $bp="%r12"; $STRIDE=2**5*8; # 5 is "window size" $N=$STRIDE/4; # should match cache line size $code.=<<___; - mov %r10,%r11 - shr \$`log($N/8)/log(2)`,%r10 - and \$`$N/8-1`,%r11 - not %r10 - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" - lea 96($bp,%r11,8),$bp # pointer within 1st cache line - movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which - movq 8(%rax,%r10,8),%xmm5 # cache line contains element - movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument - movq 24(%rax,%r10,8),%xmm7 - - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002 + lea 24-112(%rsp,$num,8),%r10# place the mask after tp[num+3] (+ICache optimization) + and \$-16,%r10 + + pshufd \$0,%xmm5,%xmm5 # broadcast index + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..31 to index and save result to stack +# +$code.=<<___; + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 + .byte 0x67 + movdqa %xmm4,%xmm3 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($k+0)+112`(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($k+1)+112`(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($k+2)+112`(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,`16*($k+3)+112`(%r10) + movdqa %xmm4,%xmm3 +___ +} +$code.=<<___; # last iteration can be optimized + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,`16*($k+0)+112`(%r10) + + paddd %xmm2,%xmm3 + .byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,`16*($k+1)+112`(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,`16*($k+2)+112`(%r10) + pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register + + pand `16*($k+1)-128`($bp),%xmm1 + pand `16*($k+2)-128`($bp),%xmm2 + movdqa %xmm3,`16*($k+3)+112`(%r10) + pand `16*($k+3)-128`($bp),%xmm3 por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm4 + movdqa `16*($k+1)-128`($bp),%xmm5 + movdqa `16*($k+2)-128`($bp),%xmm2 + pand `16*($k+0)+112`(%r10),%xmm4 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+1)+112`(%r10),%xmm5 + por %xmm4,%xmm0 + pand `16*($k+2)+112`(%r10),%xmm2 + por %xmm5,%xmm1 + pand `16*($k+3)+112`(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +} +$code.=<<___; + por %xmm1,%xmm0 + pshufd \$0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - movq %xmm0,$m0 # m0=bp[0] mov ($n0),$n0 # pull n0[0] value @@ -159,29 +212,14 @@ $code.=<<___; xor $i,$i # i=0 xor $j,$j # j=0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - mov $n0,$m1 mulq $m0 # ap[0]*bp[0] mov %rax,$lo0 mov ($np),%rax - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $lo0,$m1 # "tp[0]"*n0 mov %rdx,$hi0 - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$lo0 # discarded mov 8($ap),%rax @@ -212,16 +250,14 @@ $code.=<<___; mulq $m1 # np[j]*m1 cmp $num,$j - jne .L1st - - movq %xmm0,$m0 # bp[1] + jne .L1st # note that upon exit $j==$num, so + # they can be used interchangeably add %rax,$hi1 - mov ($ap),%rax # ap[0] adc \$0,%rdx add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] + mov $hi1,-16(%rsp,$num,8) # tp[num-1] mov %rdx,$hi1 mov $lo0,$hi0 @@ -235,33 +271,48 @@ $code.=<<___; jmp .Louter .align 16 .Louter: + lea 24+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization) + and \$-16,%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($k=0;$k<$STRIDE/16;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm0 + movdqa `16*($k+1)-128`($bp),%xmm1 + movdqa `16*($k+2)-128`($bp),%xmm2 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+0)-128`(%rdx),%xmm0 + pand `16*($k+1)-128`(%rdx),%xmm1 + por %xmm0,%xmm4 + pand `16*($k+2)-128`(%rdx),%xmm2 + por %xmm1,%xmm5 + pand `16*($k+3)-128`(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + lea $STRIDE($bp),$bp + + mov ($ap),%rax # ap[0] + movq %xmm0,$m0 # m0=bp[i] + xor $j,$j # j=0 mov $n0,$m1 mov (%rsp),$lo0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - mulq $m0 # ap[0]*bp[i] add %rax,$lo0 # ap[0]*bp[i]+tp[0] mov ($np),%rax adc \$0,%rdx - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $lo0,$m1 # tp[0]*n0 mov %rdx,$hi0 - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$lo0 # discarded mov 8($ap),%rax @@ -295,17 +346,14 @@ $code.=<<___; mulq $m1 # np[j]*m1 cmp $num,$j - jne .Linner - - movq %xmm0,$m0 # bp[i+1] - + jne .Linner # note that upon exit $j==$num, so + # they can be used interchangeably add %rax,$hi1 - mov ($ap),%rax # ap[0] adc \$0,%rdx add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] - mov (%rsp,$j,8),$lo0 + mov (%rsp,$num,8),$lo0 adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] + mov $hi1,-16(%rsp,$num,8) # tp[num-1] mov %rdx,$hi1 xor %rdx,%rdx @@ -352,12 +400,7 @@ $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax -___ -$code.=<<___ if ($win64); - movaps -88(%rsi),%xmm6 - movaps -72(%rsi),%xmm7 -___ -$code.=<<___; + mov -48(%rsi),%r15 mov -40(%rsi),%r14 mov -32(%rsi),%r13 @@ -379,8 +422,8 @@ bn_mul4x_mont_gather5: .Lmul4x_enter: ___ $code.=<<___ if ($addx); - and \$0x80100,%r11d - cmp \$0x80100,%r11d + and \$0x80108,%r11d + cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 je .Lmulx4x_enter ___ $code.=<<___; @@ -392,39 +435,34 @@ $code.=<<___; push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) -___ -$code.=<<___; + .byte 0x67 - mov ${num}d,%r10d - shl \$3,${num}d - shl \$3+2,%r10d # 4*$num + shl \$3,${num}d # convert $num to bytes + lea ($num,$num,2),%r10 # 3*$num in bytes neg $num # -$num ############################################################## - # ensure that stack frame doesn't alias with $aptr+4*$num - # modulo 4096, which covers ret[num], am[num] and n[2*num] - # (see bn_exp.c). this is done to allow memory disambiguation - # logic do its magic. [excessive frame is allocated in order - # to allow bn_from_mont8x to clear it.] + # Ensure that stack frame doesn't alias with $rptr+3*$num + # modulo 4096, which covers ret[num], am[num] and n[num] + # (see bn_exp.c). This is done to allow memory disambiguation + # logic do its magic. [Extra [num] is allocated in order + # to align with bn_power5's frame, which is cleansed after + # completing exponentiation. Extra 256 bytes is for power mask + # calculated from 7th argument, the index.] # - lea -64(%rsp,$num,2),%r11 - sub $ap,%r11 + lea -320(%rsp,$num,2),%r11 + sub $rp,%r11 and \$4095,%r11 cmp %r11,%r10 jb .Lmul4xsp_alt - sub %r11,%rsp # align with $ap - lea -64(%rsp,$num,2),%rsp # alloca(128+num*8) + sub %r11,%rsp # align with $rp + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*num*8+256) jmp .Lmul4xsp_done .align 32 .Lmul4xsp_alt: - lea 4096-64(,$num,2),%r10 - lea -64(%rsp,$num,2),%rsp # alloca(128+num*8) + lea 4096-320(,$num,2),%r10 + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*num*8+256) sub %r10,%r11 mov \$0,%r10 cmovc %r10,%r11 @@ -440,12 +478,7 @@ $code.=<<___; mov 40(%rsp),%rsi # restore %rsp mov \$1,%rax -___ -$code.=<<___ if ($win64); - movaps -88(%rsi),%xmm6 - movaps -72(%rsi),%xmm7 -___ -$code.=<<___; + mov -48(%rsi),%r15 mov -40(%rsi),%r14 mov -32(%rsi),%r13 @@ -460,9 +493,10 @@ $code.=<<___; .type mul4x_internal,\@abi-omnipotent .align 32 mul4x_internal: - shl \$5,$num - mov `($win64?56:8)`(%rax),%r10d # load 7th argument - lea 256(%rdx,$num),%r13 + shl \$5,$num # $num was in bytes + movd `($win64?56:8)`(%rax),%xmm5 # load 7th argument, index + lea .Linc(%rip),%rax + lea 128(%rdx,$num),%r13 # end of powers table (+size optimization) shr \$5,$num # restore $num ___ $bp="%r12"; @@ -470,44 +504,92 @@ ___ $N=$STRIDE/4; # should match cache line size $tp=$i; $code.=<<___; - mov %r10,%r11 - shr \$`log($N/8)/log(2)`,%r10 - and \$`$N/8-1`,%r11 - not %r10 - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" - lea 96(%rdx,%r11,8),$bp # pointer within 1st cache line - movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which - movq 8(%rax,%r10,8),%xmm5 # cache line contains element - add \$7,%r11 - movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument - movq 24(%rax,%r10,8),%xmm7 - and \$7,%r11 - - movq `0*$STRIDE/4-96`($bp),%xmm0 - lea $STRIDE($bp),$tp # borrow $tp - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - .byte 0x67 - por %xmm1,%xmm0 - movq `0*$STRIDE/4-96`($tp),%xmm1 - .byte 0x67 - pand %xmm7,%xmm3 - .byte 0x67 - por %xmm2,%xmm0 - movq `1*$STRIDE/4-96`($tp),%xmm2 + movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 + lea 88-112(%rsp,$num),%r10 # place the mask after tp[num+1] (+ICache optimization) + lea 128(%rdx),$bp # size optimization + + pshufd \$0,%xmm5,%xmm5 # broadcast index + movdqa %xmm1,%xmm4 + .byte 0x67,0x67 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..31 to index and save result to stack +# +$code.=<<___; + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 .byte 0x67 - pand %xmm4,%xmm1 + movdqa %xmm4,%xmm3 +___ +for($i=0;$i<$STRIDE/16-4;$i+=4) { +$code.=<<___; + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($i+0)+112`(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($i+1)+112`(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($i+2)+112`(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,`16*($i+3)+112`(%r10) + movdqa %xmm4,%xmm3 +___ +} +$code.=<<___; # last iteration can be optimized + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,`16*($i+0)+112`(%r10) + + paddd %xmm2,%xmm3 .byte 0x67 - por %xmm3,%xmm0 - movq `2*$STRIDE/4-96`($tp),%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,`16*($i+1)+112`(%r10) + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,`16*($i+2)+112`(%r10) + pand `16*($i+0)-128`($bp),%xmm0 # while it's still in register + + pand `16*($i+1)-128`($bp),%xmm1 + pand `16*($i+2)-128`($bp),%xmm2 + movdqa %xmm3,`16*($i+3)+112`(%r10) + pand `16*($i+3)-128`($bp),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +for($i=0;$i<$STRIDE/16-4;$i+=4) { +$code.=<<___; + movdqa `16*($i+0)-128`($bp),%xmm4 + movdqa `16*($i+1)-128`($bp),%xmm5 + movdqa `16*($i+2)-128`($bp),%xmm2 + pand `16*($i+0)+112`(%r10),%xmm4 + movdqa `16*($i+3)-128`($bp),%xmm3 + pand `16*($i+1)+112`(%r10),%xmm5 + por %xmm4,%xmm0 + pand `16*($i+2)+112`(%r10),%xmm2 + por %xmm5,%xmm1 + pand `16*($i+3)+112`(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +} +$code.=<<___; + por %xmm1,%xmm0 + pshufd \$0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + lea $STRIDE($bp),$bp movq %xmm0,$m0 # m0=bp[0] - movq `3*$STRIDE/4-96`($tp),%xmm0 + mov %r13,16+8(%rsp) # save end of b[num] mov $rp, 56+8(%rsp) # save $rp @@ -521,26 +603,10 @@ $code.=<<___; mov %rax,$A[0] mov ($np),%rax - pand %xmm5,%xmm2 - pand %xmm6,%xmm3 - por %xmm2,%xmm1 - imulq $A[0],$m1 # "tp[0]"*n0 - ############################################################## - # $tp is chosen so that writing to top-most element of the - # vector occurs just "above" references to powers table, - # "above" modulo cache-line size, which effectively precludes - # possibility of memory disambiguation logic failure when - # accessing the table. - # - lea 64+8(%rsp,%r11,8),$tp + lea 64+8(%rsp),$tp mov %rdx,$A[1] - pand %xmm7,%xmm0 - por %xmm3,%xmm1 - lea 2*$STRIDE($bp),$bp - por %xmm1,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$A[0] # discarded mov 8($ap,$num),%rax @@ -549,7 +615,7 @@ $code.=<<___; mulq $m0 add %rax,$A[1] - mov 16*1($np),%rax # interleaved with 0, therefore 16*n + mov 8*1($np),%rax adc \$0,%rdx mov %rdx,$A[0] @@ -559,7 +625,7 @@ $code.=<<___; adc \$0,%rdx add $A[1],$N[1] lea 4*8($num),$j # j=4 - lea 16*4($np),$np + lea 8*4($np),$np adc \$0,%rdx mov $N[1],($tp) mov %rdx,$N[0] @@ -569,7 +635,7 @@ $code.=<<___; .L1st4x: mulq $m0 # ap[j]*bp[0] add %rax,$A[0] - mov -16*2($np),%rax + mov -8*2($np),%rax lea 32($tp),$tp adc \$0,%rdx mov %rdx,$A[1] @@ -585,7 +651,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[0] add %rax,$A[1] - mov -16*1($np),%rax + mov -8*1($np),%rax adc \$0,%rdx mov %rdx,$A[0] @@ -600,7 +666,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[0] add %rax,$A[0] - mov 16*0($np),%rax + mov 8*0($np),%rax adc \$0,%rdx mov %rdx,$A[1] @@ -615,7 +681,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[0] add %rax,$A[1] - mov 16*1($np),%rax + mov 8*1($np),%rax adc \$0,%rdx mov %rdx,$A[0] @@ -624,7 +690,7 @@ $code.=<<___; mov 16($ap,$j),%rax adc \$0,%rdx add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] - lea 16*4($np),$np + lea 8*4($np),$np adc \$0,%rdx mov $N[1],($tp) # tp[j-1] mov %rdx,$N[0] @@ -634,7 +700,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[0] add %rax,$A[0] - mov -16*2($np),%rax + mov -8*2($np),%rax lea 32($tp),$tp adc \$0,%rdx mov %rdx,$A[1] @@ -650,7 +716,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[0] add %rax,$A[1] - mov -16*1($np),%rax + mov -8*1($np),%rax adc \$0,%rdx mov %rdx,$A[0] @@ -663,8 +729,7 @@ $code.=<<___; mov $N[1],-16($tp) # tp[j-1] mov %rdx,$N[0] - movq %xmm0,$m0 # bp[1] - lea ($np,$num,2),$np # rewind $np + lea ($np,$num),$np # rewind $np xor $N[1],$N[1] add $A[0],$N[0] @@ -675,6 +740,33 @@ $code.=<<___; .align 32 .Louter4x: + lea 16+128($tp),%rdx # where 256-byte mask is (+size optimization) + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($i=0;$i<$STRIDE/16;$i+=4) { +$code.=<<___; + movdqa `16*($i+0)-128`($bp),%xmm0 + movdqa `16*($i+1)-128`($bp),%xmm1 + movdqa `16*($i+2)-128`($bp),%xmm2 + movdqa `16*($i+3)-128`($bp),%xmm3 + pand `16*($i+0)-128`(%rdx),%xmm0 + pand `16*($i+1)-128`(%rdx),%xmm1 + por %xmm0,%xmm4 + pand `16*($i+2)-128`(%rdx),%xmm2 + por %xmm1,%xmm5 + pand `16*($i+3)-128`(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + lea $STRIDE($bp),$bp + movq %xmm0,$m0 # m0=bp[i] + mov ($tp,$num),$A[0] mov $n0,$m1 mulq $m0 # ap[0]*bp[i] @@ -682,25 +774,11 @@ $code.=<<___; mov ($np),%rax adc \$0,%rdx - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bp),%xmm3 - imulq $A[0],$m1 # tp[0]*n0 - .byte 0x67 mov %rdx,$A[1] mov $N[1],($tp) # store upmost overflow bit - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - por %xmm2,%xmm0 lea ($tp,$num),$tp # rewind $tp - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 mulq $m1 # np[0]*m1 add %rax,$A[0] # "$N[0]", discarded @@ -710,7 +788,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[i] add %rax,$A[1] - mov 16*1($np),%rax # interleaved with 0, therefore 16*n + mov 8*1($np),%rax adc \$0,%rdx add 8($tp),$A[1] # +tp[1] adc \$0,%rdx @@ -722,7 +800,7 @@ $code.=<<___; adc \$0,%rdx add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j] lea 4*8($num),$j # j=4 - lea 16*4($np),$np + lea 8*4($np),$np adc \$0,%rdx mov %rdx,$N[0] jmp .Linner4x @@ -731,7 +809,7 @@ $code.=<<___; .Linner4x: mulq $m0 # ap[j]*bp[i] add %rax,$A[0] - mov -16*2($np),%rax + mov -8*2($np),%rax adc \$0,%rdx add 16($tp),$A[0] # ap[j]*bp[i]+tp[j] lea 32($tp),$tp @@ -749,7 +827,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[i] add %rax,$A[1] - mov -16*1($np),%rax + mov -8*1($np),%rax adc \$0,%rdx add -8($tp),$A[1] adc \$0,%rdx @@ -766,7 +844,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[i] add %rax,$A[0] - mov 16*0($np),%rax + mov 8*0($np),%rax adc \$0,%rdx add ($tp),$A[0] # ap[j]*bp[i]+tp[j] adc \$0,%rdx @@ -783,7 +861,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[i] add %rax,$A[1] - mov 16*1($np),%rax + mov 8*1($np),%rax adc \$0,%rdx add 8($tp),$A[1] adc \$0,%rdx @@ -794,7 +872,7 @@ $code.=<<___; mov 16($ap,$j),%rax adc \$0,%rdx add $A[1],$N[1] - lea 16*4($np),$np + lea 8*4($np),$np adc \$0,%rdx mov $N[0],-8($tp) # tp[j-1] mov %rdx,$N[0] @@ -804,7 +882,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[i] add %rax,$A[0] - mov -16*2($np),%rax + mov -8*2($np),%rax adc \$0,%rdx add 16($tp),$A[0] # ap[j]*bp[i]+tp[j] lea 32($tp),$tp @@ -823,7 +901,7 @@ $code.=<<___; mulq $m0 # ap[j]*bp[i] add %rax,$A[1] mov $m1,%rax - mov -16*1($np),$m1 + mov -8*1($np),$m1 adc \$0,%rdx add -8($tp),$A[1] adc \$0,%rdx @@ -838,9 +916,8 @@ $code.=<<___; mov $N[0],-24($tp) # tp[j-1] mov %rdx,$N[0] - movq %xmm0,$m0 # bp[i+1] mov $N[1],-16($tp) # tp[j-1] - lea ($np,$num,2),$np # rewind $np + lea ($np,$num),$np # rewind $np xor $N[1],$N[1] add $A[0],$N[0] @@ -854,16 +931,23 @@ $code.=<<___; ___ if (1) { $code.=<<___; + xor %rax,%rax sub $N[0],$m1 # compare top-most words adc $j,$j # $j is zero or $j,$N[1] - xor \$1,$N[1] + sub $N[1],%rax # %rax=-$N[1] lea ($tp,$num),%rbx # tptr in .sqr4x_sub - lea ($np,$N[1],8),%rbp # nptr in .sqr4x_sub + mov ($np),%r12 + lea ($np),%rbp # nptr in .sqr4x_sub mov %r9,%rcx - sar \$3+2,%rcx # cf=0 + sar \$3+2,%rcx mov 56+8(%rsp),%rdi # rptr in .sqr4x_sub - jmp .Lsqr4x_sub + dec %r12 # so that after 'not' we get -n[0] + xor %r10,%r10 + mov 8*1(%rbp),%r13 + mov 8*2(%rbp),%r14 + mov 8*3(%rbp),%r15 + jmp .Lsqr4x_sub_entry ___ } else { my @ri=("%rax",$bp,$m0,$m1); @@ -930,8 +1014,8 @@ bn_power5: ___ $code.=<<___ if ($addx); mov OPENSSL_ia32cap_P+8(%rip),%r11d - and \$0x80100,%r11d - cmp \$0x80100,%r11d + and \$0x80108,%r11d + cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 je .Lpowerx5_enter ___ $code.=<<___; @@ -942,38 +1026,32 @@ $code.=<<___; push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) -___ -$code.=<<___; - mov ${num}d,%r10d + shl \$3,${num}d # convert $num to bytes - shl \$3+2,%r10d # 4*$num + lea ($num,$num,2),%r10d # 3*$num neg $num mov ($n0),$n0 # *n0 ############################################################## - # ensure that stack frame doesn't alias with $aptr+4*$num - # modulo 4096, which covers ret[num], am[num] and n[2*num] - # (see bn_exp.c). this is done to allow memory disambiguation - # logic do its magic. + # Ensure that stack frame doesn't alias with $rptr+3*$num + # modulo 4096, which covers ret[num], am[num] and n[num] + # (see bn_exp.c). This is done to allow memory disambiguation + # logic do its magic. [Extra 256 bytes is for power mask + # calculated from 7th argument, the index.] # - lea -64(%rsp,$num,2),%r11 - sub $aptr,%r11 + lea -320(%rsp,$num,2),%r11 + sub $rptr,%r11 and \$4095,%r11 cmp %r11,%r10 jb .Lpwr_sp_alt sub %r11,%rsp # align with $aptr - lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*num*8+256) jmp .Lpwr_sp_done .align 32 .Lpwr_sp_alt: - lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num - lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + lea 4096-320(,$num,2),%r10 + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*num*8+256) sub %r10,%r11 mov \$0,%r10 cmovc %r10,%r11 @@ -995,16 +1073,21 @@ $code.=<<___; mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp .Lpower5_body: - movq $rptr,%xmm1 # save $rptr + movq $rptr,%xmm1 # save $rptr, used in sqr8x movq $nptr,%xmm2 # save $nptr - movq %r10, %xmm3 # -$num + movq %r10, %xmm3 # -$num, used in sqr8x movq $bptr,%xmm4 call __bn_sqr8x_internal + call __bn_post4x_internal call __bn_sqr8x_internal + call __bn_post4x_internal call __bn_sqr8x_internal + call __bn_post4x_internal call __bn_sqr8x_internal + call __bn_post4x_internal call __bn_sqr8x_internal + call __bn_post4x_internal movq %xmm2,$nptr movq %xmm4,$bptr @@ -1565,9 +1648,9 @@ my ($nptr,$tptr,$carry,$m0)=("%rbp","%rdi","%rsi","%rbx"); $code.=<<___; movq %xmm2,$nptr -sqr8x_reduction: +__bn_sqr8x_reduction: xor %rax,%rax - lea ($nptr,$num,2),%rcx # end of n[] + lea ($nptr,$num),%rcx # end of n[] lea 48+8(%rsp,$num,2),%rdx # end of t[] buffer mov %rcx,0+8(%rsp) lea 48+8(%rsp,$num),$tptr # end of initial t[] window @@ -1593,21 +1676,21 @@ sqr8x_reduction: .byte 0x67 mov $m0,%r8 imulq 32+8(%rsp),$m0 # n0*a[0] - mov 16*0($nptr),%rax # n[0] + mov 8*0($nptr),%rax # n[0] mov \$8,%ecx jmp .L8x_reduce .align 32 .L8x_reduce: mulq $m0 - mov 16*1($nptr),%rax # n[1] + mov 8*1($nptr),%rax # n[1] neg %r8 mov %rdx,%r8 adc \$0,%r8 mulq $m0 add %rax,%r9 - mov 16*2($nptr),%rax + mov 8*2($nptr),%rax adc \$0,%rdx add %r9,%r8 mov $m0,48-8+8(%rsp,%rcx,8) # put aside n0*a[i] @@ -1616,7 +1699,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r10 - mov 16*3($nptr),%rax + mov 8*3($nptr),%rax adc \$0,%rdx add %r10,%r9 mov 32+8(%rsp),$carry # pull n0, borrow $carry @@ -1625,7 +1708,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r11 - mov 16*4($nptr),%rax + mov 8*4($nptr),%rax adc \$0,%rdx imulq %r8,$carry # modulo-scheduled add %r11,%r10 @@ -1634,7 +1717,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r12 - mov 16*5($nptr),%rax + mov 8*5($nptr),%rax adc \$0,%rdx add %r12,%r11 mov %rdx,%r12 @@ -1642,7 +1725,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r13 - mov 16*6($nptr),%rax + mov 8*6($nptr),%rax adc \$0,%rdx add %r13,%r12 mov %rdx,%r13 @@ -1650,7 +1733,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r14 - mov 16*7($nptr),%rax + mov 8*7($nptr),%rax adc \$0,%rdx add %r14,%r13 mov %rdx,%r14 @@ -1659,7 +1742,7 @@ sqr8x_reduction: mulq $m0 mov $carry,$m0 # n0*a[i] add %rax,%r15 - mov 16*0($nptr),%rax # n[0] + mov 8*0($nptr),%rax # n[0] adc \$0,%rdx add %r15,%r14 mov %rdx,%r15 @@ -1668,7 +1751,7 @@ sqr8x_reduction: dec %ecx jnz .L8x_reduce - lea 16*8($nptr),$nptr + lea 8*8($nptr),$nptr xor %rax,%rax mov 8+8(%rsp),%rdx # pull end of t[] cmp 0+8(%rsp),$nptr # end of n[]? @@ -1687,21 +1770,21 @@ sqr8x_reduction: mov 48+56+8(%rsp),$m0 # pull n0*a[0] mov \$8,%ecx - mov 16*0($nptr),%rax + mov 8*0($nptr),%rax jmp .L8x_tail .align 32 .L8x_tail: mulq $m0 add %rax,%r8 - mov 16*1($nptr),%rax + mov 8*1($nptr),%rax mov %r8,($tptr) # save result mov %rdx,%r8 adc \$0,%r8 mulq $m0 add %rax,%r9 - mov 16*2($nptr),%rax + mov 8*2($nptr),%rax adc \$0,%rdx add %r9,%r8 lea 8($tptr),$tptr # $tptr++ @@ -1710,7 +1793,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r10 - mov 16*3($nptr),%rax + mov 8*3($nptr),%rax adc \$0,%rdx add %r10,%r9 mov %rdx,%r10 @@ -1718,7 +1801,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r11 - mov 16*4($nptr),%rax + mov 8*4($nptr),%rax adc \$0,%rdx add %r11,%r10 mov %rdx,%r11 @@ -1726,7 +1809,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r12 - mov 16*5($nptr),%rax + mov 8*5($nptr),%rax adc \$0,%rdx add %r12,%r11 mov %rdx,%r12 @@ -1734,7 +1817,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r13 - mov 16*6($nptr),%rax + mov 8*6($nptr),%rax adc \$0,%rdx add %r13,%r12 mov %rdx,%r13 @@ -1742,7 +1825,7 @@ sqr8x_reduction: mulq $m0 add %rax,%r14 - mov 16*7($nptr),%rax + mov 8*7($nptr),%rax adc \$0,%rdx add %r14,%r13 mov %rdx,%r14 @@ -1753,14 +1836,14 @@ sqr8x_reduction: add %rax,%r15 adc \$0,%rdx add %r15,%r14 - mov 16*0($nptr),%rax # pull n[0] + mov 8*0($nptr),%rax # pull n[0] mov %rdx,%r15 adc \$0,%r15 dec %ecx jnz .L8x_tail - lea 16*8($nptr),$nptr + lea 8*8($nptr),$nptr mov 8+8(%rsp),%rdx # pull end of t[] cmp 0+8(%rsp),$nptr # end of n[]? jae .L8x_tail_done # break out of loop @@ -1806,7 +1889,7 @@ sqr8x_reduction: adc 8*6($tptr),%r14 adc 8*7($tptr),%r15 adc \$0,%rax # top-most carry - mov -16($nptr),%rcx # np[num-1] + mov -8($nptr),%rcx # np[num-1] xor $carry,$carry movq %xmm2,$nptr # restore $nptr @@ -1824,6 +1907,8 @@ sqr8x_reduction: cmp %rdx,$tptr # end of t[]? jb .L8x_reduction_loop + ret +.size bn_sqr8x_internal,.-bn_sqr8x_internal ___ } ############################################################## @@ -1832,48 +1917,62 @@ ___ { my ($tptr,$nptr)=("%rbx","%rbp"); $code.=<<___; - #xor %rsi,%rsi # %rsi was $carry above - sub %r15,%rcx # compare top-most words +.type __bn_post4x_internal,\@abi-omnipotent +.align 32 +__bn_post4x_internal: + mov 8*0($nptr),%r12 lea (%rdi,$num),$tptr # %rdi was $tptr above - adc %rsi,%rsi mov $num,%rcx - or %rsi,%rax movq %xmm1,$rptr # restore $rptr - xor \$1,%rax + neg %rax movq %xmm1,$aptr # prepare for back-to-back call - lea ($nptr,%rax,8),$nptr - sar \$3+2,%rcx # cf=0 - jmp .Lsqr4x_sub + sar \$3+2,%rcx + dec %r12 # so that after 'not' we get -n[0] + xor %r10,%r10 + mov 8*1($nptr),%r13 + mov 8*2($nptr),%r14 + mov 8*3($nptr),%r15 + jmp .Lsqr4x_sub_entry -.align 32 +.align 16 .Lsqr4x_sub: - .byte 0x66 - mov 8*0($tptr),%r12 - mov 8*1($tptr),%r13 - sbb 16*0($nptr),%r12 - mov 8*2($tptr),%r14 - sbb 16*1($nptr),%r13 - mov 8*3($tptr),%r15 - lea 8*4($tptr),$tptr - sbb 16*2($nptr),%r14 + mov 8*0($nptr),%r12 + mov 8*1($nptr),%r13 + mov 8*2($nptr),%r14 + mov 8*3($nptr),%r15 +.Lsqr4x_sub_entry: + lea 8*4($nptr),$nptr + not %r12 + not %r13 + not %r14 + not %r15 + and %rax,%r12 + and %rax,%r13 + and %rax,%r14 + and %rax,%r15 + + neg %r10 # mov %r10,%cf + adc 8*0($tptr),%r12 + adc 8*1($tptr),%r13 + adc 8*2($tptr),%r14 + adc 8*3($tptr),%r15 mov %r12,8*0($rptr) - sbb 16*3($nptr),%r15 - lea 16*4($nptr),$nptr + lea 8*4($tptr),$tptr mov %r13,8*1($rptr) + sbb %r10,%r10 # mov %cf,%r10 mov %r14,8*2($rptr) mov %r15,8*3($rptr) lea 8*4($rptr),$rptr inc %rcx # pass %cf jnz .Lsqr4x_sub -___ -} -$code.=<<___; + mov $num,%r10 # prepare for back-to-back call neg $num # restore $num ret -.size bn_sqr8x_internal,.-bn_sqr8x_internal +.size __bn_post4x_internal,.-__bn_post4x_internal ___ +} { $code.=<<___; .globl bn_from_montgomery @@ -1897,39 +1996,32 @@ bn_from_mont8x: push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) -___ -$code.=<<___; - .byte 0x67 - mov ${num}d,%r10d + shl \$3,${num}d # convert $num to bytes - shl \$3+2,%r10d # 4*$num + lea ($num,$num,2),%r10 # 3*$num in bytes neg $num mov ($n0),$n0 # *n0 ############################################################## - # ensure that stack frame doesn't alias with $aptr+4*$num - # modulo 4096, which covers ret[num], am[num] and n[2*num] - # (see bn_exp.c). this is done to allow memory disambiguation - # logic do its magic. + # Ensure that stack frame doesn't alias with $rptr+3*$num + # modulo 4096, which covers ret[num], am[num] and n[num] + # (see bn_exp.c). The stack is allocated to aligned with + # bn_power5's frame, and as bn_from_montgomery happens to be + # last operation, we use the opportunity to cleanse it. # - lea -64(%rsp,$num,2),%r11 - sub $aptr,%r11 + lea -320(%rsp,$num,2),%r11 + sub $rptr,%r11 and \$4095,%r11 cmp %r11,%r10 jb .Lfrom_sp_alt sub %r11,%rsp # align with $aptr - lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256) jmp .Lfrom_sp_done .align 32 .Lfrom_sp_alt: - lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num - lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + lea 4096-320(,$num,2),%r10 + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256) sub %r10,%r11 mov \$0,%r10 cmovc %r10,%r11 @@ -1983,12 +2075,13 @@ $code.=<<___; ___ $code.=<<___ if ($addx); mov OPENSSL_ia32cap_P+8(%rip),%r11d - and \$0x80100,%r11d - cmp \$0x80100,%r11d + and \$0x80108,%r11d + cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 jne .Lfrom_mont_nox lea (%rax,$num),$rptr - call sqrx8x_reduction + call __bn_sqrx8x_reduction + call __bn_postx4x_internal pxor %xmm0,%xmm0 lea 48(%rsp),%rax @@ -1999,7 +2092,8 @@ $code.=<<___ if ($addx); .Lfrom_mont_nox: ___ $code.=<<___; - call sqr8x_reduction + call __bn_sqr8x_reduction + call __bn_post4x_internal pxor %xmm0,%xmm0 lea 48(%rsp),%rax @@ -2039,7 +2133,6 @@ $code.=<<___; .align 32 bn_mulx4x_mont_gather5: .Lmulx4x_enter: - .byte 0x67 mov %rsp,%rax push %rbx push %rbp @@ -2047,40 +2140,33 @@ bn_mulx4x_mont_gather5: push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) -___ -$code.=<<___; - .byte 0x67 - mov ${num}d,%r10d + shl \$3,${num}d # convert $num to bytes - shl \$3+2,%r10d # 4*$num + lea ($num,$num,2),%r10 # 3*$num in bytes neg $num # -$num mov ($n0),$n0 # *n0 ############################################################## - # ensure that stack frame doesn't alias with $aptr+4*$num - # modulo 4096, which covers a[num], ret[num] and n[2*num] - # (see bn_exp.c). this is done to allow memory disambiguation - # logic do its magic. [excessive frame is allocated in order - # to allow bn_from_mont8x to clear it.] + # Ensure that stack frame doesn't alias with $rptr+3*$num + # modulo 4096, which covers ret[num], am[num] and n[num] + # (see bn_exp.c). This is done to allow memory disambiguation + # logic do its magic. [Extra [num] is allocated in order + # to align with bn_power5's frame, which is cleansed after + # completing exponentiation. Extra 256 bytes is for power mask + # calculated from 7th argument, the index.] # - lea -64(%rsp,$num,2),%r11 - sub $ap,%r11 + lea -320(%rsp,$num,2),%r11 + sub $rp,%r11 and \$4095,%r11 cmp %r11,%r10 jb .Lmulx4xsp_alt sub %r11,%rsp # align with $aptr - lea -64(%rsp,$num,2),%rsp # alloca(frame+$num) + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256) jmp .Lmulx4xsp_done -.align 32 .Lmulx4xsp_alt: - lea 4096-64(,$num,2),%r10 # 4096-frame-$num - lea -64(%rsp,$num,2),%rsp # alloca(frame+$num) + lea 4096-320(,$num,2),%r10 + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256) sub %r10,%r11 mov \$0,%r10 cmovc %r10,%r11 @@ -2106,12 +2192,7 @@ $code.=<<___; mov 40(%rsp),%rsi # restore %rsp mov \$1,%rax -___ -$code.=<<___ if ($win64); - movaps -88(%rsi),%xmm6 - movaps -72(%rsi),%xmm7 -___ -$code.=<<___; + mov -48(%rsi),%r15 mov -40(%rsi),%r14 mov -32(%rsi),%r13 @@ -2126,14 +2207,16 @@ $code.=<<___; .type mulx4x_internal,\@abi-omnipotent .align 32 mulx4x_internal: - .byte 0x4c,0x89,0x8c,0x24,0x08,0x00,0x00,0x00 # mov $num,8(%rsp) # save -$num - .byte 0x67 + mov $num,8(%rsp) # save -$num (it was in bytes) + mov $num,%r10 neg $num # restore $num shl \$5,$num - lea 256($bp,$num),%r13 + neg %r10 # restore $num + lea 128($bp,$num),%r13 # end of powers table (+size optimization) shr \$5+5,$num - mov `($win64?56:8)`(%rax),%r10d # load 7th argument + movd `($win64?56:8)`(%rax),%xmm5 # load 7th argument sub \$1,$num + lea .Linc(%rip),%rax mov %r13,16+8(%rsp) # end of b[num] mov $num,24+8(%rsp) # inner counter mov $rp, 56+8(%rsp) # save $rp @@ -2144,52 +2227,92 @@ my $rptr=$bptr; my $STRIDE=2**5*8; # 5 is "window size" my $N=$STRIDE/4; # should match cache line size $code.=<<___; - mov %r10,%r11 - shr \$`log($N/8)/log(2)`,%r10 - and \$`$N/8-1`,%r11 - not %r10 - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" - lea 96($bp,%r11,8),$bptr # pointer within 1st cache line - movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which - movq 8(%rax,%r10,8),%xmm5 # cache line contains element - add \$7,%r11 - movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument - movq 24(%rax,%r10,8),%xmm7 - and \$7,%r11 - - movq `0*$STRIDE/4-96`($bptr),%xmm0 - lea $STRIDE($bptr),$tptr # borrow $tptr - movq `1*$STRIDE/4-96`($bptr),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bptr),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bptr),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - movq `0*$STRIDE/4-96`($tptr),%xmm1 - pand %xmm7,%xmm3 - por %xmm2,%xmm0 - movq `1*$STRIDE/4-96`($tptr),%xmm2 - por %xmm3,%xmm0 - .byte 0x67,0x67 - pand %xmm4,%xmm1 - movq `2*$STRIDE/4-96`($tptr),%xmm3 + movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 + lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimizaton) + lea 128($bp),$bptr # size optimization + pshufd \$0,%xmm5,%xmm5 # broadcast index + movdqa %xmm1,%xmm4 + .byte 0x67 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..31 to index and save result to stack +# +$code.=<<___; + .byte 0x67 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 + movdqa %xmm4,%xmm3 +___ +for($i=0;$i<$STRIDE/16-4;$i+=4) { +$code.=<<___; + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($i+0)+112`(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($i+1)+112`(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($i+2)+112`(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,`16*($i+3)+112`(%r10) + movdqa %xmm4,%xmm3 +___ +} +$code.=<<___; # last iteration can be optimized + .byte 0x67 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,`16*($i+0)+112`(%r10) + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,`16*($i+1)+112`(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,`16*($i+2)+112`(%r10) + + pand `16*($i+0)-128`($bptr),%xmm0 # while it's still in register + pand `16*($i+1)-128`($bptr),%xmm1 + pand `16*($i+2)-128`($bptr),%xmm2 + movdqa %xmm3,`16*($i+3)+112`(%r10) + pand `16*($i+3)-128`($bptr),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +for($i=0;$i<$STRIDE/16-4;$i+=4) { +$code.=<<___; + movdqa `16*($i+0)-128`($bptr),%xmm4 + movdqa `16*($i+1)-128`($bptr),%xmm5 + movdqa `16*($i+2)-128`($bptr),%xmm2 + pand `16*($i+0)+112`(%r10),%xmm4 + movdqa `16*($i+3)-128`($bptr),%xmm3 + pand `16*($i+1)+112`(%r10),%xmm5 + por %xmm4,%xmm0 + pand `16*($i+2)+112`(%r10),%xmm2 + por %xmm5,%xmm1 + pand `16*($i+3)+112`(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +} +$code.=<<___; + pxor %xmm1,%xmm0 + pshufd \$0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + lea $STRIDE($bptr),$bptr movq %xmm0,%rdx # bp[0] - movq `3*$STRIDE/4-96`($tptr),%xmm0 - lea 2*$STRIDE($bptr),$bptr # next &b[i] - pand %xmm5,%xmm2 - .byte 0x67,0x67 - pand %xmm6,%xmm3 - ############################################################## - # $tptr is chosen so that writing to top-most element of the - # vector occurs just "above" references to powers table, - # "above" modulo cache-line size, which effectively precludes - # possibility of memory disambiguation logic failure when - # accessing the table. - # - lea 64+8*4+8(%rsp,%r11,8),$tptr + lea 64+8*4+8(%rsp),$tptr mov %rdx,$bi mulx 0*8($aptr),$mi,%rax # a[0]*b[0] @@ -2205,37 +2328,31 @@ $code.=<<___; xor $zero,$zero # cf=0, of=0 mov $mi,%rdx - por %xmm2,%xmm1 - pand %xmm7,%xmm0 - por %xmm3,%xmm1 mov $bptr,8+8(%rsp) # off-load &b[i] - por %xmm1,%xmm0 - .byte 0x48,0x8d,0xb6,0x20,0x00,0x00,0x00 # lea 4*8($aptr),$aptr + lea 4*8($aptr),$aptr adcx %rax,%r13 adcx $zero,%r14 # cf=0 - mulx 0*16($nptr),%rax,%r10 + mulx 0*8($nptr),%rax,%r10 adcx %rax,%r15 # discarded adox %r11,%r10 - mulx 1*16($nptr),%rax,%r11 + mulx 1*8($nptr),%rax,%r11 adcx %rax,%r10 adox %r12,%r11 - mulx 2*16($nptr),%rax,%r12 + mulx 2*8($nptr),%rax,%r12 mov 24+8(%rsp),$bptr # counter value - .byte 0x66 mov %r10,-8*4($tptr) adcx %rax,%r11 adox %r13,%r12 - mulx 3*16($nptr),%rax,%r15 - .byte 0x67,0x67 + mulx 3*8($nptr),%rax,%r15 mov $bi,%rdx mov %r11,-8*3($tptr) adcx %rax,%r12 adox $zero,%r15 # of=0 - .byte 0x48,0x8d,0x89,0x40,0x00,0x00,0x00 # lea 4*16($nptr),$nptr + lea 4*8($nptr),$nptr mov %r12,-8*2($tptr) - #jmp .Lmulx4x_1st + jmp .Lmulx4x_1st .align 32 .Lmulx4x_1st: @@ -2255,30 +2372,29 @@ $code.=<<___; lea 4*8($tptr),$tptr adox %r15,%r10 - mulx 0*16($nptr),%rax,%r15 + mulx 0*8($nptr),%rax,%r15 adcx %rax,%r10 adox %r15,%r11 - mulx 1*16($nptr),%rax,%r15 + mulx 1*8($nptr),%rax,%r15 adcx %rax,%r11 adox %r15,%r12 - mulx 2*16($nptr),%rax,%r15 + mulx 2*8($nptr),%rax,%r15 mov %r10,-5*8($tptr) adcx %rax,%r12 mov %r11,-4*8($tptr) adox %r15,%r13 - mulx 3*16($nptr),%rax,%r15 + mulx 3*8($nptr),%rax,%r15 mov $bi,%rdx mov %r12,-3*8($tptr) adcx %rax,%r13 adox $zero,%r15 - lea 4*16($nptr),$nptr + lea 4*8($nptr),$nptr mov %r13,-2*8($tptr) dec $bptr # of=0, pass cf jnz .Lmulx4x_1st mov 8(%rsp),$num # load -num - movq %xmm0,%rdx # bp[1] adc $zero,%r15 # modulo-scheduled lea ($aptr,$num),$aptr # rewind $aptr add %r15,%r14 @@ -2289,6 +2405,34 @@ $code.=<<___; .align 32 .Lmulx4x_outer: + lea 16-256($tptr),%r10 # where 256-byte mask is (+density control) + pxor %xmm4,%xmm4 + .byte 0x67,0x67 + pxor %xmm5,%xmm5 +___ +for($i=0;$i<$STRIDE/16;$i+=4) { +$code.=<<___; + movdqa `16*($i+0)-128`($bptr),%xmm0 + movdqa `16*($i+1)-128`($bptr),%xmm1 + movdqa `16*($i+2)-128`($bptr),%xmm2 + pand `16*($i+0)+256`(%r10),%xmm0 + movdqa `16*($i+3)-128`($bptr),%xmm3 + pand `16*($i+1)+256`(%r10),%xmm1 + por %xmm0,%xmm4 + pand `16*($i+2)+256`(%r10),%xmm2 + por %xmm1,%xmm5 + pand `16*($i+3)+256`(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + lea $STRIDE($bptr),$bptr + movq %xmm0,%rdx # m0=bp[i] + mov $zero,($tptr) # save top-most carry lea 4*8($tptr,$num),$tptr # rewind $tptr mulx 0*8($aptr),$mi,%r11 # a[0]*b[i] @@ -2303,54 +2447,37 @@ $code.=<<___; mulx 3*8($aptr),%rdx,%r14 adox -2*8($tptr),%r12 adcx %rdx,%r13 - lea ($nptr,$num,2),$nptr # rewind $nptr + lea ($nptr,$num),$nptr # rewind $nptr lea 4*8($aptr),$aptr adox -1*8($tptr),%r13 adcx $zero,%r14 adox $zero,%r14 - .byte 0x67 mov $mi,%r15 imulq 32+8(%rsp),$mi # "t[0]"*n0 - movq `0*$STRIDE/4-96`($bptr),%xmm0 - .byte 0x67,0x67 mov $mi,%rdx - movq `1*$STRIDE/4-96`($bptr),%xmm1 - .byte 0x67 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bptr),%xmm2 - .byte 0x67 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bptr),%xmm3 - add \$$STRIDE,$bptr # next &b[i] - .byte 0x67 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 xor $zero,$zero # cf=0, of=0 mov $bptr,8+8(%rsp) # off-load &b[i] - mulx 0*16($nptr),%rax,%r10 + mulx 0*8($nptr),%rax,%r10 adcx %rax,%r15 # discarded adox %r11,%r10 - mulx 1*16($nptr),%rax,%r11 + mulx 1*8($nptr),%rax,%r11 adcx %rax,%r10 adox %r12,%r11 - mulx 2*16($nptr),%rax,%r12 + mulx 2*8($nptr),%rax,%r12 adcx %rax,%r11 adox %r13,%r12 - mulx 3*16($nptr),%rax,%r15 + mulx 3*8($nptr),%rax,%r15 mov $bi,%rdx - por %xmm2,%xmm0 mov 24+8(%rsp),$bptr # counter value mov %r10,-8*4($tptr) - por %xmm3,%xmm0 adcx %rax,%r12 mov %r11,-8*3($tptr) adox $zero,%r15 # of=0 mov %r12,-8*2($tptr) - lea 4*16($nptr),$nptr + lea 4*8($nptr),$nptr jmp .Lmulx4x_inner .align 32 @@ -2375,20 +2502,20 @@ $code.=<<___; adcx $zero,%r14 # cf=0 adox %r15,%r10 - mulx 0*16($nptr),%rax,%r15 + mulx 0*8($nptr),%rax,%r15 adcx %rax,%r10 adox %r15,%r11 - mulx 1*16($nptr),%rax,%r15 + mulx 1*8($nptr),%rax,%r15 adcx %rax,%r11 adox %r15,%r12 - mulx 2*16($nptr),%rax,%r15 + mulx 2*8($nptr),%rax,%r15 mov %r10,-5*8($tptr) adcx %rax,%r12 adox %r15,%r13 mov %r11,-4*8($tptr) - mulx 3*16($nptr),%rax,%r15 + mulx 3*8($nptr),%rax,%r15 mov $bi,%rdx - lea 4*16($nptr),$nptr + lea 4*8($nptr),$nptr mov %r12,-3*8($tptr) adcx %rax,%r13 adox $zero,%r15 @@ -2398,7 +2525,6 @@ $code.=<<___; jnz .Lmulx4x_inner mov 0+8(%rsp),$num # load -num - movq %xmm0,%rdx # bp[i+1] adc $zero,%r15 # modulo-scheduled sub 0*8($tptr),$bptr # pull top-most carry to %cf mov 8+8(%rsp),$bptr # re-load &b[i] @@ -2411,20 +2537,26 @@ $code.=<<___; cmp %r10,$bptr jb .Lmulx4x_outer - mov -16($nptr),%r10 + mov -8($nptr),%r10 + mov $zero,%r8 + mov ($nptr,$num),%r12 + lea ($nptr,$num),%rbp # rewind $nptr + mov $num,%rcx + lea ($tptr,$num),%rdi # rewind $tptr + xor %eax,%eax xor %r15,%r15 sub %r14,%r10 # compare top-most words adc %r15,%r15 - or %r15,$zero - xor \$1,$zero - lea ($tptr,$num),%rdi # rewind $tptr - lea ($nptr,$num,2),$nptr # rewind $nptr - .byte 0x67,0x67 - sar \$3+2,$num # cf=0 - lea ($nptr,$zero,8),%rbp + or %r15,%r8 + sar \$3+2,%rcx + sub %r8,%rax # %rax=-%r8 mov 56+8(%rsp),%rdx # restore rp - mov $num,%rcx - jmp .Lsqrx4x_sub # common post-condition + dec %r12 # so that after 'not' we get -n[0] + mov 8*1(%rbp),%r13 + xor %r8,%r8 + mov 8*2(%rbp),%r14 + mov 8*3(%rbp),%r15 + jmp .Lsqrx4x_sub_entry # common post-condition .size mulx4x_internal,.-mulx4x_internal ___ } { @@ -2448,7 +2580,6 @@ $code.=<<___; .align 32 bn_powerx5: .Lpowerx5_enter: - .byte 0x67 mov %rsp,%rax push %rbx push %rbp @@ -2456,39 +2587,32 @@ bn_powerx5: push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) -___ -$code.=<<___; - .byte 0x67 - mov ${num}d,%r10d + shl \$3,${num}d # convert $num to bytes - shl \$3+2,%r10d # 4*$num + lea ($num,$num,2),%r10 # 3*$num in bytes neg $num mov ($n0),$n0 # *n0 ############################################################## - # ensure that stack frame doesn't alias with $aptr+4*$num - # modulo 4096, which covers ret[num], am[num] and n[2*num] - # (see bn_exp.c). this is done to allow memory disambiguation - # logic do its magic. + # Ensure that stack frame doesn't alias with $rptr+3*$num + # modulo 4096, which covers ret[num], am[num] and n[num] + # (see bn_exp.c). This is done to allow memory disambiguation + # logic do its magic. [Extra 256 bytes is for power mask + # calculated from 7th argument, the index.] # - lea -64(%rsp,$num,2),%r11 - sub $aptr,%r11 + lea -320(%rsp,$num,2),%r11 + sub $rptr,%r11 and \$4095,%r11 cmp %r11,%r10 jb .Lpwrx_sp_alt sub %r11,%rsp # align with $aptr - lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256) jmp .Lpwrx_sp_done .align 32 .Lpwrx_sp_alt: - lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num - lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + lea 4096-320(,$num,2),%r10 + lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256) sub %r10,%r11 mov \$0,%r10 cmovc %r10,%r11 @@ -2519,10 +2643,15 @@ $code.=<<___; .Lpowerx5_body: call __bn_sqrx8x_internal + call __bn_postx4x_internal call __bn_sqrx8x_internal + call __bn_postx4x_internal call __bn_sqrx8x_internal + call __bn_postx4x_internal call __bn_sqrx8x_internal + call __bn_postx4x_internal call __bn_sqrx8x_internal + call __bn_postx4x_internal mov %r10,$num # -num mov $aptr,$rptr @@ -2534,12 +2663,7 @@ $code.=<<___; mov 40(%rsp),%rsi # restore %rsp mov \$1,%rax -___ -$code.=<<___ if ($win64); - movaps -88(%rsi),%xmm6 - movaps -72(%rsi),%xmm7 -___ -$code.=<<___; + mov -48(%rsi),%r15 mov -40(%rsi),%r14 mov -32(%rsi),%r13 @@ -2973,11 +3097,11 @@ my ($nptr,$carry,$m0)=("%rbp","%rsi","%rdx"); $code.=<<___; movq %xmm2,$nptr -sqrx8x_reduction: +__bn_sqrx8x_reduction: xor %eax,%eax # initial top-most carry bit mov 32+8(%rsp),%rbx # n0 mov 48+8(%rsp),%rdx # "%r8", 8*0($tptr) - lea -128($nptr,$num,2),%rcx # end of n[] + lea -8*8($nptr,$num),%rcx # end of n[] #lea 48+8(%rsp,$num,2),$tptr # end of t[] buffer mov %rcx, 0+8(%rsp) # save end of n[] mov $tptr,8+8(%rsp) # save end of t[] @@ -3006,23 +3130,23 @@ sqrx8x_reduction: .align 32 .Lsqrx8x_reduce: mov %r8, %rbx - mulx 16*0($nptr),%rax,%r8 # n[0] + mulx 8*0($nptr),%rax,%r8 # n[0] adcx %rbx,%rax # discarded adox %r9,%r8 - mulx 16*1($nptr),%rbx,%r9 # n[1] + mulx 8*1($nptr),%rbx,%r9 # n[1] adcx %rbx,%r8 adox %r10,%r9 - mulx 16*2($nptr),%rbx,%r10 + mulx 8*2($nptr),%rbx,%r10 adcx %rbx,%r9 adox %r11,%r10 - mulx 16*3($nptr),%rbx,%r11 + mulx 8*3($nptr),%rbx,%r11 adcx %rbx,%r10 adox %r12,%r11 - .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x40,0x00,0x00,0x00 # mulx 16*4($nptr),%rbx,%r12 + .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 # mulx 8*4($nptr),%rbx,%r12 mov %rdx,%rax mov %r8,%rdx adcx %rbx,%r11 @@ -3032,15 +3156,15 @@ sqrx8x_reduction: mov %rax,%rdx mov %rax,64+48+8(%rsp,%rcx,8) # put aside n0*a[i] - mulx 16*5($nptr),%rax,%r13 + mulx 8*5($nptr),%rax,%r13 adcx %rax,%r12 adox %r14,%r13 - mulx 16*6($nptr),%rax,%r14 + mulx 8*6($nptr),%rax,%r14 adcx %rax,%r13 adox %r15,%r14 - mulx 16*7($nptr),%rax,%r15 + mulx 8*7($nptr),%rax,%r15 mov %rbx,%rdx adcx %rax,%r14 adox $carry,%r15 # $carry is 0 @@ -3056,7 +3180,7 @@ sqrx8x_reduction: mov 48+8(%rsp),%rdx # pull n0*a[0] add 8*0($tptr),%r8 - lea 16*8($nptr),$nptr + lea 8*8($nptr),$nptr mov \$-8,%rcx adcx 8*1($tptr),%r9 adcx 8*2($tptr),%r10 @@ -3075,35 +3199,35 @@ sqrx8x_reduction: .align 32 .Lsqrx8x_tail: mov %r8,%rbx - mulx 16*0($nptr),%rax,%r8 + mulx 8*0($nptr),%rax,%r8 adcx %rax,%rbx adox %r9,%r8 - mulx 16*1($nptr),%rax,%r9 + mulx 8*1($nptr),%rax,%r9 adcx %rax,%r8 adox %r10,%r9 - mulx 16*2($nptr),%rax,%r10 + mulx 8*2($nptr),%rax,%r10 adcx %rax,%r9 adox %r11,%r10 - mulx 16*3($nptr),%rax,%r11 + mulx 8*3($nptr),%rax,%r11 adcx %rax,%r10 adox %r12,%r11 - .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x40,0x00,0x00,0x00 # mulx 16*4($nptr),%rax,%r12 + .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 # mulx 8*4($nptr),%rax,%r12 adcx %rax,%r11 adox %r13,%r12 - mulx 16*5($nptr),%rax,%r13 + mulx 8*5($nptr),%rax,%r13 adcx %rax,%r12 adox %r14,%r13 - mulx 16*6($nptr),%rax,%r14 + mulx 8*6($nptr),%rax,%r14 adcx %rax,%r13 adox %r15,%r14 - mulx 16*7($nptr),%rax,%r15 + mulx 8*7($nptr),%rax,%r15 mov 72+48+8(%rsp,%rcx,8),%rdx # pull n0*a[i] adcx %rax,%r14 adox $carry,%r15 @@ -3119,7 +3243,7 @@ sqrx8x_reduction: sub 16+8(%rsp),$carry # mov 16(%rsp),%cf mov 48+8(%rsp),%rdx # pull n0*a[0] - lea 16*8($nptr),$nptr + lea 8*8($nptr),$nptr adc 8*0($tptr),%r8 adc 8*1($tptr),%r9 adc 8*2($tptr),%r10 @@ -3155,7 +3279,7 @@ sqrx8x_reduction: adc 8*0($tptr),%r8 movq %xmm3,%rcx adc 8*1($tptr),%r9 - mov 16*7($nptr),$carry + mov 8*7($nptr),$carry movq %xmm2,$nptr # restore $nptr adc 8*2($tptr),%r10 adc 8*3($tptr),%r11 @@ -3181,6 +3305,8 @@ sqrx8x_reduction: lea 8*8($tptr,%rcx),$tptr # start of current t[] window cmp 8+8(%rsp),%r8 # end of t[]? jb .Lsqrx8x_reduction_loop + ret +.size bn_sqrx8x_internal,.-bn_sqrx8x_internal ___ } ############################################################## @@ -3188,52 +3314,59 @@ ___ # { my ($rptr,$nptr)=("%rdx","%rbp"); -my @ri=map("%r$_",(10..13)); -my @ni=map("%r$_",(14..15)); $code.=<<___; - xor %ebx,%ebx - sub %r15,%rsi # compare top-most words - adc %rbx,%rbx +.align 32 +__bn_postx4x_internal: + mov 8*0($nptr),%r12 mov %rcx,%r10 # -$num - or %rbx,%rax mov %rcx,%r9 # -$num - xor \$1,%rax - sar \$3+2,%rcx # cf=0 + neg %rax + sar \$3+2,%rcx #lea 48+8(%rsp,%r9),$tptr - lea ($nptr,%rax,8),$nptr movq %xmm1,$rptr # restore $rptr movq %xmm1,$aptr # prepare for back-to-back call - jmp .Lsqrx4x_sub + dec %r12 # so that after 'not' we get -n[0] + mov 8*1($nptr),%r13 + xor %r8,%r8 + mov 8*2($nptr),%r14 + mov 8*3($nptr),%r15 + jmp .Lsqrx4x_sub_entry -.align 32 +.align 16 .Lsqrx4x_sub: - .byte 0x66 - mov 8*0($tptr),%r12 - mov 8*1($tptr),%r13 - sbb 16*0($nptr),%r12 - mov 8*2($tptr),%r14 - sbb 16*1($nptr),%r13 - mov 8*3($tptr),%r15 - lea 8*4($tptr),$tptr - sbb 16*2($nptr),%r14 + mov 8*0($nptr),%r12 + mov 8*1($nptr),%r13 + mov 8*2($nptr),%r14 + mov 8*3($nptr),%r15 +.Lsqrx4x_sub_entry: + andn %rax,%r12,%r12 + lea 8*4($nptr),$nptr + andn %rax,%r13,%r13 + andn %rax,%r14,%r14 + andn %rax,%r15,%r15 + + neg %r8 # mov %r8,%cf + adc 8*0($tptr),%r12 + adc 8*1($tptr),%r13 + adc 8*2($tptr),%r14 + adc 8*3($tptr),%r15 mov %r12,8*0($rptr) - sbb 16*3($nptr),%r15 - lea 16*4($nptr),$nptr + lea 8*4($tptr),$tptr mov %r13,8*1($rptr) + sbb %r8,%r8 # mov %cf,%r8 mov %r14,8*2($rptr) mov %r15,8*3($rptr) lea 8*4($rptr),$rptr inc %rcx jnz .Lsqrx4x_sub -___ -} -$code.=<<___; + neg %r9 # restore $num ret -.size bn_sqrx8x_internal,.-bn_sqrx8x_internal +.size __bn_postx4x_internal,.-__bn_postx4x_internal ___ +} }}} { my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%edx","%r8", "%r9d") : # Win64 order @@ -3282,56 +3415,91 @@ bn_scatter5: .globl bn_gather5 .type bn_gather5,\@abi-omnipotent -.align 16 +.align 32 bn_gather5: -___ -$code.=<<___ if ($win64); -.LSEH_begin_bn_gather5: +.LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases # I can't trust assembler to use specific encoding:-( - .byte 0x48,0x83,0xec,0x28 #sub \$0x28,%rsp - .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) - .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) + .byte 0x4c,0x8d,0x14,0x24 #lea (%rsp),%r10 + .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 #sub $0x108,%rsp + lea .Linc(%rip),%rax + and \$-16,%rsp # shouldn't be formally required + + movd $idx,%xmm5 + movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 + lea 128($tbl),%r11 # size optimization + lea 128(%rsp),%rax # size optimization + + pshufd \$0,%xmm5,%xmm5 # broadcast $idx + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 ___ +######################################################################## +# calculate mask by comparing 0..31 to $idx and save result to stack +# +for($i=0;$i<$STRIDE/16;$i+=4) { $code.=<<___; - mov $idx,%r11d - shr \$`log($N/8)/log(2)`,$idx - and \$`$N/8-1`,%r11 - not $idx - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,$idx # 5 is "window size" - lea 128($tbl,%r11,8),$tbl # pointer within 1st cache line - movq 0(%rax,$idx,8),%xmm4 # set of masks denoting which - movq 8(%rax,$idx,8),%xmm5 # cache line contains element - movq 16(%rax,$idx,8),%xmm6 # denoted by 7th argument - movq 24(%rax,$idx,8),%xmm7 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 +___ +$code.=<<___ if ($i); + movdqa %xmm3,`16*($i-1)-128`(%rax) +___ +$code.=<<___; + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($i+0)-128`(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($i+1)-128`(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($i+2)-128`(%rax) + movdqa %xmm4,%xmm2 +___ +} +$code.=<<___; + movdqa %xmm3,`16*($i-1)-128`(%rax) jmp .Lgather -.align 16 -.Lgather: - movq `0*$STRIDE/4-128`($tbl),%xmm0 - movq `1*$STRIDE/4-128`($tbl),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-128`($tbl),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-128`($tbl),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - .byte 0x67,0x67 - por %xmm2,%xmm0 - lea $STRIDE($tbl),$tbl - por %xmm3,%xmm0 +.align 32 +.Lgather: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($i=0;$i<$STRIDE/16;$i+=4) { +$code.=<<___; + movdqa `16*($i+0)-128`(%r11),%xmm0 + movdqa `16*($i+1)-128`(%r11),%xmm1 + movdqa `16*($i+2)-128`(%r11),%xmm2 + pand `16*($i+0)-128`(%rax),%xmm0 + movdqa `16*($i+3)-128`(%r11),%xmm3 + pand `16*($i+1)-128`(%rax),%xmm1 + por %xmm0,%xmm4 + pand `16*($i+2)-128`(%rax),%xmm2 + por %xmm1,%xmm5 + pand `16*($i+3)-128`(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + lea $STRIDE(%r11),%r11 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 movq %xmm0,($out) # m0=bp[0] lea 8($out),$out sub \$1,$num jnz .Lgather -___ -$code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - lea 0x28(%rsp),%rsp -___ -$code.=<<___; + + lea (%r10),%rsp ret .LSEH_end_bn_gather5: .size bn_gather5,.-bn_gather5 @@ -3339,9 +3507,9 @@ ___ } $code.=<<___; .align 64 -.Lmagic_masks: - .long 0,0, 0,0, 0,0, -1,-1 - .long 0,0, 0,0, 0,0, 0,0 +.Linc: + .long 0,0, 1,1 + .long 2,2, 2,2 .asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by " ___ @@ -3389,19 +3557,16 @@ mul_handler: lea .Lmul_epilogue(%rip),%r10 cmp %r10,%rbx - jb .Lbody_40 + ja .Lbody_40 mov 192($context),%r10 # pull $num mov 8(%rax,%r10,8),%rax # pull saved stack pointer + jmp .Lbody_proceed .Lbody_40: mov 40(%rax),%rax # pull saved stack pointer .Lbody_proceed: - - movaps -88(%rax),%xmm0 - movaps -72(%rax),%xmm1 - mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 @@ -3414,8 +3579,6 @@ mul_handler: mov %r13,224($context) # restore context->R13 mov %r14,232($context) # restore context->R14 mov %r15,240($context) # restore context->R15 - movups %xmm0,512($context) # restore context->Xmm6 - movups %xmm1,528($context) # restore context->Xmm7 .Lcommon_seh_tail: mov 8(%rax),%rdi @@ -3526,10 +3689,9 @@ ___ $code.=<<___; .align 8 .LSEH_info_bn_gather5: - .byte 0x01,0x0d,0x05,0x00 - .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 - .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6 - .byte 0x04,0x42,0x00,0x00 #sub rsp,0x28 + .byte 0x01,0x0b,0x03,0x0a + .byte 0x0b,0x01,0x21,0x00 # sub rsp,0x108 + .byte 0x04,0xa3,0x00,0x00 # lea r10,(rsp) .align 8 ___ } diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h index 5696965..86264ae 100644 --- a/crypto/bn/bn.h +++ b/crypto/bn/bn.h @@ -125,6 +125,7 @@ #ifndef HEADER_BN_H # define HEADER_BN_H +# include # include # ifndef OPENSSL_NO_FP_API # include /* FILE */ @@ -721,8 +722,17 @@ const BIGNUM *BN_get0_nist_prime_521(void); /* library internal functions */ -# define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\ - (a):bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2)) +# define bn_expand(a,bits) \ + ( \ + bits > (INT_MAX - BN_BITS2 + 1) ? \ + NULL \ + : \ + (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) ? \ + (a) \ + : \ + bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2) \ + ) + # define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) BIGNUM *bn_expand2(BIGNUM *a, int words); # ifndef OPENSSL_NO_DEPRECATED diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c index 6d30d1e..1670f01 100644 --- a/crypto/bn/bn_exp.c +++ b/crypto/bn/bn_exp.c @@ -110,6 +110,7 @@ */ #include "cryptlib.h" +#include "constant_time_locl.h" #include "bn_lcl.h" #include @@ -606,15 +607,17 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, - int width) + int window) { - size_t i, j; + int i, j; + int width = 1 << window; + BN_ULONG *table = (BN_ULONG *)buf; if (top > b->top) top = b->top; /* this works because 'buf' is explicitly * zeroed */ - for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { - buf[j] = ((unsigned char *)b->d)[i]; + for (i = 0, j = idx; i < top; i++, j += width) { + table[j] = b->d[i]; } return 1; @@ -622,15 +625,51 @@ static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, - int width) + int window) { - size_t i, j; + int i, j; + int width = 1 << window; + volatile BN_ULONG *table = (volatile BN_ULONG *)buf; if (bn_wexpand(b, top) == NULL) return 0; - for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { - ((unsigned char *)b->d)[i] = buf[j]; + if (window <= 3) { + for (i = 0; i < top; i++, table += width) { + BN_ULONG acc = 0; + + for (j = 0; j < width; j++) { + acc |= table[j] & + ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); + } + + b->d[i] = acc; + } + } else { + int xstride = 1 << (window - 2); + BN_ULONG y0, y1, y2, y3; + + i = idx >> (window - 2); /* equivalent of idx / xstride */ + idx &= xstride - 1; /* equivalent of idx % xstride */ + + y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); + y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); + y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); + y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); + + for (i = 0; i < top; i++, table += width) { + BN_ULONG acc = 0; + + for (j = 0; j < xstride; j++) { + acc |= ( (table[j + 0 * xstride] & y0) | + (table[j + 1 * xstride] & y1) | + (table[j + 2 * xstride] & y2) | + (table[j + 3 * xstride] & y3) ) + & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); + } + + b->d[i] = acc; + } } b->top = top; @@ -749,8 +788,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (window >= 5) { window = 5; /* ~5% improvement for RSA2048 sign, and even * for RSA4096 */ - if ((top & 7) == 0) - powerbufLen += 2 * top * sizeof(m->d[0]); + /* reserve space for mont->N.d[] copy */ + powerbufLen += top * sizeof(mont->N.d[0]); } #endif (void)0; @@ -971,7 +1010,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BN_ULONG *not_used, const BN_ULONG *np, const BN_ULONG *n0, int num); - BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2; + BN_ULONG *n0 = mont->n0, *np; /* * BN_to_montgomery can contaminate words above .top [in @@ -982,11 +1021,11 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, for (i = tmp.top; i < top; i++) tmp.d[i] = 0; - if (top & 7) - np2 = np; - else - for (np2 = am.d + top, i = 0; i < top; i++) - np2[2 * i] = np[i]; + /* + * copy mont->N.d[] to improve cache locality + */ + for (np = am.d + top, i = 0; i < top; i++) + np[i] = mont->N.d[i]; bn_scatter5(tmp.d, top, powerbuf, 0); bn_scatter5(am.d, am.top, powerbuf, 1); @@ -996,7 +1035,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, # if 0 for (i = 3; i < 32; i++) { /* Calculate a^i = a^(i-1) * a */ - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); } # else @@ -1007,7 +1046,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } for (i = 3; i < 8; i += 2) { int j; - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); for (j = 2 * i; j < 32; j *= 2) { bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); @@ -1015,13 +1054,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } } for (; i < 16; i += 2) { - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_scatter5(tmp.d, top, powerbuf, 2 * i); } for (; i < 32; i += 2) { - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); } # endif @@ -1050,11 +1089,11 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, while (bits >= 0) { wvalue = bn_get_bits5(p->d, bits - 4); bits -= 5; - bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue); + bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); } } - ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top); + ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top); tmp.top = top; bn_correct_top(&tmp); if (ret) { @@ -1065,9 +1104,9 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } else #endif { - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) goto err; /* @@ -1079,15 +1118,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (window > 1) { if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF - (&tmp, top, powerbuf, 2, numPowers)) + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, + window)) goto err; for (i = 3; i < numPowers; i++) { /* Calculate a^i = a^(i-1) * a */ if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF - (&tmp, top, powerbuf, i, numPowers)) + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, + window)) goto err; } } @@ -1095,8 +1134,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, bits--; for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - if (!MOD_EXP_CTIME_COPY_FROM_PREBUF - (&tmp, top, powerbuf, wvalue, numPowers)) + if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, + window)) goto err; /* @@ -1116,8 +1155,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, /* * Fetch the appropriate pre-computed value from the pre-buf */ - if (!MOD_EXP_CTIME_COPY_FROM_PREBUF - (&am, top, powerbuf, wvalue, numPowers)) + if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, + window)) goto err; /* Multiply the result into the intermediate result */ diff --git a/crypto/bn/bn_print.c b/crypto/bn/bn_print.c index ab10b95..bfa31ef 100644 --- a/crypto/bn/bn_print.c +++ b/crypto/bn/bn_print.c @@ -58,6 +58,7 @@ #include #include +#include #include "cryptlib.h" #include #include "bn_lcl.h" @@ -189,7 +190,11 @@ int BN_hex2bn(BIGNUM **bn, const char *a) a++; } - for (i = 0; isxdigit((unsigned char)a[i]); i++) ; + for (i = 0; i <= (INT_MAX/4) && isxdigit((unsigned char)a[i]); i++) + continue; + + if (i > INT_MAX/4) + goto err; num = i + neg; if (bn == NULL) @@ -204,7 +209,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a) BN_zero(ret); } - /* i is the number of hex digests; */ + /* i is the number of hex digits */ if (bn_expand(ret, i * 4) == NULL) goto err; @@ -260,7 +265,11 @@ int BN_dec2bn(BIGNUM **bn, const char *a) a++; } - for (i = 0; isdigit((unsigned char)a[i]); i++) ; + for (i = 0; i <= (INT_MAX/4) && isdigit((unsigned char)a[i]); i++) + continue; + + if (i > INT_MAX/4) + goto err; num = i + neg; if (bn == NULL) @@ -278,7 +287,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a) BN_zero(ret); } - /* i is the number of digests, a bit of an over expand; */ + /* i is the number of digits, a bit of an over expand */ if (bn_expand(ret, i * 4) == NULL) goto err; diff --git a/crypto/bn/bn_recp.c b/crypto/bn/bn_recp.c index 7497ac6..f047040 100644 --- a/crypto/bn/bn_recp.c +++ b/crypto/bn/bn_recp.c @@ -65,6 +65,7 @@ void BN_RECP_CTX_init(BN_RECP_CTX *recp) BN_init(&(recp->N)); BN_init(&(recp->Nr)); recp->num_bits = 0; + recp->shift = 0; recp->flags = 0; } diff --git a/crypto/cmac/cmac.c b/crypto/cmac/cmac.c index 774e6dc..2954b6e 100644 --- a/crypto/cmac/cmac.c +++ b/crypto/cmac/cmac.c @@ -160,6 +160,14 @@ int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, EVPerr(EVP_F_CMAC_INIT, EVP_R_DISABLED_FOR_FIPS); return 0; } + + /* Switch to FIPS cipher implementation if possible */ + if (cipher != NULL) { + const EVP_CIPHER *fcipher; + fcipher = FIPS_get_cipherbynid(EVP_CIPHER_nid(cipher)); + if (fcipher != NULL) + cipher = fcipher; + } /* * Other algorithm blocking will be done in FIPS_cmac_init, via * FIPS_cipherinit(). diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index c9f674b..1925428 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -1016,11 +1016,11 @@ void *OPENSSL_stderr(void) return stderr; } -int CRYPTO_memcmp(const void *in_a, const void *in_b, size_t len) +int CRYPTO_memcmp(const volatile void *in_a, const volatile void *in_b, size_t len) { size_t i; - const unsigned char *a = in_a; - const unsigned char *b = in_b; + const volatile unsigned char *a = in_a; + const volatile unsigned char *b = in_b; unsigned char x = 0; for (i = 0; i < len; i++) diff --git a/crypto/crypto.h b/crypto/crypto.h index c450d7a..6c644ce 100644 --- a/crypto/crypto.h +++ b/crypto/crypto.h @@ -628,7 +628,7 @@ void OPENSSL_init(void); * into a defined order as the return value when a != b is undefined, other * than to be non-zero. */ -int CRYPTO_memcmp(const void *a, const void *b, size_t len); +int CRYPTO_memcmp(const volatile void *a, const volatile void *b, size_t len); /* BEGIN ERROR CODES */ /* diff --git a/crypto/dh/dh.h b/crypto/dh/dh.h index 5498a9d..a5bd901 100644 --- a/crypto/dh/dh.h +++ b/crypto/dh/dh.h @@ -174,7 +174,7 @@ struct dh_st { /* DH_check_pub_key error codes */ # define DH_CHECK_PUBKEY_TOO_SMALL 0x01 # define DH_CHECK_PUBKEY_TOO_LARGE 0x02 -# define DH_CHECK_PUBKEY_INVALID 0x03 +# define DH_CHECK_PUBKEY_INVALID 0x04 /* * primes p where (p-1)/2 is prime too are called "safe"; we define this for diff --git a/crypto/dh/dh_check.c b/crypto/dh/dh_check.c index 5adedc0..0277041 100644 --- a/crypto/dh/dh_check.c +++ b/crypto/dh/dh_check.c @@ -160,13 +160,12 @@ int DH_check_pub_key(const DH *dh, const BIGNUM *pub_key, int *ret) goto err; BN_CTX_start(ctx); tmp = BN_CTX_get(ctx); - if (tmp == NULL) + if (tmp == NULL || !BN_set_word(tmp, 1)) goto err; - BN_set_word(tmp, 1); if (BN_cmp(pub_key, tmp) <= 0) *ret |= DH_CHECK_PUBKEY_TOO_SMALL; - BN_copy(tmp, dh->p); - BN_sub_word(tmp, 1); + if (BN_copy(tmp, dh->p) == NULL || !BN_sub_word(tmp, 1)) + goto err; if (BN_cmp(pub_key, tmp) >= 0) *ret |= DH_CHECK_PUBKEY_TOO_LARGE; diff --git a/crypto/dsa/dsa_ameth.c b/crypto/dsa/dsa_ameth.c index c40e177..cc83d6e 100644 --- a/crypto/dsa/dsa_ameth.c +++ b/crypto/dsa/dsa_ameth.c @@ -191,6 +191,8 @@ static int dsa_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) STACK_OF(ASN1_TYPE) *ndsa = NULL; DSA *dsa = NULL; + int ret = 0; + if (!PKCS8_pkey_get0(NULL, &p, &pklen, &palg, p8)) return 0; X509_ALGOR_get0(NULL, &ptype, &pval, palg); @@ -262,23 +264,21 @@ static int dsa_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) } EVP_PKEY_assign_DSA(pkey, dsa); - BN_CTX_free(ctx); - if (ndsa) - sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free); - else - ASN1_STRING_clear_free(privkey); - return 1; + ret = 1; + goto done; decerr: - DSAerr(DSA_F_DSA_PRIV_DECODE, EVP_R_DECODE_ERROR); + DSAerr(DSA_F_DSA_PRIV_DECODE, DSA_R_DECODE_ERROR); dsaerr: + DSA_free(dsa); + done: BN_CTX_free(ctx); - if (privkey) + if (ndsa) + sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free); + else ASN1_STRING_clear_free(privkey); - sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free); - DSA_free(dsa); - return 0; + return ret; } static int dsa_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) diff --git a/crypto/dso/dso_lib.c b/crypto/dso/dso_lib.c index 3312450..2beb7c1 100644 --- a/crypto/dso/dso_lib.c +++ b/crypto/dso/dso_lib.c @@ -122,6 +122,7 @@ DSO *DSO_new_method(DSO_METHOD *meth) ret->meth = meth; ret->references = 1; if ((ret->meth->init != NULL) && !ret->meth->init(ret)) { + sk_void_free(ret->meth_data); OPENSSL_free(ret); ret = NULL; } diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl index e6acfd5..7140860 100755 --- a/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -2001,6 +2001,7 @@ $code.=<<___; push %r15 sub \$32*5+8, %rsp +.Lpoint_double_shortcut$x: movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr.x mov $a_ptr, $b_ptr # backup copy movdqu 0x10($a_ptr), %xmm1 @@ -2291,6 +2292,7 @@ $code.=<<___; mov 0x40+8*1($b_ptr), $acc6 mov 0x40+8*2($b_ptr), $acc7 mov 0x40+8*3($b_ptr), $acc0 + movq $b_ptr, %xmm1 lea 0x40-$bias($b_ptr), $a_ptr lea $Z1sqr(%rsp), $r_ptr # Z1^2 @@ -2346,7 +2348,7 @@ $code.=<<___; test $acc0, $acc0 jnz .Ladd_proceed$x # (in1infty || in2infty)? test $acc1, $acc1 - jz .Ladd_proceed$x # is_equal(S1,S2)? + jz .Ladd_double$x # is_equal(S1,S2)? movq %xmm0, $r_ptr # restore $r_ptr pxor %xmm0, %xmm0 @@ -2359,6 +2361,13 @@ $code.=<<___; jmp .Ladd_done$x .align 32 +.Ladd_double$x: + movq %xmm1, $a_ptr # restore $a_ptr + movq %xmm0, $r_ptr # restore $r_ptr + add \$`32*(18-5)`, %rsp # difference in frame sizes + jmp .Lpoint_double_shortcut$x + +.align 32 .Ladd_proceed$x: `&load_for_sqr("$R(%rsp)", "$src0")` lea $Rsqr(%rsp), $r_ptr # R^2 diff --git a/crypto/ec/ecp_nistp224.c b/crypto/ec/ecp_nistp224.c index ed09f97..d81cc9c 100644 --- a/crypto/ec/ecp_nistp224.c +++ b/crypto/ec/ecp_nistp224.c @@ -1657,8 +1657,7 @@ int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) */ if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); - ret = 1; - goto err; + goto done; } if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) || (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) || @@ -1736,6 +1735,7 @@ int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) } make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); + done: if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup, nistp224_pre_comp_free, nistp224_pre_comp_clear_free)) diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c index a588708..78d191a 100644 --- a/crypto/ec/ecp_nistp256.c +++ b/crypto/ec/ecp_nistp256.c @@ -2249,8 +2249,7 @@ int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx) */ if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); - ret = 1; - goto err; + goto done; } if ((!BN_to_felem(x_tmp, &group->generator->X)) || (!BN_to_felem(y_tmp, &group->generator->Y)) || @@ -2337,6 +2336,7 @@ int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx) } make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems); + done: if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup, nistp256_pre_comp_free, nistp256_pre_comp_clear_free)) diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c index 360b9a3..c53a61b 100644 --- a/crypto/ec/ecp_nistp521.c +++ b/crypto/ec/ecp_nistp521.c @@ -2056,8 +2056,7 @@ int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx) */ if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); - ret = 1; - goto err; + goto done; } if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) || (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) || @@ -2115,6 +2114,7 @@ int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx) } make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems); + done: if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup, nistp521_pre_comp_free, nistp521_pre_comp_clear_free)) diff --git a/crypto/ec/ectest.c b/crypto/ec/ectest.c index efab0b0..40a1f00 100644 --- a/crypto/ec/ectest.c +++ b/crypto/ec/ectest.c @@ -1758,9 +1758,18 @@ static void nistp_single_test(const struct nistp_test_params *test) if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + /* + * We have not performed precomputation so have_precompute mult should be + * false + */ + if (EC_GROUP_have_precompute_mult(NISTP)) + ABORT; + /* now repeat all tests with precomputation */ if (!EC_GROUP_precompute_mult(NISTP, ctx)) ABORT; + if (!EC_GROUP_have_precompute_mult(NISTP)) + ABORT; /* fixed point multiplication */ EC_POINT_mul(NISTP, Q, m, NULL, NULL, ctx); diff --git a/crypto/engine/eng_dyn.c b/crypto/engine/eng_dyn.c index 3169b09..40f30e9 100644 --- a/crypto/engine/eng_dyn.c +++ b/crypto/engine/eng_dyn.c @@ -243,8 +243,10 @@ static int dynamic_set_data_ctx(ENGINE *e, dynamic_data_ctx **ctx) * If we lost the race to set the context, c is non-NULL and *ctx is the * context of the thread that won. */ - if (c) + if (c) { + sk_OPENSSL_STRING_free(c->dirs); OPENSSL_free(c); + } return 1; } diff --git a/crypto/evp/e_des.c b/crypto/evp/e_des.c index aae13a6..8ca65cd 100644 --- a/crypto/evp/e_des.c +++ b/crypto/evp/e_des.c @@ -71,12 +71,13 @@ typedef struct { DES_key_schedule ks; } ks; union { - void (*cbc) (const void *, void *, size_t, const void *, void *); + void (*cbc) (const void *, void *, size_t, + const DES_key_schedule *, unsigned char *); } stream; } EVP_DES_KEY; # if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) -/* ---------^^^ this is not a typo, just a way to detect that +/* ----------^^^ this is not a typo, just a way to detect that * assembler support was in general requested... */ # include "sparc_arch.h" @@ -86,9 +87,9 @@ extern unsigned int OPENSSL_sparcv9cap_P[]; void des_t4_key_expand(const void *key, DES_key_schedule *ks); void des_t4_cbc_encrypt(const void *inp, void *out, size_t len, - DES_key_schedule *ks, unsigned char iv[8]); + const DES_key_schedule *ks, unsigned char iv[8]); void des_t4_cbc_decrypt(const void *inp, void *out, size_t len, - DES_key_schedule *ks, unsigned char iv[8]); + const DES_key_schedule *ks, unsigned char iv[8]); # endif static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, @@ -130,7 +131,7 @@ static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, { EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data; - if (dat->stream.cbc) { + if (dat->stream.cbc != NULL) { (*dat->stream.cbc) (in, out, inl, &dat->ks.ks, ctx->iv); return 1; } diff --git a/crypto/evp/e_des3.c b/crypto/evp/e_des3.c index bf6c1d2..0e910d6 100644 --- a/crypto/evp/e_des3.c +++ b/crypto/evp/e_des3.c @@ -75,7 +75,8 @@ typedef struct { DES_key_schedule ks[3]; } ks; union { - void (*cbc) (const void *, void *, size_t, const void *, void *); + void (*cbc) (const void *, void *, size_t, + const DES_key_schedule *, unsigned char *); } stream; } DES_EDE_KEY; # define ks1 ks.ks[0] @@ -93,9 +94,9 @@ extern unsigned int OPENSSL_sparcv9cap_P[]; void des_t4_key_expand(const void *key, DES_key_schedule *ks); void des_t4_ede3_cbc_encrypt(const void *inp, void *out, size_t len, - DES_key_schedule *ks, unsigned char iv[8]); + const DES_key_schedule ks[3], unsigned char iv[8]); void des_t4_ede3_cbc_decrypt(const void *inp, void *out, size_t len, - DES_key_schedule *ks, unsigned char iv[8]); + const DES_key_schedule ks[3], unsigned char iv[8]); # endif static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, @@ -162,7 +163,7 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } # endif /* KSSL_DEBUG */ if (dat->stream.cbc) { - (*dat->stream.cbc) (in, out, inl, &dat->ks, ctx->iv); + (*dat->stream.cbc) (in, out, inl, dat->ks.ks, ctx->iv); return 1; } @@ -395,7 +396,7 @@ static int des_ede3_unwrap(EVP_CIPHER_CTX *ctx, unsigned char *out, int rv = -1; if (inl < 24) return -1; - if (!out) + if (out == NULL) return inl - 16; memcpy(ctx->iv, wrap_iv, 8); /* Decrypt first block which will end up as icv */ @@ -438,7 +439,7 @@ static int des_ede3_wrap(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { unsigned char sha1tmp[SHA_DIGEST_LENGTH]; - if (!out) + if (out == NULL) return inl + 16; /* Copy input to output buffer + 8 so we have space for IV */ memmove(out + 8, in, inl); diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl index bd6bf72..980cfd2 100755 --- a/crypto/modes/asm/aesni-gcm-x86_64.pl +++ b/crypto/modes/asm/aesni-gcm-x86_64.pl @@ -43,7 +43,7 @@ die "can't locate x86_64-xlate.pl"; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` =~ /GNU assembler version ([2-9]\.[0-9]+)/) { - $avx = ($1>=2.19) + ($1>=2.22); + $avx = ($1>=2.20) + ($1>=2.22); } if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && @@ -489,7 +489,7 @@ $code.=<<___; ___ $code.=<<___ if ($win64); movaps -0xd8(%rax),%xmm6 - movaps -0xd8(%rax),%xmm7 + movaps -0xc8(%rax),%xmm7 movaps -0xb8(%rax),%xmm8 movaps -0xa8(%rax),%xmm9 movaps -0x98(%rax),%xmm10 diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index 4ff2d39..f889f20 100755 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -92,7 +92,7 @@ die "can't locate x86_64-xlate.pl"; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` =~ /GNU assembler version ([2-9]\.[0-9]+)/) { - $avx = ($1>=2.19) + ($1>=2.22); + $avx = ($1>=2.20) + ($1>=2.22); } if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && diff --git a/crypto/modes/ctr128.c b/crypto/modes/ctr128.c index f3bbcbf..bcafd6b 100644 --- a/crypto/modes/ctr128.c +++ b/crypto/modes/ctr128.c @@ -67,23 +67,20 @@ /* increment counter (128-bit int) by 1 */ static void ctr128_inc(unsigned char *counter) { - u32 n = 16; - u8 c; + u32 n = 16, c = 1; do { --n; - c = counter[n]; - ++c; - counter[n] = c; - if (c) - return; + c += counter[n]; + counter[n] = (u8)c; + c >>= 8; } while (n); } #if !defined(OPENSSL_SMALL_FOOTPRINT) static void ctr128_inc_aligned(unsigned char *counter) { - size_t *data, c, n; + size_t *data, c, d, n; const union { long one; char little; @@ -91,20 +88,19 @@ static void ctr128_inc_aligned(unsigned char *counter) 1 }; - if (is_endian.little) { + if (is_endian.little || ((size_t)counter % sizeof(size_t)) != 0) { ctr128_inc(counter); return; } data = (size_t *)counter; + c = 1; n = 16 / sizeof(size_t); do { --n; - c = data[n]; - ++c; - data[n] = c; - if (c) - return; + d = data[n] += c; + /* did addition carry? */ + c = ((d - c) ^ d) >> (sizeof(size_t) * 8 - 1); } while (n); } #endif @@ -144,14 +140,14 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, } # if defined(STRICT_ALIGNMENT) - if (((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != - 0) + if (((size_t)in | (size_t)out | (size_t)ecount_buf) + % sizeof(size_t) != 0) break; # endif while (len >= 16) { (*block) (ivec, ecount_buf, key); ctr128_inc_aligned(ivec); - for (; n < 16; n += sizeof(size_t)) + for (n = 0; n < 16; n += sizeof(size_t)) *(size_t *)(out + n) = *(size_t *)(in + n) ^ *(size_t *)(ecount_buf + n); len -= 16; @@ -189,16 +185,13 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, /* increment upper 96 bits of 128-bit counter by 1 */ static void ctr96_inc(unsigned char *counter) { - u32 n = 12; - u8 c; + u32 n = 12, c = 1; do { --n; - c = counter[n]; - ++c; - counter[n] = c; - if (c) - return; + c += counter[n]; + counter[n] = (u8)c; + c >>= 8; } while (n); } diff --git a/crypto/opensslconf.h b/crypto/opensslconf.h index b4d522e..f533508 100644 --- a/crypto/opensslconf.h +++ b/crypto/opensslconf.h @@ -38,12 +38,18 @@ extern "C" { #ifndef OPENSSL_NO_SSL_TRACE # define OPENSSL_NO_SSL_TRACE #endif +#ifndef OPENSSL_NO_SSL2 +# define OPENSSL_NO_SSL2 +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif #ifndef OPENSSL_NO_UNIT_TEST # define OPENSSL_NO_UNIT_TEST #endif +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS +# define OPENSSL_NO_WEAK_SSL_CIPHERS +#endif #endif /* OPENSSL_DOING_MAKEDEPEND */ @@ -86,12 +92,18 @@ extern "C" { # if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) # define NO_SSL_TRACE # endif +# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2) +# define NO_SSL2 +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif # if defined(OPENSSL_NO_UNIT_TEST) && !defined(NO_UNIT_TEST) # define NO_UNIT_TEST # endif +# if defined(OPENSSL_NO_WEAK_SSL_CIPHERS) && !defined(NO_WEAK_SSL_CIPHERS) +# define NO_WEAK_SSL_CIPHERS +# endif #endif /* crypto/opensslconf.h.in */ diff --git a/crypto/opensslv.h b/crypto/opensslv.h index 03b8c48..4334fd1 100644 --- a/crypto/opensslv.h +++ b/crypto/opensslv.h @@ -30,11 +30,11 @@ extern "C" { * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -# define OPENSSL_VERSION_NUMBER 0x1000206fL +# define OPENSSL_VERSION_NUMBER 0x1000207fL # ifdef OPENSSL_FIPS -# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2f-fips 28 Jan 2016" +# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2g-fips 1 Mar 2016" # else -# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2f 28 Jan 2016" +# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2g 1 Mar 2016" # endif # define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl index 9c70b8c..ee04221 100755 --- a/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/perlasm/x86_64-xlate.pl @@ -198,8 +198,11 @@ my %globals; if ($gas) { # Solaris /usr/ccs/bin/as can't handle multiplications # in $self->{value} - $self->{value} =~ s/(?{value} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; + my $value = $self->{value}; + $value =~ s/(?{value} = $value; + } sprintf "\$%s",$self->{value}; } else { $self->{value} =~ s/(0b[0-1]+)/oct($1)/eig; diff --git a/crypto/pkcs7/pk7_smime.c b/crypto/pkcs7/pk7_smime.c index c4d3724..dc9b484 100644 --- a/crypto/pkcs7/pk7_smime.c +++ b/crypto/pkcs7/pk7_smime.c @@ -274,12 +274,29 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, PKCS7err(PKCS7_F_PKCS7_VERIFY, PKCS7_R_NO_CONTENT); return 0; } +#if 0 + /* + * NB: this test commented out because some versions of Netscape + * illegally include zero length content when signing data. Also + * Microsoft Authenticode includes a SpcIndirectDataContent data + * structure which describes the content to be protected by the + * signature, rather than directly embedding that content. So + * Authenticode implementations are also expected to use + * PKCS7_verify() with explicit external data, on non-detached + * PKCS#7 signatures. + * + * In OpenSSL 1.1 a new flag PKCS7_NO_DUAL_CONTENT has been + * introduced to disable this sanity check. For the 1.0.2 branch + * this change is not acceptable, so the check remains completely + * commented out (as it has been for a long time). + */ /* Check for data and content: two sets of data */ if (!PKCS7_get_detached(p7) && indata) { PKCS7err(PKCS7_F_PKCS7_VERIFY, PKCS7_R_CONTENT_AND_DATA_PRESENT); return 0; } +#endif sinfos = PKCS7_get_signer_info(p7); diff --git a/crypto/rsa/rsa_sign.c b/crypto/rsa/rsa_sign.c index ed63a1d..82ca832 100644 --- a/crypto/rsa/rsa_sign.c +++ b/crypto/rsa/rsa_sign.c @@ -84,7 +84,7 @@ int RSA_sign(int type, const unsigned char *m, unsigned int m_len, return 0; } #endif - if (rsa->meth->rsa_sign) { + if ((rsa->flags & RSA_FLAG_SIGN_VER) && rsa->meth->rsa_sign) { return rsa->meth->rsa_sign(type, m, m_len, sigret, siglen, rsa); } /* Special case: SSL signature, just check the length */ @@ -293,7 +293,7 @@ int RSA_verify(int dtype, const unsigned char *m, unsigned int m_len, const unsigned char *sigbuf, unsigned int siglen, RSA *rsa) { - if (rsa->meth->rsa_verify) { + if ((rsa->flags & RSA_FLAG_SIGN_VER) && rsa->meth->rsa_verify) { return rsa->meth->rsa_verify(dtype, m, m_len, sigbuf, siglen, rsa); } diff --git a/crypto/srp/srp.h b/crypto/srp/srp.h index d072536..028892a 100644 --- a/crypto/srp/srp.h +++ b/crypto/srp/srp.h @@ -82,16 +82,21 @@ typedef struct SRP_gN_cache_st { DECLARE_STACK_OF(SRP_gN_cache) typedef struct SRP_user_pwd_st { + /* Owned by us. */ char *id; BIGNUM *s; BIGNUM *v; + /* Not owned by us. */ const BIGNUM *g; const BIGNUM *N; + /* Owned by us. */ char *info; } SRP_user_pwd; DECLARE_STACK_OF(SRP_user_pwd) +void SRP_user_pwd_free(SRP_user_pwd *user_pwd); + typedef struct SRP_VBASE_st { STACK_OF(SRP_user_pwd) *users_pwd; STACK_OF(SRP_gN_cache) *gN_cache; @@ -115,7 +120,12 @@ DECLARE_STACK_OF(SRP_gN) SRP_VBASE *SRP_VBASE_new(char *seed_key); int SRP_VBASE_free(SRP_VBASE *vb); int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file); + +/* This method ignores the configured seed and fails for an unknown user. */ SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username); +/* NOTE: unlike in SRP_VBASE_get_by_user, caller owns the returned pointer.*/ +SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username); + char *SRP_create_verifier(const char *user, const char *pass, char **salt, char **verifier, const char *N, const char *g); int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, diff --git a/crypto/srp/srp_vfy.c b/crypto/srp/srp_vfy.c index a3f1a8a..26ad3e0 100644 --- a/crypto/srp/srp_vfy.c +++ b/crypto/srp/srp_vfy.c @@ -185,7 +185,7 @@ static char *t_tob64(char *dst, const unsigned char *src, int size) return olddst; } -static void SRP_user_pwd_free(SRP_user_pwd *user_pwd) +void SRP_user_pwd_free(SRP_user_pwd *user_pwd) { if (user_pwd == NULL) return; @@ -247,6 +247,24 @@ static int SRP_user_pwd_set_sv_BN(SRP_user_pwd *vinfo, BIGNUM *s, BIGNUM *v) return (vinfo->s != NULL && vinfo->v != NULL); } +static SRP_user_pwd *srp_user_pwd_dup(SRP_user_pwd *src) +{ + SRP_user_pwd *ret; + + if (src == NULL) + return NULL; + if ((ret = SRP_user_pwd_new()) == NULL) + return NULL; + + SRP_user_pwd_set_gN(ret, src->g, src->N); + if (!SRP_user_pwd_set_ids(ret, src->id, src->info) + || !SRP_user_pwd_set_sv_BN(ret, BN_dup(src->s), BN_dup(src->v))) { + SRP_user_pwd_free(ret); + return NULL; + } + return ret; +} + SRP_VBASE *SRP_VBASE_new(char *seed_key) { SRP_VBASE *vb = (SRP_VBASE *)OPENSSL_malloc(sizeof(SRP_VBASE)); @@ -468,21 +486,50 @@ int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file) } -SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username) +static SRP_user_pwd *find_user(SRP_VBASE *vb, char *username) { int i; SRP_user_pwd *user; - unsigned char digv[SHA_DIGEST_LENGTH]; - unsigned char digs[SHA_DIGEST_LENGTH]; - EVP_MD_CTX ctxt; if (vb == NULL) return NULL; + for (i = 0; i < sk_SRP_user_pwd_num(vb->users_pwd); i++) { user = sk_SRP_user_pwd_value(vb->users_pwd, i); if (strcmp(user->id, username) == 0) return user; } + + return NULL; +} + +/* + * This method ignores the configured seed and fails for an unknown user. + * Ownership of the returned pointer is not released to the caller. + * In other words, caller must not free the result. + */ +SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username) +{ + return find_user(vb, username); +} + +/* + * Ownership of the returned pointer is released to the caller. + * In other words, caller must free the result once done. + */ +SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username) +{ + SRP_user_pwd *user; + unsigned char digv[SHA_DIGEST_LENGTH]; + unsigned char digs[SHA_DIGEST_LENGTH]; + EVP_MD_CTX ctxt; + + if (vb == NULL) + return NULL; + + if ((user = find_user(vb, username)) != NULL) + return srp_user_pwd_dup(user); + if ((vb->seed_key == NULL) || (vb->default_g == NULL) || (vb->default_N == NULL)) return NULL; diff --git a/crypto/stack/stack.c b/crypto/stack/stack.c index de437ac..fa50083 100644 --- a/crypto/stack/stack.c +++ b/crypto/stack/stack.c @@ -360,7 +360,7 @@ void *sk_set(_STACK *st, int i, void *value) void sk_sort(_STACK *st) { - if (st && !st->sorted) { + if (st && !st->sorted && st->comp != NULL) { int (*comp_func) (const void *, const void *); /* diff --git a/crypto/x509/x509_vfy.c b/crypto/x509/x509_vfy.c index 0429767..4d34dba 100644 --- a/crypto/x509/x509_vfy.c +++ b/crypto/x509/x509_vfy.c @@ -194,6 +194,9 @@ int X509_verify_cert(X509_STORE_CTX *ctx) int num, j, retry; int (*cb) (int xok, X509_STORE_CTX *xctx); STACK_OF(X509) *sktmp = NULL; + int trust = X509_TRUST_UNTRUSTED; + int err; + if (ctx->cert == NULL) { X509err(X509_F_X509_VERIFY_CERT, X509_R_NO_CERT_SET_FOR_US_TO_VERIFY); return -1; @@ -216,7 +219,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (((ctx->chain = sk_X509_new_null()) == NULL) || (!sk_X509_push(ctx->chain, ctx->cert))) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - goto end; + ok = -1; + goto err; } CRYPTO_add(&ctx->cert->references, 1, CRYPTO_LOCK_X509); ctx->last_untrusted = 1; @@ -225,7 +229,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (ctx->untrusted != NULL && (sktmp = sk_X509_dup(ctx->untrusted)) == NULL) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - goto end; + ok = -1; + goto err; } num = sk_X509_num(ctx->chain); @@ -249,7 +254,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) { ok = ctx->get_issuer(&xtmp, ctx, x); if (ok < 0) - goto end; + goto err; /* * If successful for now free up cert so it will be picked up * again later. @@ -266,7 +271,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (xtmp != NULL) { if (!sk_X509_push(ctx->chain, xtmp)) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - goto end; + ok = -1; + goto err; } CRYPTO_add(&xtmp->references, 1, CRYPTO_LOCK_X509); (void)sk_X509_delete_ptr(sktmp, xtmp); @@ -314,7 +320,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) bad_chain = 1; ok = cb(0, ctx); if (!ok) - goto end; + goto err; } else { /* * We have a match: replace certificate with store @@ -347,25 +353,26 @@ int X509_verify_cert(X509_STORE_CTX *ctx) ok = ctx->get_issuer(&xtmp, ctx, x); if (ok < 0) - goto end; + goto err; if (ok == 0) break; x = xtmp; if (!sk_X509_push(ctx->chain, x)) { X509_free(xtmp); X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - ok = 0; - goto end; + ok = -1; + goto err; } num++; } /* we now have our chain, lets check it... */ - i = check_trust(ctx); + if ((trust = check_trust(ctx)) == X509_TRUST_REJECTED) { + /* Callback already issued */ + ok = 0; + goto err; + } - /* If explicitly rejected error */ - if (i == X509_TRUST_REJECTED) - goto end; /* * If it's not explicitly trusted then check if there is an alternative * chain that could be used. We only do this if we haven't already @@ -373,14 +380,14 @@ int X509_verify_cert(X509_STORE_CTX *ctx) * chain checking */ retry = 0; - if (i != X509_TRUST_TRUSTED + if (trust != X509_TRUST_TRUSTED && !(ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) && !(ctx->param->flags & X509_V_FLAG_NO_ALT_CHAINS)) { while (j-- > 1) { xtmp2 = sk_X509_value(ctx->chain, j - 1); ok = ctx->get_issuer(&xtmp, ctx, xtmp2); if (ok < 0) - goto end; + goto err; /* Check if we found an alternate chain */ if (ok > 0) { /* @@ -410,7 +417,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) * self signed certificate in which case we've indicated an error already * and set bad_chain == 1 */ - if (i != X509_TRUST_TRUSTED && !bad_chain) { + if (trust != X509_TRUST_TRUSTED && !bad_chain) { if ((chain_ss == NULL) || !ctx->check_issued(ctx, x, chain_ss)) { if (ctx->last_untrusted >= num) ctx->error = X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY; @@ -431,26 +438,26 @@ int X509_verify_cert(X509_STORE_CTX *ctx) bad_chain = 1; ok = cb(0, ctx); if (!ok) - goto end; + goto err; } /* We have the chain complete: now we need to check its purpose */ ok = check_chain_extensions(ctx); if (!ok) - goto end; + goto err; /* Check name constraints */ ok = check_name_constraints(ctx); if (!ok) - goto end; + goto err; ok = check_id(ctx); if (!ok) - goto end; + goto err; /* We may as well copy down any DSA parameters that are required */ X509_get_pubkey_parameters(NULL, ctx->chain); @@ -462,16 +469,16 @@ int X509_verify_cert(X509_STORE_CTX *ctx) ok = ctx->check_revocation(ctx); if (!ok) - goto end; + goto err; - i = X509_chain_check_suiteb(&ctx->error_depth, NULL, ctx->chain, - ctx->param->flags); - if (i != X509_V_OK) { - ctx->error = i; + err = X509_chain_check_suiteb(&ctx->error_depth, NULL, ctx->chain, + ctx->param->flags); + if (err != X509_V_OK) { + ctx->error = err; ctx->current_cert = sk_X509_value(ctx->chain, ctx->error_depth); ok = cb(0, ctx); if (!ok) - goto end; + goto err; } /* At this point, we have a chain and need to verify it */ @@ -480,25 +487,28 @@ int X509_verify_cert(X509_STORE_CTX *ctx) else ok = internal_verify(ctx); if (!ok) - goto end; + goto err; #ifndef OPENSSL_NO_RFC3779 /* RFC 3779 path validation, now that CRL check has been done */ ok = v3_asid_validate_path(ctx); if (!ok) - goto end; + goto err; ok = v3_addr_validate_path(ctx); if (!ok) - goto end; + goto err; #endif /* If we get this far evaluate policies */ if (!bad_chain && (ctx->param->flags & X509_V_FLAG_POLICY_CHECK)) ok = ctx->check_policy(ctx); if (!ok) - goto end; + goto err; if (0) { - end: + err: + /* Ensure we return an error */ + if (ok > 0) + ok = 0; X509_get_pubkey_parameters(NULL, ctx->chain); } if (sktmp != NULL) diff --git a/doc/apps/ciphers.pod b/doc/apps/ciphers.pod index 1c26e3b..9643b4d 100644 --- a/doc/apps/ciphers.pod +++ b/doc/apps/ciphers.pod @@ -38,25 +38,21 @@ SSL v2 and for SSL v3/TLS v1. Like B<-v>, but include cipher suite codes in output (hex format). -=item B<-ssl3> +=item B<-ssl3>, B<-tls1> -only include SSL v3 ciphers. +This lists ciphers compatible with any of SSLv3, TLSv1, TLSv1.1 or TLSv1.2. =item B<-ssl2> -only include SSL v2 ciphers. - -=item B<-tls1> - -only include TLS v1 ciphers. +Only include SSLv2 ciphers. =item B<-h>, B<-?> -print a brief usage message. +Print a brief usage message. =item B -a cipher list to convert to a cipher preference list. If it is not included +A cipher list to convert to a cipher preference list. If it is not included then the default cipher list will be used. The format is described below. =back @@ -109,9 +105,10 @@ The following is a list of all permitted cipher strings and their meanings. =item B -the default cipher list. This is determined at compile time and -is normally B. This must be the firstcipher string -specified. +The default cipher list. +This is determined at compile time and is normally +B. +When used, this must be the first cipherstring specified. =item B @@ -139,34 +136,46 @@ than 128 bits, and some cipher suites with 128-bit keys. =item B -"low" encryption cipher suites, currently those using 64 or 56 bit encryption algorithms -but excluding export cipher suites. +Low strength encryption cipher suites, currently those using 64 or 56 bit +encryption algorithms but excluding export cipher suites. +As of OpenSSL 1.0.2g, these are disabled in default builds. =item B, B -export encryption algorithms. Including 40 and 56 bits algorithms. +Export strength encryption algorithms. Including 40 and 56 bits algorithms. +As of OpenSSL 1.0.2g, these are disabled in default builds. =item B -40 bit export encryption algorithms +40-bit export encryption algorithms +As of OpenSSL 1.0.2g, these are disabled in default builds. =item B -56 bit export encryption algorithms. In OpenSSL 0.9.8c and later the set of +56-bit export encryption algorithms. In OpenSSL 0.9.8c and later the set of 56 bit export ciphers is empty unless OpenSSL has been explicitly configured with support for experimental ciphers. +As of OpenSSL 1.0.2g, these are disabled in default builds. =item B, B -the "NULL" ciphers that is those offering no encryption. Because these offer no -encryption at all and are a security risk they are disabled unless explicitly -included. +The "NULL" ciphers that is those offering no encryption. Because these offer no +encryption at all and are a security risk they are not enabled via either the +B or B cipher strings. +Be careful when building cipherlists out of lower-level primitives such as +B or B as these do overlap with the B ciphers. +When in doubt, include B in your cipherlist. =item B -the cipher suites offering no authentication. This is currently the anonymous +The cipher suites offering no authentication. This is currently the anonymous DH algorithms and anonymous ECDH algorithms. These cipher suites are vulnerable to a "man in the middle" attack and so their use is normally discouraged. +These are excluded from the B ciphers, but included in the B +ciphers. +Be careful when building cipherlists out of lower-level primitives such as +B or B as these do overlap with the B ciphers. +When in doubt, include B in your cipherlist. =item B, B @@ -582,11 +591,11 @@ Note: these ciphers can also be used in SSL v3. =head2 Deprecated SSL v2.0 cipher suites. SSL_CK_RC4_128_WITH_MD5 RC4-MD5 - SSL_CK_RC4_128_EXPORT40_WITH_MD5 EXP-RC4-MD5 - SSL_CK_RC2_128_CBC_WITH_MD5 RC2-MD5 - SSL_CK_RC2_128_CBC_EXPORT40_WITH_MD5 EXP-RC2-MD5 + SSL_CK_RC4_128_EXPORT40_WITH_MD5 Not implemented. + SSL_CK_RC2_128_CBC_WITH_MD5 RC2-CBC-MD5 + SSL_CK_RC2_128_CBC_EXPORT40_WITH_MD5 Not implemented. SSL_CK_IDEA_128_CBC_WITH_MD5 IDEA-CBC-MD5 - SSL_CK_DES_64_CBC_WITH_MD5 DES-CBC-MD5 + SSL_CK_DES_64_CBC_WITH_MD5 Not implemented. SSL_CK_DES_192_EDE3_CBC_WITH_MD5 DES-CBC3-MD5 =head1 NOTES diff --git a/doc/apps/pkeyutl.pod b/doc/apps/pkeyutl.pod index 27be9a9..5da347c 100644 --- a/doc/apps/pkeyutl.pod +++ b/doc/apps/pkeyutl.pod @@ -137,6 +137,19 @@ Unless otherwise mentioned all algorithms support the B option which specifies the digest in use for sign, verify and verifyrecover operations. The value B should represent a digest name as used in the EVP_get_digestbyname() function for example B. +This value is used only for sanity-checking the lengths of data passed in to +the B and for creating the structures that make up the signature +(e.g. B in RSASSA PKCS#1 v1.5 signatures). +In case of RSA, ECDSA and DSA signatures, this utility +will not perform hashing on input data but rather use the data directly as +input of signature algorithm. Depending on key type, signature type and mode +of padding, the maximum acceptable lengths of input data differ. In general, +with RSA the signed data can't be longer than the key modulus, in case of ECDSA +and DSA the data shouldn't be longer than field size, otherwise it will be +silently truncated to field size. + +In other words, if the value of digest is B the input should be 20 bytes +long binary encoding of SHA-1 hash function output. =head1 RSA ALGORITHM diff --git a/doc/apps/req.pod b/doc/apps/req.pod index 54a4d39..30653e5 100644 --- a/doc/apps/req.pod +++ b/doc/apps/req.pod @@ -347,9 +347,12 @@ configuration file values. =item B -This specifies the default key size in bits. If not specified then -512 is used. It is used if the B<-new> option is used. It can be -overridden by using the B<-newkey> option. +Specifies the default key size in bits. + +This option is used in conjunction with the B<-new> option to generate +a new key. It can be overridden by specifying an explicit key size in +the B<-newkey> option. The smallest accepted key size is 512 bits. If +no key size is specified then 2048 bits is used. =item B diff --git a/doc/apps/s_client.pod b/doc/apps/s_client.pod index 84d0527..618df96 100644 --- a/doc/apps/s_client.pod +++ b/doc/apps/s_client.pod @@ -201,15 +201,11 @@ Use the PSK key B when using a PSK cipher suite. The key is given as a hexadecimal number without leading 0x, for example -psk 1a2b3c4d. -=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> +=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> -these options disable the use of certain SSL or TLS protocols. By default -the initial handshake uses a method which should be compatible with all -servers and permit them to use SSL v3, SSL v2 or TLS as appropriate. - -Unfortunately there are still ancient and broken servers in use which -cannot handle this technique and will fail to connect. Some servers only -work if TLS is turned off. +These options require or disable the use of the specified SSL or TLS protocols. +By default the initial handshake uses a I method which will +negotiate the highest mutually supported protocol version. =item B<-fallback_scsv> diff --git a/doc/apps/s_server.pod b/doc/apps/s_server.pod index baca779..6f4acb7 100644 --- a/doc/apps/s_server.pod +++ b/doc/apps/s_server.pod @@ -217,11 +217,11 @@ Use the PSK key B when using a PSK cipher suite. The key is given as a hexadecimal number without leading 0x, for example -psk 1a2b3c4d. -=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1> +=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> -these options disable the use of certain SSL or TLS protocols. By default -the initial handshake uses a method which should be compatible with all -servers and permit them to use SSL v3, SSL v2 or TLS as appropriate. +These options require or disable the use of the specified SSL or TLS protocols. +By default the initial handshake uses a I method which will +negotiate the highest mutually supported protocol version. =item B<-bugs> diff --git a/doc/crypto/BIO_s_mem.pod b/doc/crypto/BIO_s_mem.pod index 8f85e0d..9f23964 100644 --- a/doc/crypto/BIO_s_mem.pod +++ b/doc/crypto/BIO_s_mem.pod @@ -16,7 +16,7 @@ BIO_get_mem_ptr, BIO_new_mem_buf - memory BIO BIO_set_mem_buf(BIO *b,BUF_MEM *bm,int c) BIO_get_mem_ptr(BIO *b,BUF_MEM **pp) - BIO *BIO_new_mem_buf(void *buf, int len); + BIO *BIO_new_mem_buf(const void *buf, int len); =head1 DESCRIPTION @@ -61,7 +61,7 @@ BIO_get_mem_ptr() places the underlying BUF_MEM structure in B. It is a macro. BIO_new_mem_buf() creates a memory BIO using B bytes of data at B, -if B is -1 then the B is assumed to be null terminated and its +if B is -1 then the B is assumed to be nul terminated and its length is determined by B. The BIO is set to a read only state and as a result cannot be written to. This is useful when some data needs to be made available from a static area of memory in the form of a BIO. The diff --git a/doc/ssl/SSL_CONF_cmd.pod b/doc/ssl/SSL_CONF_cmd.pod index 2bf1a60..e81d76a 100644 --- a/doc/ssl/SSL_CONF_cmd.pod +++ b/doc/ssl/SSL_CONF_cmd.pod @@ -74,7 +74,7 @@ B). Curve names are case sensitive. =item B<-named_curve> -This sets the temporary curve used for ephemeral ECDH modes. Only used by +This sets the temporary curve used for ephemeral ECDH modes. Only used by servers The B argument is a curve name or the special value B which @@ -85,7 +85,7 @@ can be either the B name (e.g. B) or an OpenSSL OID name =item B<-cipher> Sets the cipher suite list to B. Note: syntax checking of B is -currently not performed unless a B or B structure is +currently not performed unless a B or B structure is associated with B. =item B<-cert> @@ -111,9 +111,9 @@ operations are permitted. =item B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> -Disables protocol support for SSLv2, SSLv3, TLS 1.0, TLS 1.1 or TLS 1.2 -by setting the corresponding options B, B, -B, B and B respectively. +Disables protocol support for SSLv2, SSLv3, TLSv1.0, TLSv1.1 or TLSv1.2 +by setting the corresponding options B, B, +B, B and B respectively. =item B<-bugs> @@ -177,7 +177,7 @@ Note: the command prefix (if set) alters the recognised B values. =item B Sets the cipher suite list to B. Note: syntax checking of B is -currently not performed unless an B or B structure is +currently not performed unless an B or B structure is associated with B. =item B @@ -244,7 +244,7 @@ B). Curve names are case sensitive. =item B -This sets the temporary curve used for ephemeral ECDH modes. Only used by +This sets the temporary curve used for ephemeral ECDH modes. Only used by servers The B argument is a curve name or the special value B which @@ -258,10 +258,11 @@ The supported versions of the SSL or TLS protocol. The B argument is a comma separated list of supported protocols to enable or disable. If an protocol is preceded by B<-> that version is disabled. -All versions are enabled by default, though applications may choose to -explicitly disable some. Currently supported protocol values are B, -B, B, B and B. The special value B refers -to all supported versions. +Currently supported protocol values are B, B, B, +B and B. +All protocol versions other than B are enabled by default. +To avoid inadvertent enabling of B, when SSLv2 is disabled, it is not +possible to enable it via the B command. =item B @@ -339,16 +340,16 @@ The value is a directory name. The order of operations is significant. This can be used to set either defaults or values which cannot be overridden. For example if an application calls: - SSL_CONF_cmd(ctx, "Protocol", "-SSLv2"); + SSL_CONF_cmd(ctx, "Protocol", "-SSLv3"); SSL_CONF_cmd(ctx, userparam, uservalue); -it will disable SSLv2 support by default but the user can override it. If +it will disable SSLv3 support by default but the user can override it. If however the call sequence is: SSL_CONF_cmd(ctx, userparam, uservalue); - SSL_CONF_cmd(ctx, "Protocol", "-SSLv2"); + SSL_CONF_cmd(ctx, "Protocol", "-SSLv3"); -SSLv2 is B disabled and attempt to override this by the user are +then SSLv3 is B disabled and attempt to override this by the user are ignored. By checking the return code of SSL_CTX_cmd() it is possible to query if a @@ -372,7 +373,7 @@ can be checked instead. If -3 is returned a required argument is missing and an error is indicated. If 0 is returned some other error occurred and this can be reported back to the user. -The function SSL_CONF_cmd_value_type() can be used by applications to +The function SSL_CONF_cmd_value_type() can be used by applications to check for the existence of a command or to perform additional syntax checking or translation of the command value. For example if the return value is B an application could translate a relative diff --git a/doc/ssl/SSL_CTX_new.pod b/doc/ssl/SSL_CTX_new.pod index 491ac8c..b8cc879 100644 --- a/doc/ssl/SSL_CTX_new.pod +++ b/doc/ssl/SSL_CTX_new.pod @@ -2,13 +2,55 @@ =head1 NAME -SSL_CTX_new - create a new SSL_CTX object as framework for TLS/SSL enabled functions +SSL_CTX_new, +SSLv23_method, SSLv23_server_method, SSLv23_client_method, +TLSv1_2_method, TLSv1_2_server_method, TLSv1_2_client_method, +TLSv1_1_method, TLSv1_1_server_method, TLSv1_1_client_method, +TLSv1_method, TLSv1_server_method, TLSv1_client_method, +SSLv3_method, SSLv3_server_method, SSLv3_client_method, +SSLv2_method, SSLv2_server_method, SSLv2_client_method, +DTLS_method, DTLS_server_method, DTLS_client_method, +DTLSv1_2_method, DTLSv1_2_server_method, DTLSv1_2_client_method, +DTLSv1_method, DTLSv1_server_method, DTLSv1_client_method - +create a new SSL_CTX object as framework for TLS/SSL enabled functions =head1 SYNOPSIS #include SSL_CTX *SSL_CTX_new(const SSL_METHOD *method); + const SSL_METHOD *SSLv23_method(void); + const SSL_METHOD *SSLv23_server_method(void); + const SSL_METHOD *SSLv23_client_method(void); + const SSL_METHOD *TLSv1_2_method(void); + const SSL_METHOD *TLSv1_2_server_method(void); + const SSL_METHOD *TLSv1_2_client_method(void); + const SSL_METHOD *TLSv1_1_method(void); + const SSL_METHOD *TLSv1_1_server_method(void); + const SSL_METHOD *TLSv1_1_client_method(void); + const SSL_METHOD *TLSv1_method(void); + const SSL_METHOD *TLSv1_server_method(void); + const SSL_METHOD *TLSv1_client_method(void); + #ifndef OPENSSL_NO_SSL3_METHOD + const SSL_METHOD *SSLv3_method(void); + const SSL_METHOD *SSLv3_server_method(void); + const SSL_METHOD *SSLv3_client_method(void); + #endif + #ifndef OPENSSL_NO_SSL2 + const SSL_METHOD *SSLv2_method(void); + const SSL_METHOD *SSLv2_server_method(void); + const SSL_METHOD *SSLv2_client_method(void); + #endif + + const SSL_METHOD *DTLS_method(void); + const SSL_METHOD *DTLS_server_method(void); + const SSL_METHOD *DTLS_client_method(void); + const SSL_METHOD *DTLSv1_2_method(void); + const SSL_METHOD *DTLSv1_2_server_method(void); + const SSL_METHOD *DTLSv1_2_client_method(void); + const SSL_METHOD *DTLSv1_method(void); + const SSL_METHOD *DTLSv1_server_method(void); + const SSL_METHOD *DTLSv1_client_method(void); =head1 DESCRIPTION @@ -23,65 +65,88 @@ client only type. B can be of the following types: =over 4 -=item SSLv2_method(void), SSLv2_server_method(void), SSLv2_client_method(void) +=item SSLv23_method(), SSLv23_server_method(), SSLv23_client_method() + +These are the general-purpose I SSL/TLS methods. +The actual protocol version used will be negotiated to the highest version +mutually supported by the client and the server. +The supported protocols are SSLv2, SSLv3, TLSv1, TLSv1.1 and TLSv1.2. +Most applications should use these method, and avoid the version specific +methods described below. + +The list of protocols available can be further limited using the +B, B, B, +B and B options of the +L or L functions. +Clients should avoid creating "holes" in the set of protocols they support, +when disabling a protocol, make sure that you also disable either all previous +or all subsequent protocol versions. +In clients, when a protocol version is disabled without disabling I +previous protocol versions, the effect is to also disable all subsequent +protocol versions. + +The SSLv2 and SSLv3 protocols are deprecated and should generally not be used. +Applications should typically use L in combination with +the B flag to disable negotiation of SSLv3 via the above +I SSL/TLS methods. +The B option is set by default, and would need to be cleared +via L in order to enable negotiation of SSLv2. + +=item TLSv1_2_method(), TLSv1_2_server_method(), TLSv1_2_client_method() -A TLS/SSL connection established with these methods will only understand -the SSLv2 protocol. A client will send out SSLv2 client hello messages -and will also indicate that it only understand SSLv2. A server will only -understand SSLv2 client hello messages. +A TLS/SSL connection established with these methods will only understand the +TLSv1.2 protocol. A client will send out TLSv1.2 client hello messages and +will also indicate that it only understand TLSv1.2. A server will only +understand TLSv1.2 client hello messages. -=item SSLv3_method(void), SSLv3_server_method(void), SSLv3_client_method(void) +=item TLSv1_1_method(), TLSv1_1_server_method(), TLSv1_1_client_method() A TLS/SSL connection established with these methods will only understand the -SSLv3 protocol. A client will send out SSLv3 client hello messages -and will indicate that it only understands SSLv3. A server will only understand -SSLv3 client hello messages. This especially means, that it will -not understand SSLv2 client hello messages which are widely used for -compatibility reasons, see SSLv23_*_method(). +TLSv1.1 protocol. A client will send out TLSv1.1 client hello messages and +will also indicate that it only understand TLSv1.1. A server will only +understand TLSv1.1 client hello messages. -=item TLSv1_method(void), TLSv1_server_method(void), TLSv1_client_method(void) +=item TLSv1_method(), TLSv1_server_method(), TLSv1_client_method() A TLS/SSL connection established with these methods will only understand the -TLSv1 protocol. A client will send out TLSv1 client hello messages -and will indicate that it only understands TLSv1. A server will only understand -TLSv1 client hello messages. This especially means, that it will -not understand SSLv2 client hello messages which are widely used for -compatibility reasons, see SSLv23_*_method(). It will also not understand -SSLv3 client hello messages. - -=item SSLv23_method(void), SSLv23_server_method(void), SSLv23_client_method(void) - -A TLS/SSL connection established with these methods may understand the SSLv2, -SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols. - -If the cipher list does not contain any SSLv2 ciphersuites (the default -cipher list does not) or extensions are required (for example server name) -a client will send out TLSv1 client hello messages including extensions and -will indicate that it also understands TLSv1.1, TLSv1.2 and permits a -fallback to SSLv3. A server will support SSLv3, TLSv1, TLSv1.1 and TLSv1.2 -protocols. This is the best choice when compatibility is a concern. - -If any SSLv2 ciphersuites are included in the cipher list and no extensions -are required then SSLv2 compatible client hellos will be used by clients and -SSLv2 will be accepted by servers. This is B recommended due to the -insecurity of SSLv2 and the limited nature of the SSLv2 client hello -prohibiting the use of extensions. +TLSv1 protocol. A client will send out TLSv1 client hello messages and will +indicate that it only understands TLSv1. A server will only understand TLSv1 +client hello messages. -=back +=item SSLv3_method(), SSLv3_server_method(), SSLv3_client_method() + +A TLS/SSL connection established with these methods will only understand the +SSLv3 protocol. A client will send out SSLv3 client hello messages and will +indicate that it only understands SSLv3. A server will only understand SSLv3 +client hello messages. The SSLv3 protocol is deprecated and should not be +used. + +=item SSLv2_method(), SSLv2_server_method(), SSLv2_client_method() + +A TLS/SSL connection established with these methods will only understand the +SSLv2 protocol. A client will send out SSLv2 client hello messages and will +also indicate that it only understand SSLv2. A server will only understand +SSLv2 client hello messages. The SSLv2 protocol offers little to no security +and should not be used. +As of OpenSSL 1.0.2g, EXPORT ciphers and 56-bit DES are no longer available +with SSLv2. -The list of protocols available can later be limited using the SSL_OP_NO_SSLv2, -SSL_OP_NO_SSLv3, SSL_OP_NO_TLSv1, SSL_OP_NO_TLSv1_1 and SSL_OP_NO_TLSv1_2 -options of the SSL_CTX_set_options() or SSL_set_options() functions. -Using these options it is possible to choose e.g. SSLv23_server_method() and -be able to negotiate with all possible clients, but to only allow newer -protocols like TLSv1, TLSv1.1 or TLS v1.2. +=item DTLS_method(), DTLS_server_method(), DTLS_client_method() -Applications which never want to support SSLv2 (even is the cipher string -is configured to use SSLv2 ciphersuites) can set SSL_OP_NO_SSLv2. +These are the version-flexible DTLS methods. + +=item DTLSv1_2_method(), DTLSv1_2_server_method(), DTLSv1_2_client_method() + +These are the version-specific methods for DTLSv1.2. + +=item DTLSv1_method(), DTLSv1_server_method(), DTLSv1_client_method() + +These are the version-specific methods for DTLSv1. + +=back -SSL_CTX_new() initializes the list of ciphers, the session cache setting, -the callbacks, the keys and certificates and the options to its default -values. +SSL_CTX_new() initializes the list of ciphers, the session cache setting, the +callbacks, the keys and certificates and the options to its default values. =head1 RETURN VALUES @@ -91,8 +156,8 @@ The following return values can occur: =item NULL -The creation of a new SSL_CTX object failed. Check the error stack to -find out the reason. +The creation of a new SSL_CTX object failed. Check the error stack to find out +the reason. =item Pointer to an SSL_CTX object @@ -102,6 +167,7 @@ The return value points to an allocated SSL_CTX object. =head1 SEE ALSO +L, L, L, L, L, L, L diff --git a/doc/ssl/SSL_CTX_set_options.pod b/doc/ssl/SSL_CTX_set_options.pod index e80a72c..9a7e98c 100644 --- a/doc/ssl/SSL_CTX_set_options.pod +++ b/doc/ssl/SSL_CTX_set_options.pod @@ -189,15 +189,25 @@ browser has a cert, it will crash/hang. Works for 3.x and 4.xbeta =item SSL_OP_NO_SSLv2 Do not use the SSLv2 protocol. +As of OpenSSL 1.0.2g the B option is set by default. =item SSL_OP_NO_SSLv3 Do not use the SSLv3 protocol. +It is recommended that applications should set this option. =item SSL_OP_NO_TLSv1 Do not use the TLSv1 protocol. +=item SSL_OP_NO_TLSv1_1 + +Do not use the TLSv1.1 protocol. + +=item SSL_OP_NO_TLSv1_2 + +Do not use the TLSv1.2 protocol. + =item SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION When performing renegotiation as a server, always start a new session diff --git a/doc/ssl/ssl.pod b/doc/ssl/ssl.pod index 242087e..70cca17 100644 --- a/doc/ssl/ssl.pod +++ b/doc/ssl/ssl.pod @@ -130,41 +130,86 @@ protocol methods defined in B structures. =over 4 -=item const SSL_METHOD *B(void); +=item const SSL_METHOD *B(void); -Constructor for the SSLv2 SSL_METHOD structure for a dedicated client. +Constructor for the I SSL_METHOD structure for +clients, servers or both. +See L for details. -=item const SSL_METHOD *B(void); +=item const SSL_METHOD *B(void); -Constructor for the SSLv2 SSL_METHOD structure for a dedicated server. +Constructor for the I SSL_METHOD structure for +clients. -=item const SSL_METHOD *B(void); +=item const SSL_METHOD *B(void); -Constructor for the SSLv2 SSL_METHOD structure for combined client and server. +Constructor for the I SSL_METHOD structure for +servers. -=item const SSL_METHOD *B(void); +=item const SSL_METHOD *B(void); -Constructor for the SSLv3 SSL_METHOD structure for a dedicated client. +Constructor for the TLSv1.2 SSL_METHOD structure for clients, servers +or both. -=item const SSL_METHOD *B(void); +=item const SSL_METHOD *B(void); -Constructor for the SSLv3 SSL_METHOD structure for a dedicated server. +Constructor for the TLSv1.2 SSL_METHOD structure for clients. -=item const SSL_METHOD *B(void); +=item const SSL_METHOD *B(void); + +Constructor for the TLSv1.2 SSL_METHOD structure for servers. + +=item const SSL_METHOD *B(void); -Constructor for the SSLv3 SSL_METHOD structure for combined client and server. +Constructor for the TLSv1.1 SSL_METHOD structure for clients, servers +or both. + +=item const SSL_METHOD *B(void); + +Constructor for the TLSv1.1 SSL_METHOD structure for clients. + +=item const SSL_METHOD *B(void); + +Constructor for the TLSv1.1 SSL_METHOD structure for servers. + +=item const SSL_METHOD *B(void); + +Constructor for the TLSv1 SSL_METHOD structure for clients, servers +or both. =item const SSL_METHOD *B(void); -Constructor for the TLSv1 SSL_METHOD structure for a dedicated client. +Constructor for the TLSv1 SSL_METHOD structure for clients. =item const SSL_METHOD *B(void); -Constructor for the TLSv1 SSL_METHOD structure for a dedicated server. +Constructor for the TLSv1 SSL_METHOD structure for servers. -=item const SSL_METHOD *B(void); +=item const SSL_METHOD *B(void); + +Constructor for the SSLv3 SSL_METHOD structure for clients, servers +or both. + +=item const SSL_METHOD *B(void); + +Constructor for the SSLv3 SSL_METHOD structure for clients. + +=item const SSL_METHOD *B(void); + +Constructor for the SSLv3 SSL_METHOD structure for servers. + +=item const SSL_METHOD *B(void); + +Constructor for the SSLv2 SSL_METHOD structure for clients, servers +or both. + +=item const SSL_METHOD *B(void); + +Constructor for the SSLv2 SSL_METHOD structure for clients. + +=item const SSL_METHOD *B(void); -Constructor for the TLSv1 SSL_METHOD structure for combined client and server. +Constructor for the SSLv2 SSL_METHOD structure for servers. =back diff --git a/engines/e_capi.c b/engines/e_capi.c index f4cd2ff..6e52463 100644 --- a/engines/e_capi.c +++ b/engines/e_capi.c @@ -114,6 +114,26 @@ # define CERT_SYSTEM_STORE_CURRENT_USER 0x00010000 # endif +# ifndef ALG_SID_SHA_256 +# define ALG_SID_SHA_256 12 +# endif +# ifndef ALG_SID_SHA_384 +# define ALG_SID_SHA_384 13 +# endif +# ifndef ALG_SID_SHA_512 +# define ALG_SID_SHA_512 14 +# endif + +# ifndef CALG_SHA_256 +# define CALG_SHA_256 (ALG_CLASS_HASH | ALG_TYPE_ANY | ALG_SID_SHA_256) +# endif +# ifndef CALG_SHA_384 +# define CALG_SHA_384 (ALG_CLASS_HASH | ALG_TYPE_ANY | ALG_SID_SHA_384) +# endif +# ifndef CALG_SHA_512 +# define CALG_SHA_512 (ALG_CLASS_HASH | ALG_TYPE_ANY | ALG_SID_SHA_512) +# endif + # include # include # include @@ -800,6 +820,18 @@ int capi_rsa_sign(int dtype, const unsigned char *m, unsigned int m_len, } /* Convert the signature type to a CryptoAPI algorithm ID */ switch (dtype) { + case NID_sha256: + alg = CALG_SHA_256; + break; + + case NID_sha384: + alg = CALG_SHA_384; + break; + + case NID_sha512: + alg = CALG_SHA_512; + break; + case NID_sha1: alg = CALG_SHA1; break; diff --git a/ssl/Makefile b/ssl/Makefile index 7b90fb0..b6dee5b 100644 --- a/ssl/Makefile +++ b/ssl/Makefile @@ -15,7 +15,7 @@ KRB5_INCLUDES= CFLAGS= $(INCLUDES) $(CFLAG) GENERAL=Makefile README ssl-lib.com install.com -TEST=ssltest.c heartbeat_test.c clienthellotest.c +TEST=ssltest.c heartbeat_test.c clienthellotest.c sslv2conftest.c APPS= LIB=$(TOP)/libssl.a @@ -399,14 +399,14 @@ s2_clnt.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h s2_clnt.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h s2_clnt.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h s2_clnt.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h -s2_clnt.o: ../include/openssl/pqueue.h ../include/openssl/rand.h -s2_clnt.o: ../include/openssl/rsa.h ../include/openssl/safestack.h -s2_clnt.o: ../include/openssl/sha.h ../include/openssl/srtp.h -s2_clnt.o: ../include/openssl/ssl.h ../include/openssl/ssl2.h -s2_clnt.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h -s2_clnt.o: ../include/openssl/stack.h ../include/openssl/symhacks.h -s2_clnt.o: ../include/openssl/tls1.h ../include/openssl/x509.h -s2_clnt.o: ../include/openssl/x509_vfy.h s2_clnt.c ssl_locl.h +s2_clnt.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h +s2_clnt.o: ../include/openssl/safestack.h ../include/openssl/sha.h +s2_clnt.o: ../include/openssl/srtp.h ../include/openssl/ssl.h +s2_clnt.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h +s2_clnt.o: ../include/openssl/ssl3.h ../include/openssl/stack.h +s2_clnt.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h +s2_clnt.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s2_clnt.c +s2_clnt.o: ssl_locl.h s2_enc.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h s2_enc.o: ../include/openssl/buffer.h ../include/openssl/comp.h s2_enc.o: ../include/openssl/crypto.h ../include/openssl/dsa.h @@ -435,18 +435,18 @@ s2_lib.o: ../include/openssl/ec.h ../include/openssl/ecdh.h s2_lib.o: ../include/openssl/ecdsa.h ../include/openssl/err.h s2_lib.o: ../include/openssl/evp.h ../include/openssl/hmac.h s2_lib.o: ../include/openssl/kssl.h ../include/openssl/lhash.h -s2_lib.o: ../include/openssl/md5.h ../include/openssl/obj_mac.h -s2_lib.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h -s2_lib.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h -s2_lib.o: ../include/openssl/pem.h ../include/openssl/pem2.h -s2_lib.o: ../include/openssl/pkcs7.h ../include/openssl/pqueue.h -s2_lib.o: ../include/openssl/rsa.h ../include/openssl/safestack.h -s2_lib.o: ../include/openssl/sha.h ../include/openssl/srtp.h -s2_lib.o: ../include/openssl/ssl.h ../include/openssl/ssl2.h -s2_lib.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h -s2_lib.o: ../include/openssl/stack.h ../include/openssl/symhacks.h -s2_lib.o: ../include/openssl/tls1.h ../include/openssl/x509.h -s2_lib.o: ../include/openssl/x509_vfy.h s2_lib.c ssl_locl.h +s2_lib.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h +s2_lib.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h +s2_lib.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h +s2_lib.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h +s2_lib.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h +s2_lib.o: ../include/openssl/safestack.h ../include/openssl/sha.h +s2_lib.o: ../include/openssl/srtp.h ../include/openssl/ssl.h +s2_lib.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h +s2_lib.o: ../include/openssl/ssl3.h ../include/openssl/stack.h +s2_lib.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h +s2_lib.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s2_lib.c +s2_lib.o: ssl_locl.h s2_meth.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h s2_meth.o: ../include/openssl/buffer.h ../include/openssl/comp.h s2_meth.o: ../include/openssl/crypto.h ../include/openssl/dsa.h @@ -487,20 +487,19 @@ s2_pkt.o: ../include/openssl/ssl3.h ../include/openssl/stack.h s2_pkt.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h s2_pkt.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s2_pkt.c s2_pkt.o: ssl_locl.h -s2_srvr.o: ../crypto/constant_time_locl.h ../e_os.h ../include/openssl/asn1.h -s2_srvr.o: ../include/openssl/bio.h ../include/openssl/buffer.h -s2_srvr.o: ../include/openssl/comp.h ../include/openssl/crypto.h -s2_srvr.o: ../include/openssl/dsa.h ../include/openssl/dtls1.h -s2_srvr.o: ../include/openssl/e_os2.h ../include/openssl/ec.h -s2_srvr.o: ../include/openssl/ecdh.h ../include/openssl/ecdsa.h -s2_srvr.o: ../include/openssl/err.h ../include/openssl/evp.h -s2_srvr.o: ../include/openssl/hmac.h ../include/openssl/kssl.h -s2_srvr.o: ../include/openssl/lhash.h ../include/openssl/obj_mac.h -s2_srvr.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h -s2_srvr.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h -s2_srvr.o: ../include/openssl/pem.h ../include/openssl/pem2.h -s2_srvr.o: ../include/openssl/pkcs7.h ../include/openssl/pqueue.h -s2_srvr.o: ../include/openssl/rand.h ../include/openssl/rsa.h +s2_srvr.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h +s2_srvr.o: ../include/openssl/buffer.h ../include/openssl/comp.h +s2_srvr.o: ../include/openssl/crypto.h ../include/openssl/dsa.h +s2_srvr.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h +s2_srvr.o: ../include/openssl/ec.h ../include/openssl/ecdh.h +s2_srvr.o: ../include/openssl/ecdsa.h ../include/openssl/err.h +s2_srvr.o: ../include/openssl/evp.h ../include/openssl/hmac.h +s2_srvr.o: ../include/openssl/kssl.h ../include/openssl/lhash.h +s2_srvr.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h +s2_srvr.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h +s2_srvr.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h +s2_srvr.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h +s2_srvr.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h s2_srvr.o: ../include/openssl/safestack.h ../include/openssl/sha.h s2_srvr.o: ../include/openssl/srtp.h ../include/openssl/ssl.h s2_srvr.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h diff --git a/ssl/s2_lib.c b/ssl/s2_lib.c index d55b93f..a8036b3 100644 --- a/ssl/s2_lib.c +++ b/ssl/s2_lib.c @@ -156,6 +156,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 128, }, +# if 0 /* RC4_128_EXPORT40_WITH_MD5 */ { 1, @@ -171,6 +172,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 40, 128, }, +# endif /* RC2_128_CBC_WITH_MD5 */ { @@ -188,6 +190,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 128, }, +# if 0 /* RC2_128_CBC_EXPORT40_WITH_MD5 */ { 1, @@ -203,6 +206,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 40, 128, }, +# endif # ifndef OPENSSL_NO_IDEA /* IDEA_128_CBC_WITH_MD5 */ @@ -222,6 +226,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { }, # endif +# if 0 /* DES_64_CBC_WITH_MD5 */ { 1, @@ -237,6 +242,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { 56, 56, }, +# endif /* DES_192_EDE3_CBC_WITH_MD5 */ { diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c index f846cb5..4aac3b2 100644 --- a/ssl/s3_lib.c +++ b/ssl/s3_lib.c @@ -198,6 +198,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 03 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_RSA_RC4_40_MD5, @@ -212,6 +213,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +#endif /* Cipher 04 */ { @@ -246,6 +248,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 06 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_RSA_RC2_40_MD5, @@ -260,6 +263,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +#endif /* Cipher 07 */ #ifndef OPENSSL_NO_IDEA @@ -280,6 +284,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { #endif /* Cipher 08 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_RSA_DES_40_CBC_SHA, @@ -294,8 +299,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 09 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_RSA_DES_64_CBC_SHA, @@ -310,6 +317,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 0A */ { @@ -329,6 +337,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* The DH ciphers */ /* Cipher 0B */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 0, SSL3_TXT_DH_DSS_DES_40_CBC_SHA, @@ -343,8 +352,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 0C */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_DH_DSS_DES_64_CBC_SHA, @@ -359,6 +370,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 0D */ { @@ -377,6 +389,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 0E */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 0, SSL3_TXT_DH_RSA_DES_40_CBC_SHA, @@ -391,8 +404,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 0F */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_DH_RSA_DES_64_CBC_SHA, @@ -407,6 +422,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 10 */ { @@ -426,6 +442,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* The Ephemeral DH ciphers */ /* Cipher 11 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_EDH_DSS_DES_40_CBC_SHA, @@ -440,8 +457,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 12 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_EDH_DSS_DES_64_CBC_SHA, @@ -456,6 +475,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 13 */ { @@ -474,6 +494,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 14 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_EDH_RSA_DES_40_CBC_SHA, @@ -488,8 +509,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +#endif /* Cipher 15 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_EDH_RSA_DES_64_CBC_SHA, @@ -504,6 +527,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 16 */ { @@ -522,6 +546,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 17 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_ADH_RC4_40_MD5, @@ -536,6 +561,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +#endif /* Cipher 18 */ { @@ -554,6 +580,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 19 */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_ADH_DES_40_CBC_SHA, @@ -568,8 +595,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +#endif /* Cipher 1A */ +#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_ADH_DES_64_CBC_SHA, @@ -584,6 +613,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +#endif /* Cipher 1B */ { @@ -655,6 +685,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { #ifndef OPENSSL_NO_KRB5 /* The Kerberos ciphers*/ /* Cipher 1E */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_DES_64_CBC_SHA, @@ -669,6 +700,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +# endif /* Cipher 1F */ { @@ -719,6 +751,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 22 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_DES_64_CBC_MD5, @@ -733,6 +766,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +# endif /* Cipher 23 */ { @@ -783,6 +817,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 26 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_DES_40_CBC_SHA, @@ -797,8 +832,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +# endif /* Cipher 27 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_RC2_40_CBC_SHA, @@ -813,8 +850,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +# endif /* Cipher 28 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_RC4_40_SHA, @@ -829,8 +868,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +# endif /* Cipher 29 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_DES_40_CBC_MD5, @@ -845,8 +886,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 56, }, +# endif /* Cipher 2A */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_RC2_40_CBC_MD5, @@ -861,8 +904,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +# endif /* Cipher 2B */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, SSL3_TXT_KRB5_RC4_40_MD5, @@ -877,6 +922,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 40, 128, }, +# endif #endif /* OPENSSL_NO_KRB5 */ /* New AES ciphersuites */ @@ -1300,6 +1346,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { # endif /* Cipher 62 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, TLS1_TXT_RSA_EXPORT1024_WITH_DES_CBC_SHA, @@ -1314,8 +1361,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +# endif /* Cipher 63 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, TLS1_TXT_DHE_DSS_EXPORT1024_WITH_DES_CBC_SHA, @@ -1330,8 +1379,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 56, }, +# endif /* Cipher 64 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, TLS1_TXT_RSA_EXPORT1024_WITH_RC4_56_SHA, @@ -1346,8 +1397,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 128, }, +# endif /* Cipher 65 */ +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS { 1, TLS1_TXT_DHE_DSS_EXPORT1024_WITH_RC4_56_SHA, @@ -1362,6 +1415,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 56, 128, }, +# endif /* Cipher 66 */ { @@ -4326,21 +4380,6 @@ int ssl3_shutdown(SSL *s) } #endif } else if (!(s->shutdown & SSL_RECEIVED_SHUTDOWN)) { - if (SSL_in_init(s)) { - /* - * We can't shutdown properly if we are in the middle of a - * handshake. Doing so is problematic because the peer may send a - * CCS before it acts on our close_notify. However we should not - * continue to process received handshake messages or CCS once our - * close_notify has been sent. Therefore any close_notify from - * the peer will be unreadable because we have not moved to the next - * cipher state. Its best just to avoid this can-of-worms. Return - * an error if we are wanting to wait for a close_notify from the - * peer and we are in init. - */ - SSLerr(SSL_F_SSL3_SHUTDOWN, SSL_R_SHUTDOWN_WHILE_IN_INIT); - return -1; - } /* * If we are waiting for a close from our peer, we are closed */ diff --git a/ssl/ssl.h b/ssl/ssl.h index ae8c925..04d4007 100644 --- a/ssl/ssl.h +++ b/ssl/ssl.h @@ -2713,7 +2713,6 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL3_SETUP_KEY_BLOCK 157 # define SSL_F_SSL3_SETUP_READ_BUFFER 156 # define SSL_F_SSL3_SETUP_WRITE_BUFFER 291 -# define SSL_F_SSL3_SHUTDOWN 396 # define SSL_F_SSL3_WRITE_BYTES 158 # define SSL_F_SSL3_WRITE_PENDING 159 # define SSL_F_SSL_ADD_CERT_CHAIN 318 diff --git a/ssl/ssl_conf.c b/ssl/ssl_conf.c index 5478840..8d3709d 100644 --- a/ssl/ssl_conf.c +++ b/ssl/ssl_conf.c @@ -330,11 +330,19 @@ static int cmd_Protocol(SSL_CONF_CTX *cctx, const char *value) SSL_FLAG_TBL_INV("TLSv1.1", SSL_OP_NO_TLSv1_1), SSL_FLAG_TBL_INV("TLSv1.2", SSL_OP_NO_TLSv1_2) }; + int ret; + int sslv2off; + if (!(cctx->flags & SSL_CONF_FLAG_FILE)) return -2; cctx->tbl = ssl_protocol_list; cctx->ntbl = sizeof(ssl_protocol_list) / sizeof(ssl_flag_tbl); - return CONF_parse_list(value, ',', 1, ssl_set_option_list, cctx); + + sslv2off = *cctx->poptions & SSL_OP_NO_SSLv2; + ret = CONF_parse_list(value, ',', 1, ssl_set_option_list, cctx); + /* Never turn on SSLv2 through configuration */ + *cctx->poptions |= sslv2off; + return ret; } static int cmd_Options(SSL_CONF_CTX *cctx, const char *value) diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c index dd3b2af..704088d 100644 --- a/ssl/ssl_err.c +++ b/ssl/ssl_err.c @@ -206,7 +206,6 @@ static ERR_STRING_DATA SSL_str_functs[] = { {ERR_FUNC(SSL_F_SSL3_SETUP_KEY_BLOCK), "ssl3_setup_key_block"}, {ERR_FUNC(SSL_F_SSL3_SETUP_READ_BUFFER), "ssl3_setup_read_buffer"}, {ERR_FUNC(SSL_F_SSL3_SETUP_WRITE_BUFFER), "ssl3_setup_write_buffer"}, - {ERR_FUNC(SSL_F_SSL3_SHUTDOWN), "ssl3_shutdown"}, {ERR_FUNC(SSL_F_SSL3_WRITE_BYTES), "ssl3_write_bytes"}, {ERR_FUNC(SSL_F_SSL3_WRITE_PENDING), "ssl3_write_pending"}, {ERR_FUNC(SSL_F_SSL_ADD_CERT_CHAIN), "ssl_add_cert_chain"}, diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c index 2744be8..f1279bb 100644 --- a/ssl/ssl_lib.c +++ b/ssl/ssl_lib.c @@ -1060,7 +1060,12 @@ int SSL_shutdown(SSL *s) return -1; } - return s->method->ssl_shutdown(s); + if (!SSL_in_init(s)) { + return s->method->ssl_shutdown(s); + } else { + SSLerr(SSL_F_SSL_SHUTDOWN, SSL_R_SHUTDOWN_WHILE_IN_INIT); + return -1; + } } int SSL_renegotiate(SSL *s) @@ -2049,6 +2054,13 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth) */ ret->options |= SSL_OP_LEGACY_SERVER_CONNECT; + /* + * Disable SSLv2 by default, callers that want to enable SSLv2 will have to + * explicitly clear this option via either of SSL_CTX_clear_options() or + * SSL_clear_options(). + */ + ret->options |= SSL_OP_NO_SSLv2; + return (ret); err: SSLerr(SSL_F_SSL_CTX_NEW, ERR_R_MALLOC_FAILURE); diff --git a/ssl/sslv2conftest.c b/ssl/sslv2conftest.c new file mode 100644 index 0000000..1fd748b --- /dev/null +++ b/ssl/sslv2conftest.c @@ -0,0 +1,231 @@ +/* Written by Matt Caswell for the OpenSSL Project */ +/* ==================================================================== + * Copyright (c) 2016 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include +#include +#include +#include + + +#define TOTAL_NUM_TESTS 2 +#define TEST_SSL_CTX 0 + +#define SSLV2ON 1 +#define SSLV2OFF 0 + +SSL_CONF_CTX *confctx; +SSL_CTX *ctx; +SSL *ssl; + +static int checksslv2(int test, int sslv2) +{ + int options; + if (test == TEST_SSL_CTX) { + options = SSL_CTX_get_options(ctx); + } else { + options = SSL_get_options(ssl); + } + return ((options & SSL_OP_NO_SSLv2) == 0) ^ (sslv2 == SSLV2OFF); +} + +int main(int argc, char *argv[]) +{ + BIO *err; + int testresult = 0; + int currtest; + + SSL_library_init(); + SSL_load_error_strings(); + + err = BIO_new_fp(stderr, BIO_NOCLOSE | BIO_FP_TEXT); + + CRYPTO_malloc_debug_init(); + CRYPTO_set_mem_debug_options(V_CRYPTO_MDEBUG_ALL); + CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); + + + confctx = SSL_CONF_CTX_new(); + ctx = SSL_CTX_new(SSLv23_method()); + ssl = SSL_new(ctx); + if (confctx == NULL || ctx == NULL) + goto end; + + SSL_CONF_CTX_set_flags(confctx, SSL_CONF_FLAG_FILE + | SSL_CONF_FLAG_CLIENT + | SSL_CONF_FLAG_SERVER); + + /* + * For each test set up an SSL_CTX and SSL and see whether SSLv2 is enabled + * as expected after various SSL_CONF_cmd("Protocol", ...) calls. + */ + for (currtest = 0; currtest < TOTAL_NUM_TESTS; currtest++) { + BIO_printf(err, "SSLv2 CONF Test number %d\n", currtest); + if (currtest == TEST_SSL_CTX) + SSL_CONF_CTX_set_ssl_ctx(confctx, ctx); + else + SSL_CONF_CTX_set_ssl(confctx, ssl); + + /* SSLv2 should be off by default */ + if (!checksslv2(currtest, SSLV2OFF)) { + BIO_printf(err, "SSLv2 CONF Test: Off by default test FAIL\n"); + goto end; + } + + if (SSL_CONF_cmd(confctx, "Protocol", "ALL") != 2 + || !SSL_CONF_CTX_finish(confctx)) { + BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n"); + goto end; + } + + /* Should still be off even after ALL Protocols on */ + if (!checksslv2(currtest, SSLV2OFF)) { + BIO_printf(err, "SSLv2 CONF Test: Off after config #1 FAIL\n"); + goto end; + } + + if (SSL_CONF_cmd(confctx, "Protocol", "SSLv2") != 2 + || !SSL_CONF_CTX_finish(confctx)) { + BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n"); + goto end; + } + + /* Should still be off even if explicitly asked for */ + if (!checksslv2(currtest, SSLV2OFF)) { + BIO_printf(err, "SSLv2 CONF Test: Off after config #2 FAIL\n"); + goto end; + } + + if (SSL_CONF_cmd(confctx, "Protocol", "-SSLv2") != 2 + || !SSL_CONF_CTX_finish(confctx)) { + BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n");; + goto end; + } + + if (!checksslv2(currtest, SSLV2OFF)) { + BIO_printf(err, "SSLv2 CONF Test: Off after config #3 FAIL\n"); + goto end; + } + + if (currtest == TEST_SSL_CTX) + SSL_CTX_clear_options(ctx, SSL_OP_NO_SSLv2); + else + SSL_clear_options(ssl, SSL_OP_NO_SSLv2); + + if (!checksslv2(currtest, SSLV2ON)) { + BIO_printf(err, "SSLv2 CONF Test: On after clear FAIL\n"); + goto end; + } + + if (SSL_CONF_cmd(confctx, "Protocol", "ALL") != 2 + || !SSL_CONF_CTX_finish(confctx)) { + BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n"); + goto end; + } + + /* Option has been cleared and config says have SSLv2 so should be on */ + if (!checksslv2(currtest, SSLV2ON)) { + BIO_printf(err, "SSLv2 CONF Test: On after config #1 FAIL\n"); + goto end; + } + + if (SSL_CONF_cmd(confctx, "Protocol", "SSLv2") != 2 + || !SSL_CONF_CTX_finish(confctx)) { + BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n"); + goto end; + } + + /* Option has been cleared and config says have SSLv2 so should be on */ + if (!checksslv2(currtest, SSLV2ON)) { + BIO_printf(err, "SSLv2 CONF Test: On after config #2 FAIL\n"); + goto end; + } + + if (SSL_CONF_cmd(confctx, "Protocol", "-SSLv2") != 2 + || !SSL_CONF_CTX_finish(confctx)) { + BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n"); + goto end; + } + + /* Option has been cleared but config says no SSLv2 so should be off */ + if (!checksslv2(currtest, SSLV2OFF)) { + BIO_printf(err, "SSLv2 CONF Test: Off after config #4 FAIL\n"); + goto end; + } + + } + + testresult = 1; + + end: + SSL_free(ssl); + SSL_CTX_free(ctx); + SSL_CONF_CTX_free(confctx); + + if (!testresult) { + printf("SSLv2 CONF test: FAILED (Test %d)\n", currtest); + ERR_print_errors(err); + } else { + printf("SSLv2 CONF test: PASSED\n"); + } + + ERR_free_strings(); + ERR_remove_thread_state(NULL); + EVP_cleanup(); + CRYPTO_cleanup_all_ex_data(); + CRYPTO_mem_leaks(err); + BIO_free(err); + + return testresult ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/util/libeay.num b/util/libeay.num index 7f7487d..e5b3c6e 100755 --- a/util/libeay.num +++ b/util/libeay.num @@ -1807,6 +1807,8 @@ ASN1_UTCTIME_get 2350 NOEXIST::FUNCTION: X509_REQ_digest 2362 EXIST::FUNCTION:EVP X509_CRL_digest 2391 EXIST::FUNCTION:EVP ASN1_STRING_clear_free 2392 EXIST::FUNCTION: +SRP_VBASE_get1_by_user 2393 EXIST::FUNCTION:SRP +SRP_user_pwd_free 2394 EXIST::FUNCTION:SRP d2i_ASN1_SET_OF_PKCS7 2397 NOEXIST::FUNCTION: X509_ALGOR_cmp 2398 EXIST::FUNCTION: EVP_CIPHER_CTX_set_key_length 2399 EXIST::FUNCTION: diff --git a/util/mk1mf.pl b/util/mk1mf.pl index 99652af..2629a1c 100755 --- a/util/mk1mf.pl +++ b/util/mk1mf.pl @@ -290,6 +290,7 @@ $cflags.=" -DOPENSSL_NO_HW" if $no_hw; $cflags.=" -DOPENSSL_FIPS" if $fips; $cflags.=" -DOPENSSL_NO_JPAKE" if $no_jpake; $cflags.=" -DOPENSSL_NO_EC2M" if $no_ec2m; +$cflags.=" -DOPENSSL_NO_WEAK_SSL_CIPHERS" if $no_weak_ssl; $cflags.= " -DZLIB" if $zlib_opt; $cflags.= " -DZLIB_SHARED" if $zlib_opt == 2; @@ -482,7 +483,7 @@ EX_LIBS=$ex_libs # The OpenSSL directory SRC_D=$src_dir -LINK=$link +LINK_CMD=$link LFLAGS=$lflags RSC=$rsc @@ -1205,6 +1206,7 @@ sub read_options "no-jpake" => \$no_jpake, "no-ec2m" => \$no_ec2m, "no-ec_nistp_64_gcc_128" => 0, + "no-weak-ssl-ciphers" => \$no_weak_ssl, "no-err" => \$no_err, "no-sock" => \$no_sock, "no-krb5" => \$no_krb5, diff --git a/util/pl/BC-32.pl b/util/pl/BC-32.pl index f7161d7..375b0a7 100644 --- a/util/pl/BC-32.pl +++ b/util/pl/BC-32.pl @@ -118,7 +118,7 @@ ___ { local($ex)=($target =~ /O_SSL/)?' $(L_CRYPTO)':''; $ex.=' ws2_32.lib gdi32.lib'; - $ret.="\t\$(LINK) \$(MLFLAGS) $efile$target /def:ms/${Name}.def @<<\n \$(SHLIB_EX_OBJ) $objs $ex\n<<\n"; + $ret.="\t\$(LINK_CMD) \$(MLFLAGS) $efile$target /def:ms/${Name}.def @<<\n \$(SHLIB_EX_OBJ) $objs $ex\n<<\n"; } $ret.="\n"; return($ret); @@ -132,7 +132,7 @@ sub do_link_rule $file =~ s/\//$o/g if $o ne '/'; $n=&bname($target); $ret.="$target: $files $dep_libs\n"; - $ret.="\t\$(LINK) \$(LFLAGS) $files \$(APP_EX_OBJ), $target,, $libs\n\n"; + $ret.="\t\$(LINK_CMD) \$(LFLAGS) $files \$(APP_EX_OBJ), $target,, $libs\n\n"; return($ret); } diff --git a/util/pl/Mingw32.pl b/util/pl/Mingw32.pl index fe3fb27..55c85f6 100644 --- a/util/pl/Mingw32.pl +++ b/util/pl/Mingw32.pl @@ -98,7 +98,7 @@ sub do_link_rule $file =~ s/\//$o/g if $o ne '/'; $n=&bname($target); $ret.="$target: $files $dep_libs\n"; - $ret.="\t\$(LINK) ${efile}$target \$(LFLAGS) $files $libs\n\n"; + $ret.="\t\$(LINK_CMD) ${efile}$target \$(LFLAGS) $files $libs\n\n"; return($ret); } 1; diff --git a/util/pl/OS2-EMX.pl b/util/pl/OS2-EMX.pl index 28cd116..92a332e 100644 --- a/util/pl/OS2-EMX.pl +++ b/util/pl/OS2-EMX.pl @@ -99,7 +99,7 @@ sub do_lib_rule { local($ex)=($target =~ /O_SSL/)?' $(L_CRYPTO)':''; $ex.=' -lsocket'; - $ret.="\t\$(LINK) \$(SHLIB_CFLAGS) \$(MLFLAGS) $efile$target \$(SHLIB_EX_OBJ) \$(${Name}OBJ) $ex os2/${Name}.def\n"; + $ret.="\t\$(LINK_CMD) \$(SHLIB_CFLAGS) \$(MLFLAGS) $efile$target \$(SHLIB_EX_OBJ) \$(${Name}OBJ) $ex os2/${Name}.def\n"; $ret.="\temximp -o $out_def/$name.a os2/${Name}.def\n"; $ret.="\temximp -o $out_def/$name.lib os2/${Name}.def\n\n"; } @@ -113,7 +113,7 @@ sub do_link_rule $file =~ s/\//$o/g if $o ne '/'; $n=&bname($target); $ret.="$target: $files $dep_libs\n"; - $ret.="\t\$(LINK) ${efile}$target \$(CFLAG) \$(LFLAGS) $files $libs\n\n"; + $ret.="\t\$(LINK_CMD) ${efile}$target \$(CFLAG) \$(LFLAGS) $files $libs\n\n"; return($ret); } diff --git a/util/pl/VC-32.pl b/util/pl/VC-32.pl index 0f5547f..dba96cb 100644 --- a/util/pl/VC-32.pl +++ b/util/pl/VC-32.pl @@ -330,7 +330,7 @@ sub do_lib_rule if ($fips && $target =~ /O_CRYPTO/) { $ret.="$target: $objs \$(PREMAIN_DSO_EXE)"; - $ret.="\n\tSET FIPS_LINK=\$(LINK)\n"; + $ret.="\n\tSET FIPS_LINK=\$(LINK_CMD)\n"; $ret.="\tSET FIPS_CC=\$(CC)\n"; $ret.="\tSET FIPS_CC_ARGS=/Fo\$(OBJ_D)${o}fips_premain.obj \$(SHLIB_CFLAGS) -c\n"; $ret.="\tSET PREMAIN_DSO_EXE=\$(PREMAIN_DSO_EXE)\n"; @@ -344,7 +344,7 @@ sub do_lib_rule else { $ret.="$target: $objs"; - $ret.="\n\t\$(LINK) \$(MLFLAGS) $efile$target $name @<<\n \$(SHLIB_EX_OBJ) $objs $ex \$(EX_LIBS)\n<<\n"; + $ret.="\n\t\$(LINK_CMD) \$(MLFLAGS) $efile$target $name @<<\n \$(SHLIB_EX_OBJ) $objs $ex \$(EX_LIBS)\n<<\n"; } $ret.="\tIF EXIST \$@.manifest mt -nologo -manifest \$@.manifest -outputresource:\$@;2\n\n"; } @@ -363,7 +363,7 @@ sub do_link_rule { $ret.=" \$(OBJ_D)${o}applink.obj" if $shlib; $ret.="\n"; - $ret.=" \$(LINK) \$(LFLAGS) $efile$target @<<\n\t"; + $ret.=" \$(LINK_CMD) \$(LFLAGS) $efile$target @<<\n\t"; if ($files =~ /O_FIPSCANISTER/ && !$fipscanisterbuild) { $ret.= "\$(EX_LIBS) "; $ret.= "\$(OBJ_D)${o}applink.obj " if $shlib; @@ -373,7 +373,7 @@ sub do_link_rule elsif ($standalone == 2) { $ret.="\n"; - $ret.="\tSET FIPS_LINK=\$(LINK)\n"; + $ret.="\tSET FIPS_LINK=\$(LINK_CMD)\n"; $ret.="\tSET FIPS_CC=\$(CC)\n"; $ret.="\tSET FIPS_CC_ARGS=/Fo\$(OBJ_D)${o}fips_premain.obj \$(SHLIB_CFLAGS) -c\n"; $ret.="\tSET PREMAIN_DSO_EXE=\n"; @@ -386,7 +386,7 @@ sub do_link_rule else { $ret.="\n"; - $ret.="\t\$(LINK) \$(LFLAGS) $efile$target @<<\n"; + $ret.="\t\$(LINK_CMD) \$(LFLAGS) $efile$target @<<\n"; $ret.="\t\$(APP_EX_OBJ) $files $libs\n<<\n"; } $ret.="\tIF EXIST \$@.manifest mt -nologo -manifest \$@.manifest -outputresource:\$@;1\n\n"; diff --git a/util/pl/linux.pl b/util/pl/linux.pl index d24f7b7..3362941 100644 --- a/util/pl/linux.pl +++ b/util/pl/linux.pl @@ -78,7 +78,7 @@ sub do_link_rule $file =~ s/\//$o/g if $o ne '/'; $n=&bname($target); $ret.="$target: $files $dep_libs\n"; - $ret.="\t\$(LINK) ${efile}$target \$(LFLAGS) $files $libs\n\n"; + $ret.="\t\$(LINK_CMD) ${efile}$target \$(LFLAGS) $files $libs\n\n"; return($ret); } diff --git a/util/pl/netware.pl b/util/pl/netware.pl index fe80a9b..16f4f4e 100644 --- a/util/pl/netware.pl +++ b/util/pl/netware.pl @@ -506,22 +506,22 @@ sub do_link_rule if ($gnuc) { $ret.="\t\$(MKLIB) $lib_flags \$(TMP_D)${o}\$(E_EXE).a \$(filter-out \$(TMP_D)${o}\$(E_EXE)${obj},$files)\n"; - $ret.="\t\$(LINK) \$(LFLAGS) $def_file2\n"; + $ret.="\t\$(LINK_CMD) \$(LFLAGS) $def_file2\n"; $ret.="\t\@$mv \$(E_EXE)2.nlm \$(TEST_D)\n"; } else { - $ret.="\t\$(LINK) \$(LFLAGS) $def_file2 $files \"$prelude\" $libs -o $target2\n"; + $ret.="\t\$(LINK_CMD) \$(LFLAGS) $def_file2 $files \"$prelude\" $libs -o $target2\n"; } } if ($gnuc) { - $ret.="\t\$(LINK) \$(LFLAGS) $def_file\n"; + $ret.="\t\$(LINK_CMD) \$(LFLAGS) $def_file\n"; $ret.="\t\@$mv \$(\@F) \$(TEST_D)\n"; } else { - $ret.="\t\$(LINK) \$(LFLAGS) $def_file $files \"$prelude\" $libs -o $target\n"; + $ret.="\t\$(LINK_CMD) \$(LFLAGS) $def_file $files \"$prelude\" $libs -o $target\n"; } $ret.="\n"; diff --git a/util/pl/ultrix.pl b/util/pl/ultrix.pl index ea370c7..0c76c83 100644 --- a/util/pl/ultrix.pl +++ b/util/pl/ultrix.pl @@ -31,7 +31,7 @@ sub do_link_rule $file =~ s/\//$o/g if $o ne '/'; $n=&bname($target); $ret.="$target: $files $dep_libs\n"; - $ret.="\t\$(LINK) ${efile}$target \$(LFLAGS) $files $libs\n\n"; + $ret.="\t\$(LINK_CMD) ${efile}$target \$(LFLAGS) $files $libs\n\n"; return($ret); } diff --git a/util/pl/unix.pl b/util/pl/unix.pl index 1d4e9dc..8818c5b 100644 --- a/util/pl/unix.pl +++ b/util/pl/unix.pl @@ -164,7 +164,7 @@ sub do_link_rule $file =~ s/\//$o/g if $o ne '/'; $n=&bname($target); $ret.="$target: $files $dep_libs\n"; - $ret.="\t\$(LINK) ${efile}$target \$(LFLAGS) $files $libs\n\n"; + $ret.="\t\$(LINK_CMD) ${efile}$target \$(LFLAGS) $files $libs\n\n"; return($ret); } -- cgit v1.1