diff options
author | simon <simon@FreeBSD.org> | 2010-03-13 19:22:41 +0000 |
---|---|---|
committer | simon <simon@FreeBSD.org> | 2010-03-13 19:22:41 +0000 |
commit | 0d816bbd980d8201a2ad23ccd05f7bde16565282 (patch) | |
tree | 909a7c21b7df72ae8c08b80b468a4dd75b3820be /crypto/openssl/crypto | |
parent | 7fd3bd147ec574621124307eca10ead5353e34ba (diff) | |
parent | cdb6eef1f013e22a10ab5f5829dcdc3b5e32d385 (diff) | |
download | FreeBSD-src-0d816bbd980d8201a2ad23ccd05f7bde16565282.zip FreeBSD-src-0d816bbd980d8201a2ad23ccd05f7bde16565282.tar.gz |
Merge OpenSSL 0.9.8m into head.
This also "reverts" some FreeBSD local changes so we should now
be back to using entirely stock OpenSSL. The local changes were
simple $FreeBSD$ lines additions, which were required in the CVS
days, and the patch for FreeBSD-SA-09:15.ssl which has been
superseded with OpenSSL 0.9.8m's RFC5746 'TLS renegotiation
extension' support.
MFC after: 3 weeks
Diffstat (limited to 'crypto/openssl/crypto')
115 files changed, 1265 insertions, 16068 deletions
diff --git a/crypto/openssl/crypto/aes/aes_cfb.c b/crypto/openssl/crypto/aes/aes_cfb.c index 49f0411..9384ba6 100644 --- a/crypto/openssl/crypto/aes/aes_cfb.c +++ b/crypto/openssl/crypto/aes/aes_cfb.c @@ -201,7 +201,6 @@ void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out, assert(in && out && key && ivec && num); assert(*num == 0); - memset(out,0,(length+7)/8); for(n=0 ; n < length ; ++n) { c[0]=(in[n/8]&(1 << (7-n%8))) ? 0x80 : 0; diff --git a/crypto/openssl/crypto/aes/aes_x86core.c b/crypto/openssl/crypto/aes/aes_x86core.c deleted file mode 100644 index d323e26..0000000 --- a/crypto/openssl/crypto/aes/aes_x86core.c +++ /dev/null @@ -1,1063 +0,0 @@ -/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */ -/** - * rijndael-alg-fst.c - * - * @version 3.0 (December 2000) - * - * Optimised ANSI C code for the Rijndael cipher (now AES) - * - * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be> - * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be> - * @author Paulo Barreto <paulo.barreto@terra.com.br> - * - * This code is hereby placed in the public domain. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * This is experimental x86[_64] derivative. It assumes little-endian - * byte order and expects CPU to sustain unaligned memory references. - * It is used as playground for cache-time attack mitigations and - * serves as reference C implementation for x86[_64] assembler. - * - * <appro@fy.chalmers.se> - */ - - -#ifndef AES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - -#include <stdlib.h> -#include <openssl/aes.h> -#include "aes_locl.h" - -/* - * These two parameters control which table, 256-byte or 2KB, is - * referenced in outer and respectively inner rounds. - */ -#define AES_COMPACT_IN_OUTER_ROUNDS -#ifdef AES_COMPACT_IN_OUTER_ROUNDS -/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while - * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further* - * by factor of ~2. */ -# undef AES_COMPACT_IN_INNER_ROUNDS -#endif - -#if 1 -static void prefetch256(const void *table) -{ - volatile unsigned long *t=(void *)table,ret; - unsigned long sum; - int i; - - /* 32 is common least cache-line size */ - for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i]; - - ret = sum; -} -#else -# define prefetch256(t) -#endif - -#undef GETU32 -#define GETU32(p) (*((u32*)(p))) - -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) -typedef unsigned __int64 u64; -#define U64(C) C##UI64 -#elif defined(__arch64__) -typedef unsigned long u64; -#define U64(C) C##UL -#else -typedef unsigned long long u64; -#define U64(C) C##ULL -#endif - -#undef ROTATE -#if defined(_MSC_VER) || defined(__ICC) -# define ROTATE(a,n) _lrotl(a,n) -#elif defined(__GNUC__) && __GNUC__>=2 -# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) -# define ROTATE(a,n) ({ register unsigned int ret; \ - asm ( \ - "roll %1,%0" \ - : "=r"(ret) \ - : "I"(n), "0"(a) \ - : "cc"); \ - ret; \ - }) -# endif -#endif -/* -Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03]; -Te0[x] = S [x].[02, 01, 01, 03]; -Te1[x] = S [x].[03, 02, 01, 01]; -Te2[x] = S [x].[01, 03, 02, 01]; -Te3[x] = S [x].[01, 01, 03, 02]; -*/ -#define Te0 (u32)((u64*)((u8*)Te+0)) -#define Te1 (u32)((u64*)((u8*)Te+3)) -#define Te2 (u32)((u64*)((u8*)Te+2)) -#define Te3 (u32)((u64*)((u8*)Te+1)) -/* -Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b]; -Td0[x] = Si[x].[0e, 09, 0d, 0b]; -Td1[x] = Si[x].[0b, 0e, 09, 0d]; -Td2[x] = Si[x].[0d, 0b, 0e, 09]; -Td3[x] = Si[x].[09, 0d, 0b, 0e]; -Td4[x] = Si[x].[01]; -*/ -#define Td0 (u32)((u64*)((u8*)Td+0)) -#define Td1 (u32)((u64*)((u8*)Td+3)) -#define Td2 (u32)((u64*)((u8*)Td+2)) -#define Td3 (u32)((u64*)((u8*)Td+1)) - -static const u64 Te[256] = { - U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8), - U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6), - U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6), - U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591), - U64(0x5030306050303060), U64(0x0301010203010102), - U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56), - U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5), - U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec), - U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f), - U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa), - U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2), - U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb), - U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3), - U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45), - U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453), - U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b), - U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1), - U64(0xae93933dae93933d), U64(0x6a26264c6a26264c), - U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e), - U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83), - U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551), - U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9), - U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab), - U64(0x5331316253313162), U64(0x3f15152a3f15152a), - U64(0x0c0404080c040408), U64(0x52c7c79552c7c795), - U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d), - U64(0x2818183028181830), U64(0xa1969637a1969637), - U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f), - U64(0x0907070e0907070e), U64(0x3612122436121224), - U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df), - U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e), - U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea), - U64(0x1b0909121b090912), U64(0x9e83831d9e83831d), - U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34), - U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc), - U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b), - U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76), - U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d), - U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd), - U64(0x712f2f5e712f2f5e), U64(0x9784841397848413), - U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9), - U64(0x0000000000000000), U64(0x2cededc12cededc1), - U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3), - U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6), - U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d), - U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972), - U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98), - U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85), - U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5), - U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed), - U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a), - U64(0x5533336655333366), U64(0x9485851194858511), - U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9), - U64(0x0602020406020204), U64(0x817f7ffe817f7ffe), - U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78), - U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b), - U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d), - U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05), - U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21), - U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1), - U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677), - U64(0x75dadaaf75dadaaf), U64(0x6321214263212142), - U64(0x3010102030101020), U64(0x1affffe51affffe5), - U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf), - U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18), - U64(0x3513132635131326), U64(0x2fececc32fececc3), - U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735), - U64(0xcc444488cc444488), U64(0x3917172e3917172e), - U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755), - U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a), - U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba), - U64(0x2b1919322b191932), U64(0x957373e6957373e6), - U64(0xa06060c0a06060c0), U64(0x9881811998818119), - U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3), - U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54), - U64(0xab90903bab90903b), U64(0x8388880b8388880b), - U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7), - U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428), - U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc), - U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad), - U64(0x3be0e0db3be0e0db), U64(0x5632326456323264), - U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14), - U64(0xdb494992db494992), U64(0x0a06060c0a06060c), - U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8), - U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd), - U64(0xefacac43efacac43), U64(0xa66262c4a66262c4), - U64(0xa8919139a8919139), U64(0xa4959531a4959531), - U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2), - U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b), - U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda), - U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1), - U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949), - U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac), - U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf), - U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4), - U64(0xe9aeae47e9aeae47), U64(0x1808081018080810), - U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0), - U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c), - U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657), - U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697), - U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1), - U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e), - U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61), - U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f), - U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c), - U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc), - U64(0xd8484890d8484890), U64(0x0503030605030306), - U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c), - U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a), - U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969), - U64(0x9186861791868617), U64(0x58c1c19958c1c199), - U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27), - U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb), - U64(0xb398982bb398982b), U64(0x3311112233111122), - U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9), - U64(0x898e8e07898e8e07), U64(0xa7949433a7949433), - U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c), - U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9), - U64(0x49cece8749cece87), U64(0xff5555aaff5555aa), - U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5), - U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159), - U64(0x8089890980898909), U64(0x170d0d1a170d0d1a), - U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7), - U64(0xc6424284c6424284), U64(0xb86868d0b86868d0), - U64(0xc3414182c3414182), U64(0xb0999929b0999929), - U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e), - U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8), - U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c) -}; - -static const u8 Te4[256] = { - 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U, - 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U, - 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U, - 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U, - 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU, - 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U, - 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU, - 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U, - 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U, - 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U, - 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU, - 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU, - 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U, - 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U, - 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U, - 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U, - 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U, - 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U, - 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U, - 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU, - 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU, - 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U, - 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U, - 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U, - 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U, - 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU, - 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU, - 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU, - 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U, - 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU, - 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U, - 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U -}; - -static const u64 Td[256] = { - U64(0x50a7f45150a7f451), U64(0x5365417e5365417e), - U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a), - U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f), - U64(0xab58faacab58faac), U64(0x9303e34b9303e34b), - U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad), - U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5), - U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5), - U64(0x8044352680443526), U64(0x8fa362b58fa362b5), - U64(0x495ab1de495ab1de), U64(0x671bba25671bba25), - U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d), - U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81), - U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b), - U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215), - U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295), - U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458), - U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e), - U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4), - U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927), - U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0), - U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d), - U64(0x184adf63184adf63), U64(0x82311ae582311ae5), - U64(0x6033519760335197), U64(0x457f5362457f5362), - U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb), - U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9), - U64(0x5868487058684870), U64(0x19fd458f19fd458f), - U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52), - U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72), - U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566), - U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f), - U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3), - U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23), - U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed), - U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7), - U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e), - U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506), - U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4), - U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2), - U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4), - U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040), - U64(0x069f715e069f715e), U64(0x51106ebd51106ebd), - U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96), - U64(0xae053eddae053edd), U64(0x46bde64d46bde64d), - U64(0xb58d5491b58d5491), U64(0x055dc471055dc471), - U64(0x6fd406046fd40604), U64(0xff155060ff155060), - U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6), - U64(0xcc434089cc434089), U64(0x779ed967779ed967), - U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907), - U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879), - U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c), - U64(0xc91e84f8c91e84f8), U64(0x0000000000000000), - U64(0x8386800983868009), U64(0x48ed2b3248ed2b32), - U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c), - U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f), - U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36), - U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68), - U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624), - U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793), - U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b), - U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61), - U64(0x694b775a694b775a), U64(0x161a121c161a121c), - U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0), - U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12), - U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2), - U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14), - U64(0x8519f1578519f157), U64(0x4c0775af4c0775af), - U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3), - U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c), - U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b), - U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb), - U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8), - U64(0xcadc31d7cadc31d7), U64(0x1085634210856342), - U64(0x4022971340229713), U64(0x2011c6842011c684), - U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2), - U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7), - U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc), - U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177), - U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9), - U64(0xfa489411fa489411), U64(0x2264e9472264e947), - U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0), - U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322), - U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9), - U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498), - U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5), - U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f), - U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850), - U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54), - U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890), - U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382), - U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069), - U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf), - U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810), - U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb), - U64(0x097826cd097826cd), U64(0xf418596ef418596e), - U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83), - U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa), - U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef), - U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a), - U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029), - U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a), - U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235), - U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc), - U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733), - U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41), - U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117), - U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43), - U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4), - U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c), - U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546), - U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01), - U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb), - U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92), - U64(0x335610e9335610e9), U64(0x1347d66d1347d66d), - U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137), - U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb), - U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7), - U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a), - U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255), - U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773), - U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f), - U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478), - U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9), - U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2), - U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc), - U64(0x8b493c288b493c28), U64(0x41950dff41950dff), - U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08), - U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664), - U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5), - U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0) -}; -static const u8 Td4[256] = { - 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, - 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, - 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, - 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, - 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, - 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, - 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, - 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, - 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, - 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, - 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, - 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, - 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, - 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, - 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, - 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, - 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, - 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, - 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, - 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, - 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, - 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, - 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, - 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, - 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, - 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, - 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, - 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, - 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, - 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, - 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, - 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU -}; - -static const u32 rcon[] = { - 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U, - 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U, - 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ -}; - -/** - * Expand the cipher key into the encryption key schedule. - */ -int AES_set_encrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) { - - u32 *rk; - int i = 0; - u32 temp; - - if (!userKey || !key) - return -1; - if (bits != 128 && bits != 192 && bits != 256) - return -2; - - rk = key->rd_key; - - if (bits==128) - key->rounds = 10; - else if (bits==192) - key->rounds = 12; - else - key->rounds = 14; - - rk[0] = GETU32(userKey ); - rk[1] = GETU32(userKey + 4); - rk[2] = GETU32(userKey + 8); - rk[3] = GETU32(userKey + 12); - if (bits == 128) { - while (1) { - temp = rk[3]; - rk[4] = rk[0] ^ - (Te4[(temp >> 8) & 0xff] ) ^ - (Te4[(temp >> 16) & 0xff] << 8) ^ - (Te4[(temp >> 24) ] << 16) ^ - (Te4[(temp ) & 0xff] << 24) ^ - rcon[i]; - rk[5] = rk[1] ^ rk[4]; - rk[6] = rk[2] ^ rk[5]; - rk[7] = rk[3] ^ rk[6]; - if (++i == 10) { - return 0; - } - rk += 4; - } - } - rk[4] = GETU32(userKey + 16); - rk[5] = GETU32(userKey + 20); - if (bits == 192) { - while (1) { - temp = rk[ 5]; - rk[ 6] = rk[ 0] ^ - (Te4[(temp >> 8) & 0xff] ) ^ - (Te4[(temp >> 16) & 0xff] << 8) ^ - (Te4[(temp >> 24) ] << 16) ^ - (Te4[(temp ) & 0xff] << 24) ^ - rcon[i]; - rk[ 7] = rk[ 1] ^ rk[ 6]; - rk[ 8] = rk[ 2] ^ rk[ 7]; - rk[ 9] = rk[ 3] ^ rk[ 8]; - if (++i == 8) { - return 0; - } - rk[10] = rk[ 4] ^ rk[ 9]; - rk[11] = rk[ 5] ^ rk[10]; - rk += 6; - } - } - rk[6] = GETU32(userKey + 24); - rk[7] = GETU32(userKey + 28); - if (bits == 256) { - while (1) { - temp = rk[ 7]; - rk[ 8] = rk[ 0] ^ - (Te4[(temp >> 8) & 0xff] ) ^ - (Te4[(temp >> 16) & 0xff] << 8) ^ - (Te4[(temp >> 24) ] << 16) ^ - (Te4[(temp ) & 0xff] << 24) ^ - rcon[i]; - rk[ 9] = rk[ 1] ^ rk[ 8]; - rk[10] = rk[ 2] ^ rk[ 9]; - rk[11] = rk[ 3] ^ rk[10]; - if (++i == 7) { - return 0; - } - temp = rk[11]; - rk[12] = rk[ 4] ^ - (Te4[(temp ) & 0xff] ) ^ - (Te4[(temp >> 8) & 0xff] << 8) ^ - (Te4[(temp >> 16) & 0xff] << 16) ^ - (Te4[(temp >> 24) ] << 24); - rk[13] = rk[ 5] ^ rk[12]; - rk[14] = rk[ 6] ^ rk[13]; - rk[15] = rk[ 7] ^ rk[14]; - - rk += 8; - } - } - return 0; -} - -/** - * Expand the cipher key into the decryption key schedule. - */ -int AES_set_decrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) { - - u32 *rk; - int i, j, status; - u32 temp; - - /* first, start with an encryption schedule */ - status = AES_set_encrypt_key(userKey, bits, key); - if (status < 0) - return status; - - rk = key->rd_key; - - /* invert the order of the round keys: */ - for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) { - temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; - temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; - temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; - temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; - } - /* apply the inverse MixColumn transform to all round keys but the first and the last: */ - for (i = 1; i < (key->rounds); i++) { - rk += 4; -#if 1 - for (j = 0; j < 4; j++) { - u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; - - tp1 = rk[j]; - m = tp1 & 0x80808080; - tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp2 & 0x80808080; - tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp4 & 0x80808080; - tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - tp9 = tp8 ^ tp1; - tpb = tp9 ^ tp2; - tpd = tp9 ^ tp4; - tpe = tp8 ^ tp4 ^ tp2; -#if defined(ROTATE) - rk[j] = tpe ^ ROTATE(tpd,16) ^ - ROTATE(tp9,8) ^ ROTATE(tpb,24); -#else - rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ - (tp9 >> 24) ^ (tp9 << 8) ^ - (tpb >> 8) ^ (tpb << 24); -#endif - } -#else - rk[0] = - Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^ - Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^ - Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^ - Td3[Te2[(rk[0] >> 24) ] & 0xff]; - rk[1] = - Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^ - Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^ - Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^ - Td3[Te2[(rk[1] >> 24) ] & 0xff]; - rk[2] = - Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^ - Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^ - Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^ - Td3[Te2[(rk[2] >> 24) ] & 0xff]; - rk[3] = - Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^ - Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^ - Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^ - Td3[Te2[(rk[3] >> 24) ] & 0xff]; -#endif - } - return 0; -} - -/* - * Encrypt a single block - * in and out can overlap - */ -void AES_encrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key) { - - const u32 *rk; - u32 s0, s1, s2, s3, t[4]; - int r; - - assert(in && out && key); - rk = key->rd_key; - - /* - * map byte array block to cipher state - * and add initial round key: - */ - s0 = GETU32(in ) ^ rk[0]; - s1 = GETU32(in + 4) ^ rk[1]; - s2 = GETU32(in + 8) ^ rk[2]; - s3 = GETU32(in + 12) ^ rk[3]; - -#if defined(AES_COMPACT_IN_OUTER_ROUNDS) - prefetch256(Te4); - - t[0] = Te4[(s0 ) & 0xff] ^ - Te4[(s1 >> 8) & 0xff] << 8 ^ - Te4[(s2 >> 16) & 0xff] << 16 ^ - Te4[(s3 >> 24) ] << 24; - t[1] = Te4[(s1 ) & 0xff] ^ - Te4[(s2 >> 8) & 0xff] << 8 ^ - Te4[(s3 >> 16) & 0xff] << 16 ^ - Te4[(s0 >> 24) ] << 24; - t[2] = Te4[(s2 ) & 0xff] ^ - Te4[(s3 >> 8) & 0xff] << 8 ^ - Te4[(s0 >> 16) & 0xff] << 16 ^ - Te4[(s1 >> 24) ] << 24; - t[3] = Te4[(s3 ) & 0xff] ^ - Te4[(s0 >> 8) & 0xff] << 8 ^ - Te4[(s1 >> 16) & 0xff] << 16 ^ - Te4[(s2 >> 24) ] << 24; - - /* now do the linear transform using words */ - { int i; - u32 r0, r1, r2; - - for (i = 0; i < 4; i++) { - r0 = t[i]; - r1 = r0 & 0x80808080; - r2 = ((r0 & 0x7f7f7f7f) << 1) ^ - ((r1 - (r1 >> 7)) & 0x1b1b1b1b); -#if defined(ROTATE) - t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^ - ROTATE(r0,16) ^ ROTATE(r0,8); -#else - t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^ - (r0 << 16) ^ (r0 >> 16) ^ - (r0 << 8) ^ (r0 >> 24); -#endif - t[i] ^= rk[4+i]; - } - } -#else - t[0] = Te0[(s0 ) & 0xff] ^ - Te1[(s1 >> 8) & 0xff] ^ - Te2[(s2 >> 16) & 0xff] ^ - Te3[(s3 >> 24) ] ^ - rk[4]; - t[1] = Te0[(s1 ) & 0xff] ^ - Te1[(s2 >> 8) & 0xff] ^ - Te2[(s3 >> 16) & 0xff] ^ - Te3[(s0 >> 24) ] ^ - rk[5]; - t[2] = Te0[(s2 ) & 0xff] ^ - Te1[(s3 >> 8) & 0xff] ^ - Te2[(s0 >> 16) & 0xff] ^ - Te3[(s1 >> 24) ] ^ - rk[6]; - t[3] = Te0[(s3 ) & 0xff] ^ - Te1[(s0 >> 8) & 0xff] ^ - Te2[(s1 >> 16) & 0xff] ^ - Te3[(s2 >> 24) ] ^ - rk[7]; -#endif - s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; - - /* - * Nr - 2 full rounds: - */ - for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) { -#if defined(AES_COMPACT_IN_INNER_ROUNDS) - t[0] = Te4[(s0 ) & 0xff] ^ - Te4[(s1 >> 8) & 0xff] << 8 ^ - Te4[(s2 >> 16) & 0xff] << 16 ^ - Te4[(s3 >> 24) ] << 24; - t[1] = Te4[(s1 ) & 0xff] ^ - Te4[(s2 >> 8) & 0xff] << 8 ^ - Te4[(s3 >> 16) & 0xff] << 16 ^ - Te4[(s0 >> 24) ] << 24; - t[2] = Te4[(s2 ) & 0xff] ^ - Te4[(s3 >> 8) & 0xff] << 8 ^ - Te4[(s0 >> 16) & 0xff] << 16 ^ - Te4[(s1 >> 24) ] << 24; - t[3] = Te4[(s3 ) & 0xff] ^ - Te4[(s0 >> 8) & 0xff] << 8 ^ - Te4[(s1 >> 16) & 0xff] << 16 ^ - Te4[(s2 >> 24) ] << 24; - - /* now do the linear transform using words */ - { int i; - u32 r0, r1, r2; - - for (i = 0; i < 4; i++) { - r0 = t[i]; - r1 = r0 & 0x80808080; - r2 = ((r0 & 0x7f7f7f7f) << 1) ^ - ((r1 - (r1 >> 7)) & 0x1b1b1b1b); -#if defined(ROTATE) - t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^ - ROTATE(r0,16) ^ ROTATE(r0,8); -#else - t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^ - (r0 << 16) ^ (r0 >> 16) ^ - (r0 << 8) ^ (r0 >> 24); -#endif - t[i] ^= rk[i]; - } - } -#else - t[0] = Te0[(s0 ) & 0xff] ^ - Te1[(s1 >> 8) & 0xff] ^ - Te2[(s2 >> 16) & 0xff] ^ - Te3[(s3 >> 24) ] ^ - rk[0]; - t[1] = Te0[(s1 ) & 0xff] ^ - Te1[(s2 >> 8) & 0xff] ^ - Te2[(s3 >> 16) & 0xff] ^ - Te3[(s0 >> 24) ] ^ - rk[1]; - t[2] = Te0[(s2 ) & 0xff] ^ - Te1[(s3 >> 8) & 0xff] ^ - Te2[(s0 >> 16) & 0xff] ^ - Te3[(s1 >> 24) ] ^ - rk[2]; - t[3] = Te0[(s3 ) & 0xff] ^ - Te1[(s0 >> 8) & 0xff] ^ - Te2[(s1 >> 16) & 0xff] ^ - Te3[(s2 >> 24) ] ^ - rk[3]; -#endif - s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; - } - /* - * apply last round and - * map cipher state to byte array block: - */ -#if defined(AES_COMPACT_IN_OUTER_ROUNDS) - prefetch256(Te4); - - *(u32*)(out+0) = - Te4[(s0 ) & 0xff] ^ - Te4[(s1 >> 8) & 0xff] << 8 ^ - Te4[(s2 >> 16) & 0xff] << 16 ^ - Te4[(s3 >> 24) ] << 24 ^ - rk[0]; - *(u32*)(out+4) = - Te4[(s1 ) & 0xff] ^ - Te4[(s2 >> 8) & 0xff] << 8 ^ - Te4[(s3 >> 16) & 0xff] << 16 ^ - Te4[(s0 >> 24) ] << 24 ^ - rk[1]; - *(u32*)(out+8) = - Te4[(s2 ) & 0xff] ^ - Te4[(s3 >> 8) & 0xff] << 8 ^ - Te4[(s0 >> 16) & 0xff] << 16 ^ - Te4[(s1 >> 24) ] << 24 ^ - rk[2]; - *(u32*)(out+12) = - Te4[(s3 ) & 0xff] ^ - Te4[(s0 >> 8) & 0xff] << 8 ^ - Te4[(s1 >> 16) & 0xff] << 16 ^ - Te4[(s2 >> 24) ] << 24 ^ - rk[3]; -#else - *(u32*)(out+0) = - (Te2[(s0 ) & 0xff] & 0x000000ffU) ^ - (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^ - (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^ - (Te1[(s3 >> 24) ] & 0xff000000U) ^ - rk[0]; - *(u32*)(out+4) = - (Te2[(s1 ) & 0xff] & 0x000000ffU) ^ - (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^ - (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^ - (Te1[(s0 >> 24) ] & 0xff000000U) ^ - rk[1]; - *(u32*)(out+8) = - (Te2[(s2 ) & 0xff] & 0x000000ffU) ^ - (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^ - (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^ - (Te1[(s1 >> 24) ] & 0xff000000U) ^ - rk[2]; - *(u32*)(out+12) = - (Te2[(s3 ) & 0xff] & 0x000000ffU) ^ - (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^ - (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^ - (Te1[(s2 >> 24) ] & 0xff000000U) ^ - rk[3]; -#endif -} - -/* - * Decrypt a single block - * in and out can overlap - */ -void AES_decrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key) { - - const u32 *rk; - u32 s0, s1, s2, s3, t[4]; - int r; - - assert(in && out && key); - rk = key->rd_key; - - /* - * map byte array block to cipher state - * and add initial round key: - */ - s0 = GETU32(in ) ^ rk[0]; - s1 = GETU32(in + 4) ^ rk[1]; - s2 = GETU32(in + 8) ^ rk[2]; - s3 = GETU32(in + 12) ^ rk[3]; - -#if defined(AES_COMPACT_IN_OUTER_ROUNDS) - prefetch256(Td4); - - t[0] = Td4[(s0 ) & 0xff] ^ - Td4[(s3 >> 8) & 0xff] << 8 ^ - Td4[(s2 >> 16) & 0xff] << 16 ^ - Td4[(s1 >> 24) ] << 24; - t[1] = Td4[(s1 ) & 0xff] ^ - Td4[(s0 >> 8) & 0xff] << 8 ^ - Td4[(s3 >> 16) & 0xff] << 16 ^ - Td4[(s2 >> 24) ] << 24; - t[2] = Td4[(s2 ) & 0xff] ^ - Td4[(s1 >> 8) & 0xff] << 8 ^ - Td4[(s0 >> 16) & 0xff] << 16 ^ - Td4[(s3 >> 24) ] << 24; - t[3] = Td4[(s3 ) & 0xff] ^ - Td4[(s2 >> 8) & 0xff] << 8 ^ - Td4[(s1 >> 16) & 0xff] << 16 ^ - Td4[(s0 >> 24) ] << 24; - - /* now do the linear transform using words */ - { int i; - u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; - - for (i = 0; i < 4; i++) { - tp1 = t[i]; - m = tp1 & 0x80808080; - tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp2 & 0x80808080; - tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp4 & 0x80808080; - tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - tp9 = tp8 ^ tp1; - tpb = tp9 ^ tp2; - tpd = tp9 ^ tp4; - tpe = tp8 ^ tp4 ^ tp2; -#if defined(ROTATE) - t[i] = tpe ^ ROTATE(tpd,16) ^ - ROTATE(tp9,8) ^ ROTATE(tpb,24); -#else - t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ - (tp9 >> 24) ^ (tp9 << 8) ^ - (tpb >> 8) ^ (tpb << 24); -#endif - t[i] ^= rk[4+i]; - } - } -#else - t[0] = Td0[(s0 ) & 0xff] ^ - Td1[(s3 >> 8) & 0xff] ^ - Td2[(s2 >> 16) & 0xff] ^ - Td3[(s1 >> 24) ] ^ - rk[4]; - t[1] = Td0[(s1 ) & 0xff] ^ - Td1[(s0 >> 8) & 0xff] ^ - Td2[(s3 >> 16) & 0xff] ^ - Td3[(s2 >> 24) ] ^ - rk[5]; - t[2] = Td0[(s2 ) & 0xff] ^ - Td1[(s1 >> 8) & 0xff] ^ - Td2[(s0 >> 16) & 0xff] ^ - Td3[(s3 >> 24) ] ^ - rk[6]; - t[3] = Td0[(s3 ) & 0xff] ^ - Td1[(s2 >> 8) & 0xff] ^ - Td2[(s1 >> 16) & 0xff] ^ - Td3[(s0 >> 24) ] ^ - rk[7]; -#endif - s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; - - /* - * Nr - 2 full rounds: - */ - for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) { -#if defined(AES_COMPACT_IN_INNER_ROUNDS) - t[0] = Td4[(s0 ) & 0xff] ^ - Td4[(s3 >> 8) & 0xff] << 8 ^ - Td4[(s2 >> 16) & 0xff] << 16 ^ - Td4[(s1 >> 24) ] << 24; - t[1] = Td4[(s1 ) & 0xff] ^ - Td4[(s0 >> 8) & 0xff] << 8 ^ - Td4[(s3 >> 16) & 0xff] << 16 ^ - Td4[(s2 >> 24) ] << 24; - t[2] = Td4[(s2 ) & 0xff] ^ - Td4[(s1 >> 8) & 0xff] << 8 ^ - Td4[(s0 >> 16) & 0xff] << 16 ^ - Td4[(s3 >> 24) ] << 24; - t[3] = Td4[(s3 ) & 0xff] ^ - Td4[(s2 >> 8) & 0xff] << 8 ^ - Td4[(s1 >> 16) & 0xff] << 16 ^ - Td4[(s0 >> 24) ] << 24; - - /* now do the linear transform using words */ - { int i; - u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; - - for (i = 0; i < 4; i++) { - tp1 = t[i]; - m = tp1 & 0x80808080; - tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp2 & 0x80808080; - tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp4 & 0x80808080; - tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - tp9 = tp8 ^ tp1; - tpb = tp9 ^ tp2; - tpd = tp9 ^ tp4; - tpe = tp8 ^ tp4 ^ tp2; -#if defined(ROTATE) - t[i] = tpe ^ ROTATE(tpd,16) ^ - ROTATE(tp9,8) ^ ROTATE(tpb,24); -#else - t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ - (tp9 >> 24) ^ (tp9 << 8) ^ - (tpb >> 8) ^ (tpb << 24); -#endif - t[i] ^= rk[i]; - } - } -#else - t[0] = Td0[(s0 ) & 0xff] ^ - Td1[(s3 >> 8) & 0xff] ^ - Td2[(s2 >> 16) & 0xff] ^ - Td3[(s1 >> 24) ] ^ - rk[0]; - t[1] = Td0[(s1 ) & 0xff] ^ - Td1[(s0 >> 8) & 0xff] ^ - Td2[(s3 >> 16) & 0xff] ^ - Td3[(s2 >> 24) ] ^ - rk[1]; - t[2] = Td0[(s2 ) & 0xff] ^ - Td1[(s1 >> 8) & 0xff] ^ - Td2[(s0 >> 16) & 0xff] ^ - Td3[(s3 >> 24) ] ^ - rk[2]; - t[3] = Td0[(s3 ) & 0xff] ^ - Td1[(s2 >> 8) & 0xff] ^ - Td2[(s1 >> 16) & 0xff] ^ - Td3[(s0 >> 24) ] ^ - rk[3]; -#endif - s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; - } - /* - * apply last round and - * map cipher state to byte array block: - */ - prefetch256(Td4); - - *(u32*)(out+0) = - (Td4[(s0 ) & 0xff]) ^ - (Td4[(s3 >> 8) & 0xff] << 8) ^ - (Td4[(s2 >> 16) & 0xff] << 16) ^ - (Td4[(s1 >> 24) ] << 24) ^ - rk[0]; - *(u32*)(out+4) = - (Td4[(s1 ) & 0xff]) ^ - (Td4[(s0 >> 8) & 0xff] << 8) ^ - (Td4[(s3 >> 16) & 0xff] << 16) ^ - (Td4[(s2 >> 24) ] << 24) ^ - rk[1]; - *(u32*)(out+8) = - (Td4[(s2 ) & 0xff]) ^ - (Td4[(s1 >> 8) & 0xff] << 8) ^ - (Td4[(s0 >> 16) & 0xff] << 16) ^ - (Td4[(s3 >> 24) ] << 24) ^ - rk[2]; - *(u32*)(out+12) = - (Td4[(s3 ) & 0xff]) ^ - (Td4[(s2 >> 8) & 0xff] << 8) ^ - (Td4[(s1 >> 16) & 0xff] << 16) ^ - (Td4[(s0 >> 24) ] << 24) ^ - rk[3]; -} diff --git a/crypto/openssl/crypto/aes/asm/aes-armv4.pl b/crypto/openssl/crypto/aes/asm/aes-armv4.pl deleted file mode 100755 index 15742c1..0000000 --- a/crypto/openssl/crypto/aes/asm/aes-armv4.pl +++ /dev/null @@ -1,1030 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# AES for ARMv4 - -# January 2007. -# -# Code uses single 1K S-box and is >2 times faster than code generated -# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which -# allows to merge logical or arithmetic operation with shift or rotate -# in one instruction and emit combined result every cycle. The module -# is endian-neutral. The performance is ~42 cycles/byte for 128-bit -# key. - -# May 2007. -# -# AES_set_[en|de]crypt_key is added. - -$s0="r0"; -$s1="r1"; -$s2="r2"; -$s3="r3"; -$t1="r4"; -$t2="r5"; -$t3="r6"; -$i1="r7"; -$i2="r8"; -$i3="r9"; - -$tbl="r10"; -$key="r11"; -$rounds="r12"; - -$code=<<___; -.text -.code 32 - -.type AES_Te,%object -.align 5 -AES_Te: -.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d -.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 -.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d -.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a -.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 -.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b -.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea -.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b -.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a -.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f -.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 -.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f -.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e -.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 -.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d -.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f -.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e -.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb -.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce -.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 -.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c -.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed -.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b -.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a -.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 -.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 -.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 -.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 -.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a -.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 -.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 -.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d -.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f -.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 -.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 -.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 -.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f -.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 -.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c -.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 -.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e -.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 -.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 -.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b -.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 -.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 -.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 -.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 -.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 -.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 -.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 -.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 -.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa -.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 -.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 -.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 -.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 -.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 -.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 -.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a -.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 -.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 -.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 -.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a -@ Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -@ rcon[] -.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.size AES_Te,.-AES_Te - -@ void AES_encrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.global AES_encrypt -.type AES_encrypt,%function -.align 5 -AES_encrypt: - sub r3,pc,#8 @ AES_encrypt - stmdb sp!,{r1,r4-r12,lr} - mov $rounds,r0 @ inp - mov $key,r2 - sub $tbl,r3,#AES_encrypt-AES_Te @ Te - - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - orr $s0,$s0,$t2,lsl#16 - orr $s0,$s0,$t3,lsl#24 - ldrb $s1,[$rounds,#7] - ldrb $t1,[$rounds,#6] - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - orr $s1,$s1,$t2,lsl#16 - orr $s1,$s1,$t3,lsl#24 - ldrb $s2,[$rounds,#11] - ldrb $t1,[$rounds,#10] - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - orr $s2,$s2,$t2,lsl#16 - orr $s2,$s2,$t3,lsl#24 - ldrb $s3,[$rounds,#15] - ldrb $t1,[$rounds,#14] - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - orr $s3,$s3,$t2,lsl#16 - orr $s3,$s3,$t3,lsl#24 - - bl _armv4_AES_encrypt - - ldr $rounds,[sp],#4 @ pop out - mov $t1,$s0,lsr#24 @ write output in endian-neutral - mov $t2,$s0,lsr#16 @ manner... - mov $t3,$s0,lsr#8 - strb $t1,[$rounds,#0] - strb $t2,[$rounds,#1] - strb $t3,[$rounds,#2] - strb $s0,[$rounds,#3] - mov $t1,$s1,lsr#24 - mov $t2,$s1,lsr#16 - mov $t3,$s1,lsr#8 - strb $t1,[$rounds,#4] - strb $t2,[$rounds,#5] - strb $t3,[$rounds,#6] - strb $s1,[$rounds,#7] - mov $t1,$s2,lsr#24 - mov $t2,$s2,lsr#16 - mov $t3,$s2,lsr#8 - strb $t1,[$rounds,#8] - strb $t2,[$rounds,#9] - strb $t3,[$rounds,#10] - strb $s2,[$rounds,#11] - mov $t1,$s3,lsr#24 - mov $t2,$s3,lsr#16 - mov $t3,$s3,lsr#8 - strb $t1,[$rounds,#12] - strb $t2,[$rounds,#13] - strb $t3,[$rounds,#14] - strb $s3,[$rounds,#15] - - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size AES_encrypt,.-AES_encrypt - -.type _armv4_AES_encrypt,%function -.align 2 -_armv4_AES_encrypt: - str lr,[sp,#-4]! @ push lr - ldr $t1,[$key],#16 - ldr $t2,[$key,#-12] - ldr $t3,[$key,#-8] - ldr $i1,[$key,#-4] - ldr $rounds,[$key,#240-16] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - sub $rounds,$rounds,#1 - mov lr,#255 - -.Lenc_loop: - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0,lsr#16 - and $i1,lr,$s0 - mov $s0,$s0,lsr#24 - ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0] - ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24] - ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8] - ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16] - - and $i1,lr,$s1,lsr#16 @ i0 - and $i2,lr,$s1 - and $i3,lr,$s1,lsr#8 - mov $s1,$s1,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16] - ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24] - ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0] - ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8] - eor $s0,$s0,$i1,ror#8 - eor $s1,$s1,$t1,ror#24 - eor $t2,$t2,$i2,ror#8 - eor $t3,$t3,$i3,ror#8 - - and $i1,lr,$s2,lsr#8 @ i0 - and $i2,lr,$s2,lsr#16 @ i1 - and $i3,lr,$s2 - mov $s2,$s2,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8] - ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16] - ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24] - ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0] - eor $s0,$s0,$i1,ror#16 - eor $s1,$s1,$i2,ror#8 - eor $s2,$s2,$t2,ror#16 - eor $t3,$t3,$i3,ror#16 - - and $i1,lr,$s3 @ i0 - and $i2,lr,$s3,lsr#8 @ i1 - and $i3,lr,$s3,lsr#16 @ i2 - mov $s3,$s3,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0] - ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8] - ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16] - ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24] - eor $s0,$s0,$i1,ror#24 - eor $s1,$s1,$i2,ror#16 - eor $s2,$s2,$i3,ror#8 - eor $s3,$s3,$t3,ror#8 - - ldr $t1,[$key],#16 - ldr $t2,[$key,#-12] - ldr $t3,[$key,#-8] - ldr $i1,[$key,#-4] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - - subs $rounds,$rounds,#1 - bne .Lenc_loop - - add $tbl,$tbl,#2 - - and $i1,lr,$s0 - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0,lsr#16 - mov $s0,$s0,lsr#24 - ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0] - ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24] - ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8] - ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16] - - and $i1,lr,$s1,lsr#16 @ i0 - and $i2,lr,$s1 - and $i3,lr,$s1,lsr#8 - mov $s1,$s1,lsr#24 - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16] - ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24] - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0] - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8] - eor $s0,$i1,$s0,lsl#8 - eor $s1,$t1,$s1,lsl#24 - eor $t2,$i2,$t2,lsl#8 - eor $t3,$i3,$t3,lsl#8 - - and $i1,lr,$s2,lsr#8 @ i0 - and $i2,lr,$s2,lsr#16 @ i1 - and $i3,lr,$s2 - mov $s2,$s2,lsr#24 - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8] - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16] - ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24] - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0] - eor $s0,$i1,$s0,lsl#8 - eor $s1,$s1,$i2,lsl#16 - eor $s2,$t2,$s2,lsl#24 - eor $t3,$i3,$t3,lsl#8 - - and $i1,lr,$s3 @ i0 - and $i2,lr,$s3,lsr#8 @ i1 - and $i3,lr,$s3,lsr#16 @ i2 - mov $s3,$s3,lsr#24 - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0] - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8] - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16] - ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24] - eor $s0,$i1,$s0,lsl#8 - eor $s1,$s1,$i2,lsl#8 - eor $s2,$s2,$i3,lsl#16 - eor $s3,$t3,$s3,lsl#24 - - ldr lr,[sp],#4 @ pop lr - ldr $t1,[$key,#0] - ldr $t2,[$key,#4] - ldr $t3,[$key,#8] - ldr $i1,[$key,#12] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - - sub $tbl,$tbl,#2 - mov pc,lr @ return -.size _armv4_AES_encrypt,.-_armv4_AES_encrypt - -.global AES_set_encrypt_key -.type AES_set_encrypt_key,%function -.align 5 -AES_set_encrypt_key: - sub r3,pc,#8 @ AES_set_encrypt_key - teq r0,#0 - moveq r0,#-1 - beq .Labrt - teq r2,#0 - moveq r0,#-1 - beq .Labrt - - teq r1,#128 - beq .Lok - teq r1,#192 - beq .Lok - teq r1,#256 - movne r0,#-1 - bne .Labrt - -.Lok: stmdb sp!,{r4-r12,lr} - sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 - - mov $rounds,r0 @ inp - mov lr,r1 @ bits - mov $key,r2 @ key - - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - orr $s0,$s0,$t2,lsl#16 - orr $s0,$s0,$t3,lsl#24 - ldrb $s1,[$rounds,#7] - ldrb $t1,[$rounds,#6] - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - orr $s1,$s1,$t2,lsl#16 - orr $s1,$s1,$t3,lsl#24 - ldrb $s2,[$rounds,#11] - ldrb $t1,[$rounds,#10] - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - orr $s2,$s2,$t2,lsl#16 - orr $s2,$s2,$t3,lsl#24 - ldrb $s3,[$rounds,#15] - ldrb $t1,[$rounds,#14] - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - orr $s3,$s3,$t2,lsl#16 - orr $s3,$s3,$t3,lsl#24 - str $s0,[$key],#16 - str $s1,[$key,#-12] - str $s2,[$key,#-8] - str $s3,[$key,#-4] - - teq lr,#128 - bne .Lnot128 - mov $rounds,#10 - str $rounds,[$key,#240-16] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - -.L128_loop: - and $t2,lr,$s3,lsr#24 - and $i1,lr,$s3,lsr#16 - and $i2,lr,$s3,lsr#8 - and $i3,lr,$s3 - ldrb $t2,[$tbl,$t2] - ldrb $i1,[$tbl,$i1] - ldrb $i2,[$tbl,$i2] - ldrb $i3,[$tbl,$i3] - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i1,lsl#24 - orr $t2,$t2,$i2,lsl#16 - orr $t2,$t2,$i3,lsl#8 - eor $t2,$t2,$t1 - eor $s0,$s0,$t2 @ rk[4]=rk[0]^... - eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4] - eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5] - eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6] - str $s0,[$key],#16 - str $s1,[$key,#-12] - str $s2,[$key,#-8] - str $s3,[$key,#-4] - - subs $rounds,$rounds,#1 - bne .L128_loop - sub r2,$key,#176 - b .Ldone - -.Lnot128: - ldrb $i2,[$rounds,#19] - ldrb $t1,[$rounds,#18] - ldrb $t2,[$rounds,#17] - ldrb $t3,[$rounds,#16] - orr $i2,$i2,$t1,lsl#8 - orr $i2,$i2,$t2,lsl#16 - orr $i2,$i2,$t3,lsl#24 - ldrb $i3,[$rounds,#23] - ldrb $t1,[$rounds,#22] - ldrb $t2,[$rounds,#21] - ldrb $t3,[$rounds,#20] - orr $i3,$i3,$t1,lsl#8 - orr $i3,$i3,$t2,lsl#16 - orr $i3,$i3,$t3,lsl#24 - str $i2,[$key],#8 - str $i3,[$key,#-4] - - teq lr,#192 - bne .Lnot192 - mov $rounds,#12 - str $rounds,[$key,#240-24] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - mov $rounds,#8 - -.L192_loop: - and $t2,lr,$i3,lsr#24 - and $i1,lr,$i3,lsr#16 - and $i2,lr,$i3,lsr#8 - and $i3,lr,$i3 - ldrb $t2,[$tbl,$t2] - ldrb $i1,[$tbl,$i1] - ldrb $i2,[$tbl,$i2] - ldrb $i3,[$tbl,$i3] - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i1,lsl#24 - orr $t2,$t2,$i2,lsl#16 - orr $t2,$t2,$i3,lsl#8 - eor $i3,$t2,$t1 - eor $s0,$s0,$i3 @ rk[6]=rk[0]^... - eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6] - eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7] - eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8] - str $s0,[$key],#24 - str $s1,[$key,#-20] - str $s2,[$key,#-16] - str $s3,[$key,#-12] - - subs $rounds,$rounds,#1 - subeq r2,$key,#216 - beq .Ldone - - ldr $i1,[$key,#-32] - ldr $i2,[$key,#-28] - eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9] - eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10] - str $i1,[$key,#-8] - str $i3,[$key,#-4] - b .L192_loop - -.Lnot192: - ldrb $i2,[$rounds,#27] - ldrb $t1,[$rounds,#26] - ldrb $t2,[$rounds,#25] - ldrb $t3,[$rounds,#24] - orr $i2,$i2,$t1,lsl#8 - orr $i2,$i2,$t2,lsl#16 - orr $i2,$i2,$t3,lsl#24 - ldrb $i3,[$rounds,#31] - ldrb $t1,[$rounds,#30] - ldrb $t2,[$rounds,#29] - ldrb $t3,[$rounds,#28] - orr $i3,$i3,$t1,lsl#8 - orr $i3,$i3,$t2,lsl#16 - orr $i3,$i3,$t3,lsl#24 - str $i2,[$key],#8 - str $i3,[$key,#-4] - - mov $rounds,#14 - str $rounds,[$key,#240-32] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - mov $rounds,#7 - -.L256_loop: - and $t2,lr,$i3,lsr#24 - and $i1,lr,$i3,lsr#16 - and $i2,lr,$i3,lsr#8 - and $i3,lr,$i3 - ldrb $t2,[$tbl,$t2] - ldrb $i1,[$tbl,$i1] - ldrb $i2,[$tbl,$i2] - ldrb $i3,[$tbl,$i3] - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i1,lsl#24 - orr $t2,$t2,$i2,lsl#16 - orr $t2,$t2,$i3,lsl#8 - eor $i3,$t2,$t1 - eor $s0,$s0,$i3 @ rk[8]=rk[0]^... - eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8] - eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9] - eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10] - str $s0,[$key],#32 - str $s1,[$key,#-28] - str $s2,[$key,#-24] - str $s3,[$key,#-20] - - subs $rounds,$rounds,#1 - subeq r2,$key,#256 - beq .Ldone - - and $t2,lr,$s3 - and $i1,lr,$s3,lsr#8 - and $i2,lr,$s3,lsr#16 - and $i3,lr,$s3,lsr#24 - ldrb $t2,[$tbl,$t2] - ldrb $i1,[$tbl,$i1] - ldrb $i2,[$tbl,$i2] - ldrb $i3,[$tbl,$i3] - orr $t2,$t2,$i1,lsl#8 - orr $t2,$t2,$i2,lsl#16 - orr $t2,$t2,$i3,lsl#24 - - ldr $t1,[$key,#-48] - ldr $i1,[$key,#-44] - ldr $i2,[$key,#-40] - ldr $i3,[$key,#-36] - eor $t1,$t1,$t2 @ rk[12]=rk[4]^... - eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12] - eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13] - eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14] - str $t1,[$key,#-16] - str $i1,[$key,#-12] - str $i2,[$key,#-8] - str $i3,[$key,#-4] - b .L256_loop - -.Ldone: mov r0,#0 - ldmia sp!,{r4-r12,lr} -.Labrt: tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size AES_set_encrypt_key,.-AES_set_encrypt_key - -.global AES_set_decrypt_key -.type AES_set_decrypt_key,%function -.align 5 -AES_set_decrypt_key: - str lr,[sp,#-4]! @ push lr - bl AES_set_encrypt_key - teq r0,#0 - ldrne lr,[sp],#4 @ pop lr - bne .Labrt - - stmdb sp!,{r4-r12} - - ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov $key,r2 @ which is AES_KEY *key - mov $i1,r2 - add $i2,r2,$rounds,lsl#4 - -.Linv: ldr $s0,[$i1] - ldr $s1,[$i1,#4] - ldr $s2,[$i1,#8] - ldr $s3,[$i1,#12] - ldr $t1,[$i2] - ldr $t2,[$i2,#4] - ldr $t3,[$i2,#8] - ldr $i3,[$i2,#12] - str $s0,[$i2],#-16 - str $s1,[$i2,#16+4] - str $s2,[$i2,#16+8] - str $s3,[$i2,#16+12] - str $t1,[$i1],#16 - str $t2,[$i1,#-12] - str $t3,[$i1,#-8] - str $i3,[$i1,#-4] - teq $i1,$i2 - bne .Linv -___ -$mask80=$i1; -$mask1b=$i2; -$mask7f=$i3; -$code.=<<___; - ldr $s0,[$key,#16]! @ prefetch tp1 - mov $mask80,#0x80 - mov $mask1b,#0x1b - orr $mask80,$mask80,#0x8000 - orr $mask1b,$mask1b,#0x1b00 - orr $mask80,$mask80,$mask80,lsl#16 - orr $mask1b,$mask1b,$mask1b,lsl#16 - sub $rounds,$rounds,#1 - mvn $mask7f,$mask80 - mov $rounds,$rounds,lsl#2 @ (rounds-1)*4 - -.Lmix: and $t1,$s0,$mask80 - and $s1,$s0,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s1,$t1,$s1,lsl#1 @ tp2 - - and $t1,$s1,$mask80 - and $s2,$s1,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s2,$t1,$s2,lsl#1 @ tp4 - - and $t1,$s2,$mask80 - and $s3,$s2,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s3,$t1,$s3,lsl#1 @ tp8 - - eor $t1,$s1,$s2 - eor $t2,$s0,$s3 @ tp9 - eor $t1,$t1,$s3 @ tpe - eor $t1,$t1,$s1,ror#24 - eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) - eor $t1,$t1,$s2,ror#16 - eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) - eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24) - - ldr $s0,[$key,#4] @ prefetch tp1 - str $t1,[$key],#4 - subs $rounds,$rounds,#1 - bne .Lmix - - mov r0,#0 - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size AES_set_decrypt_key,.-AES_set_decrypt_key - -.type AES_Td,%object -.align 5 -AES_Td: -.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 -.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 -.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 -.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f -.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 -.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 -.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da -.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 -.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd -.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 -.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 -.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 -.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 -.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a -.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 -.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c -.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 -.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a -.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 -.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 -.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 -.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff -.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 -.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb -.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 -.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e -.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 -.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a -.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e -.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 -.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d -.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 -.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd -.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 -.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 -.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 -.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d -.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 -.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 -.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef -.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 -.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 -.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 -.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 -.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 -.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b -.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 -.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 -.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 -.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 -.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 -.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f -.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df -.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f -.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e -.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 -.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 -.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c -.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf -.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 -.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f -.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 -.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 -.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 -@ Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -@ void AES_decrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.global AES_decrypt -.type AES_decrypt,%function -.align 5 -AES_decrypt: - sub r3,pc,#8 @ AES_decrypt - stmdb sp!,{r1,r4-r12,lr} - mov $rounds,r0 @ inp - mov $key,r2 - sub $tbl,r3,#AES_decrypt-AES_Td @ Td - - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - orr $s0,$s0,$t2,lsl#16 - orr $s0,$s0,$t3,lsl#24 - ldrb $s1,[$rounds,#7] - ldrb $t1,[$rounds,#6] - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - orr $s1,$s1,$t2,lsl#16 - orr $s1,$s1,$t3,lsl#24 - ldrb $s2,[$rounds,#11] - ldrb $t1,[$rounds,#10] - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - orr $s2,$s2,$t2,lsl#16 - orr $s2,$s2,$t3,lsl#24 - ldrb $s3,[$rounds,#15] - ldrb $t1,[$rounds,#14] - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - orr $s3,$s3,$t2,lsl#16 - orr $s3,$s3,$t3,lsl#24 - - bl _armv4_AES_decrypt - - ldr $rounds,[sp],#4 @ pop out - mov $t1,$s0,lsr#24 @ write output in endian-neutral - mov $t2,$s0,lsr#16 @ manner... - mov $t3,$s0,lsr#8 - strb $t1,[$rounds,#0] - strb $t2,[$rounds,#1] - strb $t3,[$rounds,#2] - strb $s0,[$rounds,#3] - mov $t1,$s1,lsr#24 - mov $t2,$s1,lsr#16 - mov $t3,$s1,lsr#8 - strb $t1,[$rounds,#4] - strb $t2,[$rounds,#5] - strb $t3,[$rounds,#6] - strb $s1,[$rounds,#7] - mov $t1,$s2,lsr#24 - mov $t2,$s2,lsr#16 - mov $t3,$s2,lsr#8 - strb $t1,[$rounds,#8] - strb $t2,[$rounds,#9] - strb $t3,[$rounds,#10] - strb $s2,[$rounds,#11] - mov $t1,$s3,lsr#24 - mov $t2,$s3,lsr#16 - mov $t3,$s3,lsr#8 - strb $t1,[$rounds,#12] - strb $t2,[$rounds,#13] - strb $t3,[$rounds,#14] - strb $s3,[$rounds,#15] - - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size AES_decrypt,.-AES_decrypt - -.type _armv4_AES_decrypt,%function -.align 2 -_armv4_AES_decrypt: - str lr,[sp,#-4]! @ push lr - ldr $t1,[$key],#16 - ldr $t2,[$key,#-12] - ldr $t3,[$key,#-8] - ldr $i1,[$key,#-4] - ldr $rounds,[$key,#240-16] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - sub $rounds,$rounds,#1 - mov lr,#255 - -.Ldec_loop: - and $i1,lr,$s0,lsr#16 - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0 - mov $s0,$s0,lsr#24 - ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16] - ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24] - ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8] - ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0] - - and $i1,lr,$s1 @ i0 - and $i2,lr,$s1,lsr#16 - and $i3,lr,$s1,lsr#8 - mov $s1,$s1,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0] - ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24] - ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16] - ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8] - eor $s0,$s0,$i1,ror#24 - eor $s1,$s1,$t1,ror#8 - eor $t2,$i2,$t2,ror#8 - eor $t3,$i3,$t3,ror#8 - - and $i1,lr,$s2,lsr#8 @ i0 - and $i2,lr,$s2 @ i1 - and $i3,lr,$s2,lsr#16 - mov $s2,$s2,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8] - ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0] - ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24] - ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16] - eor $s0,$s0,$i1,ror#16 - eor $s1,$s1,$i2,ror#24 - eor $s2,$s2,$t2,ror#8 - eor $t3,$i3,$t3,ror#8 - - and $i1,lr,$s3,lsr#16 @ i0 - and $i2,lr,$s3,lsr#8 @ i1 - and $i3,lr,$s3 @ i2 - mov $s3,$s3,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16] - ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8] - ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0] - ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] - eor $s0,$s0,$i1,ror#8 - eor $s1,$s1,$i2,ror#16 - eor $s2,$s2,$i3,ror#24 - eor $s3,$s3,$t3,ror#8 - - ldr $t1,[$key],#16 - ldr $t2,[$key,#-12] - ldr $t3,[$key,#-8] - ldr $i1,[$key,#-4] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - - subs $rounds,$rounds,#1 - bne .Ldec_loop - - add $tbl,$tbl,#1024 - - ldr $t1,[$tbl,#0] @ prefetch Td4 - ldr $t2,[$tbl,#32] - ldr $t3,[$tbl,#64] - ldr $i1,[$tbl,#96] - ldr $i2,[$tbl,#128] - ldr $i3,[$tbl,#160] - ldr $t1,[$tbl,#192] - ldr $t2,[$tbl,#224] - - and $i1,lr,$s0,lsr#16 - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0 - ldrb $s0,[$tbl,$s0,lsr#24] @ Td4[s0>>24] - ldrb $t1,[$tbl,$i1] @ Td4[s0>>16] - ldrb $t2,[$tbl,$i2] @ Td4[s0>>8] - ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] - - and $i1,lr,$s1 @ i0 - and $i2,lr,$s1,lsr#16 - and $i3,lr,$s1,lsr#8 - ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] - ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] - ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] - ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] - eor $s0,$i1,$s0,lsl#24 - eor $s1,$t1,$s1,lsl#8 - eor $t2,$t2,$i2,lsl#8 - eor $t3,$t3,$i3,lsl#8 - - and $i1,lr,$s2,lsr#8 @ i0 - and $i2,lr,$s2 @ i1 - and $i3,lr,$s2,lsr#16 - ldrb $i1,[$tbl,$i1] @ Td4[s2>>8] - ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] - ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] - ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] - eor $s0,$s0,$i1,lsl#8 - eor $s1,$i2,$s1,lsl#16 - eor $s2,$t2,$s2,lsl#16 - eor $t3,$t3,$i3,lsl#16 - - and $i1,lr,$s3,lsr#16 @ i0 - and $i2,lr,$s3,lsr#8 @ i1 - and $i3,lr,$s3 @ i2 - ldrb $i1,[$tbl,$i1] @ Td4[s3>>16] - ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] - ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] - ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] - eor $s0,$s0,$i1,lsl#16 - eor $s1,$s1,$i2,lsl#8 - eor $s2,$i3,$s2,lsl#8 - eor $s3,$t3,$s3,lsl#24 - - ldr lr,[sp],#4 @ pop lr - ldr $t1,[$key,#0] - ldr $t2,[$key,#4] - ldr $t3,[$key,#8] - ldr $i1,[$key,#12] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - - sub $tbl,$tbl,#1024 - mov pc,lr @ return -.size _armv4_AES_decrypt,.-_armv4_AES_decrypt -.asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" -___ - -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; diff --git a/crypto/openssl/crypto/aes/asm/aes-ppc.pl b/crypto/openssl/crypto/aes/asm/aes-ppc.pl deleted file mode 100755 index ce42765..0000000 --- a/crypto/openssl/crypto/aes/asm/aes-ppc.pl +++ /dev/null @@ -1,1176 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# Needs more work: key setup, page boundaries, CBC routine... -# -# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with -# 128-bit key, which is ~40% better than 64-bit code generated by gcc -# 4.0. But these are not the ones currently used! Their "compact" -# counterparts are, for security reason. ppc_AES_encrypt_compact runs -# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - -# at 1/3 of ppc_AES_decrypt. - -$flavour = shift; - -if ($flavour =~ /64/) { - $SIZE_T =8; - $STU ="stdu"; - $POP ="ld"; - $PUSH ="std"; -} elsif ($flavour =~ /32/) { - $SIZE_T =4; - $STU ="stwu"; - $POP ="lwz"; - $PUSH ="stw"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$FRAME=32*$SIZE_T; - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } -} - -$sp="r1"; -$toc="r2"; -$inp="r3"; -$out="r4"; -$key="r5"; - -$Tbl0="r3"; -$Tbl1="r6"; -$Tbl2="r7"; -$Tbl3="r2"; - -$s0="r8"; -$s1="r9"; -$s2="r10"; -$s3="r11"; - -$t0="r12"; -$t1="r13"; -$t2="r14"; -$t3="r15"; - -$acc00="r16"; -$acc01="r17"; -$acc02="r18"; -$acc03="r19"; - -$acc04="r20"; -$acc05="r21"; -$acc06="r22"; -$acc07="r23"; - -$acc08="r24"; -$acc09="r25"; -$acc10="r26"; -$acc11="r27"; - -$acc12="r28"; -$acc13="r29"; -$acc14="r30"; -$acc15="r31"; - -# stay away from TLS pointer -if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } -else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } -$mask80=$Tbl2; -$mask1b=$Tbl3; - -$code.=<<___; -.machine "any" -.text - -.align 7 -LAES_Te: - mflr r0 - bcl 20,31,\$+4 - mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry - addi $Tbl0,$Tbl0,`128-8` - mtlr r0 - blr - .space `32-24` -LAES_Td: - mflr r0 - bcl 20,31,\$+4 - mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry - addi $Tbl0,$Tbl0,`128-8-32+2048+256` - mtlr r0 - blr - .space `128-32-24` -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - - -.globl .AES_encrypt -.align 7 -.AES_encrypt: - mflr r0 - $STU $sp,-$FRAME($sp) - - $PUSH r0,`$FRAME-$SIZE_T*21`($sp) - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) - $PUSH r14,`$FRAME-$SIZE_T*18`($sp) - $PUSH r15,`$FRAME-$SIZE_T*17`($sp) - $PUSH r16,`$FRAME-$SIZE_T*16`($sp) - $PUSH r17,`$FRAME-$SIZE_T*15`($sp) - $PUSH r18,`$FRAME-$SIZE_T*14`($sp) - $PUSH r19,`$FRAME-$SIZE_T*13`($sp) - $PUSH r20,`$FRAME-$SIZE_T*12`($sp) - $PUSH r21,`$FRAME-$SIZE_T*11`($sp) - $PUSH r22,`$FRAME-$SIZE_T*10`($sp) - $PUSH r23,`$FRAME-$SIZE_T*9`($sp) - $PUSH r24,`$FRAME-$SIZE_T*8`($sp) - $PUSH r25,`$FRAME-$SIZE_T*7`($sp) - $PUSH r26,`$FRAME-$SIZE_T*6`($sp) - $PUSH r27,`$FRAME-$SIZE_T*5`($sp) - $PUSH r28,`$FRAME-$SIZE_T*4`($sp) - $PUSH r29,`$FRAME-$SIZE_T*3`($sp) - $PUSH r30,`$FRAME-$SIZE_T*2`($sp) - $PUSH r31,`$FRAME-$SIZE_T*1`($sp) - - lwz $s0,0($inp) - lwz $s1,4($inp) - lwz $s2,8($inp) - lwz $s3,12($inp) - bl LAES_Te - bl Lppc_AES_encrypt_compact - stw $s0,0($out) - stw $s1,4($out) - stw $s2,8($out) - stw $s3,12($out) - - $POP r0,`$FRAME-$SIZE_T*21`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) - $POP r14,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,$FRAME - blr - -.align 4 -Lppc_AES_encrypt: - lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) - addi $Tbl1,$Tbl0,3 - addi $Tbl2,$Tbl0,2 - addi $Tbl3,$Tbl0,1 - addi $acc00,$acc00,-1 - addi $key,$key,16 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - mtctr $acc00 -.align 4 -Lenc_loop: - rlwinm $acc00,$s0,`32-24+3`,21,28 - rlwinm $acc01,$s1,`32-24+3`,21,28 - lwz $t0,0($key) - lwz $t1,4($key) - rlwinm $acc02,$s2,`32-24+3`,21,28 - rlwinm $acc03,$s3,`32-24+3`,21,28 - lwz $t2,8($key) - lwz $t3,12($key) - rlwinm $acc04,$s1,`32-16+3`,21,28 - rlwinm $acc05,$s2,`32-16+3`,21,28 - lwzx $acc00,$Tbl0,$acc00 - lwzx $acc01,$Tbl0,$acc01 - rlwinm $acc06,$s3,`32-16+3`,21,28 - rlwinm $acc07,$s0,`32-16+3`,21,28 - lwzx $acc02,$Tbl0,$acc02 - lwzx $acc03,$Tbl0,$acc03 - rlwinm $acc08,$s2,`32-8+3`,21,28 - rlwinm $acc09,$s3,`32-8+3`,21,28 - lwzx $acc04,$Tbl1,$acc04 - lwzx $acc05,$Tbl1,$acc05 - rlwinm $acc10,$s0,`32-8+3`,21,28 - rlwinm $acc11,$s1,`32-8+3`,21,28 - lwzx $acc06,$Tbl1,$acc06 - lwzx $acc07,$Tbl1,$acc07 - rlwinm $acc12,$s3,`0+3`,21,28 - rlwinm $acc13,$s0,`0+3`,21,28 - lwzx $acc08,$Tbl2,$acc08 - lwzx $acc09,$Tbl2,$acc09 - rlwinm $acc14,$s1,`0+3`,21,28 - rlwinm $acc15,$s2,`0+3`,21,28 - lwzx $acc10,$Tbl2,$acc10 - lwzx $acc11,$Tbl2,$acc11 - xor $t0,$t0,$acc00 - xor $t1,$t1,$acc01 - lwzx $acc12,$Tbl3,$acc12 - lwzx $acc13,$Tbl3,$acc13 - xor $t2,$t2,$acc02 - xor $t3,$t3,$acc03 - lwzx $acc14,$Tbl3,$acc14 - lwzx $acc15,$Tbl3,$acc15 - xor $t0,$t0,$acc04 - xor $t1,$t1,$acc05 - xor $t2,$t2,$acc06 - xor $t3,$t3,$acc07 - xor $t0,$t0,$acc08 - xor $t1,$t1,$acc09 - xor $t2,$t2,$acc10 - xor $t3,$t3,$acc11 - xor $s0,$t0,$acc12 - xor $s1,$t1,$acc13 - xor $s2,$t2,$acc14 - xor $s3,$t3,$acc15 - addi $key,$key,16 - bdnz- Lenc_loop - - addi $Tbl2,$Tbl0,2048 - nop - lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 - lwz $acc09,`2048+32`($Tbl0) - lwz $acc10,`2048+64`($Tbl0) - lwz $acc11,`2048+96`($Tbl0) - lwz $acc08,`2048+128`($Tbl0) - lwz $acc09,`2048+160`($Tbl0) - lwz $acc10,`2048+192`($Tbl0) - lwz $acc11,`2048+224`($Tbl0) - rlwinm $acc00,$s0,`32-24`,24,31 - rlwinm $acc01,$s1,`32-24`,24,31 - lwz $t0,0($key) - lwz $t1,4($key) - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - lwz $t2,8($key) - lwz $t3,12($key) - rlwinm $acc04,$s1,`32-16`,24,31 - rlwinm $acc05,$s2,`32-16`,24,31 - lbzx $acc00,$Tbl2,$acc00 - lbzx $acc01,$Tbl2,$acc01 - rlwinm $acc06,$s3,`32-16`,24,31 - rlwinm $acc07,$s0,`32-16`,24,31 - lbzx $acc02,$Tbl2,$acc02 - lbzx $acc03,$Tbl2,$acc03 - rlwinm $acc08,$s2,`32-8`,24,31 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc04,$Tbl2,$acc04 - lbzx $acc05,$Tbl2,$acc05 - rlwinm $acc10,$s0,`32-8`,24,31 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc06,$Tbl2,$acc06 - lbzx $acc07,$Tbl2,$acc07 - rlwinm $acc12,$s3,`0`,24,31 - rlwinm $acc13,$s0,`0`,24,31 - lbzx $acc08,$Tbl2,$acc08 - lbzx $acc09,$Tbl2,$acc09 - rlwinm $acc14,$s1,`0`,24,31 - rlwinm $acc15,$s2,`0`,24,31 - lbzx $acc10,$Tbl2,$acc10 - lbzx $acc11,$Tbl2,$acc11 - rlwinm $s0,$acc00,24,0,7 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc12,$Tbl2,$acc12 - lbzx $acc13,$Tbl2,$acc13 - rlwinm $s2,$acc02,24,0,7 - rlwinm $s3,$acc03,24,0,7 - lbzx $acc14,$Tbl2,$acc14 - lbzx $acc15,$Tbl2,$acc15 - rlwimi $s0,$acc04,16,8,15 - rlwimi $s1,$acc05,16,8,15 - rlwimi $s2,$acc06,16,8,15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - -.align 4 -Lppc_AES_encrypt_compact: - lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) - addi $Tbl1,$Tbl0,2048 - lis $mask80,0x8080 - lis $mask1b,0x1b1b - addi $key,$key,16 - ori $mask80,$mask80,0x8080 - ori $mask1b,$mask1b,0x1b1b - mtctr $acc00 -.align 4 -Lenc_compact_loop: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - rlwinm $acc00,$s0,`32-24`,24,31 - rlwinm $acc01,$s1,`32-24`,24,31 - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - lbzx $acc00,$Tbl1,$acc00 - lbzx $acc01,$Tbl1,$acc01 - rlwinm $acc04,$s1,`32-16`,24,31 - rlwinm $acc05,$s2,`32-16`,24,31 - lbzx $acc02,$Tbl1,$acc02 - lbzx $acc03,$Tbl1,$acc03 - rlwinm $acc06,$s3,`32-16`,24,31 - rlwinm $acc07,$s0,`32-16`,24,31 - lbzx $acc04,$Tbl1,$acc04 - lbzx $acc05,$Tbl1,$acc05 - rlwinm $acc08,$s2,`32-8`,24,31 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc06,$Tbl1,$acc06 - lbzx $acc07,$Tbl1,$acc07 - rlwinm $acc10,$s0,`32-8`,24,31 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc08,$Tbl1,$acc08 - lbzx $acc09,$Tbl1,$acc09 - rlwinm $acc12,$s3,`0`,24,31 - rlwinm $acc13,$s0,`0`,24,31 - lbzx $acc10,$Tbl1,$acc10 - lbzx $acc11,$Tbl1,$acc11 - rlwinm $acc14,$s1,`0`,24,31 - rlwinm $acc15,$s2,`0`,24,31 - lbzx $acc12,$Tbl1,$acc12 - lbzx $acc13,$Tbl1,$acc13 - rlwinm $s0,$acc00,24,0,7 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc14,$Tbl1,$acc14 - lbzx $acc15,$Tbl1,$acc15 - rlwinm $s2,$acc02,24,0,7 - rlwinm $s3,$acc03,24,0,7 - rlwimi $s0,$acc04,16,8,15 - rlwimi $s1,$acc05,16,8,15 - rlwimi $s2,$acc06,16,8,15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - lwz $t0,0($key) - lwz $t1,4($key) - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - lwz $t2,8($key) - lwz $t3,12($key) - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - - addi $key,$key,16 - bdz Lenc_compact_done - - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc01,$s1,$mask80 - and $acc02,$s2,$mask80 - and $acc03,$s3,$mask80 - srwi $acc04,$acc00,7 # r1>>7 - srwi $acc05,$acc01,7 - srwi $acc06,$acc02,7 - srwi $acc07,$acc03,7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - andc $acc09,$s1,$mask80 - andc $acc10,$s2,$mask80 - andc $acc11,$s3,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc01,$acc01,$acc05 - sub $acc02,$acc02,$acc06 - sub $acc03,$acc03,$acc07 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc09,$acc09,$acc09 - add $acc10,$acc10,$acc10 - add $acc11,$acc11,$acc11 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc01,$acc01,$mask1b - and $acc02,$acc02,$mask1b - and $acc03,$acc03,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc01,$acc01,$acc09 - xor $acc02,$acc02,$acc10 - xor $acc03,$acc03,$acc11 - - rotlwi $acc12,$s0,16 # ROTATE(r0,16) - rotlwi $acc13,$s1,16 - rotlwi $acc14,$s2,16 - rotlwi $acc15,$s3,16 - xor $s0,$s0,$acc00 # r0^r2 - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) - rotrwi $s1,$s1,24 - rotrwi $s2,$s2,24 - rotrwi $s3,$s3,24 - xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - rotlwi $acc08,$acc12,8 # ROTATE(r0,24) - rotlwi $acc09,$acc13,8 - rotlwi $acc10,$acc14,8 - rotlwi $acc11,$acc15,8 - xor $s0,$s0,$acc12 # - xor $s1,$s1,$acc13 - xor $s2,$s2,$acc14 - xor $s3,$s3,$acc15 - xor $s0,$s0,$acc08 # - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 - - b Lenc_compact_loop -.align 4 -Lenc_compact_done: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - -.globl .AES_decrypt -.align 7 -.AES_decrypt: - mflr r0 - $STU $sp,-$FRAME($sp) - - $PUSH r0,`$FRAME-$SIZE_T*21`($sp) - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) - $PUSH r14,`$FRAME-$SIZE_T*18`($sp) - $PUSH r15,`$FRAME-$SIZE_T*17`($sp) - $PUSH r16,`$FRAME-$SIZE_T*16`($sp) - $PUSH r17,`$FRAME-$SIZE_T*15`($sp) - $PUSH r18,`$FRAME-$SIZE_T*14`($sp) - $PUSH r19,`$FRAME-$SIZE_T*13`($sp) - $PUSH r20,`$FRAME-$SIZE_T*12`($sp) - $PUSH r21,`$FRAME-$SIZE_T*11`($sp) - $PUSH r22,`$FRAME-$SIZE_T*10`($sp) - $PUSH r23,`$FRAME-$SIZE_T*9`($sp) - $PUSH r24,`$FRAME-$SIZE_T*8`($sp) - $PUSH r25,`$FRAME-$SIZE_T*7`($sp) - $PUSH r26,`$FRAME-$SIZE_T*6`($sp) - $PUSH r27,`$FRAME-$SIZE_T*5`($sp) - $PUSH r28,`$FRAME-$SIZE_T*4`($sp) - $PUSH r29,`$FRAME-$SIZE_T*3`($sp) - $PUSH r30,`$FRAME-$SIZE_T*2`($sp) - $PUSH r31,`$FRAME-$SIZE_T*1`($sp) - - lwz $s0,0($inp) - lwz $s1,4($inp) - lwz $s2,8($inp) - lwz $s3,12($inp) - bl LAES_Td - bl Lppc_AES_decrypt_compact - stw $s0,0($out) - stw $s1,4($out) - stw $s2,8($out) - stw $s3,12($out) - - $POP r0,`$FRAME-$SIZE_T*21`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) - $POP r14,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,$FRAME - blr - -.align 4 -Lppc_AES_decrypt: - lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) - addi $Tbl1,$Tbl0,3 - addi $Tbl2,$Tbl0,2 - addi $Tbl3,$Tbl0,1 - addi $acc00,$acc00,-1 - addi $key,$key,16 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - mtctr $acc00 -.align 4 -Ldec_loop: - rlwinm $acc00,$s0,`32-24+3`,21,28 - rlwinm $acc01,$s1,`32-24+3`,21,28 - lwz $t0,0($key) - lwz $t1,4($key) - rlwinm $acc02,$s2,`32-24+3`,21,28 - rlwinm $acc03,$s3,`32-24+3`,21,28 - lwz $t2,8($key) - lwz $t3,12($key) - rlwinm $acc04,$s3,`32-16+3`,21,28 - rlwinm $acc05,$s0,`32-16+3`,21,28 - lwzx $acc00,$Tbl0,$acc00 - lwzx $acc01,$Tbl0,$acc01 - rlwinm $acc06,$s1,`32-16+3`,21,28 - rlwinm $acc07,$s2,`32-16+3`,21,28 - lwzx $acc02,$Tbl0,$acc02 - lwzx $acc03,$Tbl0,$acc03 - rlwinm $acc08,$s2,`32-8+3`,21,28 - rlwinm $acc09,$s3,`32-8+3`,21,28 - lwzx $acc04,$Tbl1,$acc04 - lwzx $acc05,$Tbl1,$acc05 - rlwinm $acc10,$s0,`32-8+3`,21,28 - rlwinm $acc11,$s1,`32-8+3`,21,28 - lwzx $acc06,$Tbl1,$acc06 - lwzx $acc07,$Tbl1,$acc07 - rlwinm $acc12,$s1,`0+3`,21,28 - rlwinm $acc13,$s2,`0+3`,21,28 - lwzx $acc08,$Tbl2,$acc08 - lwzx $acc09,$Tbl2,$acc09 - rlwinm $acc14,$s3,`0+3`,21,28 - rlwinm $acc15,$s0,`0+3`,21,28 - lwzx $acc10,$Tbl2,$acc10 - lwzx $acc11,$Tbl2,$acc11 - xor $t0,$t0,$acc00 - xor $t1,$t1,$acc01 - lwzx $acc12,$Tbl3,$acc12 - lwzx $acc13,$Tbl3,$acc13 - xor $t2,$t2,$acc02 - xor $t3,$t3,$acc03 - lwzx $acc14,$Tbl3,$acc14 - lwzx $acc15,$Tbl3,$acc15 - xor $t0,$t0,$acc04 - xor $t1,$t1,$acc05 - xor $t2,$t2,$acc06 - xor $t3,$t3,$acc07 - xor $t0,$t0,$acc08 - xor $t1,$t1,$acc09 - xor $t2,$t2,$acc10 - xor $t3,$t3,$acc11 - xor $s0,$t0,$acc12 - xor $s1,$t1,$acc13 - xor $s2,$t2,$acc14 - xor $s3,$t3,$acc15 - addi $key,$key,16 - bdnz- Ldec_loop - - addi $Tbl2,$Tbl0,2048 - nop - lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 - lwz $acc09,`2048+32`($Tbl0) - lwz $acc10,`2048+64`($Tbl0) - lwz $acc11,`2048+96`($Tbl0) - lwz $acc08,`2048+128`($Tbl0) - lwz $acc09,`2048+160`($Tbl0) - lwz $acc10,`2048+192`($Tbl0) - lwz $acc11,`2048+224`($Tbl0) - rlwinm $acc00,$s0,`32-24`,24,31 - rlwinm $acc01,$s1,`32-24`,24,31 - lwz $t0,0($key) - lwz $t1,4($key) - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - lwz $t2,8($key) - lwz $t3,12($key) - rlwinm $acc04,$s3,`32-16`,24,31 - rlwinm $acc05,$s0,`32-16`,24,31 - lbzx $acc00,$Tbl2,$acc00 - lbzx $acc01,$Tbl2,$acc01 - rlwinm $acc06,$s1,`32-16`,24,31 - rlwinm $acc07,$s2,`32-16`,24,31 - lbzx $acc02,$Tbl2,$acc02 - lbzx $acc03,$Tbl2,$acc03 - rlwinm $acc08,$s2,`32-8`,24,31 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc04,$Tbl2,$acc04 - lbzx $acc05,$Tbl2,$acc05 - rlwinm $acc10,$s0,`32-8`,24,31 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc06,$Tbl2,$acc06 - lbzx $acc07,$Tbl2,$acc07 - rlwinm $acc12,$s1,`0`,24,31 - rlwinm $acc13,$s2,`0`,24,31 - lbzx $acc08,$Tbl2,$acc08 - lbzx $acc09,$Tbl2,$acc09 - rlwinm $acc14,$s3,`0`,24,31 - rlwinm $acc15,$s0,`0`,24,31 - lbzx $acc10,$Tbl2,$acc10 - lbzx $acc11,$Tbl2,$acc11 - rlwinm $s0,$acc00,24,0,7 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc12,$Tbl2,$acc12 - lbzx $acc13,$Tbl2,$acc13 - rlwinm $s2,$acc02,24,0,7 - rlwinm $s3,$acc03,24,0,7 - lbzx $acc14,$Tbl2,$acc14 - lbzx $acc15,$Tbl2,$acc15 - rlwimi $s0,$acc04,16,8,15 - rlwimi $s1,$acc05,16,8,15 - rlwimi $s2,$acc06,16,8,15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - -.align 4 -Lppc_AES_decrypt_compact: - lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) - addi $Tbl1,$Tbl0,2048 - lis $mask80,0x8080 - lis $mask1b,0x1b1b - addi $key,$key,16 - ori $mask80,$mask80,0x8080 - ori $mask1b,$mask1b,0x1b1b -___ -$code.=<<___ if ($SIZE_T==8); - insrdi $mask80,$mask80,32,0 - insrdi $mask1b,$mask1b,32,0 -___ -$code.=<<___; - mtctr $acc00 -.align 4 -Ldec_compact_loop: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - rlwinm $acc00,$s0,`32-24`,24,31 - rlwinm $acc01,$s1,`32-24`,24,31 - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - lbzx $acc00,$Tbl1,$acc00 - lbzx $acc01,$Tbl1,$acc01 - rlwinm $acc04,$s3,`32-16`,24,31 - rlwinm $acc05,$s0,`32-16`,24,31 - lbzx $acc02,$Tbl1,$acc02 - lbzx $acc03,$Tbl1,$acc03 - rlwinm $acc06,$s1,`32-16`,24,31 - rlwinm $acc07,$s2,`32-16`,24,31 - lbzx $acc04,$Tbl1,$acc04 - lbzx $acc05,$Tbl1,$acc05 - rlwinm $acc08,$s2,`32-8`,24,31 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc06,$Tbl1,$acc06 - lbzx $acc07,$Tbl1,$acc07 - rlwinm $acc10,$s0,`32-8`,24,31 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc08,$Tbl1,$acc08 - lbzx $acc09,$Tbl1,$acc09 - rlwinm $acc12,$s1,`0`,24,31 - rlwinm $acc13,$s2,`0`,24,31 - lbzx $acc10,$Tbl1,$acc10 - lbzx $acc11,$Tbl1,$acc11 - rlwinm $acc14,$s3,`0`,24,31 - rlwinm $acc15,$s0,`0`,24,31 - lbzx $acc12,$Tbl1,$acc12 - lbzx $acc13,$Tbl1,$acc13 - rlwinm $s0,$acc00,24,0,7 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc14,$Tbl1,$acc14 - lbzx $acc15,$Tbl1,$acc15 - rlwinm $s2,$acc02,24,0,7 - rlwinm $s3,$acc03,24,0,7 - rlwimi $s0,$acc04,16,8,15 - rlwimi $s1,$acc05,16,8,15 - rlwimi $s2,$acc06,16,8,15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - lwz $t0,0($key) - lwz $t1,4($key) - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - lwz $t2,8($key) - lwz $t3,12($key) - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - - addi $key,$key,16 - bdz Ldec_compact_done -___ -$code.=<<___ if ($SIZE_T==8); - # vectorized permutation improves decrypt performance by 10% - insrdi $s0,$s1,32,0 - insrdi $s2,$s3,32,0 - - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc02,$s2,$mask80 - srdi $acc04,$acc00,7 # r1>>7 - srdi $acc06,$acc02,7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - andc $acc10,$s2,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc02,$acc02,$acc06 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc10,$acc10,$acc10 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc02,$acc02,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc02,$acc02,$acc10 - - and $acc04,$acc00,$mask80 # r1=r2&0x80808080 - and $acc06,$acc02,$mask80 - srdi $acc08,$acc04,7 # r1>>7 - srdi $acc10,$acc06,7 - andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f - andc $acc14,$acc02,$mask80 - sub $acc04,$acc04,$acc08 # r1-(r1>>7) - sub $acc06,$acc06,$acc10 - add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 - add $acc14,$acc14,$acc14 - and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc06,$acc06,$mask1b - xor $acc04,$acc04,$acc12 # r4 - xor $acc06,$acc06,$acc14 - - and $acc08,$acc04,$mask80 # r1=r4&0x80808080 - and $acc10,$acc06,$mask80 - srdi $acc12,$acc08,7 # r1>>7 - srdi $acc14,$acc10,7 - sub $acc08,$acc08,$acc12 # r1-(r1>>7) - sub $acc10,$acc10,$acc14 - andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f - andc $acc14,$acc06,$mask80 - add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 - add $acc14,$acc14,$acc14 - and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc10,$acc10,$mask1b - xor $acc08,$acc08,$acc12 # r8 - xor $acc10,$acc10,$acc14 - - xor $acc00,$acc00,$s0 # r2^r0 - xor $acc02,$acc02,$s2 - xor $acc04,$acc04,$s0 # r4^r0 - xor $acc06,$acc06,$s2 - - extrdi $acc01,$acc00,32,0 - extrdi $acc03,$acc02,32,0 - extrdi $acc05,$acc04,32,0 - extrdi $acc07,$acc06,32,0 - extrdi $acc09,$acc08,32,0 - extrdi $acc11,$acc10,32,0 -___ -$code.=<<___ if ($SIZE_T==4); - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc01,$s1,$mask80 - and $acc02,$s2,$mask80 - and $acc03,$s3,$mask80 - srwi $acc04,$acc00,7 # r1>>7 - srwi $acc05,$acc01,7 - srwi $acc06,$acc02,7 - srwi $acc07,$acc03,7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - andc $acc09,$s1,$mask80 - andc $acc10,$s2,$mask80 - andc $acc11,$s3,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc01,$acc01,$acc05 - sub $acc02,$acc02,$acc06 - sub $acc03,$acc03,$acc07 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc09,$acc09,$acc09 - add $acc10,$acc10,$acc10 - add $acc11,$acc11,$acc11 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc01,$acc01,$mask1b - and $acc02,$acc02,$mask1b - and $acc03,$acc03,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc01,$acc01,$acc09 - xor $acc02,$acc02,$acc10 - xor $acc03,$acc03,$acc11 - - and $acc04,$acc00,$mask80 # r1=r2&0x80808080 - and $acc05,$acc01,$mask80 - and $acc06,$acc02,$mask80 - and $acc07,$acc03,$mask80 - srwi $acc08,$acc04,7 # r1>>7 - srwi $acc09,$acc05,7 - srwi $acc10,$acc06,7 - srwi $acc11,$acc07,7 - andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f - andc $acc13,$acc01,$mask80 - andc $acc14,$acc02,$mask80 - andc $acc15,$acc03,$mask80 - sub $acc04,$acc04,$acc08 # r1-(r1>>7) - sub $acc05,$acc05,$acc09 - sub $acc06,$acc06,$acc10 - sub $acc07,$acc07,$acc11 - add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 - add $acc13,$acc13,$acc13 - add $acc14,$acc14,$acc14 - add $acc15,$acc15,$acc15 - and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc05,$acc05,$mask1b - and $acc06,$acc06,$mask1b - and $acc07,$acc07,$mask1b - xor $acc04,$acc04,$acc12 # r4 - xor $acc05,$acc05,$acc13 - xor $acc06,$acc06,$acc14 - xor $acc07,$acc07,$acc15 - - and $acc08,$acc04,$mask80 # r1=r4&0x80808080 - and $acc09,$acc05,$mask80 - and $acc10,$acc06,$mask80 - and $acc11,$acc07,$mask80 - srwi $acc12,$acc08,7 # r1>>7 - srwi $acc13,$acc09,7 - srwi $acc14,$acc10,7 - srwi $acc15,$acc11,7 - sub $acc08,$acc08,$acc12 # r1-(r1>>7) - sub $acc09,$acc09,$acc13 - sub $acc10,$acc10,$acc14 - sub $acc11,$acc11,$acc15 - andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f - andc $acc13,$acc05,$mask80 - andc $acc14,$acc06,$mask80 - andc $acc15,$acc07,$mask80 - add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 - add $acc13,$acc13,$acc13 - add $acc14,$acc14,$acc14 - add $acc15,$acc15,$acc15 - and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc09,$acc09,$mask1b - and $acc10,$acc10,$mask1b - and $acc11,$acc11,$mask1b - xor $acc08,$acc08,$acc12 # r8 - xor $acc09,$acc09,$acc13 - xor $acc10,$acc10,$acc14 - xor $acc11,$acc11,$acc15 - - xor $acc00,$acc00,$s0 # r2^r0 - xor $acc01,$acc01,$s1 - xor $acc02,$acc02,$s2 - xor $acc03,$acc03,$s3 - xor $acc04,$acc04,$s0 # r4^r0 - xor $acc05,$acc05,$s1 - xor $acc06,$acc06,$s2 - xor $acc07,$acc07,$s3 -___ -$code.=<<___; - rotrwi $s0,$s0,8 # = ROTATE(r0,8) - rotrwi $s1,$s1,8 - rotrwi $s2,$s2,8 - rotrwi $s3,$s3,8 - xor $s0,$s0,$acc00 # ^= r2^r0 - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - xor $acc00,$acc00,$acc08 - xor $acc01,$acc01,$acc09 - xor $acc02,$acc02,$acc10 - xor $acc03,$acc03,$acc11 - xor $s0,$s0,$acc04 # ^= r4^r0 - xor $s1,$s1,$acc05 - xor $s2,$s2,$acc06 - xor $s3,$s3,$acc07 - rotrwi $acc00,$acc00,24 - rotrwi $acc01,$acc01,24 - rotrwi $acc02,$acc02,24 - rotrwi $acc03,$acc03,24 - xor $acc04,$acc04,$acc08 - xor $acc05,$acc05,$acc09 - xor $acc06,$acc06,$acc10 - xor $acc07,$acc07,$acc11 - xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 - rotrwi $acc04,$acc04,16 - rotrwi $acc05,$acc05,16 - rotrwi $acc06,$acc06,16 - rotrwi $acc07,$acc07,16 - xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - rotrwi $acc08,$acc08,8 - rotrwi $acc09,$acc09,8 - rotrwi $acc10,$acc10,8 - rotrwi $acc11,$acc11,8 - xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) - xor $s1,$s1,$acc05 - xor $s2,$s2,$acc06 - xor $s3,$s3,$acc07 - xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 - - b Ldec_compact_loop -.align 4 -Ldec_compact_done: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr -.long 0 -.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" -.align 7 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/aes/asm/aes-s390x.pl b/crypto/openssl/crypto/aes/asm/aes-s390x.pl deleted file mode 100755 index 4b27afd..0000000 --- a/crypto/openssl/crypto/aes/asm/aes-s390x.pl +++ /dev/null @@ -1,1333 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# AES for s390x. - -# April 2007. -# -# Software performance improvement over gcc-generated code is ~70% and -# in absolute terms is ~73 cycles per byte processed with 128-bit key. -# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are -# *strictly* in-order execution and issued instruction [in this case -# load value from memory is critical] has to complete before execution -# flow proceeds. S-boxes are compressed to 2KB[+256B]. -# -# As for hardware acceleration support. It's basically a "teaser," as -# it can and should be improved in several ways. Most notably support -# for CBC is not utilized, nor multiple blocks are ever processed. -# Then software key schedule can be postponed till hardware support -# detection... Performance improvement over assembler is reportedly -# ~2.5x, but can reach >8x [naturally on larger chunks] if proper -# support is implemented. - -# May 2007. -# -# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided -# for 128-bit keys, if hardware support is detected. - -# Januray 2009. -# -# Add support for hardware AES192/256 and reschedule instructions to -# minimize/avoid Address Generation Interlock hazard and to favour -# dual-issue z10 pipeline. This gave ~25% improvement on z10 and -# almost 50% on z9. The gain is smaller on z10, because being dual- -# issue z10 makes it improssible to eliminate the interlock condition: -# critial path is not long enough. Yet it spends ~24 cycles per byte -# processed with 128-bit key. -# -# Unlike previous version hardware support detection takes place only -# at the moment of key schedule setup, which is denoted in key->rounds. -# This is done, because deferred key setup can't be made MT-safe, not -# for key lengthes longer than 128 bits. -# -# Add AES_cbc_encrypt, which gives incredible performance improvement, -# it was measured to be ~6.6x. It's less than previously mentioned 8x, -# because software implementation was optimized. - -$softonly=0; # allow hardware support - -$t0="%r0"; $mask="%r0"; -$t1="%r1"; -$t2="%r2"; $inp="%r2"; -$t3="%r3"; $out="%r3"; $bits="%r3"; -$key="%r4"; -$i1="%r5"; -$i2="%r6"; -$i3="%r7"; -$s0="%r8"; -$s1="%r9"; -$s2="%r10"; -$s3="%r11"; -$tbl="%r12"; -$rounds="%r13"; -$ra="%r14"; -$sp="%r15"; - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } -} - -$code=<<___; -.text - -.type AES_Te,\@object -.align 256 -AES_Te: -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; -# Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -# rcon[] -.long 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.long 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.align 256 -.size AES_Te,.-AES_Te - -# void AES_encrypt(const unsigned char *inp, unsigned char *out, -# const AES_KEY *key) { -.globl AES_encrypt -.type AES_encrypt,\@function -AES_encrypt: -___ -$code.=<<___ if (!$softonly); - l %r0,240($key) - lhi %r1,16 - clr %r0,%r1 - jl .Lesoft - - la %r1,0($key) - #la %r2,0($inp) - la %r4,0($out) - lghi %r3,16 # single block length - .long 0xb92e0042 # km %r4,%r2 - brc 1,.-4 # can this happen? - br %r14 -.align 64 -.Lesoft: -___ -$code.=<<___; - stmg %r3,$ra,24($sp) - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - - larl $tbl,AES_Te - bras $ra,_s390x_AES_encrypt - - lg $out,24($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - lmg %r6,$ra,48($sp) - br $ra -.size AES_encrypt,.-AES_encrypt - -.type _s390x_AES_encrypt,\@function -.align 16 -_s390x_AES_encrypt: - stg $ra,152($sp) - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $s3,12($key) - l $rounds,240($key) - llill $mask,`0xff<<3` - aghi $rounds,-1 - j .Lenc_loop -.align 16 -.Lenc_loop: - sllg $t1,$s0,`0+3` - srlg $t2,$s0,`8-3` - srlg $t3,$s0,`16-3` - srl $s0,`24-3` - nr $s0,$mask - ngr $t1,$mask - nr $t2,$mask - nr $t3,$mask - - srlg $i1,$s1,`16-3` # i0 - sllg $i2,$s1,`0+3` - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - nr $i1,$mask - nr $s1,$mask - ngr $i2,$mask - nr $i3,$mask - - l $s0,0($s0,$tbl) # Te0[s0>>24] - l $t1,1($t1,$tbl) # Te3[s0>>0] - l $t2,2($t2,$tbl) # Te2[s0>>8] - l $t3,3($t3,$tbl) # Te1[s0>>16] - - x $s0,3($i1,$tbl) # Te1[s1>>16] - l $s1,0($s1,$tbl) # Te0[s1>>24] - x $t2,1($i2,$tbl) # Te3[s1>>0] - x $t3,2($i3,$tbl) # Te2[s1>>8] - - srlg $i1,$s2,`8-3` # i0 - srlg $i2,$s2,`16-3` # i1 - nr $i1,$mask - nr $i2,$mask - sllg $i3,$s2,`0+3` - srl $s2,`24-3` - nr $s2,$mask - ngr $i3,$mask - - xr $s1,$t1 - srlg $ra,$s3,`8-3` # i1 - sllg $t1,$s3,`0+3` # i0 - nr $ra,$mask - la $key,16($key) - ngr $t1,$mask - - x $s0,2($i1,$tbl) # Te2[s2>>8] - x $s1,3($i2,$tbl) # Te1[s2>>16] - l $s2,0($s2,$tbl) # Te0[s2>>24] - x $t3,1($i3,$tbl) # Te3[s2>>0] - - srlg $i3,$s3,`16-3` # i2 - xr $s2,$t2 - srl $s3,`24-3` - nr $i3,$mask - nr $s3,$mask - - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $t3,12($key) - - x $s0,1($t1,$tbl) # Te3[s3>>0] - x $s1,2($ra,$tbl) # Te2[s3>>8] - x $s2,3($i3,$tbl) # Te1[s3>>16] - l $s3,0($s3,$tbl) # Te0[s3>>24] - xr $s3,$t3 - - brct $rounds,.Lenc_loop - .align 16 - - sllg $t1,$s0,`0+3` - srlg $t2,$s0,`8-3` - ngr $t1,$mask - srlg $t3,$s0,`16-3` - srl $s0,`24-3` - nr $s0,$mask - nr $t2,$mask - nr $t3,$mask - - srlg $i1,$s1,`16-3` # i0 - sllg $i2,$s1,`0+3` - ngr $i2,$mask - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - nr $i1,$mask - nr $s1,$mask - nr $i3,$mask - - llgc $s0,2($s0,$tbl) # Te4[s0>>24] - llgc $t1,2($t1,$tbl) # Te4[s0>>0] - sll $s0,24 - llgc $t2,2($t2,$tbl) # Te4[s0>>8] - llgc $t3,2($t3,$tbl) # Te4[s0>>16] - sll $t2,8 - sll $t3,16 - - llgc $i1,2($i1,$tbl) # Te4[s1>>16] - llgc $s1,2($s1,$tbl) # Te4[s1>>24] - llgc $i2,2($i2,$tbl) # Te4[s1>>0] - llgc $i3,2($i3,$tbl) # Te4[s1>>8] - sll $i1,16 - sll $s1,24 - sll $i3,8 - or $s0,$i1 - or $s1,$t1 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s2,`8-3` # i0 - srlg $i2,$s2,`16-3` # i1 - nr $i1,$mask - nr $i2,$mask - sllg $i3,$s2,`0+3` - srl $s2,`24-3` - ngr $i3,$mask - nr $s2,$mask - - sllg $t1,$s3,`0+3` # i0 - srlg $ra,$s3,`8-3` # i1 - ngr $t1,$mask - - llgc $i1,2($i1,$tbl) # Te4[s2>>8] - llgc $i2,2($i2,$tbl) # Te4[s2>>16] - sll $i1,8 - llgc $s2,2($s2,$tbl) # Te4[s2>>24] - llgc $i3,2($i3,$tbl) # Te4[s2>>0] - sll $i2,16 - nr $ra,$mask - sll $s2,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$t2 - or $t3,$i3 - - srlg $i3,$s3,`16-3` # i2 - srl $s3,`24-3` - nr $i3,$mask - nr $s3,$mask - - l $t0,16($key) - l $t2,20($key) - - llgc $i1,2($t1,$tbl) # Te4[s3>>0] - llgc $i2,2($ra,$tbl) # Te4[s3>>8] - llgc $i3,2($i3,$tbl) # Te4[s3>>16] - llgc $s3,2($s3,$tbl) # Te4[s3>>24] - sll $i2,8 - sll $i3,16 - sll $s3,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$i3 - or $s3,$t3 - - lg $ra,152($sp) - xr $s0,$t0 - xr $s1,$t2 - x $s2,24($key) - x $s3,28($key) - - br $ra -.size _s390x_AES_encrypt,.-_s390x_AES_encrypt -___ - -$code.=<<___; -.type AES_Td,\@object -.align 256 -AES_Td: -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; -# Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -# void AES_decrypt(const unsigned char *inp, unsigned char *out, -# const AES_KEY *key) { -.globl AES_decrypt -.type AES_decrypt,\@function -AES_decrypt: -___ -$code.=<<___ if (!$softonly); - l %r0,240($key) - lhi %r1,16 - clr %r0,%r1 - jl .Ldsoft - - la %r1,0($key) - #la %r2,0($inp) - la %r4,0($out) - lghi %r3,16 # single block length - .long 0xb92e0042 # km %r4,%r2 - brc 1,.-4 # can this happen? - br %r14 -.align 64 -.Ldsoft: -___ -$code.=<<___; - stmg %r3,$ra,24($sp) - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - - larl $tbl,AES_Td - bras $ra,_s390x_AES_decrypt - - lg $out,24($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - lmg %r6,$ra,48($sp) - br $ra -.size AES_decrypt,.-AES_decrypt - -.type _s390x_AES_decrypt,\@function -.align 16 -_s390x_AES_decrypt: - stg $ra,152($sp) - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $s3,12($key) - l $rounds,240($key) - llill $mask,`0xff<<3` - aghi $rounds,-1 - j .Ldec_loop -.align 16 -.Ldec_loop: - srlg $t1,$s0,`16-3` - srlg $t2,$s0,`8-3` - sllg $t3,$s0,`0+3` - srl $s0,`24-3` - nr $s0,$mask - nr $t1,$mask - nr $t2,$mask - ngr $t3,$mask - - sllg $i1,$s1,`0+3` # i0 - srlg $i2,$s1,`16-3` - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - ngr $i1,$mask - nr $s1,$mask - nr $i2,$mask - nr $i3,$mask - - l $s0,0($s0,$tbl) # Td0[s0>>24] - l $t1,3($t1,$tbl) # Td1[s0>>16] - l $t2,2($t2,$tbl) # Td2[s0>>8] - l $t3,1($t3,$tbl) # Td3[s0>>0] - - x $s0,1($i1,$tbl) # Td3[s1>>0] - l $s1,0($s1,$tbl) # Td0[s1>>24] - x $t2,3($i2,$tbl) # Td1[s1>>16] - x $t3,2($i3,$tbl) # Td2[s1>>8] - - srlg $i1,$s2,`8-3` # i0 - sllg $i2,$s2,`0+3` # i1 - srlg $i3,$s2,`16-3` - srl $s2,`24-3` - nr $i1,$mask - ngr $i2,$mask - nr $s2,$mask - nr $i3,$mask - - xr $s1,$t1 - srlg $ra,$s3,`8-3` # i1 - srlg $t1,$s3,`16-3` # i0 - nr $ra,$mask - la $key,16($key) - nr $t1,$mask - - x $s0,2($i1,$tbl) # Td2[s2>>8] - x $s1,1($i2,$tbl) # Td3[s2>>0] - l $s2,0($s2,$tbl) # Td0[s2>>24] - x $t3,3($i3,$tbl) # Td1[s2>>16] - - sllg $i3,$s3,`0+3` # i2 - srl $s3,`24-3` - ngr $i3,$mask - nr $s3,$mask - - xr $s2,$t2 - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $t3,12($key) - - x $s0,3($t1,$tbl) # Td1[s3>>16] - x $s1,2($ra,$tbl) # Td2[s3>>8] - x $s2,1($i3,$tbl) # Td3[s3>>0] - l $s3,0($s3,$tbl) # Td0[s3>>24] - xr $s3,$t3 - - brct $rounds,.Ldec_loop - .align 16 - - l $t1,`2048+0`($tbl) # prefetch Td4 - l $t2,`2048+64`($tbl) - l $t3,`2048+128`($tbl) - l $i1,`2048+192`($tbl) - llill $mask,0xff - - srlg $i3,$s0,24 # i0 - srlg $t1,$s0,16 - srlg $t2,$s0,8 - nr $s0,$mask # i3 - nr $t1,$mask - - srlg $i1,$s1,24 - nr $t2,$mask - srlg $i2,$s1,16 - srlg $ra,$s1,8 - nr $s1,$mask # i0 - nr $i2,$mask - nr $ra,$mask - - llgc $i3,2048($i3,$tbl) # Td4[s0>>24] - llgc $t1,2048($t1,$tbl) # Td4[s0>>16] - llgc $t2,2048($t2,$tbl) # Td4[s0>>8] - sll $t1,16 - llgc $t3,2048($s0,$tbl) # Td4[s0>>0] - sllg $s0,$i3,24 - sll $t2,8 - - llgc $s1,2048($s1,$tbl) # Td4[s1>>0] - llgc $i1,2048($i1,$tbl) # Td4[s1>>24] - llgc $i2,2048($i2,$tbl) # Td4[s1>>16] - sll $i1,24 - llgc $i3,2048($ra,$tbl) # Td4[s1>>8] - sll $i2,16 - sll $i3,8 - or $s0,$s1 - or $t1,$i1 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s2,8 # i0 - srlg $i2,$s2,24 - srlg $i3,$s2,16 - nr $s2,$mask # i1 - nr $i1,$mask - nr $i3,$mask - llgc $i1,2048($i1,$tbl) # Td4[s2>>8] - llgc $s1,2048($s2,$tbl) # Td4[s2>>0] - llgc $i2,2048($i2,$tbl) # Td4[s2>>24] - llgc $i3,2048($i3,$tbl) # Td4[s2>>16] - sll $i1,8 - sll $i2,24 - or $s0,$i1 - sll $i3,16 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s3,16 # i0 - srlg $i2,$s3,8 # i1 - srlg $i3,$s3,24 - nr $s3,$mask # i2 - nr $i1,$mask - nr $i2,$mask - - lg $ra,152($sp) - or $s1,$t1 - l $t0,16($key) - l $t1,20($key) - - llgc $i1,2048($i1,$tbl) # Td4[s3>>16] - llgc $i2,2048($i2,$tbl) # Td4[s3>>8] - sll $i1,16 - llgc $s2,2048($s3,$tbl) # Td4[s3>>0] - llgc $s3,2048($i3,$tbl) # Td4[s3>>24] - sll $i2,8 - sll $s3,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$t2 - or $s3,$t3 - - xr $s0,$t0 - xr $s1,$t1 - x $s2,24($key) - x $s3,28($key) - - br $ra -.size _s390x_AES_decrypt,.-_s390x_AES_decrypt -___ - -$code.=<<___; -# void AES_set_encrypt_key(const unsigned char *in, int bits, -# AES_KEY *key) { -.globl AES_set_encrypt_key -.type AES_set_encrypt_key,\@function -.align 16 -AES_set_encrypt_key: - lghi $t0,0 - clgr $inp,$t0 - je .Lminus1 - clgr $key,$t0 - je .Lminus1 - - lghi $t0,128 - clr $bits,$t0 - je .Lproceed - lghi $t0,192 - clr $bits,$t0 - je .Lproceed - lghi $t0,256 - clr $bits,$t0 - je .Lproceed - lghi %r2,-2 - br %r14 - -.align 16 -.Lproceed: -___ -$code.=<<___ if (!$softonly); - # convert bits to km code, [128,192,256]->[18,19,20] - lhi %r5,-128 - lhi %r0,18 - ar %r5,$bits - srl %r5,6 - ar %r5,%r0 - - lghi %r0,0 # query capability vector - la %r1,16($sp) - .long 0xb92f0042 # kmc %r4,%r2 - - llihh %r1,0x8000 - srlg %r1,%r1,0(%r5) - ng %r1,16($sp) - jz .Lekey_internal - - lmg %r0,%r1,0($inp) # just copy 128 bits... - stmg %r0,%r1,0($key) - lhi %r0,192 - cr $bits,%r0 - jl 1f - lg %r1,16($inp) - stg %r1,16($key) - je 1f - lg %r1,24($inp) - stg %r1,24($key) -1: st $bits,236($key) # save bits - st %r5,240($key) # save km code - lghi %r2,0 - br %r14 -___ -$code.=<<___; -.align 16 -.Lekey_internal: - stmg %r6,%r13,48($sp) # all non-volatile regs - - larl $tbl,AES_Te+2048 - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - st $s0,0($key) - st $s1,4($key) - st $s2,8($key) - st $s3,12($key) - lghi $t0,128 - cr $bits,$t0 - jne .Lnot128 - - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,10 - st $rounds,240($key) - - llgfr $t2,$s3 # temp=rk[3] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - srlg $i3,$s3,24 - nr $t2,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L128_loop: - la $t2,0($t2,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8 - icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16 - icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24 - icm $t2,1,0($i3) # Te4[rk[3]>>24] - x $t2,256($t3,$tbl) # rcon[i] - xr $s0,$t2 # rk[4]=rk[0]^... - xr $s1,$s0 # rk[5]=rk[1]^rk[4] - xr $s2,$s1 # rk[6]=rk[2]^rk[5] - xr $s3,$s2 # rk[7]=rk[3]^rk[6] - - llgfr $t2,$s3 # temp=rk[3] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - nr $t2,$mask - nr $i1,$mask - srlg $i3,$s3,24 - nr $i2,$mask - - st $s0,16($key) - st $s1,20($key) - st $s2,24($key) - st $s3,28($key) - la $key,16($key) # key+=4 - la $t3,4($t3) # i++ - brct $rounds,.L128_loop - lghi %r2,0 - lmg %r6,%r13,48($sp) - br $ra - -.align 16 -.Lnot128: - llgf $t0,16($inp) - llgf $t1,20($inp) - st $t0,16($key) - st $t1,20($key) - lghi $t0,192 - cr $bits,$t0 - jne .Lnot192 - - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,12 - st $rounds,240($key) - lghi $rounds,8 - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L192_loop: - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8 - icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16 - icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24 - icm $t1,1,0($i3) # Te4[rk[5]>>24] - x $t1,256($t3,$tbl) # rcon[i] - xr $s0,$t1 # rk[6]=rk[0]^... - xr $s1,$s0 # rk[7]=rk[1]^rk[6] - xr $s2,$s1 # rk[8]=rk[2]^rk[7] - xr $s3,$s2 # rk[9]=rk[3]^rk[8] - - st $s0,24($key) - st $s1,28($key) - st $s2,32($key) - st $s3,36($key) - brct $rounds,.L192_continue - lghi %r2,0 - lmg %r6,%r13,48($sp) - br $ra - -.align 16 -.L192_continue: - lgr $t1,$s3 - x $t1,16($key) # rk[10]=rk[4]^rk[9] - st $t1,40($key) - x $t1,20($key) # rk[11]=rk[5]^rk[10] - st $t1,44($key) - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - - la $key,24($key) # key+=6 - la $t3,4($t3) # i++ - j .L192_loop - -.align 16 -.Lnot192: - llgf $t0,24($inp) - llgf $t1,28($inp) - st $t0,24($key) - st $t1,28($key) - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,14 - st $rounds,240($key) - lghi $rounds,7 - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L256_loop: - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8 - icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16 - icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24 - icm $t1,1,0($i3) # Te4[rk[7]>>24] - x $t1,256($t3,$tbl) # rcon[i] - xr $s0,$t1 # rk[8]=rk[0]^... - xr $s1,$s0 # rk[9]=rk[1]^rk[8] - xr $s2,$s1 # rk[10]=rk[2]^rk[9] - xr $s3,$s2 # rk[11]=rk[3]^rk[10] - st $s0,32($key) - st $s1,36($key) - st $s2,40($key) - st $s3,44($key) - brct $rounds,.L256_continue - lghi %r2,0 - lmg %r6,%r13,48($sp) - br $ra - -.align 16 -.L256_continue: - lgr $t1,$s3 # temp=rk[11] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - srlg $i3,$s3,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - llgc $t1,0($t1) # Te4[rk[11]>>0] - icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8 - icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16 - icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24 - x $t1,16($key) # rk[12]=rk[4]^... - st $t1,48($key) - x $t1,20($key) # rk[13]=rk[5]^rk[12] - st $t1,52($key) - x $t1,24($key) # rk[14]=rk[6]^rk[13] - st $t1,56($key) - x $t1,28($key) # rk[15]=rk[7]^rk[14] - st $t1,60($key) - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - - la $key,32($key) # key+=8 - la $t3,4($t3) # i++ - j .L256_loop - -.Lminus1: - lghi %r2,-1 - br $ra -.size AES_set_encrypt_key,.-AES_set_encrypt_key - -# void AES_set_decrypt_key(const unsigned char *in, int bits, -# AES_KEY *key) { -.globl AES_set_decrypt_key -.type AES_set_decrypt_key,\@function -.align 16 -AES_set_decrypt_key: - stg $key,32($sp) # I rely on AES_set_encrypt_key to - stg $ra,112($sp) # save non-volatile registers! - bras $ra,AES_set_encrypt_key - lg $key,32($sp) - lg $ra,112($sp) - ltgr %r2,%r2 - bnzr $ra -___ -$code.=<<___ if (!$softonly); - l $t0,240($key) - lhi $t1,16 - cr $t0,$t1 - jl .Lgo - oill $t0,0x80 # set "decrypt" bit - st $t0,240($key) - br $ra - -.align 16 -.Ldkey_internal: - stg $key,32($sp) - stg $ra,40($sp) - bras $ra,.Lekey_internal - lg $key,32($sp) - lg $ra,40($sp) -___ -$code.=<<___; - -.Lgo: llgf $rounds,240($key) - la $i1,0($key) - sllg $i2,$rounds,4 - la $i2,0($i2,$key) - srl $rounds,1 - lghi $t1,-16 - -.align 16 -.Linv: lmg $s0,$s1,0($i1) - lmg $s2,$s3,0($i2) - stmg $s0,$s1,0($i2) - stmg $s2,$s3,0($i1) - la $i1,16($i1) - la $i2,0($t1,$i2) - brct $rounds,.Linv -___ -$mask80=$i1; -$mask1b=$i2; -$maskfe=$i3; -$code.=<<___; - llgf $rounds,240($key) - aghi $rounds,-1 - sll $rounds,2 # (rounds-1)*4 - llilh $mask80,0x8080 - llilh $mask1b,0x1b1b - llilh $maskfe,0xfefe - oill $mask80,0x8080 - oill $mask1b,0x1b1b - oill $maskfe,0xfefe - -.align 16 -.Lmix: l $s0,16($key) # tp1 - lr $s1,$s0 - ngr $s1,$mask80 - srlg $t1,$s1,7 - slr $s1,$t1 - nr $s1,$mask1b - sllg $t1,$s0,1 - nr $t1,$maskfe - xr $s1,$t1 # tp2 - - lr $s2,$s1 - ngr $s2,$mask80 - srlg $t1,$s2,7 - slr $s2,$t1 - nr $s2,$mask1b - sllg $t1,$s1,1 - nr $t1,$maskfe - xr $s2,$t1 # tp4 - - lr $s3,$s2 - ngr $s3,$mask80 - srlg $t1,$s3,7 - slr $s3,$t1 - nr $s3,$mask1b - sllg $t1,$s2,1 - nr $t1,$maskfe - xr $s3,$t1 # tp8 - - xr $s1,$s0 # tp2^tp1 - xr $s2,$s0 # tp4^tp1 - rll $s0,$s0,24 # = ROTATE(tp1,8) - xr $s2,$s3 # ^=tp8 - xr $s0,$s1 # ^=tp2^tp1 - xr $s1,$s3 # tp2^tp1^tp8 - xr $s0,$s2 # ^=tp4^tp1^tp8 - rll $s1,$s1,8 - rll $s2,$s2,16 - xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24) - rll $s3,$s3,24 - xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16) - xr $s0,$s3 # ^= ROTATE(tp8,8) - - st $s0,16($key) - la $key,4($key) - brct $rounds,.Lmix - - lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key! - lghi %r2,0 - br $ra -.size AES_set_decrypt_key,.-AES_set_decrypt_key -___ - -#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivec, const int enc) -{ -my $inp="%r2"; -my $out="%r4"; # length and out are swapped -my $len="%r3"; -my $key="%r5"; -my $ivp="%r6"; - -$code.=<<___; -.globl AES_cbc_encrypt -.type AES_cbc_encrypt,\@function -.align 16 -AES_cbc_encrypt: - xgr %r3,%r4 # flip %r3 and %r4, out and len - xgr %r4,%r3 - xgr %r3,%r4 -___ -$code.=<<___ if (!$softonly); - lhi %r0,16 - cl %r0,240($key) - jh .Lcbc_software - - lg %r0,0($ivp) # copy ivec - lg %r1,8($ivp) - stmg %r0,%r1,16($sp) - lmg %r0,%r1,0($key) # copy key, cover 256 bit - stmg %r0,%r1,32($sp) - lmg %r0,%r1,16($key) - stmg %r0,%r1,48($sp) - l %r0,240($key) # load kmc code - lghi $key,15 # res=len%16, len-=res; - ngr $key,$len - slgr $len,$key - la %r1,16($sp) # parameter block - ivec || key - jz .Lkmc_truncated - .long 0xb92f0042 # kmc %r4,%r2 - brc 1,.-4 # pay attention to "partial completion" - ltr $key,$key - jnz .Lkmc_truncated -.Lkmc_done: - lmg %r0,%r1,16($sp) # copy ivec to caller - stg %r0,0($ivp) - stg %r1,8($ivp) - br $ra -.align 16 -.Lkmc_truncated: - ahi $key,-1 # it's the way it's encoded in mvc - tmll %r0,0x80 - jnz .Lkmc_truncated_dec - lghi %r1,0 - stg %r1,128($sp) - stg %r1,136($sp) - bras %r1,1f - mvc 128(1,$sp),0($inp) -1: ex $key,0(%r1) - la %r1,16($sp) # restore parameter block - la $inp,128($sp) - lghi $len,16 - .long 0xb92f0042 # kmc %r4,%r2 - j .Lkmc_done -.align 16 -.Lkmc_truncated_dec: - stg $out,64($sp) - la $out,128($sp) - lghi $len,16 - .long 0xb92f0042 # kmc %r4,%r2 - lg $out,64($sp) - bras %r1,2f - mvc 0(1,$out),128($sp) -2: ex $key,0(%r1) - j .Lkmc_done -.align 16 -.Lcbc_software: -___ -$code.=<<___; - stmg $key,$ra,40($sp) - lhi %r0,0 - cl %r0,164($sp) - je .Lcbc_decrypt - - larl $tbl,AES_Te - - llgf $s0,0($ivp) - llgf $s1,4($ivp) - llgf $s2,8($ivp) - llgf $s3,12($ivp) - - lghi $t0,16 - slgr $len,$t0 - brc 4,.Lcbc_enc_tail # if borrow -.Lcbc_enc_loop: - stmg $inp,$out,16($sp) - x $s0,0($inp) - x $s1,4($inp) - x $s2,8($inp) - x $s3,12($inp) - lgr %r4,$key - - bras $ra,_s390x_AES_encrypt - - lmg $inp,$key,16($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - la $inp,16($inp) - la $out,16($out) - lghi $t0,16 - ltgr $len,$len - jz .Lcbc_enc_done - slgr $len,$t0 - brc 4,.Lcbc_enc_tail # if borrow - j .Lcbc_enc_loop -.align 16 -.Lcbc_enc_done: - lg $ivp,48($sp) - st $s0,0($ivp) - st $s1,4($ivp) - st $s2,8($ivp) - st $s3,12($ivp) - - lmg %r7,$ra,56($sp) - br $ra - -.align 16 -.Lcbc_enc_tail: - aghi $len,15 - lghi $t0,0 - stg $t0,128($sp) - stg $t0,136($sp) - bras $t1,3f - mvc 128(1,$sp),0($inp) -3: ex $len,0($t1) - lghi $len,0 - la $inp,128($sp) - j .Lcbc_enc_loop - -.align 16 -.Lcbc_decrypt: - larl $tbl,AES_Td - - lg $t0,0($ivp) - lg $t1,8($ivp) - stmg $t0,$t1,128($sp) - -.Lcbc_dec_loop: - stmg $inp,$out,16($sp) - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - lgr %r4,$key - - bras $ra,_s390x_AES_decrypt - - lmg $inp,$key,16($sp) - sllg $s0,$s0,32 - sllg $s2,$s2,32 - lr $s0,$s1 - lr $s2,$s3 - - lg $t0,0($inp) - lg $t1,8($inp) - xg $s0,128($sp) - xg $s2,136($sp) - lghi $s1,16 - slgr $len,$s1 - brc 4,.Lcbc_dec_tail # if borrow - brc 2,.Lcbc_dec_done # if zero - stg $s0,0($out) - stg $s2,8($out) - stmg $t0,$t1,128($sp) - - la $inp,16($inp) - la $out,16($out) - j .Lcbc_dec_loop - -.Lcbc_dec_done: - stg $s0,0($out) - stg $s2,8($out) -.Lcbc_dec_exit: - lmg $ivp,$ra,48($sp) - stmg $t0,$t1,0($ivp) - - br $ra - -.align 16 -.Lcbc_dec_tail: - aghi $len,15 - stg $s0,128($sp) - stg $s2,136($sp) - bras $s1,4f - mvc 0(1,$out),128($sp) -4: ex $len,0($s1) - j .Lcbc_dec_exit -.size AES_cbc_encrypt,.-AES_cbc_encrypt -___ -} -$code.=<<___; -.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; diff --git a/crypto/openssl/crypto/aes/asm/aes-sparcv9.pl b/crypto/openssl/crypto/aes/asm/aes-sparcv9.pl deleted file mode 100755 index c57b3a2..0000000 --- a/crypto/openssl/crypto/aes/asm/aes-sparcv9.pl +++ /dev/null @@ -1,1181 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. -# ==================================================================== -# -# Version 1.1 -# -# The major reason for undertaken effort was to mitigate the hazard of -# cache-timing attack. This is [currently and initially!] addressed in -# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each. -# 2. References to them are scheduled for L2 cache latency, meaning -# that the tables don't have to reside in L1 cache. Once again, this -# is an initial draft and one should expect more countermeasures to -# be implemented... -# -# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last -# round. -# -# Even though performance was not the primary goal [on the contrary, -# extra shifts "induced" by compressed S-box and longer loop epilogue -# "induced" by scheduling for L2 have negative effect on performance], -# the code turned out to run in ~23 cycles per processed byte en-/ -# decrypted with 128-bit key. This is pretty good result for code -# with mentioned qualities and UltraSPARC core. Compared to Sun C -# generated code my encrypt procedure runs just few percents faster, -# while decrypt one - whole 50% faster [yes, Sun C failed to generate -# optimal decrypt procedure]. Compared to GNU C generated code both -# procedures are more than 60% faster:-) - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } -$locals=16; - -$acc0="%l0"; -$acc1="%o0"; -$acc2="%o1"; -$acc3="%o2"; - -$acc4="%l1"; -$acc5="%o3"; -$acc6="%o4"; -$acc7="%o5"; - -$acc8="%l2"; -$acc9="%o7"; -$acc10="%g1"; -$acc11="%g2"; - -$acc12="%l3"; -$acc13="%g3"; -$acc14="%g4"; -$acc15="%g5"; - -$t0="%l4"; -$t1="%l5"; -$t2="%l6"; -$t3="%l7"; - -$s0="%i0"; -$s1="%i1"; -$s2="%i2"; -$s3="%i3"; -$tbl="%i4"; -$key="%i5"; -$rounds="%i7"; # aliases with return address, which is off-loaded to stack - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } -} - -$code.=<<___ if ($bits==64); -.register %g2,#scratch -.register %g3,#scratch -___ -$code.=<<___; -.section ".text",#alloc,#execinstr - -.align 256 -AES_Te: -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; - .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 - .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 - .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 - .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 - .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc - .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 - .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a - .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 - .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 - .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 - .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b - .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf - .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 - .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 - .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 - .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 - .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 - .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 - .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 - .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb - .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c - .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 - .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 - .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 - .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 - .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a - .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e - .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e - .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 - .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf - .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 - .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -.type AES_Te,#object -.size AES_Te,(.-AES_Te) - -.align 64 -.skip 16 -_sparcv9_AES_encrypt: - save %sp,-$frame-$locals,%sp - stx %i7,[%sp+$bias+$frame+0] ! off-load return address - ld [$key+240],$rounds - ld [$key+0],$t0 - ld [$key+4],$t1 ! - ld [$key+8],$t2 - srl $rounds,1,$rounds - xor $t0,$s0,$s0 - ld [$key+12],$t3 - srl $s0,21,$acc0 - xor $t1,$s1,$s1 - ld [$key+16],$t0 - srl $s1,13,$acc1 ! - xor $t2,$s2,$s2 - ld [$key+20],$t1 - xor $t3,$s3,$s3 - ld [$key+24],$t2 - and $acc0,2040,$acc0 - ld [$key+28],$t3 - nop -.Lenc_loop: - srl $s2,5,$acc2 ! - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $s3,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - srl $s1,21,$acc4 - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 ! - srl $s2,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $s3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 - fmovs %f0,%f0 - sll $s0,3,$acc7 ! - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $s2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - srl $s3,13,$acc9 - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 ! - srl $s0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $s1,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 - fmovs %f0,%f0 - srl $s3,21,$acc12 ! - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $s0,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - srl $s1,5,$acc14 - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 ! - sll $s2,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - and $acc15,2040,$acc15 - add $key,32,$key - ldx [$tbl+$acc14],$acc14 - fmovs %f0,%f0 - subcc $rounds,1,$rounds ! - ldx [$tbl+$acc15],$acc15 - bz,a,pn %icc,.Lenc_last - add $tbl,2048,$rounds - - srlx $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - fmovs %f0,%f0 - srlx $acc2,16,$acc2 ! - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 ! - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - fmovs %f0,%f0 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 ! - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 ! - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,21,$acc0 - xor $acc14,$t3,$t3 - srl $t1,13,$acc1 - xor $acc15,$t3,$t3 - - and $acc0,2040,$acc0 ! - srl $t2,5,$acc2 - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $t3,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - fmovs %f0,%f0 - srl $t1,21,$acc4 ! - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 - srl $t2,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $t3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 ! - sll $t0,3,$acc7 - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $t2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - fmovs %f0,%f0 - srl $t3,13,$acc9 ! - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 - srl $t0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $t1,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 ! - srl $t3,21,$acc12 - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $t0,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - fmovs %f0,%f0 - srl $t1,5,$acc14 ! - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 - sll $t2,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - srlx $acc1,8,$acc1 - and $acc15,2040,$acc15 - ldx [$tbl+$acc14],$acc14 ! - - srlx $acc2,16,$acc2 - xor $acc0,$s0,$s0 - ldx [$tbl+$acc15],$acc15 - srlx $acc3,24,$acc3 - xor $acc1,$s0,$s0 - ld [$key+16],$t0 - fmovs %f0,%f0 - srlx $acc5,8,$acc5 ! - xor $acc2,$s0,$s0 - ld [$key+20],$t1 - srlx $acc6,16,$acc6 - xor $acc3,$s0,$s0 - ld [$key+24],$t2 - srlx $acc7,24,$acc7 - xor $acc4,$s1,$s1 - ld [$key+28],$t3 ! - srlx $acc9,8,$acc9 - xor $acc5,$s1,$s1 - ldx [$tbl+2048+0],%g0 ! prefetch te4 - srlx $acc10,16,$acc10 - xor $acc6,$s1,$s1 - ldx [$tbl+2048+32],%g0 ! prefetch te4 - srlx $acc11,24,$acc11 - xor $acc7,$s1,$s1 - ldx [$tbl+2048+64],%g0 ! prefetch te4 - srlx $acc13,8,$acc13 - xor $acc8,$s2,$s2 - ldx [$tbl+2048+96],%g0 ! prefetch te4 - srlx $acc14,16,$acc14 ! - xor $acc9,$s2,$s2 - ldx [$tbl+2048+128],%g0 ! prefetch te4 - srlx $acc15,24,$acc15 - xor $acc10,$s2,$s2 - ldx [$tbl+2048+160],%g0 ! prefetch te4 - srl $s0,21,$acc0 - xor $acc11,$s2,$s2 - ldx [$tbl+2048+192],%g0 ! prefetch te4 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - ldx [$tbl+2048+224],%g0 ! prefetch te4 - srl $s1,13,$acc1 ! - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - ba .Lenc_loop - and $acc0,2040,$acc0 - -.align 32 -.Lenc_last: - srlx $acc1,8,$acc1 ! - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - srlx $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 ! - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 ! - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 ! - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,24,$acc0 - xor $acc14,$t3,$t3 - srl $t1,16,$acc1 ! - xor $acc15,$t3,$t3 - - srl $t2,8,$acc2 - and $acc1,255,$acc1 - ldub [$rounds+$acc0],$acc0 - srl $t1,24,$acc4 - and $acc2,255,$acc2 - ldub [$rounds+$acc1],$acc1 - srl $t2,16,$acc5 ! - and $t3,255,$acc3 - ldub [$rounds+$acc2],$acc2 - ldub [$rounds+$acc3],$acc3 - srl $t3,8,$acc6 - and $acc5,255,$acc5 - ldub [$rounds+$acc4],$acc4 - fmovs %f0,%f0 - srl $t2,24,$acc8 ! - and $acc6,255,$acc6 - ldub [$rounds+$acc5],$acc5 - srl $t3,16,$acc9 - and $t0,255,$acc7 - ldub [$rounds+$acc6],$acc6 - ldub [$rounds+$acc7],$acc7 - fmovs %f0,%f0 - srl $t0,8,$acc10 ! - and $acc9,255,$acc9 - ldub [$rounds+$acc8],$acc8 - srl $t3,24,$acc12 - and $acc10,255,$acc10 - ldub [$rounds+$acc9],$acc9 - srl $t0,16,$acc13 - and $t1,255,$acc11 - ldub [$rounds+$acc10],$acc10 ! - srl $t1,8,$acc14 - and $acc13,255,$acc13 - ldub [$rounds+$acc11],$acc11 - ldub [$rounds+$acc12],$acc12 - and $acc14,255,$acc14 - ldub [$rounds+$acc13],$acc13 - and $t2,255,$acc15 - ldub [$rounds+$acc14],$acc14 ! - - sll $acc0,24,$acc0 - xor $acc3,$s0,$s0 - ldub [$rounds+$acc15],$acc15 - sll $acc1,16,$acc1 - xor $acc0,$s0,$s0 - ldx [%sp+$bias+$frame+0],%i7 ! restore return address - fmovs %f0,%f0 - sll $acc2,8,$acc2 ! - xor $acc1,$s0,$s0 - sll $acc4,24,$acc4 - xor $acc2,$s0,$s0 - sll $acc5,16,$acc5 - xor $acc7,$s1,$s1 - sll $acc6,8,$acc6 - xor $acc4,$s1,$s1 - sll $acc8,24,$acc8 ! - xor $acc5,$s1,$s1 - sll $acc9,16,$acc9 - xor $acc11,$s2,$s2 - sll $acc10,8,$acc10 - xor $acc6,$s1,$s1 - sll $acc12,24,$acc12 - xor $acc8,$s2,$s2 - sll $acc13,16,$acc13 ! - xor $acc9,$s2,$s2 - sll $acc14,8,$acc14 - xor $acc10,$s2,$s2 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - - ret - restore -.type _sparcv9_AES_encrypt,#function -.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt) - -.align 32 -.globl AES_encrypt -AES_encrypt: - or %o0,%o1,%g1 - andcc %g1,3,%g0 - bnz,pn %xcc,.Lunaligned_enc - save %sp,-$frame,%sp - - ld [%i0+0],%o0 - ld [%i0+4],%o1 - ld [%i0+8],%o2 - ld [%i0+12],%o3 - -1: call .+8 - add %o7,AES_Te-1b,%o4 - call _sparcv9_AES_encrypt - mov %i2,%o5 - - st %o0,[%i1+0] - st %o1,[%i1+4] - st %o2,[%i1+8] - st %o3,[%i1+12] - - ret - restore - -.align 32 -.Lunaligned_enc: - ldub [%i0+0],%l0 - ldub [%i0+1],%l1 - ldub [%i0+2],%l2 - - sll %l0,24,%l0 - ldub [%i0+3],%l3 - sll %l1,16,%l1 - ldub [%i0+4],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+5],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+6],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o0 - ldub [%i0+7],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - ldub [%i0+8],%l0 - or %l7,%l6,%l6 - ldub [%i0+9],%l1 - or %l4,%l6,%o1 - ldub [%i0+10],%l2 - - sll %l0,24,%l0 - ldub [%i0+11],%l3 - sll %l1,16,%l1 - ldub [%i0+12],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+13],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+14],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o2 - ldub [%i0+15],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - or %l7,%l6,%l6 - or %l4,%l6,%o3 - -1: call .+8 - add %o7,AES_Te-1b,%o4 - call _sparcv9_AES_encrypt - mov %i2,%o5 - - srl %o0,24,%l0 - srl %o0,16,%l1 - stb %l0,[%i1+0] - srl %o0,8,%l2 - stb %l1,[%i1+1] - stb %l2,[%i1+2] - srl %o1,24,%l4 - stb %o0,[%i1+3] - - srl %o1,16,%l5 - stb %l4,[%i1+4] - srl %o1,8,%l6 - stb %l5,[%i1+5] - stb %l6,[%i1+6] - srl %o2,24,%l0 - stb %o1,[%i1+7] - - srl %o2,16,%l1 - stb %l0,[%i1+8] - srl %o2,8,%l2 - stb %l1,[%i1+9] - stb %l2,[%i1+10] - srl %o3,24,%l4 - stb %o2,[%i1+11] - - srl %o3,16,%l5 - stb %l4,[%i1+12] - srl %o3,8,%l6 - stb %l5,[%i1+13] - stb %l6,[%i1+14] - stb %o3,[%i1+15] - - ret - restore -.type AES_encrypt,#function -.size AES_encrypt,(.-AES_encrypt) - -___ - -$code.=<<___; -.align 256 -AES_Td: -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; - .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.type AES_Td,#object -.size AES_Td,(.-AES_Td) - -.align 64 -.skip 16 -_sparcv9_AES_decrypt: - save %sp,-$frame-$locals,%sp - stx %i7,[%sp+$bias+$frame+0] ! off-load return address - ld [$key+240],$rounds - ld [$key+0],$t0 - ld [$key+4],$t1 ! - ld [$key+8],$t2 - ld [$key+12],$t3 - srl $rounds,1,$rounds - xor $t0,$s0,$s0 - ld [$key+16],$t0 - xor $t1,$s1,$s1 - ld [$key+20],$t1 - srl $s0,21,$acc0 ! - xor $t2,$s2,$s2 - ld [$key+24],$t2 - xor $t3,$s3,$s3 - and $acc0,2040,$acc0 - ld [$key+28],$t3 - srl $s3,13,$acc1 - nop -.Ldec_loop: - srl $s2,5,$acc2 ! - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $s1,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - srl $s1,21,$acc4 - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 ! - srl $s0,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $s3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 - fmovs %f0,%f0 - sll $s2,3,$acc7 ! - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $s2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - srl $s1,13,$acc9 - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 ! - srl $s0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $s3,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 - fmovs %f0,%f0 - srl $s3,21,$acc12 ! - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $s2,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - srl $s1,5,$acc14 - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 ! - sll $s0,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - and $acc15,2040,$acc15 - add $key,32,$key - ldx [$tbl+$acc14],$acc14 - fmovs %f0,%f0 - subcc $rounds,1,$rounds ! - ldx [$tbl+$acc15],$acc15 - bz,a,pn %icc,.Ldec_last - add $tbl,2048,$rounds - - srlx $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - fmovs %f0,%f0 - srlx $acc2,16,$acc2 ! - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 ! - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - fmovs %f0,%f0 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 ! - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 ! - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,21,$acc0 - xor $acc14,$t3,$t3 - xor $acc15,$t3,$t3 - srl $t3,13,$acc1 - - and $acc0,2040,$acc0 ! - srl $t2,5,$acc2 - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $t1,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - fmovs %f0,%f0 - srl $t1,21,$acc4 ! - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 - srl $t0,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $t3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 ! - sll $t2,3,$acc7 - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $t2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - fmovs %f0,%f0 - srl $t1,13,$acc9 ! - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 - srl $t0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $t3,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 ! - srl $t3,21,$acc12 - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $t2,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - fmovs %f0,%f0 - srl $t1,5,$acc14 ! - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 - sll $t0,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - srlx $acc1,8,$acc1 - and $acc15,2040,$acc15 - ldx [$tbl+$acc14],$acc14 ! - - srlx $acc2,16,$acc2 - xor $acc0,$s0,$s0 - ldx [$tbl+$acc15],$acc15 - srlx $acc3,24,$acc3 - xor $acc1,$s0,$s0 - ld [$key+16],$t0 - fmovs %f0,%f0 - srlx $acc5,8,$acc5 ! - xor $acc2,$s0,$s0 - ld [$key+20],$t1 - srlx $acc6,16,$acc6 - xor $acc3,$s0,$s0 - ld [$key+24],$t2 - srlx $acc7,24,$acc7 - xor $acc4,$s1,$s1 - ld [$key+28],$t3 ! - srlx $acc9,8,$acc9 - xor $acc5,$s1,$s1 - ldx [$tbl+2048+0],%g0 ! prefetch td4 - srlx $acc10,16,$acc10 - xor $acc6,$s1,$s1 - ldx [$tbl+2048+32],%g0 ! prefetch td4 - srlx $acc11,24,$acc11 - xor $acc7,$s1,$s1 - ldx [$tbl+2048+64],%g0 ! prefetch td4 - srlx $acc13,8,$acc13 - xor $acc8,$s2,$s2 - ldx [$tbl+2048+96],%g0 ! prefetch td4 - srlx $acc14,16,$acc14 ! - xor $acc9,$s2,$s2 - ldx [$tbl+2048+128],%g0 ! prefetch td4 - srlx $acc15,24,$acc15 - xor $acc10,$s2,$s2 - ldx [$tbl+2048+160],%g0 ! prefetch td4 - srl $s0,21,$acc0 - xor $acc11,$s2,$s2 - ldx [$tbl+2048+192],%g0 ! prefetch td4 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - ldx [$tbl+2048+224],%g0 ! prefetch td4 - and $acc0,2040,$acc0 ! - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - ba .Ldec_loop - srl $s3,13,$acc1 - -.align 32 -.Ldec_last: - srlx $acc1,8,$acc1 ! - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - srlx $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 ! - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 ! - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 ! - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,24,$acc0 - xor $acc14,$t3,$t3 - xor $acc15,$t3,$t3 ! - srl $t3,16,$acc1 - - srl $t2,8,$acc2 - and $acc1,255,$acc1 - ldub [$rounds+$acc0],$acc0 - srl $t1,24,$acc4 - and $acc2,255,$acc2 - ldub [$rounds+$acc1],$acc1 - srl $t0,16,$acc5 ! - and $t1,255,$acc3 - ldub [$rounds+$acc2],$acc2 - ldub [$rounds+$acc3],$acc3 - srl $t3,8,$acc6 - and $acc5,255,$acc5 - ldub [$rounds+$acc4],$acc4 - fmovs %f0,%f0 - srl $t2,24,$acc8 ! - and $acc6,255,$acc6 - ldub [$rounds+$acc5],$acc5 - srl $t1,16,$acc9 - and $t2,255,$acc7 - ldub [$rounds+$acc6],$acc6 - ldub [$rounds+$acc7],$acc7 - fmovs %f0,%f0 - srl $t0,8,$acc10 ! - and $acc9,255,$acc9 - ldub [$rounds+$acc8],$acc8 - srl $t3,24,$acc12 - and $acc10,255,$acc10 - ldub [$rounds+$acc9],$acc9 - srl $t2,16,$acc13 - and $t3,255,$acc11 - ldub [$rounds+$acc10],$acc10 ! - srl $t1,8,$acc14 - and $acc13,255,$acc13 - ldub [$rounds+$acc11],$acc11 - ldub [$rounds+$acc12],$acc12 - and $acc14,255,$acc14 - ldub [$rounds+$acc13],$acc13 - and $t0,255,$acc15 - ldub [$rounds+$acc14],$acc14 ! - - sll $acc0,24,$acc0 - xor $acc3,$s0,$s0 - ldub [$rounds+$acc15],$acc15 - sll $acc1,16,$acc1 - xor $acc0,$s0,$s0 - ldx [%sp+$bias+$frame+0],%i7 ! restore return address - fmovs %f0,%f0 - sll $acc2,8,$acc2 ! - xor $acc1,$s0,$s0 - sll $acc4,24,$acc4 - xor $acc2,$s0,$s0 - sll $acc5,16,$acc5 - xor $acc7,$s1,$s1 - sll $acc6,8,$acc6 - xor $acc4,$s1,$s1 - sll $acc8,24,$acc8 ! - xor $acc5,$s1,$s1 - sll $acc9,16,$acc9 - xor $acc11,$s2,$s2 - sll $acc10,8,$acc10 - xor $acc6,$s1,$s1 - sll $acc12,24,$acc12 - xor $acc8,$s2,$s2 - sll $acc13,16,$acc13 ! - xor $acc9,$s2,$s2 - sll $acc14,8,$acc14 - xor $acc10,$s2,$s2 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - - ret - restore -.type _sparcv9_AES_decrypt,#function -.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt) - -.align 32 -.globl AES_decrypt -AES_decrypt: - or %o0,%o1,%g1 - andcc %g1,3,%g0 - bnz,pn %xcc,.Lunaligned_dec - save %sp,-$frame,%sp - - ld [%i0+0],%o0 - ld [%i0+4],%o1 - ld [%i0+8],%o2 - ld [%i0+12],%o3 - -1: call .+8 - add %o7,AES_Td-1b,%o4 - call _sparcv9_AES_decrypt - mov %i2,%o5 - - st %o0,[%i1+0] - st %o1,[%i1+4] - st %o2,[%i1+8] - st %o3,[%i1+12] - - ret - restore - -.align 32 -.Lunaligned_dec: - ldub [%i0+0],%l0 - ldub [%i0+1],%l1 - ldub [%i0+2],%l2 - - sll %l0,24,%l0 - ldub [%i0+3],%l3 - sll %l1,16,%l1 - ldub [%i0+4],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+5],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+6],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o0 - ldub [%i0+7],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - ldub [%i0+8],%l0 - or %l7,%l6,%l6 - ldub [%i0+9],%l1 - or %l4,%l6,%o1 - ldub [%i0+10],%l2 - - sll %l0,24,%l0 - ldub [%i0+11],%l3 - sll %l1,16,%l1 - ldub [%i0+12],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+13],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+14],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o2 - ldub [%i0+15],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - or %l7,%l6,%l6 - or %l4,%l6,%o3 - -1: call .+8 - add %o7,AES_Td-1b,%o4 - call _sparcv9_AES_decrypt - mov %i2,%o5 - - srl %o0,24,%l0 - srl %o0,16,%l1 - stb %l0,[%i1+0] - srl %o0,8,%l2 - stb %l1,[%i1+1] - stb %l2,[%i1+2] - srl %o1,24,%l4 - stb %o0,[%i1+3] - - srl %o1,16,%l5 - stb %l4,[%i1+4] - srl %o1,8,%l6 - stb %l5,[%i1+5] - stb %l6,[%i1+6] - srl %o2,24,%l0 - stb %o1,[%i1+7] - - srl %o2,16,%l1 - stb %l0,[%i1+8] - srl %o2,8,%l2 - stb %l1,[%i1+9] - stb %l2,[%i1+10] - srl %o3,24,%l4 - stb %o2,[%i1+11] - - srl %o3,16,%l5 - stb %l4,[%i1+12] - srl %o3,8,%l6 - stb %l5,[%i1+13] - stb %l6,[%i1+14] - stb %o3,[%i1+15] - - ret - restore -.type AES_decrypt,#function -.size AES_decrypt,(.-AES_decrypt) -___ - -# fmovs instructions substituting for FP nops were originally added -# to meet specific instruction alignment requirements to maximize ILP. -# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have -# undesired effect, so just omit them and sacrifice some portion of -# percent in performance... -$code =~ s/fmovs.*$//gem; - -print $code; diff --git a/crypto/openssl/crypto/aes/asm/aes-x86_64.pl b/crypto/openssl/crypto/aes/asm/aes-x86_64.pl index f616f17..b008ab5 100755 --- a/crypto/openssl/crypto/aes/asm/aes-x86_64.pl +++ b/crypto/openssl/crypto/aes/asm/aes-x86_64.pl @@ -1181,12 +1181,12 @@ AES_cbc_encrypt: .Lcbc_cleanup: cmpl \$0,$mark # was the key schedule copied? lea $aes_key,%rdi - mov $_rsp,%rsp je .Lcbc_exit mov \$240/8,%ecx xor %rax,%rax .long 0x90AB48F3 # rep stosq .Lcbc_exit: + mov $_rsp,%rsp popfq pop %r15 pop %r14 diff --git a/crypto/openssl/crypto/asn1/a_mbstr.c b/crypto/openssl/crypto/asn1/a_mbstr.c index 1bcd046..1538e0a 100644 --- a/crypto/openssl/crypto/asn1/a_mbstr.c +++ b/crypto/openssl/crypto/asn1/a_mbstr.c @@ -93,7 +93,7 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, int str_type; int ret; char free_out; - int outform, outlen; + int outform, outlen = 0; ASN1_STRING *dest; unsigned char *p; int nchar; diff --git a/crypto/openssl/crypto/asn1/a_object.c b/crypto/openssl/crypto/asn1/a_object.c index dc98042..d169f8c 100644 --- a/crypto/openssl/crypto/asn1/a_object.c +++ b/crypto/openssl/crypto/asn1/a_object.c @@ -291,6 +291,17 @@ ASN1_OBJECT *c2i_ASN1_OBJECT(ASN1_OBJECT **a, const unsigned char **pp, ASN1_OBJECT *ret=NULL; const unsigned char *p; int i; + /* Sanity check OID encoding: can't have 0x80 in subidentifiers, see: + * X.690 8.19.2 + */ + for (i = 0, p = *pp + 1; i < len - 1; i++, p++) + { + if (*p == 0x80) + { + ASN1err(ASN1_F_C2I_ASN1_OBJECT,ASN1_R_INVALID_OBJECT_ENCODING); + return NULL; + } + } /* only the ASN1_OBJECTs from the 'table' will have values * for ->sn or ->ln */ diff --git a/crypto/openssl/crypto/asn1/ameth_lib.c b/crypto/openssl/crypto/asn1/ameth_lib.c deleted file mode 100644 index 18957c6..0000000 --- a/crypto/openssl/crypto/asn1/ameth_lib.c +++ /dev/null @@ -1,446 +0,0 @@ -/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project 2006. - */ -/* ==================================================================== - * Copyright (c) 2006 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -#include <stdio.h> -#include "cryptlib.h" -#include <openssl/asn1t.h> -#include <openssl/x509.h> -#ifndef OPENSSL_NO_ENGINE -#include <openssl/engine.h> -#endif -#include "asn1_locl.h" - -extern const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[]; -extern const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[]; -extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth; -extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth; -extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth; - -/* Keep this sorted in type order !! */ -static const EVP_PKEY_ASN1_METHOD *standard_methods[] = - { -#ifndef OPENSSL_NO_RSA - &rsa_asn1_meths[0], - &rsa_asn1_meths[1], -#endif -#ifndef OPENSSL_NO_DH - &dh_asn1_meth, -#endif -#ifndef OPENSSL_NO_DSA - &dsa_asn1_meths[0], - &dsa_asn1_meths[1], - &dsa_asn1_meths[2], - &dsa_asn1_meths[3], - &dsa_asn1_meths[4], -#endif -#ifndef OPENSSL_NO_EC - &eckey_asn1_meth, -#endif - &hmac_asn1_meth - }; - -typedef int sk_cmp_fn_type(const char * const *a, const char * const *b); -DECLARE_STACK_OF(EVP_PKEY_ASN1_METHOD) -static STACK_OF(EVP_PKEY_ASN1_METHOD) *app_methods = NULL; - - - -#ifdef TEST -void main() - { - int i; - for (i = 0; - i < sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *); - i++) - fprintf(stderr, "Number %d id=%d (%s)\n", i, - standard_methods[i]->pkey_id, - OBJ_nid2sn(standard_methods[i]->pkey_id)); - } -#endif - -DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_ASN1_METHOD *, - const EVP_PKEY_ASN1_METHOD *, ameth); - -static int ameth_cmp(const EVP_PKEY_ASN1_METHOD * const *a, - const EVP_PKEY_ASN1_METHOD * const *b) - { - return ((*a)->pkey_id - (*b)->pkey_id); - } - -IMPLEMENT_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_ASN1_METHOD *, - const EVP_PKEY_ASN1_METHOD *, ameth); - -int EVP_PKEY_asn1_get_count(void) - { - int num = sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *); - if (app_methods) - num += sk_EVP_PKEY_ASN1_METHOD_num(app_methods); - return num; - } - -const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_get0(int idx) - { - int num = sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *); - if (idx < 0) - return NULL; - if (idx < num) - return standard_methods[idx]; - idx -= num; - return sk_EVP_PKEY_ASN1_METHOD_value(app_methods, idx); - } - -static const EVP_PKEY_ASN1_METHOD *pkey_asn1_find(int type) - { - EVP_PKEY_ASN1_METHOD tmp; - const EVP_PKEY_ASN1_METHOD *t = &tmp, **ret; - tmp.pkey_id = type; - if (app_methods) - { - int idx; - idx = sk_EVP_PKEY_ASN1_METHOD_find(app_methods, &tmp); - if (idx >= 0) - return sk_EVP_PKEY_ASN1_METHOD_value(app_methods, idx); - } - ret = OBJ_bsearch_ameth(&t, standard_methods, - sizeof(standard_methods) - /sizeof(EVP_PKEY_ASN1_METHOD *)); - if (!ret || !*ret) - return NULL; - return *ret; - } - -/* Find an implementation of an ASN1 algorithm. If 'pe' is not NULL - * also search through engines and set *pe to a functional reference - * to the engine implementing 'type' or NULL if no engine implements - * it. - */ - -const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_find(ENGINE **pe, int type) - { - const EVP_PKEY_ASN1_METHOD *t; - ENGINE *e; - - for (;;) - { - t = pkey_asn1_find(type); - if (!t || !(t->pkey_flags & ASN1_PKEY_ALIAS)) - break; - type = t->pkey_base_id; - } - if (pe) - { -#ifndef OPENSSL_NO_ENGINE - /* type will contain the final unaliased type */ - e = ENGINE_get_pkey_asn1_meth_engine(type); - if (e) - { - *pe = e; - return ENGINE_get_pkey_asn1_meth(e, type); - } -#endif - *pe = NULL; - } - return t; - } - -const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_find_str(ENGINE **pe, - const char *str, int len) - { - int i; - const EVP_PKEY_ASN1_METHOD *ameth; - if (len == -1) - len = strlen(str); - if (pe) - { -#ifndef OPENSSL_NO_ENGINE - ENGINE *e; - ameth = ENGINE_pkey_asn1_find_str(&e, str, len); - if (ameth) - { - /* Convert structural into - * functional reference - */ - if (!ENGINE_init(e)) - ameth = NULL; - ENGINE_free(e); - *pe = e; - return ameth; - } -#endif - *pe = NULL; - } - for (i = 0; i < EVP_PKEY_asn1_get_count(); i++) - { - ameth = EVP_PKEY_asn1_get0(i); - if (ameth->pkey_flags & ASN1_PKEY_ALIAS) - continue; - if (((int)strlen(ameth->pem_str) == len) && - !strncasecmp(ameth->pem_str, str, len)) - return ameth; - } - return NULL; - } - -int EVP_PKEY_asn1_add0(const EVP_PKEY_ASN1_METHOD *ameth) - { - if (app_methods == NULL) - { - app_methods = sk_EVP_PKEY_ASN1_METHOD_new(ameth_cmp); - if (!app_methods) - return 0; - } - if (!sk_EVP_PKEY_ASN1_METHOD_push(app_methods, ameth)) - return 0; - sk_EVP_PKEY_ASN1_METHOD_sort(app_methods); - return 1; - } - -int EVP_PKEY_asn1_add_alias(int to, int from) - { - EVP_PKEY_ASN1_METHOD *ameth; - ameth = EVP_PKEY_asn1_new(from, ASN1_PKEY_ALIAS, NULL, NULL); - if (!ameth) - return 0; - ameth->pkey_base_id = to; - return EVP_PKEY_asn1_add0(ameth); - } - -int EVP_PKEY_asn1_get0_info(int *ppkey_id, int *ppkey_base_id, int *ppkey_flags, - const char **pinfo, const char **ppem_str, - const EVP_PKEY_ASN1_METHOD *ameth) - { - if (!ameth) - return 0; - if (ppkey_id) - *ppkey_id = ameth->pkey_id; - if (ppkey_base_id) - *ppkey_base_id = ameth->pkey_base_id; - if (ppkey_flags) - *ppkey_flags = ameth->pkey_flags; - if (pinfo) - *pinfo = ameth->info; - if (ppem_str) - *ppem_str = ameth->pem_str; - return 1; - } - -const EVP_PKEY_ASN1_METHOD* EVP_PKEY_get0_asn1(EVP_PKEY *pkey) - { - return pkey->ameth; - } - -EVP_PKEY_ASN1_METHOD* EVP_PKEY_asn1_new(int id, int flags, - const char *pem_str, const char *info) - { - EVP_PKEY_ASN1_METHOD *ameth; - ameth = OPENSSL_malloc(sizeof(EVP_PKEY_ASN1_METHOD)); - if (!ameth) - return NULL; - - ameth->pkey_id = id; - ameth->pkey_base_id = id; - ameth->pkey_flags = flags | ASN1_PKEY_DYNAMIC; - - if (info) - { - ameth->info = BUF_strdup(info); - if (!ameth->info) - goto err; - } - - if (pem_str) - { - ameth->pem_str = BUF_strdup(pem_str); - if (!ameth->pem_str) - goto err; - } - - ameth->pub_decode = 0; - ameth->pub_encode = 0; - ameth->pub_cmp = 0; - ameth->pub_print = 0; - - ameth->priv_decode = 0; - ameth->priv_encode = 0; - ameth->priv_print = 0; - - ameth->old_priv_encode = 0; - ameth->old_priv_decode = 0; - - ameth->pkey_size = 0; - ameth->pkey_bits = 0; - - ameth->param_decode = 0; - ameth->param_encode = 0; - ameth->param_missing = 0; - ameth->param_copy = 0; - ameth->param_cmp = 0; - ameth->param_print = 0; - - ameth->pkey_free = 0; - ameth->pkey_ctrl = 0; - - return ameth; - - err: - - EVP_PKEY_asn1_free(ameth); - return NULL; - - } - -void EVP_PKEY_asn1_copy(EVP_PKEY_ASN1_METHOD *dst, - const EVP_PKEY_ASN1_METHOD *src) - { - - dst->pub_decode = src->pub_decode; - dst->pub_encode = src->pub_encode; - dst->pub_cmp = src->pub_cmp; - dst->pub_print = src->pub_print; - - dst->priv_decode = src->priv_decode; - dst->priv_encode = src->priv_encode; - dst->priv_print = src->priv_print; - - dst->old_priv_encode = src->old_priv_encode; - dst->old_priv_decode = src->old_priv_decode; - - dst->pkey_size = src->pkey_size; - dst->pkey_bits = src->pkey_bits; - - dst->param_decode = src->param_decode; - dst->param_encode = src->param_encode; - dst->param_missing = src->param_missing; - dst->param_copy = src->param_copy; - dst->param_cmp = src->param_cmp; - dst->param_print = src->param_print; - - dst->pkey_free = src->pkey_free; - dst->pkey_ctrl = src->pkey_ctrl; - - } - -void EVP_PKEY_asn1_free(EVP_PKEY_ASN1_METHOD *ameth) - { - if (ameth && (ameth->pkey_flags & ASN1_PKEY_DYNAMIC)) - { - if (ameth->pem_str) - OPENSSL_free(ameth->pem_str); - if (ameth->info) - OPENSSL_free(ameth->info); - OPENSSL_free(ameth); - } - } - -void EVP_PKEY_asn1_set_public(EVP_PKEY_ASN1_METHOD *ameth, - int (*pub_decode)(EVP_PKEY *pk, X509_PUBKEY *pub), - int (*pub_encode)(X509_PUBKEY *pub, const EVP_PKEY *pk), - int (*pub_cmp)(const EVP_PKEY *a, const EVP_PKEY *b), - int (*pub_print)(BIO *out, const EVP_PKEY *pkey, int indent, - ASN1_PCTX *pctx), - int (*pkey_size)(const EVP_PKEY *pk), - int (*pkey_bits)(const EVP_PKEY *pk)) - { - ameth->pub_decode = pub_decode; - ameth->pub_encode = pub_encode; - ameth->pub_cmp = pub_cmp; - ameth->pub_print = pub_print; - ameth->pkey_size = pkey_size; - ameth->pkey_bits = pkey_bits; - } - -void EVP_PKEY_asn1_set_private(EVP_PKEY_ASN1_METHOD *ameth, - int (*priv_decode)(EVP_PKEY *pk, PKCS8_PRIV_KEY_INFO *p8inf), - int (*priv_encode)(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pk), - int (*priv_print)(BIO *out, const EVP_PKEY *pkey, int indent, - ASN1_PCTX *pctx)) - { - ameth->priv_decode = priv_decode; - ameth->priv_encode = priv_encode; - ameth->priv_print = priv_print; - } - -void EVP_PKEY_asn1_set_param(EVP_PKEY_ASN1_METHOD *ameth, - int (*param_decode)(EVP_PKEY *pkey, - const unsigned char **pder, int derlen), - int (*param_encode)(const EVP_PKEY *pkey, unsigned char **pder), - int (*param_missing)(const EVP_PKEY *pk), - int (*param_copy)(EVP_PKEY *to, const EVP_PKEY *from), - int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b), - int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent, - ASN1_PCTX *pctx)) - { - ameth->param_decode = param_decode; - ameth->param_encode = param_encode; - ameth->param_missing = param_missing; - ameth->param_copy = param_copy; - ameth->param_cmp = param_cmp; - ameth->param_print = param_print; - } - -void EVP_PKEY_asn1_set_free(EVP_PKEY_ASN1_METHOD *ameth, - void (*pkey_free)(EVP_PKEY *pkey)) - { - ameth->pkey_free = pkey_free; - } - -void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth, - int (*pkey_ctrl)(EVP_PKEY *pkey, int op, - long arg1, void *arg2)) - { - ameth->pkey_ctrl = pkey_ctrl; - } diff --git a/crypto/openssl/crypto/asn1/asn1.h b/crypto/openssl/crypto/asn1/asn1.h index e338522..1958298 100644 --- a/crypto/openssl/crypto/asn1/asn1.h +++ b/crypto/openssl/crypto/asn1/asn1.h @@ -344,6 +344,8 @@ typedef struct ASN1_VALUE_st ASN1_VALUE; ((void*) (1 ? p : (type*)0)) #define CHECKED_PPTR_OF(type, p) \ ((void**) (1 ? p : (type**)0)) +#define CHECKED_PTR_OF_TO_CHAR(type, p) \ + ((char*) (1 ? p : (type*)0)) #define TYPEDEF_D2I_OF(type) typedef type *d2i_of_##type(type **,const unsigned char **,long) #define TYPEDEF_I2D_OF(type) typedef int i2d_of_##type(type *,unsigned char **) @@ -933,12 +935,12 @@ void *ASN1_dup(i2d_of_void *i2d, d2i_of_void *d2i, char *x); #define ASN1_dup_of(type,i2d,d2i,x) \ ((type*)ASN1_dup(CHECKED_I2D_OF(type, i2d), \ CHECKED_D2I_OF(type, d2i), \ - CHECKED_PTR_OF(type, x))) + CHECKED_PTR_OF_TO_CHAR(type, x))) #define ASN1_dup_of_const(type,i2d,d2i,x) \ ((type*)ASN1_dup(CHECKED_I2D_OF(const type, i2d), \ CHECKED_D2I_OF(type, d2i), \ - CHECKED_PTR_OF(const type, x))) + CHECKED_PTR_OF_TO_CHAR(const type, x))) void *ASN1_item_dup(const ASN1_ITEM *it, void *x); @@ -1263,6 +1265,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_R_INVALID_MIME_TYPE 200 #define ASN1_R_INVALID_MODIFIER 186 #define ASN1_R_INVALID_NUMBER 187 +#define ASN1_R_INVALID_OBJECT_ENCODING 212 #define ASN1_R_INVALID_SEPARATOR 131 #define ASN1_R_INVALID_TIME_FORMAT 132 #define ASN1_R_INVALID_UNIVERSALSTRING_LENGTH 133 diff --git a/crypto/openssl/crypto/asn1/asn1_err.c b/crypto/openssl/crypto/asn1/asn1_err.c index 5f5de98..ba88eb3 100644 --- a/crypto/openssl/crypto/asn1/asn1_err.c +++ b/crypto/openssl/crypto/asn1/asn1_err.c @@ -240,6 +240,7 @@ static ERR_STRING_DATA ASN1_str_reasons[]= {ERR_REASON(ASN1_R_INVALID_MIME_TYPE) ,"invalid mime type"}, {ERR_REASON(ASN1_R_INVALID_MODIFIER) ,"invalid modifier"}, {ERR_REASON(ASN1_R_INVALID_NUMBER) ,"invalid number"}, +{ERR_REASON(ASN1_R_INVALID_OBJECT_ENCODING),"invalid object encoding"}, {ERR_REASON(ASN1_R_INVALID_SEPARATOR) ,"invalid separator"}, {ERR_REASON(ASN1_R_INVALID_TIME_FORMAT) ,"invalid time format"}, {ERR_REASON(ASN1_R_INVALID_UNIVERSALSTRING_LENGTH),"invalid universalstring length"}, diff --git a/crypto/openssl/crypto/asn1/asn1_gen.c b/crypto/openssl/crypto/asn1/asn1_gen.c index 2da3829..213a8e9 100644 --- a/crypto/openssl/crypto/asn1/asn1_gen.c +++ b/crypto/openssl/crypto/asn1/asn1_gen.c @@ -227,6 +227,8 @@ ASN1_TYPE *ASN1_generate_v3(char *str, X509V3_CTX *cnf) /* Allocate buffer for new encoding */ new_der = OPENSSL_malloc(len); + if (!new_der) + goto err; /* Generate tagged encoding */ @@ -446,6 +448,8 @@ static ASN1_TYPE *asn1_multi(int utype, const char *section, X509V3_CTX *cnf) int derlen; int i, is_set; sk = sk_ASN1_TYPE_new_null(); + if (!sk) + goto bad; if (section) { if (!cnf) @@ -458,7 +462,8 @@ static ASN1_TYPE *asn1_multi(int utype, const char *section, X509V3_CTX *cnf) typ = ASN1_generate_v3(sk_CONF_VALUE_value(sect, i)->value, cnf); if (!typ) goto bad; - sk_ASN1_TYPE_push(sk, typ); + if (!sk_ASN1_TYPE_push(sk, typ)) + goto bad; typ = NULL; } } @@ -474,6 +479,8 @@ static ASN1_TYPE *asn1_multi(int utype, const char *section, X509V3_CTX *cnf) derlen = i2d_ASN1_SET_OF_ASN1_TYPE(sk, NULL, i2d_ASN1_TYPE, utype, V_ASN1_UNIVERSAL, is_set); der = OPENSSL_malloc(derlen); + if (!der) + goto bad; p = der; i2d_ASN1_SET_OF_ASN1_TYPE(sk, &p, i2d_ASN1_TYPE, utype, V_ASN1_UNIVERSAL, is_set); diff --git a/crypto/openssl/crypto/asn1/asn1_locl.h b/crypto/openssl/crypto/asn1/asn1_locl.h deleted file mode 100644 index 5aa65e2..0000000 --- a/crypto/openssl/crypto/asn1/asn1_locl.h +++ /dev/null @@ -1,134 +0,0 @@ -/* asn1t.h */ -/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project 2006. - */ -/* ==================================================================== - * Copyright (c) 2006 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -/* Internal ASN1 structures and functions: not for application use */ - -/* ASN1 print context structure */ - -struct asn1_pctx_st - { - unsigned long flags; - unsigned long nm_flags; - unsigned long cert_flags; - unsigned long oid_flags; - unsigned long str_flags; - } /* ASN1_PCTX */; - -/* ASN1 public key method structure */ - -struct evp_pkey_asn1_method_st - { - int pkey_id; - int pkey_base_id; - unsigned long pkey_flags; - - char *pem_str; - char *info; - - int (*pub_decode)(EVP_PKEY *pk, X509_PUBKEY *pub); - int (*pub_encode)(X509_PUBKEY *pub, const EVP_PKEY *pk); - int (*pub_cmp)(const EVP_PKEY *a, const EVP_PKEY *b); - int (*pub_print)(BIO *out, const EVP_PKEY *pkey, int indent, - ASN1_PCTX *pctx); - - int (*priv_decode)(EVP_PKEY *pk, PKCS8_PRIV_KEY_INFO *p8inf); - int (*priv_encode)(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pk); - int (*priv_print)(BIO *out, const EVP_PKEY *pkey, int indent, - ASN1_PCTX *pctx); - - int (*pkey_size)(const EVP_PKEY *pk); - int (*pkey_bits)(const EVP_PKEY *pk); - - int (*param_decode)(EVP_PKEY *pkey, - const unsigned char **pder, int derlen); - int (*param_encode)(const EVP_PKEY *pkey, unsigned char **pder); - int (*param_missing)(const EVP_PKEY *pk); - int (*param_copy)(EVP_PKEY *to, const EVP_PKEY *from); - int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b); - int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent, - ASN1_PCTX *pctx); - - void (*pkey_free)(EVP_PKEY *pkey); - int (*pkey_ctrl)(EVP_PKEY *pkey, int op, long arg1, void *arg2); - - /* Legacy functions for old PEM */ - - int (*old_priv_decode)(EVP_PKEY *pkey, - const unsigned char **pder, int derlen); - int (*old_priv_encode)(const EVP_PKEY *pkey, unsigned char **pder); - - } /* EVP_PKEY_ASN1_METHOD */; - -/* Method to handle CRL access. - * In general a CRL could be very large (several Mb) and can consume large - * amounts of resources if stored in memory by multiple processes. - * This method allows general CRL operations to be redirected to more - * efficient callbacks: for example a CRL entry database. - */ - -#define X509_CRL_METHOD_DYNAMIC 1 - -struct x509_crl_method_st - { - int flags; - int (*crl_init)(X509_CRL *crl); - int (*crl_free)(X509_CRL *crl); - int (*crl_lookup)(X509_CRL *crl, X509_REVOKED **ret, - ASN1_INTEGER *ser, X509_NAME *issuer); - int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk); - }; diff --git a/crypto/openssl/crypto/asn1/asn1_par.c b/crypto/openssl/crypto/asn1/asn1_par.c index 8657f73..cb08e15 100644 --- a/crypto/openssl/crypto/asn1/asn1_par.c +++ b/crypto/openssl/crypto/asn1/asn1_par.c @@ -246,7 +246,7 @@ static int asn1_parse2(BIO *bp, const unsigned char **pp, long length, int offse ii=d2i_ASN1_BOOLEAN(NULL,&opp,len+hl); if (ii < 0) { - if (BIO_write(bp,"Bad boolean\n",12)) + if (BIO_write(bp,"Bad boolean\n",12) <= 0) goto end; } BIO_printf(bp,":%d",ii); diff --git a/crypto/openssl/crypto/asn1/bio_asn1.c b/crypto/openssl/crypto/asn1/bio_asn1.c deleted file mode 100644 index dc7efd5..0000000 --- a/crypto/openssl/crypto/asn1/bio_asn1.c +++ /dev/null @@ -1,495 +0,0 @@ -/* bio_asn1.c */ -/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project. - */ -/* ==================================================================== - * Copyright (c) 2006 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -/* Experimental ASN1 BIO. When written through the data is converted - * to an ASN1 string type: default is OCTET STRING. Additional functions - * can be provided to add prefix and suffix data. - */ - -#include <string.h> -#include <openssl/bio.h> -#include <openssl/asn1.h> - -/* Must be large enough for biggest tag+length */ -#define DEFAULT_ASN1_BUF_SIZE 20 - -typedef enum - { - ASN1_STATE_START, - ASN1_STATE_PRE_COPY, - ASN1_STATE_HEADER, - ASN1_STATE_HEADER_COPY, - ASN1_STATE_DATA_COPY, - ASN1_STATE_POST_COPY, - ASN1_STATE_DONE - } asn1_bio_state_t; - -typedef struct BIO_ASN1_EX_FUNCS_st - { - asn1_ps_func *ex_func; - asn1_ps_func *ex_free_func; - } BIO_ASN1_EX_FUNCS; - -typedef struct BIO_ASN1_BUF_CTX_t - { - /* Internal state */ - asn1_bio_state_t state; - /* Internal buffer */ - unsigned char *buf; - /* Size of buffer */ - int bufsize; - /* Current position in buffer */ - int bufpos; - /* Current buffer length */ - int buflen; - /* Amount of data to copy */ - int copylen; - /* Class and tag to use */ - int asn1_class, asn1_tag; - asn1_ps_func *prefix, *prefix_free, *suffix, *suffix_free; - /* Extra buffer for prefix and suffix data */ - unsigned char *ex_buf; - int ex_len; - int ex_pos; - void *ex_arg; - } BIO_ASN1_BUF_CTX; - - -static int asn1_bio_write(BIO *h, const char *buf,int num); -static int asn1_bio_read(BIO *h, char *buf, int size); -static int asn1_bio_puts(BIO *h, const char *str); -static int asn1_bio_gets(BIO *h, char *str, int size); -static long asn1_bio_ctrl(BIO *h, int cmd, long arg1, void *arg2); -static int asn1_bio_new(BIO *h); -static int asn1_bio_free(BIO *data); -static long asn1_bio_callback_ctrl(BIO *h, int cmd, bio_info_cb *fp); - -static int asn1_bio_init(BIO_ASN1_BUF_CTX *ctx, int size); -static int asn1_bio_flush_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx, - asn1_ps_func *cleanup, asn1_bio_state_t next); -static int asn1_bio_setup_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx, - asn1_ps_func *setup, - asn1_bio_state_t ex_state, - asn1_bio_state_t other_state); - -static BIO_METHOD methods_asn1= - { - BIO_TYPE_ASN1, - "asn1", - asn1_bio_write, - asn1_bio_read, - asn1_bio_puts, - asn1_bio_gets, - asn1_bio_ctrl, - asn1_bio_new, - asn1_bio_free, - asn1_bio_callback_ctrl, - }; - -BIO_METHOD *BIO_f_asn1(void) - { - return(&methods_asn1); - } - - -static int asn1_bio_new(BIO *b) - { - BIO_ASN1_BUF_CTX *ctx; - ctx = OPENSSL_malloc(sizeof(BIO_ASN1_BUF_CTX)); - if (!ctx) - return 0; - if (!asn1_bio_init(ctx, DEFAULT_ASN1_BUF_SIZE)) - return 0; - b->init = 1; - b->ptr = (char *)ctx; - b->flags = 0; - return 1; - } - -static int asn1_bio_init(BIO_ASN1_BUF_CTX *ctx, int size) - { - ctx->buf = OPENSSL_malloc(size); - if (!ctx->buf) - return 0; - ctx->bufsize = size; - ctx->bufpos = 0; - ctx->buflen = 0; - ctx->copylen = 0; - ctx->asn1_class = V_ASN1_UNIVERSAL; - ctx->asn1_tag = V_ASN1_OCTET_STRING; - ctx->ex_buf = 0; - ctx->ex_pos = 0; - ctx->ex_len = 0; - ctx->state = ASN1_STATE_START; - return 1; - } - -static int asn1_bio_free(BIO *b) - { - BIO_ASN1_BUF_CTX *ctx; - ctx = (BIO_ASN1_BUF_CTX *) b->ptr; - if (ctx == NULL) - return 0; - if (ctx->buf) - OPENSSL_free(ctx->buf); - OPENSSL_free(ctx); - b->init = 0; - b->ptr = NULL; - b->flags = 0; - return 1; - } - -static int asn1_bio_write(BIO *b, const char *in , int inl) - { - BIO_ASN1_BUF_CTX *ctx; - int wrmax, wrlen, ret; - unsigned char *p; - if (!in || (inl < 0) || (b->next_bio == NULL)) - return 0; - ctx = (BIO_ASN1_BUF_CTX *) b->ptr; - if (ctx == NULL) - return 0; - - wrlen = 0; - ret = -1; - - for(;;) - { - switch (ctx->state) - { - - /* Setup prefix data, call it */ - case ASN1_STATE_START: - if (!asn1_bio_setup_ex(b, ctx, ctx->prefix, - ASN1_STATE_PRE_COPY, ASN1_STATE_HEADER)) - return 0; - break; - - /* Copy any pre data first */ - case ASN1_STATE_PRE_COPY: - - ret = asn1_bio_flush_ex(b, ctx, ctx->prefix_free, - ASN1_STATE_HEADER); - - if (ret <= 0) - goto done; - - break; - - case ASN1_STATE_HEADER: - ctx->buflen = - ASN1_object_size(0, inl, ctx->asn1_tag) - inl; - OPENSSL_assert(ctx->buflen <= ctx->bufsize); - p = ctx->buf; - ASN1_put_object(&p, 0, inl, - ctx->asn1_tag, ctx->asn1_class); - ctx->copylen = inl; - ctx->state = ASN1_STATE_HEADER_COPY; - - break; - - case ASN1_STATE_HEADER_COPY: - ret = BIO_write(b->next_bio, - ctx->buf + ctx->bufpos, ctx->buflen); - if (ret <= 0) - goto done; - - ctx->buflen -= ret; - if (ctx->buflen) - ctx->bufpos += ret; - else - { - ctx->bufpos = 0; - ctx->state = ASN1_STATE_DATA_COPY; - } - - break; - - case ASN1_STATE_DATA_COPY: - - if (inl > ctx->copylen) - wrmax = ctx->copylen; - else - wrmax = inl; - ret = BIO_write(b->next_bio, in, wrmax); - if (ret <= 0) - break; - wrlen += ret; - ctx->copylen -= ret; - in += ret; - inl -= ret; - - if (ctx->copylen == 0) - ctx->state = ASN1_STATE_HEADER; - - if (inl == 0) - goto done; - - break; - - default: - BIO_clear_retry_flags(b); - return 0; - - } - - } - - done: - BIO_clear_retry_flags(b); - BIO_copy_next_retry(b); - - return (wrlen > 0) ? wrlen : ret; - - } - -static int asn1_bio_flush_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx, - asn1_ps_func *cleanup, asn1_bio_state_t next) - { - int ret; - if (ctx->ex_len <= 0) - return 1; - for(;;) - { - ret = BIO_write(b->next_bio, ctx->ex_buf + ctx->ex_pos, - ctx->ex_len); - if (ret <= 0) - break; - ctx->ex_len -= ret; - if (ctx->ex_len > 0) - ctx->ex_pos += ret; - else - { - if(cleanup) - cleanup(b, &ctx->ex_buf, &ctx->ex_len, - &ctx->ex_arg); - ctx->state = next; - ctx->ex_pos = 0; - break; - } - } - return ret; - } - -static int asn1_bio_setup_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx, - asn1_ps_func *setup, - asn1_bio_state_t ex_state, - asn1_bio_state_t other_state) - { - if (setup && !setup(b, &ctx->ex_buf, &ctx->ex_len, &ctx->ex_arg)) - { - BIO_clear_retry_flags(b); - return 0; - } - if (ctx->ex_len > 0) - ctx->state = ex_state; - else - ctx->state = other_state; - return 1; - } - -static int asn1_bio_read(BIO *b, char *in , int inl) - { - if (!b->next_bio) - return 0; - return BIO_read(b->next_bio, in , inl); - } - -static int asn1_bio_puts(BIO *b, const char *str) - { - return asn1_bio_write(b, str, strlen(str)); - } - -static int asn1_bio_gets(BIO *b, char *str, int size) - { - if (!b->next_bio) - return 0; - return BIO_gets(b->next_bio, str , size); - } - -static long asn1_bio_callback_ctrl(BIO *b, int cmd, bio_info_cb *fp) - { - if (b->next_bio == NULL) return(0); - return BIO_callback_ctrl(b->next_bio,cmd,fp); - } - -static long asn1_bio_ctrl(BIO *b, int cmd, long arg1, void *arg2) - { - BIO_ASN1_BUF_CTX *ctx; - BIO_ASN1_EX_FUNCS *ex_func; - long ret = 1; - ctx = (BIO_ASN1_BUF_CTX *) b->ptr; - if (ctx == NULL) - return 0; - switch(cmd) - { - - case BIO_C_SET_PREFIX: - ex_func = arg2; - ctx->prefix = ex_func->ex_func; - ctx->prefix_free = ex_func->ex_free_func; - break; - - case BIO_C_GET_PREFIX: - ex_func = arg2; - ex_func->ex_func = ctx->prefix; - ex_func->ex_free_func = ctx->prefix_free; - break; - - case BIO_C_SET_SUFFIX: - ex_func = arg2; - ctx->suffix = ex_func->ex_func; - ctx->suffix_free = ex_func->ex_free_func; - break; - - case BIO_C_GET_SUFFIX: - ex_func = arg2; - ex_func->ex_func = ctx->suffix; - ex_func->ex_free_func = ctx->suffix_free; - break; - - case BIO_C_SET_EX_ARG: - ctx->ex_arg = arg2; - break; - - case BIO_C_GET_EX_ARG: - *(void **)arg2 = ctx->ex_arg; - break; - - case BIO_CTRL_FLUSH: - if (!b->next_bio) - return 0; - - /* Call post function if possible */ - if (ctx->state == ASN1_STATE_HEADER) - { - if (!asn1_bio_setup_ex(b, ctx, ctx->suffix, - ASN1_STATE_POST_COPY, ASN1_STATE_DONE)) - return 0; - } - - if (ctx->state == ASN1_STATE_POST_COPY) - { - ret = asn1_bio_flush_ex(b, ctx, ctx->suffix_free, - ASN1_STATE_DONE); - if (ret <= 0) - return ret; - } - - if (ctx->state == ASN1_STATE_DONE) - return BIO_ctrl(b->next_bio, cmd, arg1, arg2); - else - { - BIO_clear_retry_flags(b); - return 0; - } - break; - - - default: - if (!b->next_bio) - return 0; - return BIO_ctrl(b->next_bio, cmd, arg1, arg2); - - } - - return ret; - } - -static int asn1_bio_set_ex(BIO *b, int cmd, - asn1_ps_func *ex_func, asn1_ps_func *ex_free_func) - { - BIO_ASN1_EX_FUNCS extmp; - extmp.ex_func = ex_func; - extmp.ex_free_func = ex_free_func; - return BIO_ctrl(b, cmd, 0, &extmp); - } - -static int asn1_bio_get_ex(BIO *b, int cmd, - asn1_ps_func **ex_func, asn1_ps_func **ex_free_func) - { - BIO_ASN1_EX_FUNCS extmp; - int ret; - ret = BIO_ctrl(b, cmd, 0, &extmp); - if (ret > 0) - { - *ex_func = extmp.ex_func; - *ex_free_func = extmp.ex_free_func; - } - return ret; - } - -int BIO_asn1_set_prefix(BIO *b, asn1_ps_func *prefix, asn1_ps_func *prefix_free) - { - return asn1_bio_set_ex(b, BIO_C_SET_PREFIX, prefix, prefix_free); - } - -int BIO_asn1_get_prefix(BIO *b, asn1_ps_func **pprefix, asn1_ps_func **pprefix_free) - { - return asn1_bio_get_ex(b, BIO_C_GET_PREFIX, pprefix, pprefix_free); - } - -int BIO_asn1_set_suffix(BIO *b, asn1_ps_func *suffix, asn1_ps_func *suffix_free) - { - return asn1_bio_set_ex(b, BIO_C_SET_SUFFIX, suffix, suffix_free); - } - -int BIO_asn1_get_suffix(BIO *b, asn1_ps_func **psuffix, asn1_ps_func **psuffix_free) - { - return asn1_bio_get_ex(b, BIO_C_GET_SUFFIX, psuffix, psuffix_free); - } diff --git a/crypto/openssl/crypto/asn1/bio_ndef.c b/crypto/openssl/crypto/asn1/bio_ndef.c deleted file mode 100644 index 370389b..0000000 --- a/crypto/openssl/crypto/asn1/bio_ndef.c +++ /dev/null @@ -1,246 +0,0 @@ -/* bio_ndef.c */ -/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project. - */ -/* ==================================================================== - * Copyright (c) 2008 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - */ - -#include <openssl/asn1.h> -#include <openssl/asn1t.h> -#include <openssl/bio.h> -#include <openssl/err.h> - -#ifndef OPENSSL_SYSNAME_NETWARE -#include <memory.h> -#endif -#include <stdio.h> - -/* Experimental NDEF ASN1 BIO support routines */ - -/* The usage is quite simple, initialize an ASN1 structure, - * get a BIO from it then any data written through the BIO - * will end up translated to approptiate format on the fly. - * The data is streamed out and does *not* need to be - * all held in memory at once. - * - * When the BIO is flushed the output is finalized and any - * signatures etc written out. - * - * The BIO is a 'proper' BIO and can handle non blocking I/O - * correctly. - * - * The usage is simple. The implementation is *not*... - */ - -/* BIO support data stored in the ASN1 BIO ex_arg */ - -typedef struct ndef_aux_st - { - /* ASN1 structure this BIO refers to */ - ASN1_VALUE *val; - const ASN1_ITEM *it; - /* Top of the BIO chain */ - BIO *ndef_bio; - /* Output BIO */ - BIO *out; - /* Boundary where content is inserted */ - unsigned char **boundary; - /* DER buffer start */ - unsigned char *derbuf; - } NDEF_SUPPORT; - -static int ndef_prefix(BIO *b, unsigned char **pbuf, int *plen, void *parg); -static int ndef_prefix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg); -static int ndef_suffix(BIO *b, unsigned char **pbuf, int *plen, void *parg); -static int ndef_suffix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg); - -BIO *BIO_new_NDEF(BIO *out, ASN1_VALUE *val, const ASN1_ITEM *it) - { - NDEF_SUPPORT *ndef_aux = NULL; - BIO *asn_bio = NULL; - const ASN1_AUX *aux = it->funcs; - ASN1_STREAM_ARG sarg; - - if (!aux || !aux->asn1_cb) - { - ASN1err(ASN1_F_BIO_NEW_NDEF, ASN1_R_STREAMING_NOT_SUPPORTED); - return NULL; - } - ndef_aux = OPENSSL_malloc(sizeof(NDEF_SUPPORT)); - asn_bio = BIO_new(BIO_f_asn1()); - - /* ASN1 bio needs to be next to output BIO */ - - out = BIO_push(asn_bio, out); - - if (!ndef_aux || !asn_bio || !out) - goto err; - - BIO_asn1_set_prefix(asn_bio, ndef_prefix, ndef_prefix_free); - BIO_asn1_set_suffix(asn_bio, ndef_suffix, ndef_suffix_free); - - /* Now let callback prepend any digest, cipher etc BIOs - * ASN1 structure needs. - */ - - sarg.out = out; - sarg.ndef_bio = NULL; - sarg.boundary = NULL; - - if (aux->asn1_cb(ASN1_OP_STREAM_PRE, &val, it, &sarg) <= 0) - goto err; - - ndef_aux->val = val; - ndef_aux->it = it; - ndef_aux->ndef_bio = sarg.ndef_bio; - ndef_aux->boundary = sarg.boundary; - ndef_aux->out = out; - - BIO_ctrl(asn_bio, BIO_C_SET_EX_ARG, 0, ndef_aux); - - return sarg.ndef_bio; - - err: - if (asn_bio) - BIO_free(asn_bio); - if (ndef_aux) - OPENSSL_free(ndef_aux); - return NULL; - } - -static int ndef_prefix(BIO *b, unsigned char **pbuf, int *plen, void *parg) - { - NDEF_SUPPORT *ndef_aux; - unsigned char *p; - int derlen; - - if (!parg) - return 0; - - ndef_aux = *(NDEF_SUPPORT **)parg; - - derlen = ASN1_item_ndef_i2d(ndef_aux->val, NULL, ndef_aux->it); - p = OPENSSL_malloc(derlen); - ndef_aux->derbuf = p; - *pbuf = p; - derlen = ASN1_item_ndef_i2d(ndef_aux->val, &p, ndef_aux->it); - - if (!*ndef_aux->boundary) - return 0; - - *plen = *ndef_aux->boundary - *pbuf; - - return 1; - } - -static int ndef_prefix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg) - { - NDEF_SUPPORT *ndef_aux; - - if (!parg) - return 0; - - ndef_aux = *(NDEF_SUPPORT **)parg; - - if (ndef_aux->derbuf) - OPENSSL_free(ndef_aux->derbuf); - - ndef_aux->derbuf = NULL; - *pbuf = NULL; - *plen = 0; - return 1; - } - -static int ndef_suffix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg) - { - NDEF_SUPPORT **pndef_aux = (NDEF_SUPPORT **)parg; - if (!ndef_prefix_free(b, pbuf, plen, parg)) - return 0; - OPENSSL_free(*pndef_aux); - *pndef_aux = NULL; - return 1; - } - -static int ndef_suffix(BIO *b, unsigned char **pbuf, int *plen, void *parg) - { - NDEF_SUPPORT *ndef_aux; - unsigned char *p; - int derlen; - const ASN1_AUX *aux; - ASN1_STREAM_ARG sarg; - - if (!parg) - return 0; - - ndef_aux = *(NDEF_SUPPORT **)parg; - - aux = ndef_aux->it->funcs; - - /* Finalize structures */ - sarg.ndef_bio = ndef_aux->ndef_bio; - sarg.out = ndef_aux->out; - sarg.boundary = ndef_aux->boundary; - if (aux->asn1_cb(ASN1_OP_STREAM_POST, - &ndef_aux->val, ndef_aux->it, &sarg) <= 0) - return 0; - - derlen = ASN1_item_ndef_i2d(ndef_aux->val, NULL, ndef_aux->it); - p = OPENSSL_malloc(derlen); - ndef_aux->derbuf = p; - *pbuf = p; - derlen = ASN1_item_ndef_i2d(ndef_aux->val, &p, ndef_aux->it); - - if (!*ndef_aux->boundary) - return 0; - *pbuf = *ndef_aux->boundary; - *plen = derlen - (*ndef_aux->boundary - ndef_aux->derbuf); - - return 1; - } diff --git a/crypto/openssl/crypto/asn1/t_x509.c b/crypto/openssl/crypto/asn1/t_x509.c index 8f746f9..6f295b4 100644 --- a/crypto/openssl/crypto/asn1/t_x509.c +++ b/crypto/openssl/crypto/asn1/t_x509.c @@ -379,6 +379,8 @@ int ASN1_GENERALIZEDTIME_print(BIO *bp, ASN1_GENERALIZEDTIME *tm) int gmt=0; int i; int y=0,M=0,d=0,h=0,m=0,s=0; + char *f = NULL; + int f_len = 0; i=tm->length; v=(char *)tm->data; @@ -396,10 +398,21 @@ int ASN1_GENERALIZEDTIME_print(BIO *bp, ASN1_GENERALIZEDTIME *tm) if (tm->length >= 14 && (v[12] >= '0') && (v[12] <= '9') && (v[13] >= '0') && (v[13] <= '9')) + { s= (v[12]-'0')*10+(v[13]-'0'); + /* Check for fractions of seconds. */ + if (tm->length >= 15 && v[14] == '.') + { + int l = tm->length; + f = &v[14]; /* The decimal point. */ + f_len = 1; + while (14 + f_len < l && f[f_len] >= '0' && f[f_len] <= '9') + ++f_len; + } + } - if (BIO_printf(bp,"%s %2d %02d:%02d:%02d %d%s", - mon[M-1],d,h,m,s,y,(gmt)?" GMT":"") <= 0) + if (BIO_printf(bp,"%s %2d %02d:%02d:%02d%.*s %d%s", + mon[M-1],d,h,m,s,f_len,f,y,(gmt)?" GMT":"") <= 0) return(0); else return(1); diff --git a/crypto/openssl/crypto/asn1/x_nx509.c b/crypto/openssl/crypto/asn1/x_nx509.c deleted file mode 100644 index fbd9a22..0000000 --- a/crypto/openssl/crypto/asn1/x_nx509.c +++ /dev/null @@ -1,72 +0,0 @@ -/* x_nx509.c */ -/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project 2005. - */ -/* ==================================================================== - * Copyright (c) 2005 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -#include <stddef.h> -#include <openssl/x509.h> -#include <openssl/asn1.h> -#include <openssl/asn1t.h> - -/* Old netscape certificate wrapper format */ - -ASN1_SEQUENCE(NETSCAPE_X509) = { - ASN1_SIMPLE(NETSCAPE_X509, header, ASN1_OCTET_STRING), - ASN1_OPT(NETSCAPE_X509, cert, X509) -} ASN1_SEQUENCE_END(NETSCAPE_X509) - -IMPLEMENT_ASN1_FUNCTIONS(NETSCAPE_X509) - diff --git a/crypto/openssl/crypto/bio/bio.h b/crypto/openssl/crypto/bio/bio.h index cecb6a7..ebb4278 100644 --- a/crypto/openssl/crypto/bio/bio.h +++ b/crypto/openssl/crypto/bio/bio.h @@ -156,8 +156,11 @@ extern "C" { * previous write * operation */ +#define BIO_CTRL_DGRAM_GET_PEER 46 #define BIO_CTRL_DGRAM_SET_PEER 44 /* Destination for the data */ +#define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT 45 /* Next DTLS handshake timeout to + * adjust socket timeouts */ /* modifiers */ #define BIO_FP_READ 0x02 @@ -405,7 +408,7 @@ typedef struct bio_f_buffer_ctx_struct #define BIO_get_conn_hostname(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,0) #define BIO_get_conn_port(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,1) #define BIO_get_conn_ip(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,2) -#define BIO_get_conn_int_port(b) BIO_int_ctrl(b,BIO_C_GET_CONNECT,3) +#define BIO_get_conn_int_port(b) BIO_int_ctrl(b,BIO_C_GET_CONNECT,3,0) #define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) @@ -414,7 +417,7 @@ typedef struct bio_f_buffer_ctx_struct #define BIO_set_accept_port(b,name) BIO_ctrl(b,BIO_C_SET_ACCEPT,0,(char *)name) #define BIO_get_accept_port(b) BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,0) /* #define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) */ -#define BIO_set_nbio_accept(b,n) BIO_ctrl(b,BIO_C_SET_ACCEPT,1,(n)?"a":NULL) +#define BIO_set_nbio_accept(b,n) BIO_ctrl(b,BIO_C_SET_ACCEPT,1,(n)?(void *)"a":NULL) #define BIO_set_accept_bios(b,bio) BIO_ctrl(b,BIO_C_SET_ACCEPT,2,(char *)bio) #define BIO_BIND_NORMAL 0 @@ -541,6 +544,8 @@ int BIO_ctrl_reset_read_request(BIO *b); (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_RECV_TIMER_EXP, 0, NULL) #define BIO_dgram_send_timedout(b) \ (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_SEND_TIMER_EXP, 0, NULL) +#define BIO_dgram_get_peer(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_PEER, 0, (char *)peer) #define BIO_dgram_set_peer(b,peer) \ (int)BIO_ctrl(b, BIO_CTRL_DGRAM_SET_PEER, 0, (char *)peer) diff --git a/crypto/openssl/crypto/bio/bss_dgram.c b/crypto/openssl/crypto/bio/bss_dgram.c index c3da6dc..14ca854 100644 --- a/crypto/openssl/crypto/bio/bss_dgram.c +++ b/crypto/openssl/crypto/bio/bss_dgram.c @@ -66,7 +66,13 @@ #include <openssl/bio.h> +#if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VMS) +#include <sys/timeb.h> +#endif + +#ifdef OPENSSL_SYS_LINUX #define IP_MTU 14 /* linux is lame */ +#endif #ifdef WATT32 #define sock_write SockWrite /* Watt-32 uses same names */ @@ -84,6 +90,8 @@ static int dgram_clear(BIO *bio); static int BIO_dgram_should_retry(int s); +static void get_current_time(struct timeval *t); + static BIO_METHOD methods_dgramp= { BIO_TYPE_DGRAM, @@ -104,6 +112,8 @@ typedef struct bio_dgram_data_st unsigned int connected; unsigned int _errno; unsigned int mtu; + struct timeval next_timeout; + struct timeval socket_timeout; } bio_dgram_data; BIO_METHOD *BIO_s_datagram(void) @@ -165,7 +175,100 @@ static int dgram_clear(BIO *a) } return(1); } - + +static void dgram_adjust_rcv_timeout(BIO *b) + { +#if defined(SO_RCVTIMEO) + bio_dgram_data *data = (bio_dgram_data *)b->ptr; + int sz = sizeof(int); + + /* Is a timer active? */ + if (data->next_timeout.tv_sec > 0 || data->next_timeout.tv_usec > 0) + { + struct timeval timenow, timeleft; + + /* Read current socket timeout */ +#ifdef OPENSSL_SYS_WINDOWS + int timeout; + if (getsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, + (void*)&timeout, &sz) < 0) + { perror("getsockopt"); } + else + { + data->socket_timeout.tv_sec = timeout / 1000; + data->socket_timeout.tv_usec = (timeout % 1000) * 1000; + } +#else + if ( getsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, + &(data->socket_timeout), (void *)&sz) < 0) + { perror("getsockopt"); } +#endif + + /* Get current time */ + get_current_time(&timenow); + + /* Calculate time left until timer expires */ + memcpy(&timeleft, &(data->next_timeout), sizeof(struct timeval)); + timeleft.tv_sec -= timenow.tv_sec; + timeleft.tv_usec -= timenow.tv_usec; + if (timeleft.tv_usec < 0) + { + timeleft.tv_sec--; + timeleft.tv_usec += 1000000; + } + + if (timeleft.tv_sec < 0) + { + timeleft.tv_sec = 0; + timeleft.tv_usec = 1; + } + + /* Adjust socket timeout if next handhake message timer + * will expire earlier. + */ + if ((data->socket_timeout.tv_sec == 0 && data->socket_timeout.tv_usec == 0) || + (data->socket_timeout.tv_sec > timeleft.tv_sec) || + (data->socket_timeout.tv_sec == timeleft.tv_sec && + data->socket_timeout.tv_usec >= timeleft.tv_usec)) + { +#ifdef OPENSSL_SYS_WINDOWS + timeout = timeleft.tv_sec * 1000 + timeleft.tv_usec / 1000; + if (setsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, + (void*)&timeout, sizeof(timeout)) < 0) + { perror("setsockopt"); } +#else + if ( setsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, &timeleft, + sizeof(struct timeval)) < 0) + { perror("setsockopt"); } +#endif + } + } +#endif + } + +static void dgram_reset_rcv_timeout(BIO *b) + { +#if defined(SO_RCVTIMEO) + bio_dgram_data *data = (bio_dgram_data *)b->ptr; + + /* Is a timer active? */ + if (data->next_timeout.tv_sec > 0 || data->next_timeout.tv_usec > 0) + { +#ifdef OPENSSL_SYS_WINDOWS + int timeout = data->socket_timeout.tv_sec * 1000 + + data->socket_timeout.tv_usec / 1000; + if (setsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, + (void*)&timeout, sizeof(timeout)) < 0) + { perror("setsockopt"); } +#else + if ( setsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, &(data->socket_timeout), + sizeof(struct timeval)) < 0) + { perror("setsockopt"); } +#endif + } +#endif + } + static int dgram_read(BIO *b, char *out, int outl) { int ret=0; @@ -183,13 +286,15 @@ static int dgram_read(BIO *b, char *out, int outl) * but this is not universal. Cast to (void *) to avoid * compiler warnings. */ + dgram_adjust_rcv_timeout(b); ret=recvfrom(b->num,out,outl,0,&peer,(void *)&peerlen); + dgram_reset_rcv_timeout(b); - if ( ! data->connected && ret > 0) - BIO_ctrl(b, BIO_CTRL_DGRAM_CONNECT, 0, &peer); + if ( ! data->connected && ret >= 0) + BIO_ctrl(b, BIO_CTRL_DGRAM_SET_PEER, 0, &peer); BIO_clear_retry_flags(b); - if (ret <= 0) + if (ret < 0) { if (BIO_dgram_should_retry(ret)) { @@ -219,7 +324,7 @@ static int dgram_write(BIO *b, const char *in, int inl) BIO_clear_retry_flags(b); if (ret <= 0) { - if (BIO_sock_should_retry(ret)) + if (BIO_dgram_should_retry(ret)) { BIO_set_retry_write(b); data->_errno = get_last_socket_error(); @@ -240,8 +345,14 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) int *ip; struct sockaddr *to = NULL; bio_dgram_data *data = NULL; +#if defined(IP_MTU_DISCOVER) || defined(IP_MTU) long sockopt_val = 0; unsigned int sockopt_len = 0; +#endif +#ifdef OPENSSL_SYS_LINUX + socklen_t addr_len; + struct sockaddr_storage addr; +#endif data = (bio_dgram_data *)b->ptr; @@ -300,24 +411,87 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) #endif break; /* (Linux)kernel sets DF bit on outgoing IP packets */ -#ifdef IP_MTU_DISCOVER case BIO_CTRL_DGRAM_MTU_DISCOVER: - sockopt_val = IP_PMTUDISC_DO; - if ((ret = setsockopt(b->num, IPPROTO_IP, IP_MTU_DISCOVER, - &sockopt_val, sizeof(sockopt_val))) < 0) - perror("setsockopt"); +#ifdef OPENSSL_SYS_LINUX + addr_len = (socklen_t)sizeof(struct sockaddr_storage); + memset((void *)&addr, 0, sizeof(struct sockaddr_storage)); + if (getsockname(b->num, (void *)&addr, &addr_len) < 0) + { + ret = 0; + break; + } + sockopt_len = sizeof(sockopt_val); + switch (addr.ss_family) + { + case AF_INET: + sockopt_val = IP_PMTUDISC_DO; + if ((ret = setsockopt(b->num, IPPROTO_IP, IP_MTU_DISCOVER, + &sockopt_val, sizeof(sockopt_val))) < 0) + perror("setsockopt"); + break; + case AF_INET6: + sockopt_val = IPV6_PMTUDISC_DO; + if ((ret = setsockopt(b->num, IPPROTO_IPV6, IPV6_MTU_DISCOVER, + &sockopt_val, sizeof(sockopt_val))) < 0) + perror("setsockopt"); + break; + default: + ret = -1; + break; + } + ret = -1; +#else break; #endif case BIO_CTRL_DGRAM_QUERY_MTU: - sockopt_len = sizeof(sockopt_val); - if ((ret = getsockopt(b->num, IPPROTO_IP, IP_MTU, (void *)&sockopt_val, - &sockopt_len)) < 0 || sockopt_val < 0) - { ret = 0; } - else +#ifdef OPENSSL_SYS_LINUX + addr_len = (socklen_t)sizeof(struct sockaddr_storage); + memset((void *)&addr, 0, sizeof(struct sockaddr_storage)); + if (getsockname(b->num, (void *)&addr, &addr_len) < 0) { - data->mtu = sockopt_val; - ret = data->mtu; + ret = 0; + break; } + sockopt_len = sizeof(sockopt_val); + switch (addr.ss_family) + { + case AF_INET: + if ((ret = getsockopt(b->num, IPPROTO_IP, IP_MTU, (void *)&sockopt_val, + &sockopt_len)) < 0 || sockopt_val < 0) + { + ret = 0; + } + else + { + /* we assume that the transport protocol is UDP and no + * IP options are used. + */ + data->mtu = sockopt_val - 8 - 20; + ret = data->mtu; + } + break; + case AF_INET6: + if ((ret = getsockopt(b->num, IPPROTO_IPV6, IPV6_MTU, (void *)&sockopt_val, + &sockopt_len)) < 0 || sockopt_val < 0) + { + ret = 0; + } + else + { + /* we assume that the transport protocol is UDP and no + * IPV6 options are used. + */ + data->mtu = sockopt_val - 8 - 40; + ret = data->mtu; + } + break; + default: + ret = 0; + break; + } +#else + ret = 0; +#endif break; case BIO_CTRL_DGRAM_GET_MTU: return data->mtu; @@ -340,11 +514,20 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) memset(&(data->peer), 0x00, sizeof(struct sockaddr)); } break; + case BIO_CTRL_DGRAM_GET_PEER: + to = (struct sockaddr *) ptr; + + memcpy(to, &(data->peer), sizeof(struct sockaddr)); + ret = sizeof(struct sockaddr); + break; case BIO_CTRL_DGRAM_SET_PEER: to = (struct sockaddr *) ptr; memcpy(&(data->peer), to, sizeof(struct sockaddr)); break; + case BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT: + memcpy(&(data->next_timeout), ptr, sizeof(struct timeval)); + break; #if defined(SO_RCVTIMEO) case BIO_CTRL_DGRAM_SET_RECV_TIMEOUT: #ifdef OPENSSL_SYS_WINDOWS @@ -507,10 +690,6 @@ int BIO_dgram_non_fatal_error(int err) # endif #endif -#if defined(ENOTCONN) - case ENOTCONN: -#endif - #ifdef EINTR case EINTR: #endif @@ -533,11 +712,6 @@ int BIO_dgram_non_fatal_error(int err) case EALREADY: #endif -/* DF bit set, and packet larger than MTU */ -#ifdef EMSGSIZE - case EMSGSIZE: -#endif - return(1); /* break; */ default: @@ -546,3 +720,20 @@ int BIO_dgram_non_fatal_error(int err) return(0); } #endif + +static void get_current_time(struct timeval *t) + { +#ifdef OPENSSL_SYS_WIN32 + struct _timeb tb; + _ftime(&tb); + t->tv_sec = (long)tb.time; + t->tv_usec = (long)tb.millitm * 1000; +#elif defined(OPENSSL_SYS_VMS) + struct timeb tb; + ftime(&tb); + t->tv_sec = (long)tb.time; + t->tv_usec = (long)tb.millitm * 1000; +#else + gettimeofday(t, NULL); +#endif + } diff --git a/crypto/openssl/crypto/bio/bss_file.c b/crypto/openssl/crypto/bio/bss_file.c index 9ad46fa..62c1073 100644 --- a/crypto/openssl/crypto/bio/bss_file.c +++ b/crypto/openssl/crypto/bio/bss_file.c @@ -404,11 +404,18 @@ static int MS_CALLBACK file_gets(BIO *bp, char *buf, int size) buf[0]='\0'; if (bp->flags&BIO_FLAGS_UPLINK) - UP_fgets(buf,size,bp->ptr); + { + if (!UP_fgets(buf,size,bp->ptr)) + goto err; + } else - fgets(buf,size,(FILE *)bp->ptr); + { + if (!fgets(buf,size,(FILE *)bp->ptr)) + goto err; + } if (buf[0] != '\0') ret=strlen(buf); + err: return(ret); } diff --git a/crypto/openssl/crypto/bn/asm/alpha-mont.pl b/crypto/openssl/crypto/bn/asm/alpha-mont.pl deleted file mode 100755 index 7a2cc31..0000000 --- a/crypto/openssl/crypto/bn/asm/alpha-mont.pl +++ /dev/null @@ -1,317 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# On 21264 RSA sign performance improves by 70/35/20/15 percent for -# 512/1024/2048/4096 bit key lengths. This is against vendor compiler -# instructed to '-tune host' code with in-line assembler. Other -# benchmarks improve by 15-20%. To anchor it to something else, the -# code provides approximately the same performance per GHz as AMD64. -# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x -# difference. - -# int bn_mul_mont( -$rp="a0"; # BN_ULONG *rp, -$ap="a1"; # const BN_ULONG *ap, -$bp="a2"; # const BN_ULONG *bp, -$np="a3"; # const BN_ULONG *np, -$n0="a4"; # const BN_ULONG *n0, -$num="a5"; # int num); - -$lo0="t0"; -$hi0="t1"; -$lo1="t2"; -$hi1="t3"; -$aj="t4"; -$bi="t5"; -$nj="t6"; -$tp="t7"; -$alo="t8"; -$ahi="t9"; -$nlo="t10"; -$nhi="t11"; -$tj="t12"; -$i="s3"; -$j="s4"; -$m1="s5"; - -$code=<<___; -#include <asm.h> -#include <regdef.h> - -.text - -.set noat -.set noreorder - -.globl bn_mul_mont -.align 5 -.ent bn_mul_mont -bn_mul_mont: - lda sp,-40(sp) - stq ra,0(sp) - stq s3,8(sp) - stq s4,16(sp) - stq s5,24(sp) - stq fp,32(sp) - mov sp,fp - .mask 0x0400f000,-40 - .frame fp,40,ra - .prologue 0 - - .align 4 - .set reorder - sextl $num,$num - mov 0,v0 - cmplt $num,4,AT - bne AT,.Lexit - - ldq $hi0,0($ap) # ap[0] - s8addq $num,16,AT - ldq $aj,8($ap) - subq sp,AT,sp - ldq $bi,0($bp) # bp[0] - mov -4096,AT - ldq $n0,0($n0) - and sp,AT,sp - - mulq $hi0,$bi,$lo0 - ldq $hi1,0($np) # np[0] - umulh $hi0,$bi,$hi0 - ldq $nj,8($np) - - mulq $lo0,$n0,$m1 - - mulq $hi1,$m1,$lo1 - umulh $hi1,$m1,$hi1 - - addq $lo1,$lo0,$lo1 - cmpult $lo1,$lo0,AT - addq $hi1,AT,$hi1 - - mulq $aj,$bi,$alo - mov 2,$j - umulh $aj,$bi,$ahi - mov sp,$tp - - mulq $nj,$m1,$nlo - s8addq $j,$ap,$aj - umulh $nj,$m1,$nhi - s8addq $j,$np,$nj -.align 4 -.L1st: - .set noreorder - ldq $aj,($aj) - addl $j,1,$j - ldq $nj,($nj) - lda $tp,8($tp) - - addq $alo,$hi0,$lo0 - mulq $aj,$bi,$alo - cmpult $lo0,$hi0,AT - addq $nlo,$hi1,$lo1 - - mulq $nj,$m1,$nlo - addq $ahi,AT,$hi0 - cmpult $lo1,$hi1,v0 - cmplt $j,$num,$tj - - umulh $aj,$bi,$ahi - addq $nhi,v0,$hi1 - addq $lo1,$lo0,$lo1 - s8addq $j,$ap,$aj - - umulh $nj,$m1,$nhi - cmpult $lo1,$lo0,v0 - addq $hi1,v0,$hi1 - s8addq $j,$np,$nj - - stq $lo1,-8($tp) - nop - unop - bne $tj,.L1st - .set reorder - - addq $alo,$hi0,$lo0 - addq $nlo,$hi1,$lo1 - cmpult $lo0,$hi0,AT - cmpult $lo1,$hi1,v0 - addq $ahi,AT,$hi0 - addq $nhi,v0,$hi1 - - addq $lo1,$lo0,$lo1 - cmpult $lo1,$lo0,v0 - addq $hi1,v0,$hi1 - - stq $lo1,0($tp) - - addq $hi1,$hi0,$hi1 - cmpult $hi1,$hi0,AT - stq $hi1,8($tp) - stq AT,16($tp) - - mov 1,$i -.align 4 -.Louter: - s8addq $i,$bp,$bi - ldq $hi0,($ap) - ldq $aj,8($ap) - ldq $bi,($bi) - ldq $hi1,($np) - ldq $nj,8($np) - ldq $tj,(sp) - - mulq $hi0,$bi,$lo0 - umulh $hi0,$bi,$hi0 - - addq $lo0,$tj,$lo0 - cmpult $lo0,$tj,AT - addq $hi0,AT,$hi0 - - mulq $lo0,$n0,$m1 - - mulq $hi1,$m1,$lo1 - umulh $hi1,$m1,$hi1 - - addq $lo1,$lo0,$lo1 - cmpult $lo1,$lo0,AT - mov 2,$j - addq $hi1,AT,$hi1 - - mulq $aj,$bi,$alo - mov sp,$tp - umulh $aj,$bi,$ahi - - mulq $nj,$m1,$nlo - s8addq $j,$ap,$aj - umulh $nj,$m1,$nhi -.align 4 -.Linner: - .set noreorder - ldq $tj,8($tp) #L0 - nop #U1 - ldq $aj,($aj) #L1 - s8addq $j,$np,$nj #U0 - - ldq $nj,($nj) #L0 - nop #U1 - addq $alo,$hi0,$lo0 #L1 - lda $tp,8($tp) - - mulq $aj,$bi,$alo #U1 - cmpult $lo0,$hi0,AT #L0 - addq $nlo,$hi1,$lo1 #L1 - addl $j,1,$j - - mulq $nj,$m1,$nlo #U1 - addq $ahi,AT,$hi0 #L0 - addq $lo0,$tj,$lo0 #L1 - cmpult $lo1,$hi1,v0 #U0 - - umulh $aj,$bi,$ahi #U1 - cmpult $lo0,$tj,AT #L0 - addq $lo1,$lo0,$lo1 #L1 - addq $nhi,v0,$hi1 #U0 - - umulh $nj,$m1,$nhi #U1 - s8addq $j,$ap,$aj #L0 - cmpult $lo1,$lo0,v0 #L1 - cmplt $j,$num,$tj #U0 # borrow $tj - - addq $hi0,AT,$hi0 #L0 - addq $hi1,v0,$hi1 #U1 - stq $lo1,-8($tp) #L1 - bne $tj,.Linner #U0 - .set reorder - - ldq $tj,8($tp) - addq $alo,$hi0,$lo0 - addq $nlo,$hi1,$lo1 - cmpult $lo0,$hi0,AT - cmpult $lo1,$hi1,v0 - addq $ahi,AT,$hi0 - addq $nhi,v0,$hi1 - - addq $lo0,$tj,$lo0 - cmpult $lo0,$tj,AT - addq $hi0,AT,$hi0 - - ldq $tj,16($tp) - addq $lo1,$lo0,$j - cmpult $j,$lo0,v0 - addq $hi1,v0,$hi1 - - addq $hi1,$hi0,$lo1 - stq $j,($tp) - cmpult $lo1,$hi0,$hi1 - addq $lo1,$tj,$lo1 - cmpult $lo1,$tj,AT - addl $i,1,$i - addq $hi1,AT,$hi1 - stq $lo1,8($tp) - cmplt $i,$num,$tj # borrow $tj - stq $hi1,16($tp) - bne $tj,.Louter - - s8addq $num,sp,$tj # &tp[num] - mov $rp,$bp # put rp aside - mov sp,$tp - mov sp,$ap - mov 0,$hi0 # clear borrow bit - -.align 4 -.Lsub: ldq $lo0,($tp) - ldq $lo1,($np) - lda $tp,8($tp) - lda $np,8($np) - subq $lo0,$lo1,$lo1 # tp[i]-np[i] - cmpult $lo0,$lo1,AT - subq $lo1,$hi0,$lo0 - cmpult $lo1,$lo0,$hi0 - or $hi0,AT,$hi0 - stq $lo0,($rp) - cmpult $tp,$tj,v0 - lda $rp,8($rp) - bne v0,.Lsub - - subq $hi1,$hi0,$hi0 # handle upmost overflow bit - mov sp,$tp - mov $bp,$rp # restore rp - - and sp,$hi0,$ap - bic $bp,$hi0,$bp - bis $bp,$ap,$ap # ap=borrow?tp:rp - -.align 4 -.Lcopy: ldq $aj,($ap) # copy or in-place refresh - lda $tp,8($tp) - lda $rp,8($rp) - lda $ap,8($ap) - stq zero,-8($tp) # zap tp - cmpult $tp,$tj,AT - stq $aj,-8($rp) - bne AT,.Lcopy - mov 1,v0 - -.Lexit: - .set noreorder - mov fp,sp - /*ldq ra,0(sp)*/ - ldq s3,8(sp) - ldq s4,16(sp) - ldq s5,24(sp) - ldq fp,32(sp) - lda sp,40(sp) - ret (ra) -.end bn_mul_mont -.rdata -.asciiz "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>" -___ - -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/bn/asm/armv4-mont.pl b/crypto/openssl/crypto/bn/asm/armv4-mont.pl deleted file mode 100755 index 05d5dc1..0000000 --- a/crypto/openssl/crypto/bn/asm/armv4-mont.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# January 2007. - -# Montgomery multiplication for ARMv4. -# -# Performance improvement naturally varies among CPU implementations -# and compilers. The code was observed to provide +65-35% improvement -# [depending on key length, less for longer keys] on ARM920T, and -# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code -# base and compiler generated code with in-lined umull and even umlal -# instructions. The latter means that this code didn't really have an -# "advantage" of utilizing some "secret" instruction. -# -# The code is interoperable with Thumb ISA and is rather compact, less -# than 1/2KB. Windows CE port would be trivial, as it's exclusively -# about decorations, ABI and instruction syntax are identical. - -$num="r0"; # starts as num argument, but holds &tp[num-1] -$ap="r1"; -$bp="r2"; $bi="r2"; $rp="r2"; -$np="r3"; -$tp="r4"; -$aj="r5"; -$nj="r6"; -$tj="r7"; -$n0="r8"; -########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer -$alo="r10"; # sl, gcc uses it to keep @GOT -$ahi="r11"; # fp -$nlo="r12"; # ip -########### # r13 is stack pointer -$nhi="r14"; # lr -########### # r15 is program counter - -#### argument block layout relative to &tp[num-1], a.k.a. $num -$_rp="$num,#12*4"; -# ap permanently resides in r1 -$_bp="$num,#13*4"; -# np permanently resides in r3 -$_n0="$num,#14*4"; -$_num="$num,#15*4"; $_bpend=$_num; - -$code=<<___; -.text - -.global bn_mul_mont -.type bn_mul_mont,%function - -.align 2 -bn_mul_mont: - stmdb sp!,{r0,r2} @ sp points at argument block - ldr $num,[sp,#3*4] @ load num - cmp $num,#2 - movlt r0,#0 - addlt sp,sp,#2*4 - blt .Labrt - - stmdb sp!,{r4-r12,lr} @ save 10 registers - - mov $num,$num,lsl#2 @ rescale $num for byte count - sub sp,sp,$num @ alloca(4*num) - sub sp,sp,#4 @ +extra dword - sub $num,$num,#4 @ "num=num-1" - add $tp,$bp,$num @ &bp[num-1] - - add $num,sp,$num @ $num to point at &tp[num-1] - ldr $n0,[$_n0] @ &n0 - ldr $bi,[$bp] @ bp[0] - ldr $aj,[$ap],#4 @ ap[0],ap++ - ldr $nj,[$np],#4 @ np[0],np++ - ldr $n0,[$n0] @ *n0 - str $tp,[$_bpend] @ save &bp[num] - - umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0] - str $n0,[$_n0] @ save n0 value - mul $n0,$alo,$n0 @ "tp[0]"*n0 - mov $nlo,#0 - umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]" - mov $tp,sp - -.L1st: - ldr $aj,[$ap],#4 @ ap[j],ap++ - mov $alo,$ahi - mov $ahi,#0 - umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0] - ldr $nj,[$np],#4 @ np[j],np++ - mov $nhi,#0 - umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 - adds $nlo,$nlo,$alo - str $nlo,[$tp],#4 @ tp[j-1]=,tp++ - adc $nlo,$nhi,#0 - cmp $tp,$num - bne .L1st - - adds $nlo,$nlo,$ahi - mov $nhi,#0 - adc $nhi,$nhi,#0 - ldr $tp,[$_bp] @ restore bp - str $nlo,[$num] @ tp[num-1]= - ldr $n0,[$_n0] @ restore n0 - str $nhi,[$num,#4] @ tp[num]= - -.Louter: - sub $tj,$num,sp @ "original" $num-1 value - sub $ap,$ap,$tj @ "rewind" ap to &ap[1] - sub $np,$np,$tj @ "rewind" np to &np[1] - ldr $bi,[$tp,#4]! @ *(++bp) - ldr $aj,[$ap,#-4] @ ap[0] - ldr $nj,[$np,#-4] @ np[0] - ldr $alo,[sp] @ tp[0] - ldr $tj,[sp,#4] @ tp[1] - - mov $ahi,#0 - umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0] - str $tp,[$_bp] @ save bp - mul $n0,$alo,$n0 - mov $nlo,#0 - umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]" - mov $tp,sp - -.Linner: - ldr $aj,[$ap],#4 @ ap[j],ap++ - adds $alo,$ahi,$tj @ +=tp[j] - mov $ahi,#0 - umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i] - ldr $nj,[$np],#4 @ np[j],np++ - mov $nhi,#0 - umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 - ldr $tj,[$tp,#8] @ tp[j+1] - adc $ahi,$ahi,#0 - adds $nlo,$nlo,$alo - str $nlo,[$tp],#4 @ tp[j-1]=,tp++ - adc $nlo,$nhi,#0 - cmp $tp,$num - bne .Linner - - adds $nlo,$nlo,$ahi - mov $nhi,#0 - adc $nhi,$nhi,#0 - adds $nlo,$nlo,$tj - adc $nhi,$nhi,#0 - ldr $tp,[$_bp] @ restore bp - ldr $tj,[$_bpend] @ restore &bp[num] - str $nlo,[$num] @ tp[num-1]= - ldr $n0,[$_n0] @ restore n0 - str $nhi,[$num,#4] @ tp[num]= - - cmp $tp,$tj - bne .Louter - - ldr $rp,[$_rp] @ pull rp - add $num,$num,#4 @ $num to point at &tp[num] - sub $aj,$num,sp @ "original" num value - mov $tp,sp @ "rewind" $tp - mov $ap,$tp @ "borrow" $ap - sub $np,$np,$aj @ "rewind" $np to &np[0] - - subs $tj,$tj,$tj @ "clear" carry flag -.Lsub: ldr $tj,[$tp],#4 - ldr $nj,[$np],#4 - sbcs $tj,$tj,$nj @ tp[j]-np[j] - str $tj,[$rp],#4 @ rp[j]= - teq $tp,$num @ preserve carry - bne .Lsub - sbcs $nhi,$nhi,#0 @ upmost carry - mov $tp,sp @ "rewind" $tp - sub $rp,$rp,$aj @ "rewind" $rp - - and $ap,$tp,$nhi - bic $np,$rp,$nhi - orr $ap,$ap,$np @ ap=borrow?tp:rp - -.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh - str sp,[$tp],#4 @ zap tp - str $tj,[$rp],#4 - cmp $tp,$num - bne .Lcopy - - add sp,$num,#4 @ skip over tp[num+1] - ldmia sp!,{r4-r12,lr} @ restore registers - add sp,sp,#2*4 @ skip over {r0,r2} - mov r0,#1 -.Labrt: tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size bn_mul_mont,.-bn_mul_mont -.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" -___ - -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/bn/asm/mips3-mont.pl b/crypto/openssl/crypto/bn/asm/mips3-mont.pl deleted file mode 100755 index 8f9156e..0000000 --- a/crypto/openssl/crypto/bn/asm/mips3-mont.pl +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# This module doesn't present direct interest for OpenSSL, because it -# doesn't provide better performance for longer keys. While 512-bit -# RSA private key operations are 40% faster, 1024-bit ones are hardly -# faster at all, while longer key operations are slower by up to 20%. -# It might be of interest to embedded system developers though, as -# it's smaller than 1KB, yet offers ~3x improvement over compiler -# generated code. -# -# The module targets N32 and N64 MIPS ABIs and currently is a bit -# IRIX-centric, i.e. is likely to require adaptation for other OSes. - -# int bn_mul_mont( -$rp="a0"; # BN_ULONG *rp, -$ap="a1"; # const BN_ULONG *ap, -$bp="a2"; # const BN_ULONG *bp, -$np="a3"; # const BN_ULONG *np, -$n0="a4"; # const BN_ULONG *n0, -$num="a5"; # int num); - -$lo0="a6"; -$hi0="a7"; -$lo1="v0"; -$hi1="v1"; -$aj="t0"; -$bi="t1"; -$nj="t2"; -$tp="t3"; -$alo="s0"; -$ahi="s1"; -$nlo="s2"; -$nhi="s3"; -$tj="s4"; -$i="s5"; -$j="s6"; -$fp="t8"; -$m1="t9"; - -$FRAME=8*(2+8); - -$code=<<___; -#include <asm.h> -#include <regdef.h> - -.text - -.set noat -.set reorder - -.align 5 -.globl bn_mul_mont -.ent bn_mul_mont -bn_mul_mont: - .set noreorder - PTR_SUB sp,64 - move $fp,sp - .frame $fp,64,ra - slt AT,$num,4 - li v0,0 - beqzl AT,.Lproceed - nop - jr ra - PTR_ADD sp,$fp,64 - .set reorder -.align 5 -.Lproceed: - ld $n0,0($n0) - ld $bi,0($bp) # bp[0] - ld $aj,0($ap) # ap[0] - ld $nj,0($np) # np[0] - PTR_SUB sp,16 # place for two extra words - sll $num,3 - li AT,-4096 - PTR_SUB sp,$num - and sp,AT - - sd s0,0($fp) - sd s1,8($fp) - sd s2,16($fp) - sd s3,24($fp) - sd s4,32($fp) - sd s5,40($fp) - sd s6,48($fp) - sd s7,56($fp) - - dmultu $aj,$bi - ld $alo,8($ap) - ld $nlo,8($np) - mflo $lo0 - mfhi $hi0 - dmultu $lo0,$n0 - mflo $m1 - - dmultu $alo,$bi - mflo $alo - mfhi $ahi - - dmultu $nj,$m1 - mflo $lo1 - mfhi $hi1 - dmultu $nlo,$m1 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - mflo $nlo - mfhi $nhi - - move $tp,sp - li $j,16 -.align 4 -.L1st: - .set noreorder - PTR_ADD $aj,$ap,$j - ld $aj,($aj) - PTR_ADD $nj,$np,$j - ld $nj,($nj) - - dmultu $aj,$bi - daddu $lo0,$alo,$hi0 - daddu $lo1,$nlo,$hi1 - sltu AT,$lo0,$hi0 - sltu s7,$lo1,$hi1 - daddu $hi0,$ahi,AT - daddu $hi1,$nhi,s7 - mflo $alo - mfhi $ahi - - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - dmultu $nj,$m1 - daddu $hi1,AT - addu $j,8 - sd $lo1,($tp) - sltu s7,$j,$num - mflo $nlo - mfhi $nhi - - bnez s7,.L1st - PTR_ADD $tp,8 - .set reorder - - daddu $lo0,$alo,$hi0 - sltu AT,$lo0,$hi0 - daddu $hi0,$ahi,AT - - daddu $lo1,$nlo,$hi1 - sltu s7,$lo1,$hi1 - daddu $hi1,$nhi,s7 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - - sd $lo1,($tp) - - daddu $hi1,$hi0 - sltu AT,$hi1,$hi0 - sd $hi1,8($tp) - sd AT,16($tp) - - li $i,8 -.align 4 -.Louter: - PTR_ADD $bi,$bp,$i - ld $bi,($bi) - ld $aj,($ap) - ld $alo,8($ap) - ld $tj,(sp) - - dmultu $aj,$bi - ld $nj,($np) - ld $nlo,8($np) - mflo $lo0 - mfhi $hi0 - daddu $lo0,$tj - dmultu $lo0,$n0 - sltu AT,$lo0,$tj - daddu $hi0,AT - mflo $m1 - - dmultu $alo,$bi - mflo $alo - mfhi $ahi - - dmultu $nj,$m1 - mflo $lo1 - mfhi $hi1 - - dmultu $nlo,$m1 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - mflo $nlo - mfhi $nhi - - move $tp,sp - li $j,16 - ld $tj,8($tp) -.align 4 -.Linner: - .set noreorder - PTR_ADD $aj,$ap,$j - ld $aj,($aj) - PTR_ADD $nj,$np,$j - ld $nj,($nj) - - dmultu $aj,$bi - daddu $lo0,$alo,$hi0 - daddu $lo1,$nlo,$hi1 - sltu AT,$lo0,$hi0 - sltu s7,$lo1,$hi1 - daddu $hi0,$ahi,AT - daddu $hi1,$nhi,s7 - mflo $alo - mfhi $ahi - - daddu $lo0,$tj - addu $j,8 - dmultu $nj,$m1 - sltu AT,$lo0,$tj - daddu $lo1,$lo0 - daddu $hi0,AT - sltu s7,$lo1,$lo0 - ld $tj,16($tp) - daddu $hi1,s7 - sltu AT,$j,$num - mflo $nlo - mfhi $nhi - sd $lo1,($tp) - bnez AT,.Linner - PTR_ADD $tp,8 - .set reorder - - daddu $lo0,$alo,$hi0 - sltu AT,$lo0,$hi0 - daddu $hi0,$ahi,AT - daddu $lo0,$tj - sltu s7,$lo0,$tj - daddu $hi0,s7 - - ld $tj,16($tp) - daddu $lo1,$nlo,$hi1 - sltu AT,$lo1,$hi1 - daddu $hi1,$nhi,AT - daddu $lo1,$lo0 - sltu s7,$lo1,$lo0 - daddu $hi1,s7 - sd $lo1,($tp) - - daddu $lo1,$hi1,$hi0 - sltu $hi1,$lo1,$hi0 - daddu $lo1,$tj - sltu AT,$lo1,$tj - daddu $hi1,AT - sd $lo1,8($tp) - sd $hi1,16($tp) - - addu $i,8 - sltu s7,$i,$num - bnez s7,.Louter - - .set noreorder - PTR_ADD $tj,sp,$num # &tp[num] - move $tp,sp - move $ap,sp - li $hi0,0 # clear borrow bit - -.align 4 -.Lsub: ld $lo0,($tp) - ld $lo1,($np) - PTR_ADD $tp,8 - PTR_ADD $np,8 - dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] - sgtu AT,$lo1,$lo0 - dsubu $lo0,$lo1,$hi0 - sgtu $hi0,$lo0,$lo1 - sd $lo0,($rp) - or $hi0,AT - sltu AT,$tp,$tj - bnez AT,.Lsub - PTR_ADD $rp,8 - - dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit - move $tp,sp - PTR_SUB $rp,$num # restore rp - not $hi1,$hi0 - - and $ap,$hi0,sp - and $bp,$hi1,$rp - or $ap,$ap,$bp # ap=borrow?tp:rp - -.align 4 -.Lcopy: ld $aj,($ap) - PTR_ADD $ap,8 - PTR_ADD $tp,8 - sd zero,-8($tp) - sltu AT,$tp,$tj - sd $aj,($rp) - bnez AT,.Lcopy - PTR_ADD $rp,8 - - ld s0,0($fp) - ld s1,8($fp) - ld s2,16($fp) - ld s3,24($fp) - ld s4,32($fp) - ld s5,40($fp) - ld s6,48($fp) - ld s7,56($fp) - li v0,1 - jr ra - PTR_ADD sp,$fp,64 - .set reorder -END(bn_mul_mont) -.rdata -.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>" -___ - -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/bn/asm/ppc-mont.pl b/crypto/openssl/crypto/bn/asm/ppc-mont.pl deleted file mode 100755 index 7849eae..0000000 --- a/crypto/openssl/crypto/bn/asm/ppc-mont.pl +++ /dev/null @@ -1,323 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# April 2006 - -# "Teaser" Montgomery multiplication module for PowerPC. It's possible -# to gain a bit more by modulo-scheduling outer loop, then dedicated -# squaring procedure should give further 20% and code can be adapted -# for 32-bit application running on 64-bit CPU. As for the latter. -# It won't be able to achieve "native" 64-bit performance, because in -# 32-bit application context every addc instruction will have to be -# expanded as addc, twice right shift by 32 and finally adde, etc. -# So far RSA *sign* performance improvement over pre-bn_mul_mont asm -# for 64-bit application running on PPC970/G5 is: -# -# 512-bit +65% -# 1024-bit +35% -# 2048-bit +18% -# 4096-bit +4% - -$flavour = shift; - -if ($flavour =~ /32/) { - $BITS= 32; - $BNSZ= $BITS/8; - $SIZE_T=4; - $RZONE= 224; - $FRAME= $SIZE_T*16; - - $LD= "lwz"; # load - $LDU= "lwzu"; # load and update - $LDX= "lwzx"; # load indexed - $ST= "stw"; # store - $STU= "stwu"; # store and update - $STX= "stwx"; # store indexed - $STUX= "stwux"; # store indexed and update - $UMULL= "mullw"; # unsigned multiply low - $UMULH= "mulhwu"; # unsigned multiply high - $UCMP= "cmplw"; # unsigned compare - $SHRI= "srwi"; # unsigned shift right by immediate - $PUSH= $ST; - $POP= $LD; -} elsif ($flavour =~ /64/) { - $BITS= 64; - $BNSZ= $BITS/8; - $SIZE_T=8; - $RZONE= 288; - $FRAME= $SIZE_T*16; - - # same as above, but 64-bit mnemonics... - $LD= "ld"; # load - $LDU= "ldu"; # load and update - $LDX= "ldx"; # load indexed - $ST= "std"; # store - $STU= "stdu"; # store and update - $STX= "stdx"; # store indexed - $STUX= "stdux"; # store indexed and update - $UMULL= "mulld"; # unsigned multiply low - $UMULH= "mulhdu"; # unsigned multiply high - $UCMP= "cmpld"; # unsigned compare - $SHRI= "srdi"; # unsigned shift right by immediate - $PUSH= $ST; - $POP= $LD; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$sp="r1"; -$toc="r2"; -$rp="r3"; $ovf="r3"; -$ap="r4"; -$bp="r5"; -$np="r6"; -$n0="r7"; -$num="r8"; -$rp="r9"; # $rp is reassigned -$aj="r10"; -$nj="r11"; -$tj="r12"; -# non-volatile registers -$i="r14"; -$j="r15"; -$tp="r16"; -$m0="r17"; -$m1="r18"; -$lo0="r19"; -$hi0="r20"; -$lo1="r21"; -$hi1="r22"; -$alo="r23"; -$ahi="r24"; -$nlo="r25"; -# -$nhi="r0"; - -$code=<<___; -.machine "any" -.text - -.globl .bn_mul_mont -.align 4 -.bn_mul_mont: - cmpwi $num,4 - mr $rp,r3 ; $rp is reassigned - li r3,0 - bltlr - - slwi $num,$num,`log($BNSZ)/log(2)` - li $tj,-4096 - addi $ovf,$num,`$FRAME+$RZONE` - subf $ovf,$ovf,$sp ; $sp-$ovf - and $ovf,$ovf,$tj ; minimize TLB usage - subf $ovf,$sp,$ovf ; $ovf-$sp - srwi $num,$num,`log($BNSZ)/log(2)` - $STUX $sp,$sp,$ovf - - $PUSH r14,`4*$SIZE_T`($sp) - $PUSH r15,`5*$SIZE_T`($sp) - $PUSH r16,`6*$SIZE_T`($sp) - $PUSH r17,`7*$SIZE_T`($sp) - $PUSH r18,`8*$SIZE_T`($sp) - $PUSH r19,`9*$SIZE_T`($sp) - $PUSH r20,`10*$SIZE_T`($sp) - $PUSH r21,`11*$SIZE_T`($sp) - $PUSH r22,`12*$SIZE_T`($sp) - $PUSH r23,`13*$SIZE_T`($sp) - $PUSH r24,`14*$SIZE_T`($sp) - $PUSH r25,`15*$SIZE_T`($sp) - - $LD $n0,0($n0) ; pull n0[0] value - addi $num,$num,-2 ; adjust $num for counter register - - $LD $m0,0($bp) ; m0=bp[0] - $LD $aj,0($ap) ; ap[0] - addi $tp,$sp,$FRAME - $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0] - $UMULH $hi0,$aj,$m0 - - $LD $aj,$BNSZ($ap) ; ap[1] - $LD $nj,0($np) ; np[0] - - $UMULL $m1,$lo0,$n0 ; "tp[0]"*n0 - - $UMULL $alo,$aj,$m0 ; ap[1]*bp[0] - $UMULH $ahi,$aj,$m0 - - $UMULL $lo1,$nj,$m1 ; np[0]*m1 - $UMULH $hi1,$nj,$m1 - $LD $nj,$BNSZ($np) ; np[1] - addc $lo1,$lo1,$lo0 - addze $hi1,$hi1 - - $UMULL $nlo,$nj,$m1 ; np[1]*m1 - $UMULH $nhi,$nj,$m1 - - mtctr $num - li $j,`2*$BNSZ` -.align 4 -L1st: - $LDX $aj,$ap,$j ; ap[j] - addc $lo0,$alo,$hi0 - $LDX $nj,$np,$j ; np[j] - addze $hi0,$ahi - $UMULL $alo,$aj,$m0 ; ap[j]*bp[0] - addc $lo1,$nlo,$hi1 - $UMULH $ahi,$aj,$m0 - addze $hi1,$nhi - $UMULL $nlo,$nj,$m1 ; np[j]*m1 - addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0] - $UMULH $nhi,$nj,$m1 - addze $hi1,$hi1 - $ST $lo1,0($tp) ; tp[j-1] - - addi $j,$j,$BNSZ ; j++ - addi $tp,$tp,$BNSZ ; tp++ - bdnz- L1st -;L1st - addc $lo0,$alo,$hi0 - addze $hi0,$ahi - - addc $lo1,$nlo,$hi1 - addze $hi1,$nhi - addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0] - addze $hi1,$hi1 - $ST $lo1,0($tp) ; tp[j-1] - - li $ovf,0 - addc $hi1,$hi1,$hi0 - addze $ovf,$ovf ; upmost overflow bit - $ST $hi1,$BNSZ($tp) - - li $i,$BNSZ -.align 4 -Louter: - $LDX $m0,$bp,$i ; m0=bp[i] - $LD $aj,0($ap) ; ap[0] - addi $tp,$sp,$FRAME - $LD $tj,$FRAME($sp) ; tp[0] - $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i] - $UMULH $hi0,$aj,$m0 - $LD $aj,$BNSZ($ap) ; ap[1] - $LD $nj,0($np) ; np[0] - addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0] - $UMULL $alo,$aj,$m0 ; ap[j]*bp[i] - addze $hi0,$hi0 - $UMULL $m1,$lo0,$n0 ; tp[0]*n0 - $UMULH $ahi,$aj,$m0 - $UMULL $lo1,$nj,$m1 ; np[0]*m1 - $UMULH $hi1,$nj,$m1 - $LD $nj,$BNSZ($np) ; np[1] - addc $lo1,$lo1,$lo0 - $UMULL $nlo,$nj,$m1 ; np[1]*m1 - addze $hi1,$hi1 - $UMULH $nhi,$nj,$m1 - - mtctr $num - li $j,`2*$BNSZ` -.align 4 -Linner: - $LDX $aj,$ap,$j ; ap[j] - addc $lo0,$alo,$hi0 - $LD $tj,$BNSZ($tp) ; tp[j] - addze $hi0,$ahi - $LDX $nj,$np,$j ; np[j] - addc $lo1,$nlo,$hi1 - $UMULL $alo,$aj,$m0 ; ap[j]*bp[i] - addze $hi1,$nhi - $UMULH $ahi,$aj,$m0 - addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j] - $UMULL $nlo,$nj,$m1 ; np[j]*m1 - addze $hi0,$hi0 - $UMULH $nhi,$nj,$m1 - addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j] - addi $j,$j,$BNSZ ; j++ - addze $hi1,$hi1 - $ST $lo1,0($tp) ; tp[j-1] - addi $tp,$tp,$BNSZ ; tp++ - bdnz- Linner -;Linner - $LD $tj,$BNSZ($tp) ; tp[j] - addc $lo0,$alo,$hi0 - addze $hi0,$ahi - addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j] - addze $hi0,$hi0 - - addc $lo1,$nlo,$hi1 - addze $hi1,$nhi - addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j] - addze $hi1,$hi1 - $ST $lo1,0($tp) ; tp[j-1] - - addic $ovf,$ovf,-1 ; move upmost overflow to XER[CA] - li $ovf,0 - adde $hi1,$hi1,$hi0 - addze $ovf,$ovf - $ST $hi1,$BNSZ($tp) -; - slwi $tj,$num,`log($BNSZ)/log(2)` - $UCMP $i,$tj - addi $i,$i,$BNSZ - ble- Louter - - addi $num,$num,2 ; restore $num - subfc $j,$j,$j ; j=0 and "clear" XER[CA] - addi $tp,$sp,$FRAME - mtctr $num - -.align 4 -Lsub: $LDX $tj,$tp,$j - $LDX $nj,$np,$j - subfe $aj,$nj,$tj ; tp[j]-np[j] - $STX $aj,$rp,$j - addi $j,$j,$BNSZ - bdnz- Lsub - - li $j,0 - mtctr $num - subfe $ovf,$j,$ovf ; handle upmost overflow bit - and $ap,$tp,$ovf - andc $np,$rp,$ovf - or $ap,$ap,$np ; ap=borrow?tp:rp - -.align 4 -Lcopy: ; copy or in-place refresh - $LDX $tj,$ap,$j - $STX $tj,$rp,$j - $STX $j,$tp,$j ; zap at once - addi $j,$j,$BNSZ - bdnz- Lcopy - - $POP r14,`4*$SIZE_T`($sp) - $POP r15,`5*$SIZE_T`($sp) - $POP r16,`6*$SIZE_T`($sp) - $POP r17,`7*$SIZE_T`($sp) - $POP r18,`8*$SIZE_T`($sp) - $POP r19,`9*$SIZE_T`($sp) - $POP r20,`10*$SIZE_T`($sp) - $POP r21,`11*$SIZE_T`($sp) - $POP r22,`12*$SIZE_T`($sp) - $POP r23,`13*$SIZE_T`($sp) - $POP r24,`14*$SIZE_T`($sp) - $POP r25,`15*$SIZE_T`($sp) - $POP $sp,0($sp) - li r3,1 - blr - .long 0 -.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/bn/asm/ppc64-mont.pl b/crypto/openssl/crypto/bn/asm/ppc64-mont.pl deleted file mode 100755 index 3449b35..0000000 --- a/crypto/openssl/crypto/bn/asm/ppc64-mont.pl +++ /dev/null @@ -1,918 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# December 2007 - -# The reason for undertaken effort is basically following. Even though -# Power 6 CPU operates at incredible 4.7GHz clock frequency, its PKI -# performance was observed to be less than impressive, essentially as -# fast as 1.8GHz PPC970, or 2.6 times(!) slower than one would hope. -# Well, it's not surprising that IBM had to make some sacrifices to -# boost the clock frequency that much, but no overall improvement? -# Having observed how much difference did switching to FPU make on -# UltraSPARC, playing same stunt on Power 6 appeared appropriate... -# Unfortunately the resulting performance improvement is not as -# impressive, ~30%, and in absolute terms is still very far from what -# one would expect from 4.7GHz CPU. There is a chance that I'm doing -# something wrong, but in the lack of assembler level micro-profiling -# data or at least decent platform guide I can't tell... Or better -# results might be achieved with VMX... Anyway, this module provides -# *worse* performance on other PowerPC implementations, ~40-15% slower -# on PPC970 depending on key length and ~40% slower on Power 5 for all -# key lengths. As it's obviously inappropriate as "best all-round" -# alternative, it has to be complemented with run-time CPU family -# detection. Oh! It should also be noted that unlike other PowerPC -# implementation IALU ppc-mont.pl module performs *suboptimaly* on -# >=1024-bit key lengths on Power 6. It should also be noted that -# *everything* said so far applies to 64-bit builds! As far as 32-bit -# application executed on 64-bit CPU goes, this module is likely to -# become preferred choice, because it's easy to adapt it for such -# case and *is* faster than 32-bit ppc-mont.pl on *all* processors. - -# February 2008 - -# Micro-profiling assisted optimization results in ~15% improvement -# over original ppc64-mont.pl version, or overall ~50% improvement -# over ppc.pl module on Power 6. If compared to ppc-mont.pl on same -# Power 6 CPU, this module is 5-150% faster depending on key length, -# [hereafter] more for longer keys. But if compared to ppc-mont.pl -# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive -# in absolute terms, but it's apparently the way Power 6 is... - -$flavour = shift; - -if ($flavour =~ /32/) { - $SIZE_T=4; - $RZONE= 224; - $FRAME= $SIZE_T*12+8*12; - $fname= "bn_mul_mont_ppc64"; - - $STUX= "stwux"; # store indexed and update - $PUSH= "stw"; - $POP= "lwz"; - die "not implemented yet"; -} elsif ($flavour =~ /64/) { - $SIZE_T=8; - $RZONE= 288; - $FRAME= $SIZE_T*12+8*12; - $fname= "bn_mul_mont"; - - # same as above, but 64-bit mnemonics... - $STUX= "stdux"; # store indexed and update - $PUSH= "std"; - $POP= "ld"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$FRAME=($FRAME+63)&~63; -$TRANSFER=16*8; - -$carry="r0"; -$sp="r1"; -$toc="r2"; -$rp="r3"; $ovf="r3"; -$ap="r4"; -$bp="r5"; -$np="r6"; -$n0="r7"; -$num="r8"; -$rp="r9"; # $rp is reassigned -$tp="r10"; -$j="r11"; -$i="r12"; -# non-volatile registers -$nap_d="r14"; # interleaved ap and np in double format -$a0="r15"; # ap[0] -$t0="r16"; # temporary registers -$t1="r17"; -$t2="r18"; -$t3="r19"; -$t4="r20"; -$t5="r21"; -$t6="r22"; -$t7="r23"; - -# PPC offers enough register bank capacity to unroll inner loops twice -# -# ..A3A2A1A0 -# dcba -# ----------- -# A0a -# A0b -# A0c -# A0d -# A1a -# A1b -# A1c -# A1d -# A2a -# A2b -# A2c -# A2d -# A3a -# A3b -# A3c -# A3d -# ..a -# ..b -# -$ba="f0"; $bb="f1"; $bc="f2"; $bd="f3"; -$na="f4"; $nb="f5"; $nc="f6"; $nd="f7"; -$dota="f8"; $dotb="f9"; -$A0="f10"; $A1="f11"; $A2="f12"; $A3="f13"; -$N0="f14"; $N1="f15"; $N2="f16"; $N3="f17"; -$T0a="f18"; $T0b="f19"; -$T1a="f20"; $T1b="f21"; -$T2a="f22"; $T2b="f23"; -$T3a="f24"; $T3b="f25"; - -# sp----------->+-------------------------------+ -# | saved sp | -# +-------------------------------+ -# | | -# +-------------------------------+ -# | 10 saved gpr, r14-r23 | -# . . -# . . -# +12*size_t +-------------------------------+ -# | 12 saved fpr, f14-f25 | -# . . -# . . -# +12*8 +-------------------------------+ -# | padding to 64 byte boundary | -# . . -# +X +-------------------------------+ -# | 16 gpr<->fpr transfer zone | -# . . -# . . -# +16*8 +-------------------------------+ -# | __int64 tmp[-1] | -# +-------------------------------+ -# | __int64 tmp[num] | -# . . -# . . -# . . -# +(num+1)*8 +-------------------------------+ -# | padding to 64 byte boundary | -# . . -# +X +-------------------------------+ -# | double nap_d[4*num] | -# . . -# . . -# . . -# +-------------------------------+ - -$code=<<___; -.machine "any" -.text - -.globl .$fname -.align 5 -.$fname: - cmpwi $num,4 - mr $rp,r3 ; $rp is reassigned - li r3,0 ; possible "not handled" return code - bltlr- - andi. r0,$num,1 ; $num has to be even - bnelr- - - slwi $num,$num,3 ; num*=8 - li $i,-4096 - slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num - add $tp,$tp,$num ; place for tp[num+1] - addi $tp,$tp,`$FRAME+$TRANSFER+8+64+$RZONE` - subf $tp,$tp,$sp ; $sp-$tp - and $tp,$tp,$i ; minimize TLB usage - subf $tp,$sp,$tp ; $tp-$sp - $STUX $sp,$sp,$tp ; alloca - - $PUSH r14,`2*$SIZE_T`($sp) - $PUSH r15,`3*$SIZE_T`($sp) - $PUSH r16,`4*$SIZE_T`($sp) - $PUSH r17,`5*$SIZE_T`($sp) - $PUSH r18,`6*$SIZE_T`($sp) - $PUSH r19,`7*$SIZE_T`($sp) - $PUSH r20,`8*$SIZE_T`($sp) - $PUSH r21,`9*$SIZE_T`($sp) - $PUSH r22,`10*$SIZE_T`($sp) - $PUSH r23,`11*$SIZE_T`($sp) - stfd f14,`12*$SIZE_T+0`($sp) - stfd f15,`12*$SIZE_T+8`($sp) - stfd f16,`12*$SIZE_T+16`($sp) - stfd f17,`12*$SIZE_T+24`($sp) - stfd f18,`12*$SIZE_T+32`($sp) - stfd f19,`12*$SIZE_T+40`($sp) - stfd f20,`12*$SIZE_T+48`($sp) - stfd f21,`12*$SIZE_T+56`($sp) - stfd f22,`12*$SIZE_T+64`($sp) - stfd f23,`12*$SIZE_T+72`($sp) - stfd f24,`12*$SIZE_T+80`($sp) - stfd f25,`12*$SIZE_T+88`($sp) - - ld $a0,0($ap) ; pull ap[0] value - ld $n0,0($n0) ; pull n0[0] value - ld $t3,0($bp) ; bp[0] - - addi $tp,$sp,`$FRAME+$TRANSFER+8+64` - li $i,-64 - add $nap_d,$tp,$num - and $nap_d,$nap_d,$i ; align to 64 bytes - - mulld $t7,$a0,$t3 ; ap[0]*bp[0] - ; nap_d is off by 1, because it's used with stfdu/lfdu - addi $nap_d,$nap_d,-8 - srwi $j,$num,`3+1` ; counter register, num/2 - mulld $t7,$t7,$n0 ; tp[0]*n0 - addi $j,$j,-1 - addi $tp,$sp,`$FRAME+$TRANSFER-8` - li $carry,0 - mtctr $j - - ; transfer bp[0] to FPU as 4x16-bit values - extrdi $t0,$t3,16,48 - extrdi $t1,$t3,16,32 - extrdi $t2,$t3,16,16 - extrdi $t3,$t3,16,0 - std $t0,`$FRAME+0`($sp) - std $t1,`$FRAME+8`($sp) - std $t2,`$FRAME+16`($sp) - std $t3,`$FRAME+24`($sp) - ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values - extrdi $t4,$t7,16,48 - extrdi $t5,$t7,16,32 - extrdi $t6,$t7,16,16 - extrdi $t7,$t7,16,0 - std $t4,`$FRAME+32`($sp) - std $t5,`$FRAME+40`($sp) - std $t6,`$FRAME+48`($sp) - std $t7,`$FRAME+56`($sp) - lwz $t0,4($ap) ; load a[j] as 32-bit word pair - lwz $t1,0($ap) - lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair - lwz $t3,8($ap) - lwz $t4,4($np) ; load n[j] as 32-bit word pair - lwz $t5,0($np) - lwz $t6,12($np) ; load n[j+1] as 32-bit word pair - lwz $t7,8($np) - lfd $ba,`$FRAME+0`($sp) - lfd $bb,`$FRAME+8`($sp) - lfd $bc,`$FRAME+16`($sp) - lfd $bd,`$FRAME+24`($sp) - lfd $na,`$FRAME+32`($sp) - lfd $nb,`$FRAME+40`($sp) - lfd $nc,`$FRAME+48`($sp) - lfd $nd,`$FRAME+56`($sp) - std $t0,`$FRAME+64`($sp) - std $t1,`$FRAME+72`($sp) - std $t2,`$FRAME+80`($sp) - std $t3,`$FRAME+88`($sp) - std $t4,`$FRAME+96`($sp) - std $t5,`$FRAME+104`($sp) - std $t6,`$FRAME+112`($sp) - std $t7,`$FRAME+120`($sp) - fcfid $ba,$ba - fcfid $bb,$bb - fcfid $bc,$bc - fcfid $bd,$bd - fcfid $na,$na - fcfid $nb,$nb - fcfid $nc,$nc - fcfid $nd,$nd - - lfd $A0,`$FRAME+64`($sp) - lfd $A1,`$FRAME+72`($sp) - lfd $A2,`$FRAME+80`($sp) - lfd $A3,`$FRAME+88`($sp) - lfd $N0,`$FRAME+96`($sp) - lfd $N1,`$FRAME+104`($sp) - lfd $N2,`$FRAME+112`($sp) - lfd $N3,`$FRAME+120`($sp) - fcfid $A0,$A0 - fcfid $A1,$A1 - fcfid $A2,$A2 - fcfid $A3,$A3 - fcfid $N0,$N0 - fcfid $N1,$N1 - fcfid $N2,$N2 - fcfid $N3,$N3 - addi $ap,$ap,16 - addi $np,$np,16 - - fmul $T1a,$A1,$ba - fmul $T1b,$A1,$bb - stfd $A0,8($nap_d) ; save a[j] in double format - stfd $A1,16($nap_d) - fmul $T2a,$A2,$ba - fmul $T2b,$A2,$bb - stfd $A2,24($nap_d) ; save a[j+1] in double format - stfd $A3,32($nap_d) - fmul $T3a,$A3,$ba - fmul $T3b,$A3,$bb - stfd $N0,40($nap_d) ; save n[j] in double format - stfd $N1,48($nap_d) - fmul $T0a,$A0,$ba - fmul $T0b,$A0,$bb - stfd $N2,56($nap_d) ; save n[j+1] in double format - stfdu $N3,64($nap_d) - - fmadd $T1a,$A0,$bc,$T1a - fmadd $T1b,$A0,$bd,$T1b - fmadd $T2a,$A1,$bc,$T2a - fmadd $T2b,$A1,$bd,$T2b - fmadd $T3a,$A2,$bc,$T3a - fmadd $T3b,$A2,$bd,$T3b - fmul $dota,$A3,$bc - fmul $dotb,$A3,$bd - - fmadd $T1a,$N1,$na,$T1a - fmadd $T1b,$N1,$nb,$T1b - fmadd $T2a,$N2,$na,$T2a - fmadd $T2b,$N2,$nb,$T2b - fmadd $T3a,$N3,$na,$T3a - fmadd $T3b,$N3,$nb,$T3b - fmadd $T0a,$N0,$na,$T0a - fmadd $T0b,$N0,$nb,$T0b - - fmadd $T1a,$N0,$nc,$T1a - fmadd $T1b,$N0,$nd,$T1b - fmadd $T2a,$N1,$nc,$T2a - fmadd $T2b,$N1,$nd,$T2b - fmadd $T3a,$N2,$nc,$T3a - fmadd $T3b,$N2,$nd,$T3b - fmadd $dota,$N3,$nc,$dota - fmadd $dotb,$N3,$nd,$dotb - - fctid $T0a,$T0a - fctid $T0b,$T0b - fctid $T1a,$T1a - fctid $T1b,$T1b - fctid $T2a,$T2a - fctid $T2b,$T2b - fctid $T3a,$T3a - fctid $T3b,$T3b - - stfd $T0a,`$FRAME+0`($sp) - stfd $T0b,`$FRAME+8`($sp) - stfd $T1a,`$FRAME+16`($sp) - stfd $T1b,`$FRAME+24`($sp) - stfd $T2a,`$FRAME+32`($sp) - stfd $T2b,`$FRAME+40`($sp) - stfd $T3a,`$FRAME+48`($sp) - stfd $T3b,`$FRAME+56`($sp) - -.align 5 -L1st: - lwz $t0,4($ap) ; load a[j] as 32-bit word pair - lwz $t1,0($ap) - lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair - lwz $t3,8($ap) - lwz $t4,4($np) ; load n[j] as 32-bit word pair - lwz $t5,0($np) - lwz $t6,12($np) ; load n[j+1] as 32-bit word pair - lwz $t7,8($np) - std $t0,`$FRAME+64`($sp) - std $t1,`$FRAME+72`($sp) - std $t2,`$FRAME+80`($sp) - std $t3,`$FRAME+88`($sp) - std $t4,`$FRAME+96`($sp) - std $t5,`$FRAME+104`($sp) - std $t6,`$FRAME+112`($sp) - std $t7,`$FRAME+120`($sp) - ld $t0,`$FRAME+0`($sp) - ld $t1,`$FRAME+8`($sp) - ld $t2,`$FRAME+16`($sp) - ld $t3,`$FRAME+24`($sp) - ld $t4,`$FRAME+32`($sp) - ld $t5,`$FRAME+40`($sp) - ld $t6,`$FRAME+48`($sp) - ld $t7,`$FRAME+56`($sp) - lfd $A0,`$FRAME+64`($sp) - lfd $A1,`$FRAME+72`($sp) - lfd $A2,`$FRAME+80`($sp) - lfd $A3,`$FRAME+88`($sp) - lfd $N0,`$FRAME+96`($sp) - lfd $N1,`$FRAME+104`($sp) - lfd $N2,`$FRAME+112`($sp) - lfd $N3,`$FRAME+120`($sp) - fcfid $A0,$A0 - fcfid $A1,$A1 - fcfid $A2,$A2 - fcfid $A3,$A3 - fcfid $N0,$N0 - fcfid $N1,$N1 - fcfid $N2,$N2 - fcfid $N3,$N3 - addi $ap,$ap,16 - addi $np,$np,16 - - fmul $T1a,$A1,$ba - fmul $T1b,$A1,$bb - fmul $T2a,$A2,$ba - fmul $T2b,$A2,$bb - stfd $A0,8($nap_d) ; save a[j] in double format - stfd $A1,16($nap_d) - fmul $T3a,$A3,$ba - fmul $T3b,$A3,$bb - fmadd $T0a,$A0,$ba,$dota - fmadd $T0b,$A0,$bb,$dotb - stfd $A2,24($nap_d) ; save a[j+1] in double format - stfd $A3,32($nap_d) - - fmadd $T1a,$A0,$bc,$T1a - fmadd $T1b,$A0,$bd,$T1b - fmadd $T2a,$A1,$bc,$T2a - fmadd $T2b,$A1,$bd,$T2b - stfd $N0,40($nap_d) ; save n[j] in double format - stfd $N1,48($nap_d) - fmadd $T3a,$A2,$bc,$T3a - fmadd $T3b,$A2,$bd,$T3b - add $t0,$t0,$carry ; can not overflow - fmul $dota,$A3,$bc - fmul $dotb,$A3,$bd - stfd $N2,56($nap_d) ; save n[j+1] in double format - stfdu $N3,64($nap_d) - srdi $carry,$t0,16 - add $t1,$t1,$carry - srdi $carry,$t1,16 - - fmadd $T1a,$N1,$na,$T1a - fmadd $T1b,$N1,$nb,$T1b - insrdi $t0,$t1,16,32 - fmadd $T2a,$N2,$na,$T2a - fmadd $T2b,$N2,$nb,$T2b - add $t2,$t2,$carry - fmadd $T3a,$N3,$na,$T3a - fmadd $T3b,$N3,$nb,$T3b - srdi $carry,$t2,16 - fmadd $T0a,$N0,$na,$T0a - fmadd $T0b,$N0,$nb,$T0b - insrdi $t0,$t2,16,16 - add $t3,$t3,$carry - srdi $carry,$t3,16 - - fmadd $T1a,$N0,$nc,$T1a - fmadd $T1b,$N0,$nd,$T1b - insrdi $t0,$t3,16,0 ; 0..63 bits - fmadd $T2a,$N1,$nc,$T2a - fmadd $T2b,$N1,$nd,$T2b - add $t4,$t4,$carry - fmadd $T3a,$N2,$nc,$T3a - fmadd $T3b,$N2,$nd,$T3b - srdi $carry,$t4,16 - fmadd $dota,$N3,$nc,$dota - fmadd $dotb,$N3,$nd,$dotb - add $t5,$t5,$carry - srdi $carry,$t5,16 - insrdi $t4,$t5,16,32 - - fctid $T0a,$T0a - fctid $T0b,$T0b - add $t6,$t6,$carry - fctid $T1a,$T1a - fctid $T1b,$T1b - srdi $carry,$t6,16 - fctid $T2a,$T2a - fctid $T2b,$T2b - insrdi $t4,$t6,16,16 - fctid $T3a,$T3a - fctid $T3b,$T3b - add $t7,$t7,$carry - insrdi $t4,$t7,16,0 ; 64..127 bits - srdi $carry,$t7,16 ; upper 33 bits - - stfd $T0a,`$FRAME+0`($sp) - stfd $T0b,`$FRAME+8`($sp) - stfd $T1a,`$FRAME+16`($sp) - stfd $T1b,`$FRAME+24`($sp) - stfd $T2a,`$FRAME+32`($sp) - stfd $T2b,`$FRAME+40`($sp) - stfd $T3a,`$FRAME+48`($sp) - stfd $T3b,`$FRAME+56`($sp) - std $t0,8($tp) ; tp[j-1] - stdu $t4,16($tp) ; tp[j] - bdnz- L1st - - fctid $dota,$dota - fctid $dotb,$dotb - - ld $t0,`$FRAME+0`($sp) - ld $t1,`$FRAME+8`($sp) - ld $t2,`$FRAME+16`($sp) - ld $t3,`$FRAME+24`($sp) - ld $t4,`$FRAME+32`($sp) - ld $t5,`$FRAME+40`($sp) - ld $t6,`$FRAME+48`($sp) - ld $t7,`$FRAME+56`($sp) - stfd $dota,`$FRAME+64`($sp) - stfd $dotb,`$FRAME+72`($sp) - - add $t0,$t0,$carry ; can not overflow - srdi $carry,$t0,16 - add $t1,$t1,$carry - srdi $carry,$t1,16 - insrdi $t0,$t1,16,32 - add $t2,$t2,$carry - srdi $carry,$t2,16 - insrdi $t0,$t2,16,16 - add $t3,$t3,$carry - srdi $carry,$t3,16 - insrdi $t0,$t3,16,0 ; 0..63 bits - add $t4,$t4,$carry - srdi $carry,$t4,16 - add $t5,$t5,$carry - srdi $carry,$t5,16 - insrdi $t4,$t5,16,32 - add $t6,$t6,$carry - srdi $carry,$t6,16 - insrdi $t4,$t6,16,16 - add $t7,$t7,$carry - insrdi $t4,$t7,16,0 ; 64..127 bits - srdi $carry,$t7,16 ; upper 33 bits - ld $t6,`$FRAME+64`($sp) - ld $t7,`$FRAME+72`($sp) - - std $t0,8($tp) ; tp[j-1] - stdu $t4,16($tp) ; tp[j] - - add $t6,$t6,$carry ; can not overflow - srdi $carry,$t6,16 - add $t7,$t7,$carry - insrdi $t6,$t7,48,0 - srdi $ovf,$t7,48 - std $t6,8($tp) ; tp[num-1] - - slwi $t7,$num,2 - subf $nap_d,$t7,$nap_d ; rewind pointer - - li $i,8 ; i=1 -.align 5 -Louter: - ldx $t3,$bp,$i ; bp[i] - ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] - mulld $t7,$a0,$t3 ; ap[0]*bp[i] - - addi $tp,$sp,`$FRAME+$TRANSFER` - add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] - li $carry,0 - mulld $t7,$t7,$n0 ; tp[0]*n0 - mtctr $j - - ; transfer bp[i] to FPU as 4x16-bit values - extrdi $t0,$t3,16,48 - extrdi $t1,$t3,16,32 - extrdi $t2,$t3,16,16 - extrdi $t3,$t3,16,0 - std $t0,`$FRAME+0`($sp) - std $t1,`$FRAME+8`($sp) - std $t2,`$FRAME+16`($sp) - std $t3,`$FRAME+24`($sp) - ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values - extrdi $t4,$t7,16,48 - extrdi $t5,$t7,16,32 - extrdi $t6,$t7,16,16 - extrdi $t7,$t7,16,0 - std $t4,`$FRAME+32`($sp) - std $t5,`$FRAME+40`($sp) - std $t6,`$FRAME+48`($sp) - std $t7,`$FRAME+56`($sp) - - lfd $A0,8($nap_d) ; load a[j] in double format - lfd $A1,16($nap_d) - lfd $A2,24($nap_d) ; load a[j+1] in double format - lfd $A3,32($nap_d) - lfd $N0,40($nap_d) ; load n[j] in double format - lfd $N1,48($nap_d) - lfd $N2,56($nap_d) ; load n[j+1] in double format - lfdu $N3,64($nap_d) - - lfd $ba,`$FRAME+0`($sp) - lfd $bb,`$FRAME+8`($sp) - lfd $bc,`$FRAME+16`($sp) - lfd $bd,`$FRAME+24`($sp) - lfd $na,`$FRAME+32`($sp) - lfd $nb,`$FRAME+40`($sp) - lfd $nc,`$FRAME+48`($sp) - lfd $nd,`$FRAME+56`($sp) - - fcfid $ba,$ba - fcfid $bb,$bb - fcfid $bc,$bc - fcfid $bd,$bd - fcfid $na,$na - fcfid $nb,$nb - fcfid $nc,$nc - fcfid $nd,$nd - - fmul $T1a,$A1,$ba - fmul $T1b,$A1,$bb - fmul $T2a,$A2,$ba - fmul $T2b,$A2,$bb - fmul $T3a,$A3,$ba - fmul $T3b,$A3,$bb - fmul $T0a,$A0,$ba - fmul $T0b,$A0,$bb - - fmadd $T1a,$A0,$bc,$T1a - fmadd $T1b,$A0,$bd,$T1b - fmadd $T2a,$A1,$bc,$T2a - fmadd $T2b,$A1,$bd,$T2b - fmadd $T3a,$A2,$bc,$T3a - fmadd $T3b,$A2,$bd,$T3b - fmul $dota,$A3,$bc - fmul $dotb,$A3,$bd - - fmadd $T1a,$N1,$na,$T1a - fmadd $T1b,$N1,$nb,$T1b - lfd $A0,8($nap_d) ; load a[j] in double format - lfd $A1,16($nap_d) - fmadd $T2a,$N2,$na,$T2a - fmadd $T2b,$N2,$nb,$T2b - lfd $A2,24($nap_d) ; load a[j+1] in double format - lfd $A3,32($nap_d) - fmadd $T3a,$N3,$na,$T3a - fmadd $T3b,$N3,$nb,$T3b - fmadd $T0a,$N0,$na,$T0a - fmadd $T0b,$N0,$nb,$T0b - - fmadd $T1a,$N0,$nc,$T1a - fmadd $T1b,$N0,$nd,$T1b - fmadd $T2a,$N1,$nc,$T2a - fmadd $T2b,$N1,$nd,$T2b - fmadd $T3a,$N2,$nc,$T3a - fmadd $T3b,$N2,$nd,$T3b - fmadd $dota,$N3,$nc,$dota - fmadd $dotb,$N3,$nd,$dotb - - fctid $T0a,$T0a - fctid $T0b,$T0b - fctid $T1a,$T1a - fctid $T1b,$T1b - fctid $T2a,$T2a - fctid $T2b,$T2b - fctid $T3a,$T3a - fctid $T3b,$T3b - - stfd $T0a,`$FRAME+0`($sp) - stfd $T0b,`$FRAME+8`($sp) - stfd $T1a,`$FRAME+16`($sp) - stfd $T1b,`$FRAME+24`($sp) - stfd $T2a,`$FRAME+32`($sp) - stfd $T2b,`$FRAME+40`($sp) - stfd $T3a,`$FRAME+48`($sp) - stfd $T3b,`$FRAME+56`($sp) - -.align 5 -Linner: - fmul $T1a,$A1,$ba - fmul $T1b,$A1,$bb - fmul $T2a,$A2,$ba - fmul $T2b,$A2,$bb - lfd $N0,40($nap_d) ; load n[j] in double format - lfd $N1,48($nap_d) - fmul $T3a,$A3,$ba - fmul $T3b,$A3,$bb - fmadd $T0a,$A0,$ba,$dota - fmadd $T0b,$A0,$bb,$dotb - lfd $N2,56($nap_d) ; load n[j+1] in double format - lfdu $N3,64($nap_d) - - fmadd $T1a,$A0,$bc,$T1a - fmadd $T1b,$A0,$bd,$T1b - fmadd $T2a,$A1,$bc,$T2a - fmadd $T2b,$A1,$bd,$T2b - lfd $A0,8($nap_d) ; load a[j] in double format - lfd $A1,16($nap_d) - fmadd $T3a,$A2,$bc,$T3a - fmadd $T3b,$A2,$bd,$T3b - fmul $dota,$A3,$bc - fmul $dotb,$A3,$bd - lfd $A2,24($nap_d) ; load a[j+1] in double format - lfd $A3,32($nap_d) - - fmadd $T1a,$N1,$na,$T1a - fmadd $T1b,$N1,$nb,$T1b - ld $t0,`$FRAME+0`($sp) - ld $t1,`$FRAME+8`($sp) - fmadd $T2a,$N2,$na,$T2a - fmadd $T2b,$N2,$nb,$T2b - ld $t2,`$FRAME+16`($sp) - ld $t3,`$FRAME+24`($sp) - fmadd $T3a,$N3,$na,$T3a - fmadd $T3b,$N3,$nb,$T3b - add $t0,$t0,$carry ; can not overflow - ld $t4,`$FRAME+32`($sp) - ld $t5,`$FRAME+40`($sp) - fmadd $T0a,$N0,$na,$T0a - fmadd $T0b,$N0,$nb,$T0b - srdi $carry,$t0,16 - add $t1,$t1,$carry - srdi $carry,$t1,16 - ld $t6,`$FRAME+48`($sp) - ld $t7,`$FRAME+56`($sp) - - fmadd $T1a,$N0,$nc,$T1a - fmadd $T1b,$N0,$nd,$T1b - insrdi $t0,$t1,16,32 - ld $t1,8($tp) ; tp[j] - fmadd $T2a,$N1,$nc,$T2a - fmadd $T2b,$N1,$nd,$T2b - add $t2,$t2,$carry - fmadd $T3a,$N2,$nc,$T3a - fmadd $T3b,$N2,$nd,$T3b - srdi $carry,$t2,16 - insrdi $t0,$t2,16,16 - fmadd $dota,$N3,$nc,$dota - fmadd $dotb,$N3,$nd,$dotb - add $t3,$t3,$carry - ldu $t2,16($tp) ; tp[j+1] - srdi $carry,$t3,16 - insrdi $t0,$t3,16,0 ; 0..63 bits - add $t4,$t4,$carry - - fctid $T0a,$T0a - fctid $T0b,$T0b - srdi $carry,$t4,16 - fctid $T1a,$T1a - fctid $T1b,$T1b - add $t5,$t5,$carry - fctid $T2a,$T2a - fctid $T2b,$T2b - srdi $carry,$t5,16 - insrdi $t4,$t5,16,32 - fctid $T3a,$T3a - fctid $T3b,$T3b - add $t6,$t6,$carry - srdi $carry,$t6,16 - insrdi $t4,$t6,16,16 - - stfd $T0a,`$FRAME+0`($sp) - stfd $T0b,`$FRAME+8`($sp) - add $t7,$t7,$carry - addc $t3,$t0,$t1 - stfd $T1a,`$FRAME+16`($sp) - stfd $T1b,`$FRAME+24`($sp) - insrdi $t4,$t7,16,0 ; 64..127 bits - srdi $carry,$t7,16 ; upper 33 bits - stfd $T2a,`$FRAME+32`($sp) - stfd $T2b,`$FRAME+40`($sp) - adde $t5,$t4,$t2 - stfd $T3a,`$FRAME+48`($sp) - stfd $T3b,`$FRAME+56`($sp) - addze $carry,$carry - std $t3,-16($tp) ; tp[j-1] - std $t5,-8($tp) ; tp[j] - bdnz- Linner - - fctid $dota,$dota - fctid $dotb,$dotb - ld $t0,`$FRAME+0`($sp) - ld $t1,`$FRAME+8`($sp) - ld $t2,`$FRAME+16`($sp) - ld $t3,`$FRAME+24`($sp) - ld $t4,`$FRAME+32`($sp) - ld $t5,`$FRAME+40`($sp) - ld $t6,`$FRAME+48`($sp) - ld $t7,`$FRAME+56`($sp) - stfd $dota,`$FRAME+64`($sp) - stfd $dotb,`$FRAME+72`($sp) - - add $t0,$t0,$carry ; can not overflow - srdi $carry,$t0,16 - add $t1,$t1,$carry - srdi $carry,$t1,16 - insrdi $t0,$t1,16,32 - add $t2,$t2,$carry - ld $t1,8($tp) ; tp[j] - srdi $carry,$t2,16 - insrdi $t0,$t2,16,16 - add $t3,$t3,$carry - ldu $t2,16($tp) ; tp[j+1] - srdi $carry,$t3,16 - insrdi $t0,$t3,16,0 ; 0..63 bits - add $t4,$t4,$carry - srdi $carry,$t4,16 - add $t5,$t5,$carry - srdi $carry,$t5,16 - insrdi $t4,$t5,16,32 - add $t6,$t6,$carry - srdi $carry,$t6,16 - insrdi $t4,$t6,16,16 - add $t7,$t7,$carry - insrdi $t4,$t7,16,0 ; 64..127 bits - srdi $carry,$t7,16 ; upper 33 bits - ld $t6,`$FRAME+64`($sp) - ld $t7,`$FRAME+72`($sp) - - addc $t3,$t0,$t1 - adde $t5,$t4,$t2 - addze $carry,$carry - - std $t3,-16($tp) ; tp[j-1] - std $t5,-8($tp) ; tp[j] - - add $carry,$carry,$ovf ; comsume upmost overflow - add $t6,$t6,$carry ; can not overflow - srdi $carry,$t6,16 - add $t7,$t7,$carry - insrdi $t6,$t7,48,0 - srdi $ovf,$t7,48 - std $t6,0($tp) ; tp[num-1] - - slwi $t7,$num,2 - addi $i,$i,8 - subf $nap_d,$t7,$nap_d ; rewind pointer - cmpw $i,$num - blt- Louter - - subf $np,$num,$np ; rewind np - addi $j,$j,1 ; restore counter - subfc $i,$i,$i ; j=0 and "clear" XER[CA] - addi $tp,$sp,`$FRAME+$TRANSFER+8` - addi $t4,$sp,`$FRAME+$TRANSFER+16` - addi $t5,$np,8 - addi $t6,$rp,8 - mtctr $j - -.align 4 -Lsub: ldx $t0,$tp,$i - ldx $t1,$np,$i - ldx $t2,$t4,$i - ldx $t3,$t5,$i - subfe $t0,$t1,$t0 ; tp[j]-np[j] - subfe $t2,$t3,$t2 ; tp[j+1]-np[j+1] - stdx $t0,$rp,$i - stdx $t2,$t6,$i - addi $i,$i,16 - bdnz- Lsub - - li $i,0 - subfe $ovf,$i,$ovf ; handle upmost overflow bit - and $ap,$tp,$ovf - andc $np,$rp,$ovf - or $ap,$ap,$np ; ap=borrow?tp:rp - addi $t7,$ap,8 - mtctr $j - -.align 4 -Lcopy: ; copy or in-place refresh - ldx $t0,$ap,$i - ldx $t1,$t7,$i - std $i,8($nap_d) ; zap nap_d - std $i,16($nap_d) - std $i,24($nap_d) - std $i,32($nap_d) - std $i,40($nap_d) - std $i,48($nap_d) - std $i,56($nap_d) - stdu $i,64($nap_d) - stdx $t0,$rp,$i - stdx $t1,$t6,$i - stdx $i,$tp,$i ; zap tp at once - stdx $i,$t4,$i - addi $i,$i,16 - bdnz- Lcopy - - $POP r14,`2*$SIZE_T`($sp) - $POP r15,`3*$SIZE_T`($sp) - $POP r16,`4*$SIZE_T`($sp) - $POP r17,`5*$SIZE_T`($sp) - $POP r18,`6*$SIZE_T`($sp) - $POP r19,`7*$SIZE_T`($sp) - $POP r20,`8*$SIZE_T`($sp) - $POP r21,`9*$SIZE_T`($sp) - $POP r22,`10*$SIZE_T`($sp) - $POP r23,`11*$SIZE_T`($sp) - lfd f14,`12*$SIZE_T+0`($sp) - lfd f15,`12*$SIZE_T+8`($sp) - lfd f16,`12*$SIZE_T+16`($sp) - lfd f17,`12*$SIZE_T+24`($sp) - lfd f18,`12*$SIZE_T+32`($sp) - lfd f19,`12*$SIZE_T+40`($sp) - lfd f20,`12*$SIZE_T+48`($sp) - lfd f21,`12*$SIZE_T+56`($sp) - lfd f22,`12*$SIZE_T+64`($sp) - lfd f23,`12*$SIZE_T+72`($sp) - lfd f24,`12*$SIZE_T+80`($sp) - lfd f25,`12*$SIZE_T+88`($sp) - $POP $sp,0($sp) - li r3,1 ; signal "handled" - blr - .long 0 -.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@fy.chalmers.se>" -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/bn/asm/s390x-mont.pl b/crypto/openssl/crypto/bn/asm/s390x-mont.pl deleted file mode 100755 index d232510..0000000 --- a/crypto/openssl/crypto/bn/asm/s390x-mont.pl +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# April 2007. -# -# Performance improvement over vanilla C code varies from 85% to 45% -# depending on key length and benchmark. Unfortunately in this context -# these are not very impressive results [for code that utilizes "wide" -# 64x64=128-bit multiplication, which is not commonly available to C -# programmers], at least hand-coded bn_asm.c replacement is known to -# provide 30-40% better results for longest keys. Well, on a second -# thought it's not very surprising, because z-CPUs are single-issue -# and _strictly_ in-order execution, while bn_mul_mont is more or less -# dependent on CPU ability to pipe-line instructions and have several -# of them "in-flight" at the same time. I mean while other methods, -# for example Karatsuba, aim to minimize amount of multiplications at -# the cost of other operations increase, bn_mul_mont aim to neatly -# "overlap" multiplications and the other operations [and on most -# platforms even minimize the amount of the other operations, in -# particular references to memory]. But it's possible to improve this -# module performance by implementing dedicated squaring code-path and -# possibly by unrolling loops... - -# January 2009. -# -# Reschedule to minimize/avoid Address Generation Interlock hazard, -# make inner loops counter-based. - -$mn0="%r0"; -$num="%r1"; - -# int bn_mul_mont( -$rp="%r2"; # BN_ULONG *rp, -$ap="%r3"; # const BN_ULONG *ap, -$bp="%r4"; # const BN_ULONG *bp, -$np="%r5"; # const BN_ULONG *np, -$n0="%r6"; # const BN_ULONG *n0, -#$num="160(%r15)" # int num); - -$bi="%r2"; # zaps rp -$j="%r7"; - -$ahi="%r8"; -$alo="%r9"; -$nhi="%r10"; -$nlo="%r11"; -$AHI="%r12"; -$NHI="%r13"; -$count="%r14"; -$sp="%r15"; - -$code.=<<___; -.text -.globl bn_mul_mont -.type bn_mul_mont,\@function -bn_mul_mont: - lgf $num,164($sp) # pull $num - sla $num,3 # $num to enumerate bytes - la $bp,0($num,$bp) - - stg %r2,16($sp) - - cghi $num,16 # - lghi %r2,0 # - blr %r14 # if($num<16) return 0; - cghi $num,128 # - bhr %r14 # if($num>128) return 0; - - stmg %r3,%r15,24($sp) - - lghi $rp,-160-8 # leave room for carry bit - lcgr $j,$num # -$num - lgr %r0,$sp - la $rp,0($rp,$sp) - la $sp,0($j,$rp) # alloca - stg %r0,0($sp) # back chain - - sra $num,3 # restore $num - la $bp,0($j,$bp) # restore $bp - ahi $num,-1 # adjust $num for inner loop - lg $n0,0($n0) # pull n0 - - lg $bi,0($bp) - lg $alo,0($ap) - mlgr $ahi,$bi # ap[0]*bp[0] - lgr $AHI,$ahi - - lgr $mn0,$alo # "tp[0]"*n0 - msgr $mn0,$n0 - - lg $nlo,0($np) # - mlgr $nhi,$mn0 # np[0]*m1 - algr $nlo,$alo # +="tp[0]" - lghi $NHI,0 - alcgr $NHI,$nhi - - la $j,8(%r0) # j=1 - lr $count,$num - -.align 16 -.L1st: - lg $alo,0($j,$ap) - mlgr $ahi,$bi # ap[j]*bp[0] - algr $alo,$AHI - lghi $AHI,0 - alcgr $AHI,$ahi - - lg $nlo,0($j,$np) - mlgr $nhi,$mn0 # np[j]*m1 - algr $nlo,$NHI - lghi $NHI,0 - alcgr $nhi,$NHI # +="tp[j]" - algr $nlo,$alo - alcgr $NHI,$nhi - - stg $nlo,160-8($j,$sp) # tp[j-1]= - la $j,8($j) # j++ - brct $count,.L1st - - algr $NHI,$AHI - lghi $AHI,0 - alcgr $AHI,$AHI # upmost overflow bit - stg $NHI,160-8($j,$sp) - stg $AHI,160($j,$sp) - la $bp,8($bp) # bp++ - -.Louter: - lg $bi,0($bp) # bp[i] - lg $alo,0($ap) - mlgr $ahi,$bi # ap[0]*bp[i] - alg $alo,160($sp) # +=tp[0] - lghi $AHI,0 - alcgr $AHI,$ahi - - lgr $mn0,$alo - msgr $mn0,$n0 # tp[0]*n0 - - lg $nlo,0($np) # np[0] - mlgr $nhi,$mn0 # np[0]*m1 - algr $nlo,$alo # +="tp[0]" - lghi $NHI,0 - alcgr $NHI,$nhi - - la $j,8(%r0) # j=1 - lr $count,$num - -.align 16 -.Linner: - lg $alo,0($j,$ap) - mlgr $ahi,$bi # ap[j]*bp[i] - algr $alo,$AHI - lghi $AHI,0 - alcgr $ahi,$AHI - alg $alo,160($j,$sp)# +=tp[j] - alcgr $AHI,$ahi - - lg $nlo,0($j,$np) - mlgr $nhi,$mn0 # np[j]*m1 - algr $nlo,$NHI - lghi $NHI,0 - alcgr $nhi,$NHI - algr $nlo,$alo # +="tp[j]" - alcgr $NHI,$nhi - - stg $nlo,160-8($j,$sp) # tp[j-1]= - la $j,8($j) # j++ - brct $count,.Linner - - algr $NHI,$AHI - lghi $AHI,0 - alcgr $AHI,$AHI - alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit - lghi $ahi,0 - alcgr $AHI,$ahi # new upmost overflow bit - stg $NHI,160-8($j,$sp) - stg $AHI,160($j,$sp) - - la $bp,8($bp) # bp++ - clg $bp,160+8+32($j,$sp) # compare to &bp[num] - jne .Louter - - lg $rp,160+8+16($j,$sp) # reincarnate rp - la $ap,160($sp) - ahi $num,1 # restore $num, incidentally clears "borrow" - - la $j,0(%r0) - lr $count,$num -.Lsub: lg $alo,0($j,$ap) - slbg $alo,0($j,$np) - stg $alo,0($j,$rp) - la $j,8($j) - brct $count,.Lsub - lghi $ahi,0 - slbgr $AHI,$ahi # handle upmost carry - - ngr $ap,$AHI - lghi $np,-1 - xgr $np,$AHI - ngr $np,$rp - ogr $ap,$np # ap=borrow?tp:rp - - la $j,0(%r0) - lgr $count,$num -.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh - stg $j,160($j,$sp) # zap tp - stg $alo,0($j,$rp) - la $j,8($j) - brct $count,.Lcopy - - la %r1,160+8+48($j,$sp) - lmg %r6,%r15,0(%r1) - lghi %r2,1 # signal "processed" - br %r14 -.size bn_mul_mont,.-bn_mul_mont -.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>" -___ - -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/bn/asm/s390x.S b/crypto/openssl/crypto/bn/asm/s390x.S deleted file mode 100755 index 8f45f5d..0000000 --- a/crypto/openssl/crypto/bn/asm/s390x.S +++ /dev/null @@ -1,678 +0,0 @@ -.ident "s390x.S, version 1.0" -// ==================================================================== -// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -// project. -// -// Rights for redistribution and usage in source and binary forms are -// granted according to the OpenSSL license. Warranty of any kind is -// disclaimed. -// ==================================================================== - -.text - -#define zero %r0 - -// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); -.globl bn_mul_add_words -.type bn_mul_add_words,@function -.align 4 -bn_mul_add_words: - lghi zero,0 // zero = 0 - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0; - ltgfr %r4,%r4 - bler %r14 // if (len<=0) return 0; - - stmg %r6,%r10,48(%r15) - lghi %r8,0 // carry = 0 - srag %r10,%r4,2 // cnt=len/4 - jz .Loop1_madd - -.Loop4_madd: - lg %r7,0(%r2,%r3) // ap[i] - mlgr %r6,%r5 // *=w - algr %r7,%r8 // +=carry - alcgr %r6,zero - alg %r7,0(%r2,%r1) // +=rp[i] - alcgr %r6,zero - stg %r7,0(%r2,%r1) // rp[i]= - - lg %r9,8(%r2,%r3) - mlgr %r8,%r5 - algr %r9,%r6 - alcgr %r8,zero - alg %r9,8(%r2,%r1) - alcgr %r8,zero - stg %r9,8(%r2,%r1) - - lg %r7,16(%r2,%r3) - mlgr %r6,%r5 - algr %r7,%r8 - alcgr %r6,zero - alg %r7,16(%r2,%r1) - alcgr %r6,zero - stg %r7,16(%r2,%r1) - - lg %r9,24(%r2,%r3) - mlgr %r8,%r5 - algr %r9,%r6 - alcgr %r8,zero - alg %r9,24(%r2,%r1) - alcgr %r8,zero - stg %r9,24(%r2,%r1) - - la %r2,32(%r2) // i+=4 - brct %r10,.Loop4_madd - - lghi %r10,3 - nr %r4,%r10 // cnt=len%4 - jz .Lend_madd - -.Loop1_madd: - lg %r7,0(%r2,%r3) // ap[i] - mlgr %r6,%r5 // *=w - algr %r7,%r8 // +=carry - alcgr %r6,zero - alg %r7,0(%r2,%r1) // +=rp[i] - alcgr %r6,zero - stg %r7,0(%r2,%r1) // rp[i]= - - lgr %r8,%r6 - la %r2,8(%r2) // i++ - brct %r4,.Loop1_madd - -.Lend_madd: - lgr %r2,%r8 - lmg %r6,%r10,48(%r15) - br %r14 -.size bn_mul_add_words,.-bn_mul_add_words - -// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); -.globl bn_mul_words -.type bn_mul_words,@function -.align 4 -bn_mul_words: - lghi zero,0 // zero = 0 - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0; - ltgfr %r4,%r4 - bler %r14 // if (len<=0) return 0; - - stmg %r6,%r10,48(%r15) - lghi %r8,0 // carry = 0 - srag %r10,%r4,2 // cnt=len/4 - jz .Loop1_mul - -.Loop4_mul: - lg %r7,0(%r2,%r3) // ap[i] - mlgr %r6,%r5 // *=w - algr %r7,%r8 // +=carry - alcgr %r6,zero - stg %r7,0(%r2,%r1) // rp[i]= - - lg %r9,8(%r2,%r3) - mlgr %r8,%r5 - algr %r9,%r6 - alcgr %r8,zero - stg %r9,8(%r2,%r1) - - lg %r7,16(%r2,%r3) - mlgr %r6,%r5 - algr %r7,%r8 - alcgr %r6,zero - stg %r7,16(%r2,%r1) - - lg %r9,24(%r2,%r3) - mlgr %r8,%r5 - algr %r9,%r6 - alcgr %r8,zero - stg %r9,24(%r2,%r1) - - la %r2,32(%r2) // i+=4 - brct %r10,.Loop4_mul - - lghi %r10,3 - nr %r4,%r10 // cnt=len%4 - jz .Lend_mul - -.Loop1_mul: - lg %r7,0(%r2,%r3) // ap[i] - mlgr %r6,%r5 // *=w - algr %r7,%r8 // +=carry - alcgr %r6,zero - stg %r7,0(%r2,%r1) // rp[i]= - - lgr %r8,%r6 - la %r2,8(%r2) // i++ - brct %r4,.Loop1_mul - -.Lend_mul: - lgr %r2,%r8 - lmg %r6,%r10,48(%r15) - br %r14 -.size bn_mul_words,.-bn_mul_words - -// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) -.globl bn_sqr_words -.type bn_sqr_words,@function -.align 4 -bn_sqr_words: - ltgfr %r4,%r4 - bler %r14 - - stmg %r6,%r7,48(%r15) - srag %r1,%r4,2 // cnt=len/4 - jz .Loop1_sqr - -.Loop4_sqr: - lg %r7,0(%r3) - mlgr %r6,%r7 - stg %r7,0(%r2) - stg %r6,8(%r2) - - lg %r7,8(%r3) - mlgr %r6,%r7 - stg %r7,16(%r2) - stg %r6,24(%r2) - - lg %r7,16(%r3) - mlgr %r6,%r7 - stg %r7,32(%r2) - stg %r6,40(%r2) - - lg %r7,24(%r3) - mlgr %r6,%r7 - stg %r7,48(%r2) - stg %r6,56(%r2) - - la %r3,32(%r3) - la %r2,64(%r2) - brct %r1,.Loop4_sqr - - lghi %r1,3 - nr %r4,%r1 // cnt=len%4 - jz .Lend_sqr - -.Loop1_sqr: - lg %r7,0(%r3) - mlgr %r6,%r7 - stg %r7,0(%r2) - stg %r6,8(%r2) - - la %r3,8(%r3) - la %r2,16(%r2) - brct %r4,.Loop1_sqr - -.Lend_sqr: - lmg %r6,%r7,48(%r15) - br %r14 -.size bn_sqr_words,.-bn_sqr_words - -// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d); -.globl bn_div_words -.type bn_div_words,@function -.align 4 -bn_div_words: - dlgr %r2,%r4 - lgr %r2,%r3 - br %r14 -.size bn_div_words,.-bn_div_words - -// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); -.globl bn_add_words -.type bn_add_words,@function -.align 4 -bn_add_words: - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0 - ltgfr %r5,%r5 - bler %r14 // if (len<=0) return 0; - - stg %r6,48(%r15) - lghi %r6,3 - nr %r6,%r5 // len%4 - sra %r5,2 // len/4, use sra because it sets condition code - jz .Loop1_add // carry is incidentally cleared if branch taken - algr %r2,%r2 // clear carry - -.Loop4_add: - lg %r0,0(%r2,%r3) - alcg %r0,0(%r2,%r4) - stg %r0,0(%r2,%r1) - lg %r0,8(%r2,%r3) - alcg %r0,8(%r2,%r4) - stg %r0,8(%r2,%r1) - lg %r0,16(%r2,%r3) - alcg %r0,16(%r2,%r4) - stg %r0,16(%r2,%r1) - lg %r0,24(%r2,%r3) - alcg %r0,24(%r2,%r4) - stg %r0,24(%r2,%r1) - - la %r2,32(%r2) // i+=4 - brct %r5,.Loop4_add - - la %r6,1(%r6) // see if len%4 is zero ... - brct %r6,.Loop1_add // without touching condition code:-) - -.Lexit_add: - lghi %r2,0 - alcgr %r2,%r2 - lg %r6,48(%r15) - br %r14 - -.Loop1_add: - lg %r0,0(%r2,%r3) - alcg %r0,0(%r2,%r4) - stg %r0,0(%r2,%r1) - - la %r2,8(%r2) // i++ - brct %r6,.Loop1_add - - j .Lexit_add -.size bn_add_words,.-bn_add_words - -// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); -.globl bn_sub_words -.type bn_sub_words,@function -.align 4 -bn_sub_words: - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0 - ltgfr %r5,%r5 - bler %r14 // if (len<=0) return 0; - - stg %r6,48(%r15) - lghi %r6,3 - nr %r6,%r5 // len%4 - sra %r5,2 // len/4, use sra because it sets condition code - jnz .Loop4_sub // borrow is incidentally cleared if branch taken - slgr %r2,%r2 // clear borrow - -.Loop1_sub: - lg %r0,0(%r2,%r3) - slbg %r0,0(%r2,%r4) - stg %r0,0(%r2,%r1) - - la %r2,8(%r2) // i++ - brct %r6,.Loop1_sub - j .Lexit_sub - -.Loop4_sub: - lg %r0,0(%r2,%r3) - slbg %r0,0(%r2,%r4) - stg %r0,0(%r2,%r1) - lg %r0,8(%r2,%r3) - slbg %r0,8(%r2,%r4) - stg %r0,8(%r2,%r1) - lg %r0,16(%r2,%r3) - slbg %r0,16(%r2,%r4) - stg %r0,16(%r2,%r1) - lg %r0,24(%r2,%r3) - slbg %r0,24(%r2,%r4) - stg %r0,24(%r2,%r1) - - la %r2,32(%r2) // i+=4 - brct %r5,.Loop4_sub - - la %r6,1(%r6) // see if len%4 is zero ... - brct %r6,.Loop1_sub // without touching condition code:-) - -.Lexit_sub: - lghi %r2,0 - slbgr %r2,%r2 - lcgr %r2,%r2 - lg %r6,48(%r15) - br %r14 -.size bn_sub_words,.-bn_sub_words - -#define c1 %r1 -#define c2 %r5 -#define c3 %r8 - -#define mul_add_c(ai,bi,c1,c2,c3) \ - lg %r7,ai*8(%r3); \ - mlg %r6,bi*8(%r4); \ - algr c1,%r7; \ - alcgr c2,%r6; \ - alcgr c3,zero - -// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); -.globl bn_mul_comba8 -.type bn_mul_comba8,@function -.align 4 -bn_mul_comba8: - stmg %r6,%r8,48(%r15) - - lghi c1,0 - lghi c2,0 - lghi c3,0 - lghi zero,0 - - mul_add_c(0,0,c1,c2,c3); - stg c1,0*8(%r2) - lghi c1,0 - - mul_add_c(0,1,c2,c3,c1); - mul_add_c(1,0,c2,c3,c1); - stg c2,1*8(%r2) - lghi c2,0 - - mul_add_c(2,0,c3,c1,c2); - mul_add_c(1,1,c3,c1,c2); - mul_add_c(0,2,c3,c1,c2); - stg c3,2*8(%r2) - lghi c3,0 - - mul_add_c(0,3,c1,c2,c3); - mul_add_c(1,2,c1,c2,c3); - mul_add_c(2,1,c1,c2,c3); - mul_add_c(3,0,c1,c2,c3); - stg c1,3*8(%r2) - lghi c1,0 - - mul_add_c(4,0,c2,c3,c1); - mul_add_c(3,1,c2,c3,c1); - mul_add_c(2,2,c2,c3,c1); - mul_add_c(1,3,c2,c3,c1); - mul_add_c(0,4,c2,c3,c1); - stg c2,4*8(%r2) - lghi c2,0 - - mul_add_c(0,5,c3,c1,c2); - mul_add_c(1,4,c3,c1,c2); - mul_add_c(2,3,c3,c1,c2); - mul_add_c(3,2,c3,c1,c2); - mul_add_c(4,1,c3,c1,c2); - mul_add_c(5,0,c3,c1,c2); - stg c3,5*8(%r2) - lghi c3,0 - - mul_add_c(6,0,c1,c2,c3); - mul_add_c(5,1,c1,c2,c3); - mul_add_c(4,2,c1,c2,c3); - mul_add_c(3,3,c1,c2,c3); - mul_add_c(2,4,c1,c2,c3); - mul_add_c(1,5,c1,c2,c3); - mul_add_c(0,6,c1,c2,c3); - stg c1,6*8(%r2) - lghi c1,0 - - mul_add_c(0,7,c2,c3,c1); - mul_add_c(1,6,c2,c3,c1); - mul_add_c(2,5,c2,c3,c1); - mul_add_c(3,4,c2,c3,c1); - mul_add_c(4,3,c2,c3,c1); - mul_add_c(5,2,c2,c3,c1); - mul_add_c(6,1,c2,c3,c1); - mul_add_c(7,0,c2,c3,c1); - stg c2,7*8(%r2) - lghi c2,0 - - mul_add_c(7,1,c3,c1,c2); - mul_add_c(6,2,c3,c1,c2); - mul_add_c(5,3,c3,c1,c2); - mul_add_c(4,4,c3,c1,c2); - mul_add_c(3,5,c3,c1,c2); - mul_add_c(2,6,c3,c1,c2); - mul_add_c(1,7,c3,c1,c2); - stg c3,8*8(%r2) - lghi c3,0 - - mul_add_c(2,7,c1,c2,c3); - mul_add_c(3,6,c1,c2,c3); - mul_add_c(4,5,c1,c2,c3); - mul_add_c(5,4,c1,c2,c3); - mul_add_c(6,3,c1,c2,c3); - mul_add_c(7,2,c1,c2,c3); - stg c1,9*8(%r2) - lghi c1,0 - - mul_add_c(7,3,c2,c3,c1); - mul_add_c(6,4,c2,c3,c1); - mul_add_c(5,5,c2,c3,c1); - mul_add_c(4,6,c2,c3,c1); - mul_add_c(3,7,c2,c3,c1); - stg c2,10*8(%r2) - lghi c2,0 - - mul_add_c(4,7,c3,c1,c2); - mul_add_c(5,6,c3,c1,c2); - mul_add_c(6,5,c3,c1,c2); - mul_add_c(7,4,c3,c1,c2); - stg c3,11*8(%r2) - lghi c3,0 - - mul_add_c(7,5,c1,c2,c3); - mul_add_c(6,6,c1,c2,c3); - mul_add_c(5,7,c1,c2,c3); - stg c1,12*8(%r2) - lghi c1,0 - - - mul_add_c(6,7,c2,c3,c1); - mul_add_c(7,6,c2,c3,c1); - stg c2,13*8(%r2) - lghi c2,0 - - mul_add_c(7,7,c3,c1,c2); - stg c3,14*8(%r2) - stg c1,15*8(%r2) - - lmg %r6,%r8,48(%r15) - br %r14 -.size bn_mul_comba8,.-bn_mul_comba8 - -// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); -.globl bn_mul_comba4 -.type bn_mul_comba4,@function -.align 4 -bn_mul_comba4: - stmg %r6,%r8,48(%r15) - - lghi c1,0 - lghi c2,0 - lghi c3,0 - lghi zero,0 - - mul_add_c(0,0,c1,c2,c3); - stg c1,0*8(%r3) - lghi c1,0 - - mul_add_c(0,1,c2,c3,c1); - mul_add_c(1,0,c2,c3,c1); - stg c2,1*8(%r2) - lghi c2,0 - - mul_add_c(2,0,c3,c1,c2); - mul_add_c(1,1,c3,c1,c2); - mul_add_c(0,2,c3,c1,c2); - stg c3,2*8(%r2) - lghi c3,0 - - mul_add_c(0,3,c1,c2,c3); - mul_add_c(1,2,c1,c2,c3); - mul_add_c(2,1,c1,c2,c3); - mul_add_c(3,0,c1,c2,c3); - stg c1,3*8(%r2) - lghi c1,0 - - mul_add_c(3,1,c2,c3,c1); - mul_add_c(2,2,c2,c3,c1); - mul_add_c(1,3,c2,c3,c1); - stg c2,4*8(%r2) - lghi c2,0 - - mul_add_c(2,3,c3,c1,c2); - mul_add_c(3,2,c3,c1,c2); - stg c3,5*8(%r2) - lghi c3,0 - - mul_add_c(3,3,c1,c2,c3); - stg c1,6*8(%r2) - stg c2,7*8(%r2) - - stmg %r6,%r8,48(%r15) - br %r14 -.size bn_mul_comba4,.-bn_mul_comba4 - -#define sqr_add_c(ai,c1,c2,c3) \ - lg %r7,ai*8(%r3); \ - mlgr %r6,%r7; \ - algr c1,%r7; \ - alcgr c2,%r6; \ - alcgr c3,zero - -#define sqr_add_c2(ai,aj,c1,c2,c3) \ - lg %r7,ai*8(%r3); \ - mlg %r6,aj*8(%r3); \ - algr c1,%r7; \ - alcgr c2,%r6; \ - alcgr c3,zero; \ - algr c1,%r7; \ - alcgr c2,%r6; \ - alcgr c3,zero - -// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3); -.globl bn_sqr_comba8 -.type bn_sqr_comba8,@function -.align 4 -bn_sqr_comba8: - stmg %r6,%r8,48(%r15) - - lghi c1,0 - lghi c2,0 - lghi c3,0 - lghi zero,0 - - sqr_add_c(0,c1,c2,c3); - stg c1,0*8(%r2) - lghi c1,0 - - sqr_add_c2(1,0,c2,c3,c1); - stg c2,1*8(%r2) - lghi c2,0 - - sqr_add_c(1,c3,c1,c2); - sqr_add_c2(2,0,c3,c1,c2); - stg c3,2*8(%r2) - lghi c3,0 - - sqr_add_c2(3,0,c1,c2,c3); - sqr_add_c2(2,1,c1,c2,c3); - stg c1,3*8(%r2) - lghi c1,0 - - sqr_add_c(2,c2,c3,c1); - sqr_add_c2(3,1,c2,c3,c1); - sqr_add_c2(4,0,c2,c3,c1); - stg c2,4*8(%r2) - lghi c2,0 - - sqr_add_c2(5,0,c3,c1,c2); - sqr_add_c2(4,1,c3,c1,c2); - sqr_add_c2(3,2,c3,c1,c2); - stg c3,5*8(%r2) - lghi c3,0 - - sqr_add_c(3,c1,c2,c3); - sqr_add_c2(4,2,c1,c2,c3); - sqr_add_c2(5,1,c1,c2,c3); - sqr_add_c2(6,0,c1,c2,c3); - stg c1,6*8(%r2) - lghi c1,0 - - sqr_add_c2(7,0,c2,c3,c1); - sqr_add_c2(6,1,c2,c3,c1); - sqr_add_c2(5,2,c2,c3,c1); - sqr_add_c2(4,3,c2,c3,c1); - stg c2,7*8(%r2) - lghi c2,0 - - sqr_add_c(4,c3,c1,c2); - sqr_add_c2(5,3,c3,c1,c2); - sqr_add_c2(6,2,c3,c1,c2); - sqr_add_c2(7,1,c3,c1,c2); - stg c3,8*8(%r2) - lghi c3,0 - - sqr_add_c2(7,2,c1,c2,c3); - sqr_add_c2(6,3,c1,c2,c3); - sqr_add_c2(5,4,c1,c2,c3); - stg c1,9*8(%r2) - lghi c1,0 - - sqr_add_c(5,c2,c3,c1); - sqr_add_c2(6,4,c2,c3,c1); - sqr_add_c2(7,3,c2,c3,c1); - stg c2,10*8(%r2) - lghi c2,0 - - sqr_add_c2(7,4,c3,c1,c2); - sqr_add_c2(6,5,c3,c1,c2); - stg c3,11*8(%r2) - lghi c3,0 - - sqr_add_c(6,c1,c2,c3); - sqr_add_c2(7,5,c1,c2,c3); - stg c1,12*8(%r2) - lghi c1,0 - - sqr_add_c2(7,6,c2,c3,c1); - stg c2,13*8(%r2) - lghi c2,0 - - sqr_add_c(7,c3,c1,c2); - stg c3,14*8(%r2) - stg c1,15*8(%r2) - - lmg %r6,%r8,48(%r15) - br %r14 -.size bn_sqr_comba8,.-bn_sqr_comba8 - -// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3); -.globl bn_sqr_comba4 -.type bn_sqr_comba4,@function -.align 4 -bn_sqr_comba4: - stmg %r6,%r8,48(%r15) - - lghi c1,0 - lghi c2,0 - lghi c3,0 - lghi zero,0 - - sqr_add_c(0,c1,c2,c3); - stg c1,0*8(%r2) - lghi c1,0 - - sqr_add_c2(1,0,c2,c3,c1); - stg c2,1*8(%r2) - lghi c2,0 - - sqr_add_c(1,c3,c1,c2); - sqr_add_c2(2,0,c3,c1,c2); - stg c3,2*8(%r2) - lghi c3,0 - - sqr_add_c2(3,0,c1,c2,c3); - sqr_add_c2(2,1,c1,c2,c3); - stg c1,3*8(%r2) - lghi c1,0 - - sqr_add_c(2,c2,c3,c1); - sqr_add_c2(3,1,c2,c3,c1); - stg c2,4*8(%r2) - lghi c2,0 - - sqr_add_c2(3,2,c3,c1,c2); - stg c3,5*8(%r2) - lghi c3,0 - - sqr_add_c(3,c1,c2,c3); - stg c1,6*8(%r2) - stg c2,7*8(%r2) - - lmg %r6,%r8,48(%r15) - br %r14 -.size bn_sqr_comba4,.-bn_sqr_comba4 diff --git a/crypto/openssl/crypto/bn/asm/sparcv9-mont.pl b/crypto/openssl/crypto/bn/asm/sparcv9-mont.pl deleted file mode 100755 index b8fb1e8..0000000 --- a/crypto/openssl/crypto/bn/asm/sparcv9-mont.pl +++ /dev/null @@ -1,606 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# December 2005 -# -# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons -# for undertaken effort are multiple. First of all, UltraSPARC is not -# the whole SPARCv9 universe and other VIS-free implementations deserve -# optimized code as much. Secondly, newly introduced UltraSPARC T1, -# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes, -# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with -# several integrated RSA/DSA accelerator circuits accessible through -# kernel driver [only(*)], but having decent user-land software -# implementation is important too. Finally, reasons like desire to -# experiment with dedicated squaring procedure. Yes, this module -# implements one, because it was easiest to draft it in SPARCv9 -# instructions... - -# (*) Engine accessing the driver in question is on my TODO list. -# For reference, acceleator is estimated to give 6 to 10 times -# improvement on single-threaded RSA sign. It should be noted -# that 6-10x improvement coefficient does not actually mean -# something extraordinary in terms of absolute [single-threaded] -# performance, as SPARCv9 instruction set is by all means least -# suitable for high performance crypto among other 64 bit -# platforms. 6-10x factor simply places T1 in same performance -# domain as say AMD64 and IA-64. Improvement of RSA verify don't -# appear impressive at all, but it's the sign operation which is -# far more critical/interesting. - -# You might notice that inner loops are modulo-scheduled:-) This has -# essentially negligible impact on UltraSPARC performance, it's -# Fujitsu SPARC64 V users who should notice and hopefully appreciate -# the advantage... Currently this module surpasses sparcv9a-mont.pl -# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a -# module still have hidden potential [see TODO list there], which is -# estimated to be larger than 20%... - -# int bn_mul_mont( -$rp="%i0"; # BN_ULONG *rp, -$ap="%i1"; # const BN_ULONG *ap, -$bp="%i2"; # const BN_ULONG *bp, -$np="%i3"; # const BN_ULONG *np, -$n0="%i4"; # const BN_ULONG *n0, -$num="%i5"; # int num); - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=128; } - -$car0="%o0"; -$car1="%o1"; -$car2="%o2"; # 1 bit -$acc0="%o3"; -$acc1="%o4"; -$mask="%g1"; # 32 bits, what a waste... -$tmp0="%g4"; -$tmp1="%g5"; - -$i="%l0"; -$j="%l1"; -$mul0="%l2"; -$mul1="%l3"; -$tp="%l4"; -$apj="%l5"; -$npj="%l6"; -$tpj="%l7"; - -$fname="bn_mul_mont_int"; - -$code=<<___; -.section ".text",#alloc,#execinstr - -.global $fname -.align 32 -$fname: - cmp %o5,4 ! 128 bits minimum - bge,pt %icc,.Lenter - sethi %hi(0xffffffff),$mask - retl - clr %o0 -.align 32 -.Lenter: - save %sp,-$frame,%sp - sll $num,2,$num ! num*=4 - or $mask,%lo(0xffffffff),$mask - ld [$n0],$n0 - cmp $ap,$bp - and $num,$mask,$num - ld [$bp],$mul0 ! bp[0] - nop - - add %sp,$bias,%o7 ! real top of stack - ld [$ap],$car0 ! ap[0] ! redundant in squaring context - sub %o7,$num,%o7 - ld [$ap+4],$apj ! ap[1] - and %o7,-1024,%o7 - ld [$np],$car1 ! np[0] - sub %o7,$bias,%sp ! alloca - ld [$np+4],$npj ! np[1] - be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont - mov 12,$j - - mulx $car0,$mul0,$car0 ! ap[0]*bp[0] - mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0] - and $car0,$mask,$acc0 - add %sp,$bias+$frame,$tp - ld [$ap+8],$apj !prologue! - - mulx $n0,$acc0,$mul1 ! "t[0]"*n0 - and $mul1,$mask,$mul1 - - mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 - mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - ld [$np+8],$npj !prologue! - srlx $car1,32,$car1 - mov $tmp0,$acc0 !prologue! - -.L1st: - mulx $apj,$mul0,$tmp0 - mulx $npj,$mul1,$tmp1 - add $acc0,$car0,$car0 - ld [$ap+$j],$apj ! ap[j] - and $car0,$mask,$acc0 - add $acc1,$car1,$car1 - ld [$np+$j],$npj ! np[j] - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - add $j,4,$j ! j++ - mov $tmp0,$acc0 - st $car1,[$tp] - cmp $j,$num - mov $tmp1,$acc1 - srlx $car1,32,$car1 - bl %icc,.L1st - add $tp,4,$tp ! tp++ -!.L1st - - mulx $apj,$mul0,$tmp0 !epilogue! - mulx $npj,$mul1,$tmp1 - add $acc0,$car0,$car0 - and $car0,$mask,$acc0 - add $acc1,$car1,$car1 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - st $car1,[$tp] - srlx $car1,32,$car1 - - add $tmp0,$car0,$car0 - and $car0,$mask,$acc0 - add $tmp1,$car1,$car1 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car1 - - add $car0,$car1,$car1 - st $car1,[$tp+8] - srlx $car1,32,$car2 - - mov 4,$i ! i++ - ld [$bp+4],$mul0 ! bp[1] -.Louter: - add %sp,$bias+$frame,$tp - ld [$ap],$car0 ! ap[0] - ld [$ap+4],$apj ! ap[1] - ld [$np],$car1 ! np[0] - ld [$np+4],$npj ! np[1] - ld [$tp],$tmp1 ! tp[0] - ld [$tp+4],$tpj ! tp[1] - mov 12,$j - - mulx $car0,$mul0,$car0 - mulx $apj,$mul0,$tmp0 !prologue! - add $tmp1,$car0,$car0 - ld [$ap+8],$apj !prologue! - and $car0,$mask,$acc0 - - mulx $n0,$acc0,$mul1 - and $mul1,$mask,$mul1 - - mulx $car1,$mul1,$car1 - mulx $npj,$mul1,$acc1 !prologue! - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - ld [$np+8],$npj !prologue! - srlx $car1,32,$car1 - mov $tmp0,$acc0 !prologue! - -.Linner: - mulx $apj,$mul0,$tmp0 - mulx $npj,$mul1,$tmp1 - add $tpj,$car0,$car0 - ld [$ap+$j],$apj ! ap[j] - add $acc0,$car0,$car0 - add $acc1,$car1,$car1 - ld [$np+$j],$npj ! np[j] - and $car0,$mask,$acc0 - ld [$tp+8],$tpj ! tp[j] - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - add $j,4,$j ! j++ - mov $tmp0,$acc0 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - mov $tmp1,$acc1 - cmp $j,$num - bl %icc,.Linner - add $tp,4,$tp ! tp++ -!.Linner - - mulx $apj,$mul0,$tmp0 !epilogue! - mulx $npj,$mul1,$tmp1 - add $tpj,$car0,$car0 - add $acc0,$car0,$car0 - ld [$tp+8],$tpj ! tp[j] - and $car0,$mask,$acc0 - add $acc1,$car1,$car1 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - - add $tpj,$car0,$car0 - add $tmp0,$car0,$car0 - and $car0,$mask,$acc0 - add $tmp1,$car1,$car1 - add $acc0,$car1,$car1 - st $car1,[$tp+4] ! tp[j-1] - srlx $car0,32,$car0 - add $i,4,$i ! i++ - srlx $car1,32,$car1 - - add $car0,$car1,$car1 - cmp $i,$num - add $car2,$car1,$car1 - st $car1,[$tp+8] - - srlx $car1,32,$car2 - bl,a %icc,.Louter - ld [$bp+$i],$mul0 ! bp[i] -!.Louter - - add $tp,12,$tp - -.Ltail: - add $np,$num,$np - add $rp,$num,$rp - mov $tp,$ap - sub %g0,$num,%o7 ! k=-num - ba .Lsub - subcc %g0,%g0,%g0 ! clear %icc.c -.align 16 -.Lsub: - ld [$tp+%o7],%o0 - ld [$np+%o7],%o1 - subccc %o0,%o1,%o1 ! tp[j]-np[j] - add $rp,%o7,$i - add %o7,4,%o7 - brnz %o7,.Lsub - st %o1,[$i] - subc $car2,0,$car2 ! handle upmost overflow bit - and $tp,$car2,$ap - andn $rp,$car2,$np - or $ap,$np,$ap - sub %g0,$num,%o7 - -.Lcopy: - ld [$ap+%o7],%o0 ! copy or in-place refresh - st %g0,[$tp+%o7] ! zap tp - st %o0,[$rp+%o7] - add %o7,4,%o7 - brnz %o7,.Lcopy - nop - mov 1,%i0 - ret - restore -___ - -######## -######## .Lbn_sqr_mont gives up to 20% *overall* improvement over -######## code without following dedicated squaring procedure. -######## -$sbit="%i2"; # re-use $bp! - -$code.=<<___; -.align 32 -.Lbn_sqr_mont: - mulx $mul0,$mul0,$car0 ! ap[0]*ap[0] - mulx $apj,$mul0,$tmp0 !prologue! - and $car0,$mask,$acc0 - add %sp,$bias+$frame,$tp - ld [$ap+8],$apj !prologue! - - mulx $n0,$acc0,$mul1 ! "t[0]"*n0 - srlx $car0,32,$car0 - and $mul1,$mask,$mul1 - - mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 - mulx $npj,$mul1,$acc1 !prologue! - and $car0,1,$sbit - ld [$np+8],$npj !prologue! - srlx $car0,1,$car0 - add $acc0,$car1,$car1 - srlx $car1,32,$car1 - mov $tmp0,$acc0 !prologue! - -.Lsqr_1st: - mulx $apj,$mul0,$tmp0 - mulx $npj,$mul1,$tmp1 - add $acc0,$car0,$car0 ! ap[j]*a0+c0 - add $acc1,$car1,$car1 - ld [$ap+$j],$apj ! ap[j] - and $car0,$mask,$acc0 - ld [$np+$j],$npj ! np[j] - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - mov $tmp1,$acc1 - srlx $acc0,32,$sbit - add $j,4,$j ! j++ - and $acc0,$mask,$acc0 - cmp $j,$num - add $acc0,$car1,$car1 - st $car1,[$tp] - mov $tmp0,$acc0 - srlx $car1,32,$car1 - bl %icc,.Lsqr_1st - add $tp,4,$tp ! tp++ -!.Lsqr_1st - - mulx $apj,$mul0,$tmp0 ! epilogue - mulx $npj,$mul1,$tmp1 - add $acc0,$car0,$car0 ! ap[j]*a0+c0 - add $acc1,$car1,$car1 - and $car0,$mask,$acc0 - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - add $acc0,$car1,$car1 - st $car1,[$tp] - srlx $car1,32,$car1 - - add $tmp0,$car0,$car0 ! ap[j]*a0+c0 - add $tmp1,$car1,$car1 - and $car0,$mask,$acc0 - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - add $acc0,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car1 - - add $car0,$car0,$car0 - or $sbit,$car0,$car0 - add $car0,$car1,$car1 - st $car1,[$tp+8] - srlx $car1,32,$car2 - - ld [%sp+$bias+$frame],$tmp0 ! tp[0] - ld [%sp+$bias+$frame+4],$tmp1 ! tp[1] - ld [%sp+$bias+$frame+8],$tpj ! tp[2] - ld [$ap+4],$mul0 ! ap[1] - ld [$ap+8],$apj ! ap[2] - ld [$np],$car1 ! np[0] - ld [$np+4],$npj ! np[1] - mulx $n0,$tmp0,$mul1 - - mulx $mul0,$mul0,$car0 - and $mul1,$mask,$mul1 - - mulx $car1,$mul1,$car1 - mulx $npj,$mul1,$acc1 - add $tmp0,$car1,$car1 - and $car0,$mask,$acc0 - ld [$np+8],$npj ! np[2] - srlx $car1,32,$car1 - add $tmp1,$car1,$car1 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - and $car0,1,$sbit - add $acc1,$car1,$car1 - srlx $car0,1,$car0 - mov 12,$j - st $car1,[%sp+$bias+$frame] ! tp[0]= - srlx $car1,32,$car1 - add %sp,$bias+$frame+4,$tp - -.Lsqr_2nd: - mulx $apj,$mul0,$acc0 - mulx $npj,$mul1,$acc1 - add $acc0,$car0,$car0 - add $tpj,$car1,$car1 - ld [$ap+$j],$apj ! ap[j] - and $car0,$mask,$acc0 - ld [$np+$j],$npj ! np[j] - srlx $car0,32,$car0 - add $acc1,$car1,$car1 - ld [$tp+8],$tpj ! tp[j] - add $acc0,$acc0,$acc0 - add $j,4,$j ! j++ - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - cmp $j,$num - add $acc0,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - bl %icc,.Lsqr_2nd - add $tp,4,$tp ! tp++ -!.Lsqr_2nd - - mulx $apj,$mul0,$acc0 - mulx $npj,$mul1,$acc1 - add $acc0,$car0,$car0 - add $tpj,$car1,$car1 - and $car0,$mask,$acc0 - srlx $car0,32,$car0 - add $acc1,$car1,$car1 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - add $acc0,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - - add $car0,$car0,$car0 - or $sbit,$car0,$car0 - add $car0,$car1,$car1 - add $car2,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car2 - - ld [%sp+$bias+$frame],$tmp1 ! tp[0] - ld [%sp+$bias+$frame+4],$tpj ! tp[1] - ld [$ap+8],$mul0 ! ap[2] - ld [$np],$car1 ! np[0] - ld [$np+4],$npj ! np[1] - mulx $n0,$tmp1,$mul1 - and $mul1,$mask,$mul1 - mov 8,$i - - mulx $mul0,$mul0,$car0 - mulx $car1,$mul1,$car1 - and $car0,$mask,$acc0 - add $tmp1,$car1,$car1 - srlx $car0,32,$car0 - add %sp,$bias+$frame,$tp - srlx $car1,32,$car1 - and $car0,1,$sbit - srlx $car0,1,$car0 - mov 4,$j - -.Lsqr_outer: -.Lsqr_inner1: - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - add $j,4,$j - ld [$tp+8],$tpj - cmp $j,$i - add $acc1,$car1,$car1 - ld [$np+$j],$npj - st $car1,[$tp] - srlx $car1,32,$car1 - bl %icc,.Lsqr_inner1 - add $tp,4,$tp -!.Lsqr_inner1 - - add $j,4,$j - ld [$ap+$j],$apj ! ap[j] - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - ld [$np+$j],$npj ! np[j] - add $acc0,$car1,$car1 - ld [$tp+8],$tpj ! tp[j] - add $acc1,$car1,$car1 - st $car1,[$tp] - srlx $car1,32,$car1 - - add $j,4,$j - cmp $j,$num - be,pn %icc,.Lsqr_no_inner2 - add $tp,4,$tp - -.Lsqr_inner2: - mulx $apj,$mul0,$acc0 - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - add $acc0,$car0,$car0 - ld [$ap+$j],$apj ! ap[j] - and $car0,$mask,$acc0 - ld [$np+$j],$npj ! np[j] - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - ld [$tp+8],$tpj ! tp[j] - or $sbit,$acc0,$acc0 - add $j,4,$j ! j++ - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - cmp $j,$num - add $acc0,$car1,$car1 - add $acc1,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - bl %icc,.Lsqr_inner2 - add $tp,4,$tp ! tp++ - -.Lsqr_no_inner2: - mulx $apj,$mul0,$acc0 - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - add $acc0,$car0,$car0 - and $car0,$mask,$acc0 - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - add $acc0,$car1,$car1 - add $acc1,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - - add $car0,$car0,$car0 - or $sbit,$car0,$car0 - add $car0,$car1,$car1 - add $car2,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car2 - - add $i,4,$i ! i++ - ld [%sp+$bias+$frame],$tmp1 ! tp[0] - ld [%sp+$bias+$frame+4],$tpj ! tp[1] - ld [$ap+$i],$mul0 ! ap[j] - ld [$np],$car1 ! np[0] - ld [$np+4],$npj ! np[1] - mulx $n0,$tmp1,$mul1 - and $mul1,$mask,$mul1 - add $i,4,$tmp0 - - mulx $mul0,$mul0,$car0 - mulx $car1,$mul1,$car1 - and $car0,$mask,$acc0 - add $tmp1,$car1,$car1 - srlx $car0,32,$car0 - add %sp,$bias+$frame,$tp - srlx $car1,32,$car1 - and $car0,1,$sbit - srlx $car0,1,$car0 - - cmp $tmp0,$num ! i<num-1 - bl %icc,.Lsqr_outer - mov 4,$j - -.Lsqr_last: - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - add $j,4,$j - ld [$tp+8],$tpj - cmp $j,$i - add $acc1,$car1,$car1 - ld [$np+$j],$npj - st $car1,[$tp] - srlx $car1,32,$car1 - bl %icc,.Lsqr_last - add $tp,4,$tp -!.Lsqr_last - - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - add $acc0,$car1,$car1 - add $acc1,$car1,$car1 - st $car1,[$tp] - srlx $car1,32,$car1 - - add $car0,$car0,$car0 ! recover $car0 - or $sbit,$car0,$car0 - add $car0,$car1,$car1 - add $car2,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car2 - - ba .Ltail - add $tp,8,$tp -.type $fname,#function -.size $fname,(.-$fname) -.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" -.align 32 -___ -$code =~ s/\`([^\`]*)\`/eval($1)/gem; -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/bn/asm/sparcv9a-mont.pl b/crypto/openssl/crypto/bn/asm/sparcv9a-mont.pl deleted file mode 100755 index a14205f..0000000 --- a/crypto/openssl/crypto/bn/asm/sparcv9a-mont.pl +++ /dev/null @@ -1,882 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# October 2005 -# -# "Teaser" Montgomery multiplication module for UltraSPARC. Why FPU? -# Because unlike integer multiplier, which simply stalls whole CPU, -# FPU is fully pipelined and can effectively emit 48 bit partial -# product every cycle. Why not blended SPARC v9? One can argue that -# making this module dependent on UltraSPARC VIS extension limits its -# binary compatibility. Well yes, it does exclude SPARC64 prior-V(!) -# implementations from compatibility matrix. But the rest, whole Sun -# UltraSPARC family and brand new Fujitsu's SPARC64 V, all support -# VIS extension instructions used in this module. This is considered -# good enough to not care about HAL SPARC64 users [if any] who have -# integer-only pure SPARCv9 module to "fall down" to. - -# USI&II cores currently exhibit uniform 2x improvement [over pre- -# bn_mul_mont codebase] for all key lengths and benchmarks. On USIII -# performance improves few percents for shorter keys and worsens few -# percents for longer keys. This is because USIII integer multiplier -# is >3x faster than USI&II one, which is harder to match [but see -# TODO list below]. It should also be noted that SPARC64 V features -# out-of-order execution, which *might* mean that integer multiplier -# is pipelined, which in turn *might* be impossible to match... On -# additional note, SPARC64 V implements FP Multiply-Add instruction, -# which is perfectly usable in this context... In other words, as far -# as Fujitsu SPARC64 V goes, talk to the author:-) - -# The implementation implies following "non-natural" limitations on -# input arguments: -# - num may not be less than 4; -# - num has to be even; -# Failure to meet either condition has no fatal effects, simply -# doesn't give any performance gain. - -# TODO: -# - modulo-schedule inner loop for better performance (on in-order -# execution core such as UltraSPARC this shall result in further -# noticeable(!) improvement); -# - dedicated squaring procedure[?]; - -###################################################################### -# November 2006 -# -# Modulo-scheduled inner loops allow to interleave floating point and -# integer instructions and minimize Read-After-Write penalties. This -# results in *further* 20-50% perfromance improvement [depending on -# key length, more for longer keys] on USI&II cores and 30-80% - on -# USIII&IV. - -$fname="bn_mul_mont_fpu"; -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } - -if ($bits==64) { - $bias=2047; - $frame=192; -} else { - $bias=0; - $frame=128; # 96 rounded up to largest known cache-line -} -$locals=64; - -# In order to provide for 32-/64-bit ABI duality, I keep integers wider -# than 32 bit in %g1-%g4 and %o0-%o5. %l0-%l7 and %i0-%i5 are used -# exclusively for pointers, indexes and other small values... -# int bn_mul_mont( -$rp="%i0"; # BN_ULONG *rp, -$ap="%i1"; # const BN_ULONG *ap, -$bp="%i2"; # const BN_ULONG *bp, -$np="%i3"; # const BN_ULONG *np, -$n0="%i4"; # const BN_ULONG *n0, -$num="%i5"; # int num); - -$tp="%l0"; # t[num] -$ap_l="%l1"; # a[num],n[num] are smashed to 32-bit words and saved -$ap_h="%l2"; # to these four vectors as double-precision FP values. -$np_l="%l3"; # This way a bunch of fxtods are eliminated in second -$np_h="%l4"; # loop and L1-cache aliasing is minimized... -$i="%l5"; -$j="%l6"; -$mask="%l7"; # 16-bit mask, 0xffff - -$n0="%g4"; # reassigned(!) to "64-bit" register -$carry="%i4"; # %i4 reused(!) for a carry bit - -# FP register naming chart -# -# ..HILO -# dcba -# -------- -# LOa -# LOb -# LOc -# LOd -# HIa -# HIb -# HIc -# HId -# ..a -# ..b -$ba="%f0"; $bb="%f2"; $bc="%f4"; $bd="%f6"; -$na="%f8"; $nb="%f10"; $nc="%f12"; $nd="%f14"; -$alo="%f16"; $alo_="%f17"; $ahi="%f18"; $ahi_="%f19"; -$nlo="%f20"; $nlo_="%f21"; $nhi="%f22"; $nhi_="%f23"; - -$dota="%f24"; $dotb="%f26"; - -$aloa="%f32"; $alob="%f34"; $aloc="%f36"; $alod="%f38"; -$ahia="%f40"; $ahib="%f42"; $ahic="%f44"; $ahid="%f46"; -$nloa="%f48"; $nlob="%f50"; $nloc="%f52"; $nlod="%f54"; -$nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62"; - -$ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load - -$code=<<___; -.section ".text",#alloc,#execinstr - -.global $fname -.align 32 -$fname: - save %sp,-$frame-$locals,%sp - - cmp $num,4 - bl,a,pn %icc,.Lret - clr %i0 - andcc $num,1,%g0 ! $num has to be even... - bnz,a,pn %icc,.Lret - clr %i0 ! signal "unsupported input value" - - srl $num,1,$num - sethi %hi(0xffff),$mask - ld [%i4+0],$n0 ! $n0 reassigned, remember? - or $mask,%lo(0xffff),$mask - ld [%i4+4],%o0 - sllx %o0,32,%o0 - or %o0,$n0,$n0 ! $n0=n0[1].n0[0] - - sll $num,3,$num ! num*=8 - - add %sp,$bias,%o0 ! real top of stack - sll $num,2,%o1 - add %o1,$num,%o1 ! %o1=num*5 - sub %o0,%o1,%o0 - and %o0,-2048,%o0 ! optimize TLB utilization - sub %o0,$bias,%sp ! alloca(5*num*8) - - rd %asi,%o7 ! save %asi - add %sp,$bias+$frame+$locals,$tp - add $tp,$num,$ap_l - add $ap_l,$num,$ap_l ! [an]p_[lh] point at the vectors' ends ! - add $ap_l,$num,$ap_h - add $ap_h,$num,$np_l - add $np_l,$num,$np_h - - wr %g0,$ASI_FL16_P,%asi ! setup %asi for 16-bit FP loads - - add $rp,$num,$rp ! readjust input pointers to point - add $ap,$num,$ap ! at the ends too... - add $bp,$num,$bp - add $np,$num,$np - - stx %o7,[%sp+$bias+$frame+48] ! save %asi - - sub %g0,$num,$i ! i=-num - sub %g0,$num,$j ! j=-num - - add $ap,$j,%o3 - add $bp,$i,%o4 - - ld [%o3+4],%g1 ! bp[0] - ld [%o3+0],%o0 - ld [%o4+4],%g5 ! ap[0] - sllx %g1,32,%g1 - ld [%o4+0],%o1 - sllx %g5,32,%g5 - or %g1,%o0,%o0 - or %g5,%o1,%o1 - - add $np,$j,%o5 - - mulx %o1,%o0,%o0 ! ap[0]*bp[0] - mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0 - stx %o0,[%sp+$bias+$frame+0] - - ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words - fzeros $alo - ld [%o3+4],$ahi_ - fzeros $ahi - ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words - fzeros $nlo - ld [%o5+4],$nhi_ - fzeros $nhi - - ! transfer b[i] to FPU as 4x16-bit values - ldda [%o4+2]%asi,$ba - fxtod $alo,$alo - ldda [%o4+0]%asi,$bb - fxtod $ahi,$ahi - ldda [%o4+6]%asi,$bc - fxtod $nlo,$nlo - ldda [%o4+4]%asi,$bd - fxtod $nhi,$nhi - - ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values - ldda [%sp+$bias+$frame+6]%asi,$na - fxtod $ba,$ba - ldda [%sp+$bias+$frame+4]%asi,$nb - fxtod $bb,$bb - ldda [%sp+$bias+$frame+2]%asi,$nc - fxtod $bc,$bc - ldda [%sp+$bias+$frame+0]%asi,$nd - fxtod $bd,$bd - - std $alo,[$ap_l+$j] ! save smashed ap[j] in double format - fxtod $na,$na - std $ahi,[$ap_h+$j] - fxtod $nb,$nb - std $nlo,[$np_l+$j] ! save smashed np[j] in double format - fxtod $nc,$nc - std $nhi,[$np_h+$j] - fxtod $nd,$nd - - fmuld $alo,$ba,$aloa - fmuld $nlo,$na,$nloa - fmuld $alo,$bb,$alob - fmuld $nlo,$nb,$nlob - fmuld $alo,$bc,$aloc - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - fmuld $alo,$bd,$alod - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - fmuld $ahi,$ba,$ahia - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - fmuld $ahi,$bb,$ahib - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - fmuld $ahi,$bc,$ahic - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - fmuld $ahi,$bd,$ahid - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - - faddd $ahic,$nhic,$dota ! $nhic - faddd $ahid,$nhid,$dotb ! $nhid - - faddd $nloc,$nhia,$nloc - faddd $nlod,$nhib,$nlod - - fdtox $nloa,$nloa - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - add $j,8,$j - std $nlob,[%sp+$bias+$frame+8] - add $ap,$j,%o4 - std $nloc,[%sp+$bias+$frame+16] - add $np,$j,%o5 - std $nlod,[%sp+$bias+$frame+24] - - ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words - fzeros $alo - ld [%o4+4],$ahi_ - fzeros $ahi - ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words - fzeros $nlo - ld [%o5+4],$nhi_ - fzeros $nhi - - fxtod $alo,$alo - fxtod $ahi,$ahi - fxtod $nlo,$nlo - fxtod $nhi,$nhi - - ldx [%sp+$bias+$frame+0],%o0 - fmuld $alo,$ba,$aloa - ldx [%sp+$bias+$frame+8],%o1 - fmuld $nlo,$na,$nloa - ldx [%sp+$bias+$frame+16],%o2 - fmuld $alo,$bb,$alob - ldx [%sp+$bias+$frame+24],%o3 - fmuld $nlo,$nb,$nlob - - srlx %o0,16,%o7 - std $alo,[$ap_l+$j] ! save smashed ap[j] in double format - fmuld $alo,$bc,$aloc - add %o7,%o1,%o1 - std $ahi,[$ap_h+$j] - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - srlx %o1,16,%o7 - std $nlo,[$np_l+$j] ! save smashed np[j] in double format - fmuld $alo,$bd,$alod - add %o7,%o2,%o2 - std $nhi,[$np_h+$j] - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - srlx %o2,16,%o7 - fmuld $ahi,$ba,$ahia - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - !and %o0,$mask,%o0 - !and %o1,$mask,%o1 - !and %o2,$mask,%o2 - !sllx %o1,16,%o1 - !sllx %o2,32,%o2 - !sllx %o3,48,%o7 - !or %o1,%o0,%o0 - !or %o2,%o0,%o0 - !or %o7,%o0,%o0 ! 64-bit result - srlx %o3,16,%g1 ! 34-bit carry - fmuld $ahi,$bb,$ahib - - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - fmuld $ahi,$bc,$ahic - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - fmuld $ahi,$bd,$ahid - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - - faddd $dota,$nloa,$nloa - faddd $dotb,$nlob,$nlob - faddd $ahic,$nhic,$dota ! $nhic - faddd $ahid,$nhid,$dotb ! $nhid - - faddd $nloc,$nhia,$nloc - faddd $nlod,$nhib,$nlod - - fdtox $nloa,$nloa - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - addcc $j,8,$j - std $nloc,[%sp+$bias+$frame+16] - bz,pn %icc,.L1stskip - std $nlod,[%sp+$bias+$frame+24] - -.align 32 ! incidentally already aligned ! -.L1st: - add $ap,$j,%o4 - add $np,$j,%o5 - ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words - fzeros $alo - ld [%o4+4],$ahi_ - fzeros $ahi - ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words - fzeros $nlo - ld [%o5+4],$nhi_ - fzeros $nhi - - fxtod $alo,$alo - fxtod $ahi,$ahi - fxtod $nlo,$nlo - fxtod $nhi,$nhi - - ldx [%sp+$bias+$frame+0],%o0 - fmuld $alo,$ba,$aloa - ldx [%sp+$bias+$frame+8],%o1 - fmuld $nlo,$na,$nloa - ldx [%sp+$bias+$frame+16],%o2 - fmuld $alo,$bb,$alob - ldx [%sp+$bias+$frame+24],%o3 - fmuld $nlo,$nb,$nlob - - srlx %o0,16,%o7 - std $alo,[$ap_l+$j] ! save smashed ap[j] in double format - fmuld $alo,$bc,$aloc - add %o7,%o1,%o1 - std $ahi,[$ap_h+$j] - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - srlx %o1,16,%o7 - std $nlo,[$np_l+$j] ! save smashed np[j] in double format - fmuld $alo,$bd,$alod - add %o7,%o2,%o2 - std $nhi,[$np_h+$j] - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - srlx %o2,16,%o7 - fmuld $ahi,$ba,$ahia - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - and %o0,$mask,%o0 - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - and %o1,$mask,%o1 - and %o2,$mask,%o2 - fmuld $ahi,$bb,$ahib - sllx %o1,16,%o1 - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - sllx %o2,32,%o2 - fmuld $ahi,$bc,$ahic - sllx %o3,48,%o7 - or %o1,%o0,%o0 - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - or %o2,%o0,%o0 - fmuld $ahi,$bd,$ahid - or %o7,%o0,%o0 ! 64-bit result - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - addcc %g1,%o0,%o0 - faddd $dota,$nloa,$nloa - srlx %o3,16,%g1 ! 34-bit carry - faddd $dotb,$nlob,$nlob - bcs,a %xcc,.+8 - add %g1,1,%g1 - - stx %o0,[$tp] ! tp[j-1]= - - faddd $ahic,$nhic,$dota ! $nhic - faddd $ahid,$nhid,$dotb ! $nhid - - faddd $nloc,$nhia,$nloc - faddd $nlod,$nhib,$nlod - - fdtox $nloa,$nloa - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - std $nloc,[%sp+$bias+$frame+16] - std $nlod,[%sp+$bias+$frame+24] - - addcc $j,8,$j - bnz,pt %icc,.L1st - add $tp,8,$tp - -.L1stskip: - fdtox $dota,$dota - fdtox $dotb,$dotb - - ldx [%sp+$bias+$frame+0],%o0 - ldx [%sp+$bias+$frame+8],%o1 - ldx [%sp+$bias+$frame+16],%o2 - ldx [%sp+$bias+$frame+24],%o3 - - srlx %o0,16,%o7 - std $dota,[%sp+$bias+$frame+32] - add %o7,%o1,%o1 - std $dotb,[%sp+$bias+$frame+40] - srlx %o1,16,%o7 - add %o7,%o2,%o2 - srlx %o2,16,%o7 - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - and %o0,$mask,%o0 - and %o1,$mask,%o1 - and %o2,$mask,%o2 - sllx %o1,16,%o1 - sllx %o2,32,%o2 - sllx %o3,48,%o7 - or %o1,%o0,%o0 - or %o2,%o0,%o0 - or %o7,%o0,%o0 ! 64-bit result - ldx [%sp+$bias+$frame+32],%o4 - addcc %g1,%o0,%o0 - ldx [%sp+$bias+$frame+40],%o5 - srlx %o3,16,%g1 ! 34-bit carry - bcs,a %xcc,.+8 - add %g1,1,%g1 - - stx %o0,[$tp] ! tp[j-1]= - add $tp,8,$tp - - srlx %o4,16,%o7 - add %o7,%o5,%o5 - and %o4,$mask,%o4 - sllx %o5,16,%o7 - or %o7,%o4,%o4 - addcc %g1,%o4,%o4 - srlx %o5,48,%g1 - bcs,a %xcc,.+8 - add %g1,1,%g1 - - mov %g1,$carry - stx %o4,[$tp] ! tp[num-1]= - - ba .Louter - add $i,8,$i -.align 32 -.Louter: - sub %g0,$num,$j ! j=-num - add %sp,$bias+$frame+$locals,$tp - - add $ap,$j,%o3 - add $bp,$i,%o4 - - ld [%o3+4],%g1 ! bp[i] - ld [%o3+0],%o0 - ld [%o4+4],%g5 ! ap[0] - sllx %g1,32,%g1 - ld [%o4+0],%o1 - sllx %g5,32,%g5 - or %g1,%o0,%o0 - or %g5,%o1,%o1 - - ldx [$tp],%o2 ! tp[0] - mulx %o1,%o0,%o0 - addcc %o2,%o0,%o0 - mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0 - stx %o0,[%sp+$bias+$frame+0] - - ! transfer b[i] to FPU as 4x16-bit values - ldda [%o4+2]%asi,$ba - ldda [%o4+0]%asi,$bb - ldda [%o4+6]%asi,$bc - ldda [%o4+4]%asi,$bd - - ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values - ldda [%sp+$bias+$frame+6]%asi,$na - fxtod $ba,$ba - ldda [%sp+$bias+$frame+4]%asi,$nb - fxtod $bb,$bb - ldda [%sp+$bias+$frame+2]%asi,$nc - fxtod $bc,$bc - ldda [%sp+$bias+$frame+0]%asi,$nd - fxtod $bd,$bd - ldd [$ap_l+$j],$alo ! load a[j] in double format - fxtod $na,$na - ldd [$ap_h+$j],$ahi - fxtod $nb,$nb - ldd [$np_l+$j],$nlo ! load n[j] in double format - fxtod $nc,$nc - ldd [$np_h+$j],$nhi - fxtod $nd,$nd - - fmuld $alo,$ba,$aloa - fmuld $nlo,$na,$nloa - fmuld $alo,$bb,$alob - fmuld $nlo,$nb,$nlob - fmuld $alo,$bc,$aloc - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - fmuld $alo,$bd,$alod - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - fmuld $ahi,$ba,$ahia - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - fmuld $ahi,$bb,$ahib - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - fmuld $ahi,$bc,$ahic - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - fmuld $ahi,$bd,$ahid - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - - faddd $ahic,$nhic,$dota ! $nhic - faddd $ahid,$nhid,$dotb ! $nhid - - faddd $nloc,$nhia,$nloc - faddd $nlod,$nhib,$nlod - - fdtox $nloa,$nloa - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - std $nloc,[%sp+$bias+$frame+16] - add $j,8,$j - std $nlod,[%sp+$bias+$frame+24] - - ldd [$ap_l+$j],$alo ! load a[j] in double format - ldd [$ap_h+$j],$ahi - ldd [$np_l+$j],$nlo ! load n[j] in double format - ldd [$np_h+$j],$nhi - - fmuld $alo,$ba,$aloa - fmuld $nlo,$na,$nloa - fmuld $alo,$bb,$alob - fmuld $nlo,$nb,$nlob - fmuld $alo,$bc,$aloc - ldx [%sp+$bias+$frame+0],%o0 - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - ldx [%sp+$bias+$frame+8],%o1 - fmuld $alo,$bd,$alod - ldx [%sp+$bias+$frame+16],%o2 - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - ldx [%sp+$bias+$frame+24],%o3 - fmuld $ahi,$ba,$ahia - - srlx %o0,16,%o7 - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - add %o7,%o1,%o1 - fmuld $ahi,$bb,$ahib - srlx %o1,16,%o7 - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - add %o7,%o2,%o2 - fmuld $ahi,$bc,$ahic - srlx %o2,16,%o7 - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - ! why? - and %o0,$mask,%o0 - fmuld $ahi,$bd,$ahid - and %o1,$mask,%o1 - and %o2,$mask,%o2 - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - sllx %o1,16,%o1 - faddd $dota,$nloa,$nloa - sllx %o2,32,%o2 - faddd $dotb,$nlob,$nlob - sllx %o3,48,%o7 - or %o1,%o0,%o0 - faddd $ahic,$nhic,$dota ! $nhic - or %o2,%o0,%o0 - faddd $ahid,$nhid,$dotb ! $nhid - or %o7,%o0,%o0 ! 64-bit result - ldx [$tp],%o7 - faddd $nloc,$nhia,$nloc - addcc %o7,%o0,%o0 - ! end-of-why? - faddd $nlod,$nhib,$nlod - srlx %o3,16,%g1 ! 34-bit carry - fdtox $nloa,$nloa - bcs,a %xcc,.+8 - add %g1,1,%g1 - - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - addcc $j,8,$j - std $nloc,[%sp+$bias+$frame+16] - bz,pn %icc,.Linnerskip - std $nlod,[%sp+$bias+$frame+24] - - ba .Linner - nop -.align 32 -.Linner: - ldd [$ap_l+$j],$alo ! load a[j] in double format - ldd [$ap_h+$j],$ahi - ldd [$np_l+$j],$nlo ! load n[j] in double format - ldd [$np_h+$j],$nhi - - fmuld $alo,$ba,$aloa - fmuld $nlo,$na,$nloa - fmuld $alo,$bb,$alob - fmuld $nlo,$nb,$nlob - fmuld $alo,$bc,$aloc - ldx [%sp+$bias+$frame+0],%o0 - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - ldx [%sp+$bias+$frame+8],%o1 - fmuld $alo,$bd,$alod - ldx [%sp+$bias+$frame+16],%o2 - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - ldx [%sp+$bias+$frame+24],%o3 - fmuld $ahi,$ba,$ahia - - srlx %o0,16,%o7 - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - add %o7,%o1,%o1 - fmuld $ahi,$bb,$ahib - srlx %o1,16,%o7 - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - add %o7,%o2,%o2 - fmuld $ahi,$bc,$ahic - srlx %o2,16,%o7 - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - and %o0,$mask,%o0 - fmuld $ahi,$bd,$ahid - and %o1,$mask,%o1 - and %o2,$mask,%o2 - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - sllx %o1,16,%o1 - faddd $dota,$nloa,$nloa - sllx %o2,32,%o2 - faddd $dotb,$nlob,$nlob - sllx %o3,48,%o7 - or %o1,%o0,%o0 - faddd $ahic,$nhic,$dota ! $nhic - or %o2,%o0,%o0 - faddd $ahid,$nhid,$dotb ! $nhid - or %o7,%o0,%o0 ! 64-bit result - faddd $nloc,$nhia,$nloc - addcc %g1,%o0,%o0 - ldx [$tp+8],%o7 ! tp[j] - faddd $nlod,$nhib,$nlod - srlx %o3,16,%g1 ! 34-bit carry - fdtox $nloa,$nloa - bcs,a %xcc,.+8 - add %g1,1,%g1 - fdtox $nlob,$nlob - addcc %o7,%o0,%o0 - fdtox $nloc,$nloc - bcs,a %xcc,.+8 - add %g1,1,%g1 - - stx %o0,[$tp] ! tp[j-1] - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - std $nloc,[%sp+$bias+$frame+16] - addcc $j,8,$j - std $nlod,[%sp+$bias+$frame+24] - bnz,pt %icc,.Linner - add $tp,8,$tp - -.Linnerskip: - fdtox $dota,$dota - fdtox $dotb,$dotb - - ldx [%sp+$bias+$frame+0],%o0 - ldx [%sp+$bias+$frame+8],%o1 - ldx [%sp+$bias+$frame+16],%o2 - ldx [%sp+$bias+$frame+24],%o3 - - srlx %o0,16,%o7 - std $dota,[%sp+$bias+$frame+32] - add %o7,%o1,%o1 - std $dotb,[%sp+$bias+$frame+40] - srlx %o1,16,%o7 - add %o7,%o2,%o2 - srlx %o2,16,%o7 - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - and %o0,$mask,%o0 - and %o1,$mask,%o1 - and %o2,$mask,%o2 - sllx %o1,16,%o1 - sllx %o2,32,%o2 - sllx %o3,48,%o7 - or %o1,%o0,%o0 - or %o2,%o0,%o0 - ldx [%sp+$bias+$frame+32],%o4 - or %o7,%o0,%o0 ! 64-bit result - ldx [%sp+$bias+$frame+40],%o5 - addcc %g1,%o0,%o0 - ldx [$tp+8],%o7 ! tp[j] - srlx %o3,16,%g1 ! 34-bit carry - bcs,a %xcc,.+8 - add %g1,1,%g1 - - addcc %o7,%o0,%o0 - bcs,a %xcc,.+8 - add %g1,1,%g1 - - stx %o0,[$tp] ! tp[j-1] - add $tp,8,$tp - - srlx %o4,16,%o7 - add %o7,%o5,%o5 - and %o4,$mask,%o4 - sllx %o5,16,%o7 - or %o7,%o4,%o4 - addcc %g1,%o4,%o4 - srlx %o5,48,%g1 - bcs,a %xcc,.+8 - add %g1,1,%g1 - - addcc $carry,%o4,%o4 - stx %o4,[$tp] ! tp[num-1] - mov %g1,$carry - bcs,a %xcc,.+8 - add $carry,1,$carry - - addcc $i,8,$i - bnz %icc,.Louter - nop - - add $tp,8,$tp ! adjust tp to point at the end - orn %g0,%g0,%g4 - sub %g0,$num,%o7 ! n=-num - ba .Lsub - subcc %g0,%g0,%g0 ! clear %icc.c - -.align 32 -.Lsub: - ldx [$tp+%o7],%o0 - add $np,%o7,%g1 - ld [%g1+0],%o2 - ld [%g1+4],%o3 - srlx %o0,32,%o1 - subccc %o0,%o2,%o2 - add $rp,%o7,%g1 - subccc %o1,%o3,%o3 - st %o2,[%g1+0] - add %o7,8,%o7 - brnz,pt %o7,.Lsub - st %o3,[%g1+4] - subc $carry,0,%g4 - sub %g0,$num,%o7 ! n=-num - ba .Lcopy - nop - -.align 32 -.Lcopy: - ldx [$tp+%o7],%o0 - add $rp,%o7,%g1 - ld [%g1+0],%o2 - ld [%g1+4],%o3 - stx %g0,[$tp+%o7] - and %o0,%g4,%o0 - srlx %o0,32,%o1 - andn %o2,%g4,%o2 - andn %o3,%g4,%o3 - or %o2,%o0,%o0 - or %o3,%o1,%o1 - st %o0,[%g1+0] - add %o7,8,%o7 - brnz,pt %o7,.Lcopy - st %o1,[%g1+4] - sub %g0,$num,%o7 ! n=-num - -.Lzap: - stx %g0,[$ap_l+%o7] - stx %g0,[$ap_h+%o7] - stx %g0,[$np_l+%o7] - stx %g0,[$np_h+%o7] - add %o7,8,%o7 - brnz,pt %o7,.Lzap - nop - - ldx [%sp+$bias+$frame+48],%o7 - wr %g0,%o7,%asi ! restore %asi - - mov 1,%i0 -.Lret: - ret - restore -.type $fname,#function -.size $fname,(.-$fname) -.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>" -.align 32 -___ - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -# Below substitution makes it possible to compile without demanding -# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I -# dare to do this, because VIS capability is detected at run-time now -# and this routine is not called on CPU not capable to execute it. Do -# note that fzeros is not the only VIS dependency! Another dependency -# is implicit and is just _a_ numerical value loaded to %asi register, -# which assembler can't recognize as VIS specific... -$code =~ s/fzeros\s+%f([0-9]+)/ - sprintf(".word\t0x%x\t! fzeros %%f%d",0x81b00c20|($1<<25),$1) - /gem; - -print $code; -# flush -close STDOUT; diff --git a/crypto/openssl/crypto/bn/asm/via-mont.pl b/crypto/openssl/crypto/bn/asm/via-mont.pl deleted file mode 100755 index c046a51..0000000 --- a/crypto/openssl/crypto/bn/asm/via-mont.pl +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# Wrapper around 'rep montmul', VIA-specific instruction accessing -# PadLock Montgomery Multiplier. The wrapper is designed as drop-in -# replacement for OpenSSL bn_mul_mont [first implemented in 0.9.9]. -# -# Below are interleaved outputs from 'openssl speed rsa dsa' for 4 -# different software configurations on 1.5GHz VIA Esther processor. -# Lines marked with "software integer" denote performance of hand- -# coded integer-only assembler found in OpenSSL 0.9.7. "Software SSE2" -# refers to hand-coded SSE2 Montgomery multiplication procedure found -# OpenSSL 0.9.9. "Hardware VIA SDK" refers to padlock_pmm routine from -# Padlock SDK 2.0.1 available for download from VIA, which naturally -# utilizes the magic 'repz montmul' instruction. And finally "hardware -# this" refers to *this* implementation which also uses 'repz montmul' -# -# sign verify sign/s verify/s -# rsa 512 bits 0.001720s 0.000140s 581.4 7149.7 software integer -# rsa 512 bits 0.000690s 0.000086s 1450.3 11606.0 software SSE2 -# rsa 512 bits 0.006136s 0.000201s 163.0 4974.5 hardware VIA SDK -# rsa 512 bits 0.000712s 0.000050s 1404.9 19858.5 hardware this -# -# rsa 1024 bits 0.008518s 0.000413s 117.4 2420.8 software integer -# rsa 1024 bits 0.004275s 0.000277s 233.9 3609.7 software SSE2 -# rsa 1024 bits 0.012136s 0.000260s 82.4 3844.5 hardware VIA SDK -# rsa 1024 bits 0.002522s 0.000116s 396.5 8650.9 hardware this -# -# rsa 2048 bits 0.050101s 0.001371s 20.0 729.6 software integer -# rsa 2048 bits 0.030273s 0.001008s 33.0 991.9 software SSE2 -# rsa 2048 bits 0.030833s 0.000976s 32.4 1025.1 hardware VIA SDK -# rsa 2048 bits 0.011879s 0.000342s 84.2 2921.7 hardware this -# -# rsa 4096 bits 0.327097s 0.004859s 3.1 205.8 software integer -# rsa 4096 bits 0.229318s 0.003859s 4.4 259.2 software SSE2 -# rsa 4096 bits 0.233953s 0.003274s 4.3 305.4 hardware VIA SDK -# rsa 4096 bits 0.070493s 0.001166s 14.2 857.6 hardware this -# -# dsa 512 bits 0.001342s 0.001651s 745.2 605.7 software integer -# dsa 512 bits 0.000844s 0.000987s 1185.3 1013.1 software SSE2 -# dsa 512 bits 0.001902s 0.002247s 525.6 444.9 hardware VIA SDK -# dsa 512 bits 0.000458s 0.000524s 2182.2 1909.1 hardware this -# -# dsa 1024 bits 0.003964s 0.004926s 252.3 203.0 software integer -# dsa 1024 bits 0.002686s 0.003166s 372.3 315.8 software SSE2 -# dsa 1024 bits 0.002397s 0.002823s 417.1 354.3 hardware VIA SDK -# dsa 1024 bits 0.000978s 0.001170s 1022.2 855.0 hardware this -# -# dsa 2048 bits 0.013280s 0.016518s 75.3 60.5 software integer -# dsa 2048 bits 0.009911s 0.011522s 100.9 86.8 software SSE2 -# dsa 2048 bits 0.009542s 0.011763s 104.8 85.0 hardware VIA SDK -# dsa 2048 bits 0.002884s 0.003352s 346.8 298.3 hardware this -# -# To give you some other reference point here is output for 2.4GHz P4 -# running hand-coded SSE2 bn_mul_mont found in 0.9.9, i.e. "software -# SSE2" in above terms. -# -# rsa 512 bits 0.000407s 0.000047s 2454.2 21137.0 -# rsa 1024 bits 0.002426s 0.000141s 412.1 7100.0 -# rsa 2048 bits 0.015046s 0.000491s 66.5 2034.9 -# rsa 4096 bits 0.109770s 0.002379s 9.1 420.3 -# dsa 512 bits 0.000438s 0.000525s 2281.1 1904.1 -# dsa 1024 bits 0.001346s 0.001595s 742.7 627.0 -# dsa 2048 bits 0.004745s 0.005582s 210.7 179.1 -# -# Conclusions: -# - VIA SDK leaves a *lot* of room for improvement (which this -# implementation successfully fills:-); -# - 'rep montmul' gives up to >3x performance improvement depending on -# key length; -# - in terms of absolute performance it delivers approximately as much -# as modern out-of-order 32-bit cores [again, for longer keys]. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"via-mont.pl"); - -# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); -$func="bn_mul_mont_padlock"; - -$pad=16*1; # amount of reserved bytes on top of every vector - -# stack layout -$mZeroPrime=&DWP(0,"esp"); # these are specified by VIA -$A=&DWP(4,"esp"); -$B=&DWP(8,"esp"); -$T=&DWP(12,"esp"); -$M=&DWP(16,"esp"); -$scratch=&DWP(20,"esp"); -$rp=&DWP(24,"esp"); # these are mine -$sp=&DWP(28,"esp"); -# &DWP(32,"esp") # 32 byte scratch area -# &DWP(64+(4*$num+$pad)*0,"esp") # padded tp[num] -# &DWP(64+(4*$num+$pad)*1,"esp") # padded copy of ap[num] -# &DWP(64+(4*$num+$pad)*2,"esp") # padded copy of bp[num] -# &DWP(64+(4*$num+$pad)*3,"esp") # padded copy of np[num] -# Note that SDK suggests to unconditionally allocate 2K per vector. This -# has quite an impact on performance. It naturally depends on key length, -# but to give an example 1024 bit private RSA key operations suffer >30% -# penalty. I allocate only as much as actually required... - -&function_begin($func); - &xor ("eax","eax"); - &mov ("ecx",&wparam(5)); # num - # meet VIA's limitations for num [note that the specification - # expresses them in bits, while we work with amount of 32-bit words] - &test ("ecx",3); - &jnz (&label("leave")); # num % 4 != 0 - &cmp ("ecx",8); - &jb (&label("leave")); # num < 8 - &cmp ("ecx",1024); - &ja (&label("leave")); # num > 1024 - - &pushf (); - &cld (); - - &mov ("edi",&wparam(0)); # rp - &mov ("eax",&wparam(1)); # ap - &mov ("ebx",&wparam(2)); # bp - &mov ("edx",&wparam(3)); # np - &mov ("esi",&wparam(4)); # n0 - &mov ("esi",&DWP(0,"esi")); # *n0 - - &lea ("ecx",&DWP($pad,"","ecx",4)); # ecx becomes vector size in bytes - &lea ("ebp",&DWP(64,"","ecx",4)); # allocate 4 vectors + 64 bytes - &neg ("ebp"); - &add ("ebp","esp"); - &and ("ebp",-64); # align to cache-line - &xchg ("ebp","esp"); # alloca - - &mov ($rp,"edi"); # save rp - &mov ($sp,"ebp"); # save esp - - &mov ($mZeroPrime,"esi"); - &lea ("esi",&DWP(64,"esp")); # tp - &mov ($T,"esi"); - &lea ("edi",&DWP(32,"esp")); # scratch area - &mov ($scratch,"edi"); - &mov ("esi","eax"); - - &lea ("ebp",&DWP(-$pad,"ecx")); - &shr ("ebp",2); # restore original num value in ebp - - &xor ("eax","eax"); - - &mov ("ecx","ebp"); - &lea ("ecx",&DWP((32+$pad)/4,"ecx"));# padded tp + scratch - &data_byte(0xf3,0xab); # rep stosl, bzero - - &mov ("ecx","ebp"); - &lea ("edi",&DWP(64+$pad,"esp","ecx",4));# pointer to ap copy - &mov ($A,"edi"); - &data_byte(0xf3,0xa5); # rep movsl, memcpy - &mov ("ecx",$pad/4); - &data_byte(0xf3,0xab); # rep stosl, bzero pad - # edi points at the end of padded ap copy... - - &mov ("ecx","ebp"); - &mov ("esi","ebx"); - &mov ($B,"edi"); - &data_byte(0xf3,0xa5); # rep movsl, memcpy - &mov ("ecx",$pad/4); - &data_byte(0xf3,0xab); # rep stosl, bzero pad - # edi points at the end of padded bp copy... - - &mov ("ecx","ebp"); - &mov ("esi","edx"); - &mov ($M,"edi"); - &data_byte(0xf3,0xa5); # rep movsl, memcpy - &mov ("ecx",$pad/4); - &data_byte(0xf3,0xab); # rep stosl, bzero pad - # edi points at the end of padded np copy... - - # let magic happen... - &mov ("ecx","ebp"); - &mov ("esi","esp"); - &shl ("ecx",5); # convert word counter to bit counter - &align (4); - &data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul - - &mov ("ecx","ebp"); - &lea ("esi",&DWP(64,"esp")); # tp - # edi still points at the end of padded np copy... - &neg ("ebp"); - &lea ("ebp",&DWP(-$pad,"edi","ebp",4)); # so just "rewind" - &mov ("edi",$rp); # restore rp - &xor ("edx","edx"); # i=0 and clear CF - -&set_label("sub",8); - &mov ("eax",&DWP(0,"esi","edx",4)); - &sbb ("eax",&DWP(0,"ebp","edx",4)); - &mov (&DWP(0,"edi","edx",4),"eax"); # rp[i]=tp[i]-np[i] - &lea ("edx",&DWP(1,"edx")); # i++ - &loop (&label("sub")); # doesn't affect CF! - - &mov ("eax",&DWP(0,"esi","edx",4)); # upmost overflow bit - &sbb ("eax",0); - &and ("esi","eax"); - ¬ ("eax"); - &mov ("ebp","edi"); - &and ("ebp","eax"); - &or ("esi","ebp"); # tp=carry?tp:rp - - &mov ("ecx","edx"); # num - &xor ("edx","edx"); # i=0 - -&set_label("copy",8); - &mov ("eax",&DWP(0,"esi","edx",4)); - &mov (&DWP(64,"esp","edx",4),"ecx"); # zap tp - &mov (&DWP(0,"edi","edx",4),"eax"); - &lea ("edx",&DWP(1,"edx")); # i++ - &loop (&label("copy")); - - &mov ("ebp",$sp); - &xor ("eax","eax"); - - &mov ("ecx",64/4); - &mov ("edi","esp"); # zap frame including scratch area - &data_byte(0xf3,0xab); # rep stosl, bzero - - # zap copies of ap, bp and np - &lea ("edi",&DWP(64+$pad,"esp","edx",4));# pointer to ap - &lea ("ecx",&DWP(3*$pad/4,"edx","edx",2)); - &data_byte(0xf3,0xab); # rep stosl, bzero - - &mov ("esp","ebp"); - &inc ("eax"); # signal "done" - &popf (); -&set_label("leave"); -&function_end($func); - -&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>"); - -&asm_finish(); diff --git a/crypto/openssl/crypto/bn/asm/x86-mont.pl b/crypto/openssl/crypto/bn/asm/x86-mont.pl deleted file mode 100755 index 5cd3cd2..0000000 --- a/crypto/openssl/crypto/bn/asm/x86-mont.pl +++ /dev/null @@ -1,591 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# October 2005 -# -# This is a "teaser" code, as it can be improved in several ways... -# First of all non-SSE2 path should be implemented (yes, for now it -# performs Montgomery multiplication/convolution only on SSE2-capable -# CPUs such as P4, others fall down to original code). Then inner loop -# can be unrolled and modulo-scheduled to improve ILP and possibly -# moved to 128-bit XMM register bank (though it would require input -# rearrangement and/or increase bus bandwidth utilization). Dedicated -# squaring procedure should give further performance improvement... -# Yet, for being draft, the code improves rsa512 *sign* benchmark by -# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-) - -# December 2006 -# -# Modulo-scheduling SSE2 loops results in further 15-20% improvement. -# Integer-only code [being equipped with dedicated squaring procedure] -# gives ~40% on rsa512 sign benchmark... - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&external_label("OPENSSL_ia32cap_P") if ($sse2); - -&function_begin("bn_mul_mont"); - -$i="edx"; -$j="ecx"; -$ap="esi"; $tp="esi"; # overlapping variables!!! -$rp="edi"; $bp="edi"; # overlapping variables!!! -$np="ebp"; -$num="ebx"; - -$_num=&DWP(4*0,"esp"); # stack top layout -$_rp=&DWP(4*1,"esp"); -$_ap=&DWP(4*2,"esp"); -$_bp=&DWP(4*3,"esp"); -$_np=&DWP(4*4,"esp"); -$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp"); -$_sp=&DWP(4*6,"esp"); -$_bpend=&DWP(4*7,"esp"); -$frame=32; # size of above frame rounded up to 16n - - &xor ("eax","eax"); - &mov ("edi",&wparam(5)); # int num - &cmp ("edi",4); - &jl (&label("just_leave")); - - &lea ("esi",&wparam(0)); # put aside pointer to argument block - &lea ("edx",&wparam(1)); # load ap - &mov ("ebp","esp"); # saved stack pointer! - &add ("edi",2); # extra two words on top of tp - &neg ("edi"); - &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2)) - &neg ("edi"); - - # minimize cache contention by arraning 2K window between stack - # pointer and ap argument [np is also position sensitive vector, - # but it's assumed to be near ap, as it's allocated at ~same - # time]. - &mov ("eax","esp"); - &sub ("eax","edx"); - &and ("eax",2047); - &sub ("esp","eax"); # this aligns sp and ap modulo 2048 - - &xor ("edx","esp"); - &and ("edx",2048); - &xor ("edx",2048); - &sub ("esp","edx"); # this splits them apart modulo 4096 - - &and ("esp",-64); # align to cache line - - ################################# load argument block... - &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp - &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap - &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp - &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np - &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0 - #&mov ("edi",&DWP(5*4,"esi"));# int num - - &mov ("esi",&DWP(0,"esi")); # pull n0[0] - &mov ($_rp,"eax"); # ... save a copy of argument block - &mov ($_ap,"ebx"); - &mov ($_bp,"ecx"); - &mov ($_np,"edx"); - &mov ($_n0,"esi"); - &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling - #&mov ($_num,$num); # redundant as $num is not reused - &mov ($_sp,"ebp"); # saved stack pointer! - -if($sse2) { -$acc0="mm0"; # mmx register bank layout -$acc1="mm1"; -$car0="mm2"; -$car1="mm3"; -$mul0="mm4"; -$mul1="mm5"; -$temp="mm6"; -$mask="mm7"; - - &picmeup("eax","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"eax"),26); - &jnc (&label("non_sse2")); - - &mov ("eax",-1); - &movd ($mask,"eax"); # mask 32 lower bits - - &mov ($ap,$_ap); # load input pointers - &mov ($bp,$_bp); - &mov ($np,$_np); - - &xor ($i,$i); # i=0 - &xor ($j,$j); # j=0 - - &movd ($mul0,&DWP(0,$bp)); # bp[0] - &movd ($mul1,&DWP(0,$ap)); # ap[0] - &movd ($car1,&DWP(0,$np)); # np[0] - - &pmuludq($mul1,$mul0); # ap[0]*bp[0] - &movq ($car0,$mul1); - &movq ($acc0,$mul1); # I wish movd worked for - &pand ($acc0,$mask); # inter-register transfers - - &pmuludq($mul1,$_n0q); # *=n0 - - &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0 - &paddq ($car1,$acc0); - - &movd ($acc1,&DWP(4,$np)); # np[1] - &movd ($acc0,&DWP(4,$ap)); # ap[1] - - &psrlq ($car0,32); - &psrlq ($car1,32); - - &inc ($j); # j++ -&set_label("1st",16); - &pmuludq($acc0,$mul0); # ap[j]*bp[0] - &pmuludq($acc1,$mul1); # np[j]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1] - &paddq ($car1,$acc0); # +=ap[j]*bp[0]; - &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1] - &psrlq ($car0,32); - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]= - &psrlq ($car1,32); - - &lea ($j,&DWP(1,$j)); - &cmp ($j,$num); - &jl (&label("1st")); - - &pmuludq($acc0,$mul0); # ap[num-1]*bp[0] - &pmuludq($acc1,$mul1); # np[num-1]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &paddq ($car1,$acc0); # +=ap[num-1]*bp[0]; - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]= - - &psrlq ($car0,32); - &psrlq ($car1,32); - - &paddq ($car1,$car0); - &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] - - &inc ($i); # i++ -&set_label("outer"); - &xor ($j,$j); # j=0 - - &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i] - &movd ($mul1,&DWP(0,$ap)); # ap[0] - &movd ($temp,&DWP($frame,"esp")); # tp[0] - &movd ($car1,&DWP(0,$np)); # np[0] - &pmuludq($mul1,$mul0); # ap[0]*bp[i] - - &paddq ($mul1,$temp); # +=tp[0] - &movq ($acc0,$mul1); - &movq ($car0,$mul1); - &pand ($acc0,$mask); - - &pmuludq($mul1,$_n0q); # *=n0 - - &pmuludq($car1,$mul1); - &paddq ($car1,$acc0); - - &movd ($temp,&DWP($frame+4,"esp")); # tp[1] - &movd ($acc1,&DWP(4,$np)); # np[1] - &movd ($acc0,&DWP(4,$ap)); # ap[1] - - &psrlq ($car0,32); - &psrlq ($car1,32); - &paddq ($car0,$temp); # +=tp[1] - - &inc ($j); # j++ - &dec ($num); -&set_label("inner"); - &pmuludq($acc0,$mul0); # ap[j]*bp[i] - &pmuludq($acc1,$mul1); # np[j]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1] - &pand ($acc0,$mask); - &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1] - &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j] - &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1] - &psrlq ($car0,32); - &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]= - &psrlq ($car1,32); - &paddq ($car0,$temp); # +=tp[j+1] - - &dec ($num); - &lea ($j,&DWP(1,$j)); # j++ - &jnz (&label("inner")); - - &mov ($num,$j); - &pmuludq($acc0,$mul0); # ap[num-1]*bp[i] - &pmuludq($acc1,$mul1); # np[num-1]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1] - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]= - &psrlq ($car0,32); - &psrlq ($car1,32); - - &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num] - &paddq ($car1,$car0); - &paddq ($car1,$temp); - &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] - - &lea ($i,&DWP(1,$i)); # i++ - &cmp ($i,$num); - &jle (&label("outer")); - - &emms (); # done with mmx bank - &jmp (&label("common_tail")); - -&set_label("non_sse2",16); -} - -if (0) { - &mov ("esp",$_sp); - &xor ("eax","eax"); # signal "not fast enough [yet]" - &jmp (&label("just_leave")); - # While the below code provides competitive performance for - # all key lengthes on modern Intel cores, it's still more - # than 10% slower for 4096-bit key elsewhere:-( "Competitive" - # means compared to the original integer-only assembler. - # 512-bit RSA sign is better by ~40%, but that's about all - # one can say about all CPUs... -} else { -$inp="esi"; # integer path uses these registers differently -$word="edi"; -$carry="ebp"; - - &mov ($inp,$_ap); - &lea ($carry,&DWP(1,$num)); - &mov ($word,$_bp); - &xor ($j,$j); # j=0 - &mov ("edx",$inp); - &and ($carry,1); # see if num is even - &sub ("edx",$word); # see if ap==bp - &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num] - &or ($carry,"edx"); - &mov ($word,&DWP(0,$word)); # bp[0] - &jz (&label("bn_sqr_mont")); - &mov ($_bpend,"eax"); - &mov ("eax",&DWP(0,$inp)); - &xor ("edx","edx"); - -&set_label("mull",16); - &mov ($carry,"edx"); - &mul ($word); # ap[j]*bp[0] - &add ($carry,"eax"); - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1] - &cmp ($j,$num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("mull")); - - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*bp[0] - &mov ($word,$_n0); - &add ("eax",$carry); - &mov ($inp,$_np); - &adc ("edx",0); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]= - &xor ($j,$j); - &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]= - &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]= - - &mov ("eax",&DWP(0,$inp)); # np[0] - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ("eax",&DWP(4,$inp)); # np[1] - &adc ("edx",0); - &inc ($j); - - &jmp (&label("2ndmadd")); - -&set_label("1stmadd",16); - &mov ($carry,"edx"); - &mul ($word); # ap[j]*bp[i] - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("1stmadd")); - - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*bp[i] - &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &mov ($word,$_n0); - &adc ("edx",0); - &mov ($inp,$_np); - &add ($carry,"eax"); - &adc ("edx",0); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &xor ($j,$j); - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]= - &adc ($j,0); - &mov ("eax",&DWP(0,$inp)); # np[0] - &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]= - &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ("eax",&DWP(4,$inp)); # np[1] - &adc ("edx",0); - &mov ($j,1); - -&set_label("2ndmadd",16); - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]= - &jl (&label("2ndmadd")); - - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &adc ("edx",0); - &add ($carry,"eax"); - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]= - - &xor ("eax","eax"); - &mov ($j,$_bp); # &bp[i] - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1] - &lea ($j,&DWP(4,$j)); - &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]= - &cmp ($j,$_bpend); - &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= - &je (&label("common_tail")); - - &mov ($word,&DWP(0,$j)); # bp[i+1] - &mov ($inp,$_ap); - &mov ($_bp,$j); # &bp[++i] - &xor ($j,$j); - &xor ("edx","edx"); - &mov ("eax",&DWP(0,$inp)); - &jmp (&label("1stmadd")); - -&set_label("bn_sqr_mont",16); -$sbit=$num; - &mov ($_num,$num); - &mov ($_bp,$j); # i=0 - - &mov ("eax",$word); # ap[0] - &mul ($word); # ap[0]*ap[0] - &mov (&DWP($frame,"esp"),"eax"); # tp[0]= - &mov ($sbit,"edx"); - &shr ("edx",1); - &and ($sbit,1); - &inc ($j); -&set_label("sqr",16); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j] - &mov ($carry,"edx"); - &mul ($word); # ap[j]*ap[0] - &add ("eax",$carry); - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &lea ($carry,&DWP(0,$sbit,"eax",2)); - &shr ("eax",31); - &cmp ($j,$_num); - &mov ($sbit,"eax"); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("sqr")); - - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1] - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*ap[0] - &add ("eax",$carry); - &mov ($word,$_n0); - &adc ("edx",0); - &mov ($inp,$_np); - &lea ($carry,&DWP(0,$sbit,"eax",2)); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - &shr ("eax",31); - &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]= - - &lea ($carry,&DWP(0,"eax","edx",2)); - &mov ("eax",&DWP(0,$inp)); # np[0] - &shr ("edx",31); - &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]= - &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ($num,$j); - &adc ("edx",0); - &mov ("eax",&DWP(4,$inp)); # np[1] - &mov ($j,1); - -&set_label("3rdmadd",16); - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1] - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]= - - &mov ($carry,"edx"); - &mul ($word); # np[j+1]*m - &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1] - &lea ($j,&DWP(2,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]= - &jl (&label("3rdmadd")); - - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &adc ("edx",0); - &add ($carry,"eax"); - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]= - - &mov ($j,$_bp); # i - &xor ("eax","eax"); - &mov ($inp,$_ap); - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1] - &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]= - &cmp ($j,$num); - &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= - &je (&label("common_tail")); - - &mov ($word,&DWP(4,$inp,$j,4)); # ap[i] - &lea ($j,&DWP(1,$j)); - &mov ("eax",$word); - &mov ($_bp,$j); # ++i - &mul ($word); # ap[i]*ap[i] - &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i] - &adc ("edx",0); - &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]= - &xor ($carry,$carry); - &cmp ($j,$num); - &lea ($j,&DWP(1,$j)); - &je (&label("sqrlast")); - - &mov ($sbit,"edx"); # zaps $num - &shr ("edx",1); - &and ($sbit,1); -&set_label("sqradd",16); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j] - &mov ($carry,"edx"); - &mul ($word); # ap[j]*ap[i] - &add ("eax",$carry); - &lea ($carry,&DWP(0,"eax","eax")); - &adc ("edx",0); - &shr ("eax",31); - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("eax",0); - &add ($carry,$sbit); - &adc ("eax",0); - &cmp ($j,$_num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &mov ($sbit,"eax"); - &jle (&label("sqradd")); - - &mov ($carry,"edx"); - &lea ("edx",&DWP(0,$sbit,"edx",2)); - &shr ($carry,31); -&set_label("sqrlast"); - &mov ($word,$_n0); - &mov ($inp,$_np); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num] - &mov ("eax",&DWP(0,$inp)); # np[0] - &adc ($carry,0); - &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]= - &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &lea ($num,&DWP(-1,$j)); - &adc ("edx",0); - &mov ($j,1); - &mov ("eax",&DWP(4,$inp)); # np[1] - - &jmp (&label("3rdmadd")); -} - -&set_label("common_tail",16); - &mov ($np,$_np); # load modulus pointer - &mov ($rp,$_rp); # load result pointer - &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped] - - &mov ("eax",&DWP(0,$tp)); # tp[0] - &mov ($j,$num); # j=num-1 - &xor ($i,$i); # i=0 and clear CF! - -&set_label("sub",16); - &sbb ("eax",&DWP(0,$np,$i,4)); - &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i] - &dec ($j); # doesn't affect CF! - &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1] - &lea ($i,&DWP(1,$i)); # i++ - &jge (&label("sub")); - - &sbb ("eax",0); # handle upmost overflow bit - &and ($tp,"eax"); - ¬ ("eax"); - &mov ($np,$rp); - &and ($np,"eax"); - &or ($tp,$np); # tp=carry?tp:rp - -&set_label("copy",16); # copy or in-place refresh - &mov ("eax",&DWP(0,$tp,$num,4)); - &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i] - &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector - &dec ($num); - &jge (&label("copy")); - - &mov ("esp",$_sp); # pull saved stack pointer - &mov ("eax",1); -&set_label("just_leave"); -&function_end("bn_mul_mont"); - -&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); - -&asm_finish(); diff --git a/crypto/openssl/crypto/bn/asm/x86_64-gcc.c b/crypto/openssl/crypto/bn/asm/x86_64-gcc.c index f13f52d..2b2bc1e 100644 --- a/crypto/openssl/crypto/bn/asm/x86_64-gcc.c +++ b/crypto/openssl/crypto/bn/asm/x86_64-gcc.c @@ -1,3 +1,4 @@ +#include "../bn_lcl.h" #ifdef __SUNPRO_C # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ #else @@ -56,6 +57,9 @@ #define BN_ULONG unsigned long +#undef mul +#undef mul_add + /* * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; * "g"(0) let the compiler to decide where does it @@ -97,7 +101,7 @@ : "a"(a) \ : "cc"); -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; @@ -121,7 +125,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) return(c1); } -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; @@ -144,7 +148,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) return(c1); } -void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { if (n <= 0) return; @@ -175,7 +179,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) return ret; } -BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) +BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n) { BN_ULONG ret=0,i=0; if (n <= 0) return 0; @@ -198,7 +202,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) } #ifndef SIMICS -BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) +BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n) { BN_ULONG ret=0,i=0; if (n <= 0) return 0; @@ -485,7 +489,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) r[7]=c2; } -void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -561,7 +565,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) r[15]=c1; } -void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG t1,t2; BN_ULONG c1,c2,c3; diff --git a/crypto/openssl/crypto/bn/bn_div.c b/crypto/openssl/crypto/bn/bn_div.c index 1e8e576..7c35545 100644 --- a/crypto/openssl/crypto/bn/bn_div.c +++ b/crypto/openssl/crypto/bn/bn_div.c @@ -102,7 +102,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, /* The next 2 are needed so we can do a dv->d[0]|=1 later * since BN_lshift1 will only work once there is a value :-) */ BN_zero(dv); - bn_wexpand(dv,1); + if(bn_wexpand(dv,1) == NULL) goto end; dv->top=1; if (!BN_lshift(D,D,nm-nd)) goto end; @@ -229,7 +229,8 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, if (dv == NULL) res=BN_CTX_get(ctx); else res=dv; - if (sdiv == NULL || res == NULL) goto err; + if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL) + goto err; /* First we normalise the numbers */ norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); diff --git a/crypto/openssl/crypto/bn/bn_exp.c b/crypto/openssl/crypto/bn/bn_exp.c index 70a33f0..d9b6c73 100644 --- a/crypto/openssl/crypto/bn/bn_exp.c +++ b/crypto/openssl/crypto/bn/bn_exp.c @@ -134,7 +134,8 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) rr = BN_CTX_get(ctx); else rr = r; - if ((v = BN_CTX_get(ctx)) == NULL) goto err; + v = BN_CTX_get(ctx); + if (rr == NULL || v == NULL) goto err; if (BN_copy(v,a) == NULL) goto err; bits=BN_num_bits(p); diff --git a/crypto/openssl/crypto/bn/bn_gf2m.c b/crypto/openssl/crypto/bn/bn_gf2m.c index 306f029..ae642cc 100644 --- a/crypto/openssl/crypto/bn/bn_gf2m.c +++ b/crypto/openssl/crypto/bn/bn_gf2m.c @@ -294,7 +294,8 @@ int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) if (a->top < b->top) { at = b; bt = a; } else { at = a; bt = b; } - bn_wexpand(r, at->top); + if(bn_wexpand(r, at->top) == NULL) + return 0; for (i = 0; i < bt->top; i++) { diff --git a/crypto/openssl/crypto/bn/bn_mul.c b/crypto/openssl/crypto/bn/bn_mul.c index b848c8c..a0e9ec3 100644 --- a/crypto/openssl/crypto/bn/bn_mul.c +++ b/crypto/openssl/crypto/bn/bn_mul.c @@ -1028,17 +1028,19 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) assert(j <= al || j <= bl); k = j+j; t = BN_CTX_get(ctx); + if (t == NULL) + goto err; if (al > j || bl > j) { - bn_wexpand(t,k*4); - bn_wexpand(rr,k*4); + if (bn_wexpand(t,k*4) == NULL) goto err; + if (bn_wexpand(rr,k*4) == NULL) goto err; bn_mul_part_recursive(rr->d,a->d,b->d, j,al-j,bl-j,t->d); } else /* al <= j || bl <= j */ { - bn_wexpand(t,k*2); - bn_wexpand(rr,k*2); + if (bn_wexpand(t,k*2) == NULL) goto err; + if (bn_wexpand(rr,k*2) == NULL) goto err; bn_mul_recursive(rr->d,a->d,b->d, j,al-j,bl-j,t->d); } diff --git a/crypto/openssl/crypto/bn/bntest.c b/crypto/openssl/crypto/bn/bntest.c index cf19038..d41daac 100644 --- a/crypto/openssl/crypto/bn/bntest.c +++ b/crypto/openssl/crypto/bn/bntest.c @@ -1027,7 +1027,7 @@ int test_exp(BIO *bp, BN_CTX *ctx) BN_bntest_rand(a,20+i*5,0,0); /**/ BN_bntest_rand(b,2+i,0,0); /**/ - if (!BN_exp(d,a,b,ctx)) + if (BN_exp(d,a,b,ctx) <= 0) return(0); if (bp != NULL) diff --git a/crypto/openssl/crypto/camellia/Makefile b/crypto/openssl/crypto/camellia/Makefile index dfb1295..dfd1a75 100644 --- a/crypto/openssl/crypto/camellia/Makefile +++ b/crypto/openssl/crypto/camellia/Makefile @@ -96,8 +96,11 @@ cmll_ctr.o: ../../include/openssl/camellia.h ../../include/openssl/e_os2.h cmll_ctr.o: ../../include/openssl/opensslconf.h cmll_ctr.c cmll_locl.h cmll_ecb.o: ../../include/openssl/camellia.h ../../include/openssl/e_os2.h cmll_ecb.o: ../../include/openssl/opensslconf.h cmll_ecb.c cmll_locl.h -cmll_misc.o: ../../include/openssl/camellia.h ../../include/openssl/e_os2.h +cmll_misc.o: ../../include/openssl/camellia.h ../../include/openssl/crypto.h +cmll_misc.o: ../../include/openssl/e_os2.h ../../include/openssl/fips.h cmll_misc.o: ../../include/openssl/opensslconf.h -cmll_misc.o: ../../include/openssl/opensslv.h cmll_locl.h cmll_misc.c +cmll_misc.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +cmll_misc.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +cmll_misc.o: ../../include/openssl/symhacks.h cmll_locl.h cmll_misc.c cmll_ofb.o: ../../include/openssl/camellia.h ../../include/openssl/e_os2.h cmll_ofb.o: ../../include/openssl/opensslconf.h cmll_locl.h cmll_ofb.c diff --git a/crypto/openssl/crypto/camellia/asm/cmll-x86.pl b/crypto/openssl/crypto/camellia/asm/cmll-x86.pl deleted file mode 100755 index 0812815..0000000 --- a/crypto/openssl/crypto/camellia/asm/cmll-x86.pl +++ /dev/null @@ -1,1138 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Copyright (c) 2008 Andy Polyakov <appro@openssl.org> -# -# This module may be used under the terms of either the GNU General -# Public License version 2 or later, the GNU Lesser General Public -# License version 2.1 or later, the Mozilla Public License version -# 1.1 or the BSD License. The exact terms of either license are -# distributed along with this module. For further details see -# http://www.openssl.org/~appro/camellia/. -# ==================================================================== - -# Performance in cycles per processed byte (less is better) in -# 'openssl speed ...' benchmark: -# -# AMD K8 Core2 PIII P4 -# -evp camellia-128-ecb 21.5 22.8 27.0 28.9 -# + over gcc 3.4.6 +90/11% +70/10% +53/4% +160/64% -# + over icc 8.0 +48/19% +21/15% +21/17% +55/37% -# -# camellia-128-cbc 17.3 21.1 23.9 25.9 -# -# 128-bit key setup 196 280 256 240 cycles/key -# + over gcc 3.4.6 +30/0% +17/11% +11/0% +63/40% -# + over icc 8.0 +18/3% +10/0% +10/3% +21/10% -# -# Pairs of numbers in "+" rows represent performance improvement over -# compiler generated position-independent code, PIC, and non-PIC -# respectively. PIC results are of greater relevance, as this module -# is position-independent, i.e. suitable for a shared library or PIE. -# Position independence "costs" one register, which is why compilers -# are so close with non-PIC results, they have an extra register to -# spare. CBC results are better than ECB ones thanks to "zero-copy" -# private _x86_* interface, and are ~30-40% better than with compiler -# generated cmll_cbc.o, and reach ~80-90% of x86_64 performance on -# same CPU (where applicable). - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -$OPENSSL=1; - -&asm_init($ARGV[0],"cmll-586.pl",$ARGV[$#ARGV] eq "386"); - -@T=("eax","ebx","ecx","edx"); -$idx="esi"; -$key="edi"; -$Tbl="ebp"; - -# stack frame layout in _x86_Camellia_* routines, frame is allocated -# by caller -$__ra=&DWP(0,"esp"); # return address -$__s0=&DWP(4,"esp"); # s0 backing store -$__s1=&DWP(8,"esp"); # s1 backing store -$__s2=&DWP(12,"esp"); # s2 backing store -$__s3=&DWP(16,"esp"); # s3 backing store -$__end=&DWP(20,"esp"); # pointer to end/start of key schedule - -# stack frame layout in Camellia_[en|crypt] routines, which differs from -# above by 4 and overlaps by pointer to end/start of key schedule -$_end=&DWP(16,"esp"); -$_esp=&DWP(20,"esp"); - -# const unsigned int Camellia_SBOX[4][256]; -# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][], -# and [2][] - with [3][]. This is done to optimize code size. -$SBOX1_1110=0; # Camellia_SBOX[0] -$SBOX4_4404=4; # Camellia_SBOX[1] -$SBOX2_0222=2048; # Camellia_SBOX[2] -$SBOX3_3033=2052; # Camellia_SBOX[3] -&static_label("Camellia_SIGMA"); -&static_label("Camellia_SBOX"); - -sub Camellia_Feistel { -my $i=@_[0]; -my $seed=defined(@_[1])?@_[1]:0; -my $scale=$seed<0?-8:8; -my $frame=defined(@_[2])?@_[2]:0; -my $j=($i&1)*2; -my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4]; - - &xor ($t0,$idx); # t0^=key[0] - &xor ($t1,&DWP($seed+$i*$scale+4,$key)); # t1^=key[1] - &movz ($idx,&HB($t0)); # (t0>>8)&0xff - &mov ($t3,&DWP($SBOX3_3033,$Tbl,$idx,8)); # t3=SBOX3_3033[0] - &movz ($idx,&LB($t0)); # (t0>>0)&0xff - &xor ($t3,&DWP($SBOX4_4404,$Tbl,$idx,8)); # t3^=SBOX4_4404[0] - &shr ($t0,16); - &movz ($idx,&LB($t1)); # (t1>>0)&0xff - &mov ($t2,&DWP($SBOX1_1110,$Tbl,$idx,8)); # t2=SBOX1_1110[1] - &movz ($idx,&HB($t0)); # (t0>>24)&0xff - &xor ($t3,&DWP($SBOX1_1110,$Tbl,$idx,8)); # t3^=SBOX1_1110[0] - &movz ($idx,&HB($t1)); # (t1>>8)&0xff - &xor ($t2,&DWP($SBOX4_4404,$Tbl,$idx,8)); # t2^=SBOX4_4404[1] - &shr ($t1,16); - &movz ($t0,&LB($t0)); # (t0>>16)&0xff - &xor ($t3,&DWP($SBOX2_0222,$Tbl,$t0,8)); # t3^=SBOX2_0222[0] - &movz ($idx,&HB($t1)); # (t1>>24)&0xff - &mov ($t0,&DWP($frame+4*(($j+3)%4),"esp")); # prefetch "s3" - &xor ($t2,$t3); # t2^=t3 - &rotr ($t3,8); # t3=RightRotate(t3,8) - &xor ($t2,&DWP($SBOX2_0222,$Tbl,$idx,8)); # t2^=SBOX2_0222[1] - &movz ($idx,&LB($t1)); # (t1>>16)&0xff - &mov ($t1,&DWP($frame+4*(($j+2)%4),"esp")); # prefetch "s2" - &xor ($t3,$t0); # t3^=s3 - &xor ($t2,&DWP($SBOX3_3033,$Tbl,$idx,8)); # t2^=SBOX3_3033[1] - &mov ($idx,&DWP($seed+($i+1)*$scale,$key)); # prefetch key[i+1] - &xor ($t3,$t2); # t3^=t2 - &mov (&DWP($frame+4*(($j+3)%4),"esp"),$t3); # s3=t3 - &xor ($t2,$t1); # t2^=s2 - &mov (&DWP($frame+4*(($j+2)%4),"esp"),$t2); # s2=t2 -} - -# void Camellia_EncryptBlock_Rounds( -# int grandRounds, -# const Byte plaintext[], -# const KEY_TABLE_TYPE keyTable, -# Byte ciphertext[]) -&function_begin("Camellia_EncryptBlock_Rounds"); - &mov ("eax",&wparam(0)); # load grandRounds - &mov ($idx,&wparam(1)); # load plaintext pointer - &mov ($key,&wparam(2)); # load key schedule pointer - - &mov ("ebx","esp"); - &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra - &and ("esp",-64); - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ("ecx",&DWP(-64-63,$key)); - &sub ("ecx","esp"); - &neg ("ecx"); - &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp","ecx"); - &add ("esp",4); # 4 is reserved for callee's return address - - &shl ("eax",6); - &lea ("eax",&DWP(0,$key,"eax")); - &mov ($_esp,"ebx"); # save %esp - &mov ($_end,"eax"); # save keyEnd - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov (@T[0],&DWP(0,$idx)); # load plaintext - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &bswap (@T[0]); - &mov (@T[3],&DWP(12,$idx)); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &call ("_x86_Camellia_encrypt"); - - &mov ("esp",$_esp); - &bswap (@T[0]); - &mov ($idx,&wparam(3)); # load ciphertext pointer - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - &mov (&DWP(0,$idx),@T[0]); # write ciphertext - &mov (&DWP(4,$idx),@T[1]); - &mov (&DWP(8,$idx),@T[2]); - &mov (&DWP(12,$idx),@T[3]); -&function_end("Camellia_EncryptBlock_Rounds"); -# V1.x API -&function_begin_B("Camellia_EncryptBlock"); - &mov ("eax",128); - &sub ("eax",&wparam(0)); # load keyBitLength - &mov ("eax",3); - &adc ("eax",0); # keyBitLength==128?3:4 - &mov (&wparam(0),"eax"); - &jmp (&label("Camellia_EncryptBlock_Rounds")); -&function_end_B("Camellia_EncryptBlock"); - -if ($OPENSSL) { -# void Camellia_encrypt( -# const unsigned char *in, -# unsigned char *out, -# const CAMELLIA_KEY *key) -&function_begin("Camellia_encrypt"); - &mov ($idx,&wparam(0)); # load plaintext pointer - &mov ($key,&wparam(2)); # load key schedule pointer - - &mov ("ebx","esp"); - &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra - &and ("esp",-64); - &mov ("eax",&DWP(272,$key)); # load grandRounds counter - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ("ecx",&DWP(-64-63,$key)); - &sub ("ecx","esp"); - &neg ("ecx"); - &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp","ecx"); - &add ("esp",4); # 4 is reserved for callee's return address - - &shl ("eax",6); - &lea ("eax",&DWP(0,$key,"eax")); - &mov ($_esp,"ebx"); # save %esp - &mov ($_end,"eax"); # save keyEnd - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov (@T[0],&DWP(0,$idx)); # load plaintext - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &bswap (@T[0]); - &mov (@T[3],&DWP(12,$idx)); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &call ("_x86_Camellia_encrypt"); - - &mov ("esp",$_esp); - &bswap (@T[0]); - &mov ($idx,&wparam(1)); # load ciphertext pointer - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - &mov (&DWP(0,$idx),@T[0]); # write ciphertext - &mov (&DWP(4,$idx),@T[1]); - &mov (&DWP(8,$idx),@T[2]); - &mov (&DWP(12,$idx),@T[3]); -&function_end("Camellia_encrypt"); -} - -&function_begin_B("_x86_Camellia_encrypt"); - &xor (@T[0],&DWP(0,$key)); # ^=key[0-3] - &xor (@T[1],&DWP(4,$key)); - &xor (@T[2],&DWP(8,$key)); - &xor (@T[3],&DWP(12,$key)); - &mov ($idx,&DWP(16,$key)); # prefetch key[4] - - &mov ($__s0,@T[0]); # save s[0-3] - &mov ($__s1,@T[1]); - &mov ($__s2,@T[2]); - &mov ($__s3,@T[3]); - -&set_label("loop",16); - for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16,4); } - - &add ($key,16*4); - &cmp ($key,$__end); - &je (&label("done")); - - # @T[0-1] are preloaded, $idx is preloaded with key[0] - &and ($idx,@T[0]); - &mov (@T[3],$__s3); - &rotl ($idx,1); - &mov (@T[2],@T[3]); - &xor (@T[1],$idx); - &or (@T[2],&DWP(12,$key)); - &mov ($__s1,@T[1]); # s1^=LeftRotate(s0&key[0],1); - &xor (@T[2],$__s2); - - &mov ($idx,&DWP(4,$key)); - &mov ($__s2,@T[2]); # s2^=s3|key[3]; - &or ($idx,@T[1]); - &and (@T[2],&DWP(8,$key)); - &xor (@T[0],$idx); - &rotl (@T[2],1); - &mov ($__s0,@T[0]); # s0^=s1|key[1]; - &xor (@T[3],@T[2]); - &mov ($idx,&DWP(16,$key)); # prefetch key[4] - &mov ($__s3,@T[3]); # s3^=LeftRotate(s2&key[2],1); - &jmp (&label("loop")); - -&set_label("done",8); - &mov (@T[2],@T[0]); # SwapHalf - &mov (@T[3],@T[1]); - &mov (@T[0],$__s2); - &mov (@T[1],$__s3); - &xor (@T[0],$idx); # $idx is preloaded with key[0] - &xor (@T[1],&DWP(4,$key)); - &xor (@T[2],&DWP(8,$key)); - &xor (@T[3],&DWP(12,$key)); - &ret (); -&function_end_B("_x86_Camellia_encrypt"); - -# void Camellia_DecryptBlock_Rounds( -# int grandRounds, -# const Byte ciphertext[], -# const KEY_TABLE_TYPE keyTable, -# Byte plaintext[]) -&function_begin("Camellia_DecryptBlock_Rounds"); - &mov ("eax",&wparam(0)); # load grandRounds - &mov ($idx,&wparam(1)); # load ciphertext pointer - &mov ($key,&wparam(2)); # load key schedule pointer - - &mov ("ebx","esp"); - &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra - &and ("esp",-64); - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ("ecx",&DWP(-64-63,$key)); - &sub ("ecx","esp"); - &neg ("ecx"); - &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp","ecx"); - &add ("esp",4); # 4 is reserved for callee's return address - - &shl ("eax",6); - &mov (&DWP(4*4,"esp"),$key); # save keyStart - &lea ($key,&DWP(0,$key,"eax")); - &mov (&DWP(5*4,"esp"),"ebx");# save %esp - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov (@T[0],&DWP(0,$idx)); # load ciphertext - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &bswap (@T[0]); - &mov (@T[3],&DWP(12,$idx)); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &call ("_x86_Camellia_decrypt"); - - &mov ("esp",&DWP(5*4,"esp")); - &bswap (@T[0]); - &mov ($idx,&wparam(3)); # load plaintext pointer - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - &mov (&DWP(0,$idx),@T[0]); # write plaintext - &mov (&DWP(4,$idx),@T[1]); - &mov (&DWP(8,$idx),@T[2]); - &mov (&DWP(12,$idx),@T[3]); -&function_end("Camellia_DecryptBlock_Rounds"); -# V1.x API -&function_begin_B("Camellia_DecryptBlock"); - &mov ("eax",128); - &sub ("eax",&wparam(0)); # load keyBitLength - &mov ("eax",3); - &adc ("eax",0); # keyBitLength==128?3:4 - &mov (&wparam(0),"eax"); - &jmp (&label("Camellia_DecryptBlock_Rounds")); -&function_end_B("Camellia_DecryptBlock"); - -if ($OPENSSL) { -# void Camellia_decrypt( -# const unsigned char *in, -# unsigned char *out, -# const CAMELLIA_KEY *key) -&function_begin("Camellia_decrypt"); - &mov ($idx,&wparam(0)); # load ciphertext pointer - &mov ($key,&wparam(2)); # load key schedule pointer - - &mov ("ebx","esp"); - &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra - &and ("esp",-64); - &mov ("eax",&DWP(272,$key)); # load grandRounds counter - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ("ecx",&DWP(-64-63,$key)); - &sub ("ecx","esp"); - &neg ("ecx"); - &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp","ecx"); - &add ("esp",4); # 4 is reserved for callee's return address - - &shl ("eax",6); - &mov (&DWP(4*4,"esp"),$key); # save keyStart - &lea ($key,&DWP(0,$key,"eax")); - &mov (&DWP(5*4,"esp"),"ebx");# save %esp - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov (@T[0],&DWP(0,$idx)); # load ciphertext - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &bswap (@T[0]); - &mov (@T[3],&DWP(12,$idx)); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &call ("_x86_Camellia_decrypt"); - - &mov ("esp",&DWP(5*4,"esp")); - &bswap (@T[0]); - &mov ($idx,&wparam(1)); # load plaintext pointer - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - &mov (&DWP(0,$idx),@T[0]); # write plaintext - &mov (&DWP(4,$idx),@T[1]); - &mov (&DWP(8,$idx),@T[2]); - &mov (&DWP(12,$idx),@T[3]); -&function_end("Camellia_decrypt"); -} - -&function_begin_B("_x86_Camellia_decrypt"); - &xor (@T[0],&DWP(0,$key)); # ^=key[0-3] - &xor (@T[1],&DWP(4,$key)); - &xor (@T[2],&DWP(8,$key)); - &xor (@T[3],&DWP(12,$key)); - &mov ($idx,&DWP(-8,$key)); # prefetch key[-2] - - &mov ($__s0,@T[0]); # save s[0-3] - &mov ($__s1,@T[1]); - &mov ($__s2,@T[2]); - &mov ($__s3,@T[3]); - -&set_label("loop",16); - for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8,4); } - - &sub ($key,16*4); - &cmp ($key,$__end); - &je (&label("done")); - - # @T[0-1] are preloaded, $idx is preloaded with key[2] - &and ($idx,@T[0]); - &mov (@T[3],$__s3); - &rotl ($idx,1); - &mov (@T[2],@T[3]); - &xor (@T[1],$idx); - &or (@T[2],&DWP(4,$key)); - &mov ($__s1,@T[1]); # s1^=LeftRotate(s0&key[0],1); - &xor (@T[2],$__s2); - - &mov ($idx,&DWP(12,$key)); - &mov ($__s2,@T[2]); # s2^=s3|key[3]; - &or ($idx,@T[1]); - &and (@T[2],&DWP(0,$key)); - &xor (@T[0],$idx); - &rotl (@T[2],1); - &mov ($__s0,@T[0]); # s0^=s1|key[1]; - &xor (@T[3],@T[2]); - &mov ($idx,&DWP(-8,$key)); # prefetch key[4] - &mov ($__s3,@T[3]); # s3^=LeftRotate(s2&key[2],1); - &jmp (&label("loop")); - -&set_label("done",8); - &mov (@T[2],@T[0]); # SwapHalf - &mov (@T[3],@T[1]); - &mov (@T[0],$__s2); - &mov (@T[1],$__s3); - &xor (@T[2],$idx); # $idx is preloaded with key[2] - &xor (@T[3],&DWP(12,$key)); - &xor (@T[0],&DWP(0,$key)); - &xor (@T[1],&DWP(4,$key)); - &ret (); -&function_end_B("_x86_Camellia_decrypt"); - -# shld is very slow on Intel P4 family. Even on AMD it limits -# instruction decode rate [because it's VectorPath] and consequently -# performance. PIII, PM and Core[2] seem to be the only ones which -# execute this code ~7% faster... -sub __rotl128 { - my ($i0,$i1,$i2,$i3,$rot,$rnd,@T)=@_; - - $rnd *= 2; - if ($rot) { - &mov ($idx,$i0); - &shld ($i0,$i1,$rot); - &shld ($i1,$i2,$rot); - &shld ($i2,$i3,$rot); - &shld ($i3,$idx,$rot); - } - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]); -} - -# ... Implementing 128-bit rotate without shld gives >3x performance -# improvement on P4, only ~7% degradation on other Intel CPUs and -# not worse performance on AMD. This is therefore preferred. -sub _rotl128 { - my ($i0,$i1,$i2,$i3,$rot,$rnd,@T)=@_; - - $rnd *= 2; - if ($rot) { - &mov ($Tbl,$i0); - &shl ($i0,$rot); - &mov ($idx,$i1); - &shr ($idx,32-$rot); - &shl ($i1,$rot); - &or ($i0,$idx); - &mov ($idx,$i2); - &shl ($i2,$rot); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]); - &shr ($idx,32-$rot); - &or ($i1,$idx); - &shr ($Tbl,32-$rot); - &mov ($idx,$i3); - &shr ($idx,32-$rot); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]); - &shl ($i3,$rot); - &or ($i2,$idx); - &or ($i3,$Tbl); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]); - } else { - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]); - } -} - -sub _saveround { -my ($rnd,$key,@T)=@_; -my $bias=int(@T[0])?shift(@T):0; - - &mov (&DWP($bias+$rnd*8+0,$key),@T[0]); - &mov (&DWP($bias+$rnd*8+4,$key),@T[1]) if ($#T>=1); - &mov (&DWP($bias+$rnd*8+8,$key),@T[2]) if ($#T>=2); - &mov (&DWP($bias+$rnd*8+12,$key),@T[3]) if ($#T>=3); -} - -sub _loadround { -my ($rnd,$key,@T)=@_; -my $bias=int(@T[0])?shift(@T):0; - - &mov (@T[0],&DWP($bias+$rnd*8+0,$key)); - &mov (@T[1],&DWP($bias+$rnd*8+4,$key)) if ($#T>=1); - &mov (@T[2],&DWP($bias+$rnd*8+8,$key)) if ($#T>=2); - &mov (@T[3],&DWP($bias+$rnd*8+12,$key)) if ($#T>=3); -} - -# void Camellia_Ekeygen( -# const int keyBitLength, -# const Byte *rawKey, -# KEY_TABLE_TYPE keyTable) -&function_begin("Camellia_Ekeygen"); -{ my $step=0; - - &stack_push(4); # place for s[0-3] - - &mov ($Tbl,&wparam(0)); # load arguments - &mov ($idx,&wparam(1)); - &mov ($key,&wparam(2)); - - &mov (@T[0],&DWP(0,$idx)); # load 0-127 bits - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &mov (@T[3],&DWP(12,$idx)); - - &bswap (@T[0]); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &_saveround (0,$key,@T); # KL<<<0 - - &cmp ($Tbl,128); - &je (&label("1st128")); - - &mov (@T[0],&DWP(16,$idx)); # load 128-191 bits - &mov (@T[1],&DWP(20,$idx)); - &cmp ($Tbl,192); - &je (&label("1st192")); - &mov (@T[2],&DWP(24,$idx)); # load 192-255 bits - &mov (@T[3],&DWP(28,$idx)); - &jmp (&label("1st256")); -&set_label("1st192",4); - &mov (@T[2],@T[0]); - &mov (@T[3],@T[1]); - ¬ (@T[2]); - ¬ (@T[3]); -&set_label("1st256",4); - &bswap (@T[0]); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &_saveround (4,$key,@T); # temporary storage for KR! - - &xor (@T[0],&DWP(0*8+0,$key)); # KR^KL - &xor (@T[1],&DWP(0*8+4,$key)); - &xor (@T[2],&DWP(1*8+0,$key)); - &xor (@T[3],&DWP(1*8+4,$key)); - -&set_label("1st128",4); - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - &lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl)); - - &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0] - &mov (&swtmp(0),@T[0]); # save s[0-3] - &mov (&swtmp(1),@T[1]); - &mov (&swtmp(2),@T[2]); - &mov (&swtmp(3),@T[3]); - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); - &mov (@T[2],&swtmp(2)); - &mov (@T[3],&swtmp(3)); - - &mov ($idx,&wparam(2)); - &xor (@T[0],&DWP(0*8+0,$idx)); # ^KL - &xor (@T[1],&DWP(0*8+4,$idx)); - &xor (@T[2],&DWP(1*8+0,$idx)); - &xor (@T[3],&DWP(1*8+4,$idx)); - - &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[4] - &mov (&swtmp(0),@T[0]); # save s[0-3] - &mov (&swtmp(1),@T[1]); - &mov (&swtmp(2),@T[2]); - &mov (&swtmp(3),@T[3]); - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); - &mov (@T[2],&swtmp(2)); - &mov (@T[3],&swtmp(3)); - - &mov ($idx,&wparam(0)); - &cmp ($idx,128); - &jne (&label("2nd256")); - - &mov ($key,&wparam(2)); - &lea ($key,&DWP(128,$key)); # size optimization - - ####### process KA - &_saveround (2,$key,-128,@T); # KA<<<0 - &_rotl128 (@T,15,6,@T); # KA<<<15 - &_rotl128 (@T,15,8,@T); # KA<<<(15+15=30) - &_rotl128 (@T,15,12,@T[0],@T[1]); # KA<<<(30+15=45) - &_rotl128 (@T,15,14,@T); # KA<<<(45+15=60) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,2,20,@T); # KA<<<(60+32+2=94) - &_rotl128 (@T,17,24,@T); # KA<<<(94+17=111) - - ####### process KL - &_loadround (0,$key,-128,@T); # load KL - &_rotl128 (@T,15,4,@T); # KL<<<15 - &_rotl128 (@T,30,10,@T); # KL<<<(15+30=45) - &_rotl128 (@T,15,13,@T[2],@T[3]); # KL<<<(45+15=60) - &_rotl128 (@T,17,16,@T); # KL<<<(60+17=77) - &_rotl128 (@T,17,18,@T); # KL<<<(77+17=94) - &_rotl128 (@T,17,22,@T); # KL<<<(94+17=111) - - while (@T[0] ne "eax") # restore order - { unshift (@T,pop(@T)); } - - &mov ("eax",3); # 3 grandRounds - &jmp (&label("done")); - -&set_label("2nd256",16); - &mov ($idx,&wparam(2)); - &_saveround (6,$idx,@T); # temporary storage for KA! - - &xor (@T[0],&DWP(4*8+0,$idx)); # KA^KR - &xor (@T[1],&DWP(4*8+4,$idx)); - &xor (@T[2],&DWP(5*8+0,$idx)); - &xor (@T[3],&DWP(5*8+4,$idx)); - - &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[8] - &mov (&swtmp(0),@T[0]); # save s[0-3] - &mov (&swtmp(1),@T[1]); - &mov (&swtmp(2),@T[2]); - &mov (&swtmp(3),@T[3]); - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); - &mov (@T[2],&swtmp(2)); - &mov (@T[3],&swtmp(3)); - - &mov ($key,&wparam(2)); - &lea ($key,&DWP(128,$key)); # size optimization - - ####### process KB - &_saveround (2,$key,-128,@T); # KB<<<0 - &_rotl128 (@T,30,10,@T); # KB<<<30 - &_rotl128 (@T,30,20,@T); # KB<<<(30+30=60) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,19,32,@T); # KB<<<(60+32+19=111) - - ####### process KR - &_loadround (4,$key,-128,@T); # load KR - &_rotl128 (@T,15,4,@T); # KR<<<15 - &_rotl128 (@T,15,8,@T); # KR<<<(15+15=30) - &_rotl128 (@T,30,18,@T); # KR<<<(30+30=60) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,2,26,@T); # KR<<<(60+32+2=94) - - ####### process KA - &_loadround (6,$key,-128,@T); # load KA - &_rotl128 (@T,15,6,@T); # KA<<<15 - &_rotl128 (@T,30,14,@T); # KA<<<(15+30=45) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,0,24,@T); # KA<<<(45+32+0=77) - &_rotl128 (@T,17,28,@T); # KA<<<(77+17=94) - - ####### process KL - &_loadround (0,$key,-128,@T); # load KL - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,13,12,@T); # KL<<<(32+13=45) - &_rotl128 (@T,15,16,@T); # KL<<<(45+15=60) - &_rotl128 (@T,17,22,@T); # KL<<<(60+17=77) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,2,30,@T); # KL<<<(77+32+2=111) - - while (@T[0] ne "eax") # restore order - { unshift (@T,pop(@T)); } - - &mov ("eax",4); # 4 grandRounds -&set_label("done"); - &lea ("edx",&DWP(272-128,$key)); # end of key schedule - &stack_pop(4); -} -&function_end("Camellia_Ekeygen"); - -if ($OPENSSL) { -# int Camellia_set_key ( -# const unsigned char *userKey, -# int bits, -# CAMELLIA_KEY *key) -&function_begin_B("Camellia_set_key"); - &push ("ebx"); - &mov ("ecx",&wparam(0)); # pull arguments - &mov ("ebx",&wparam(1)); - &mov ("edx",&wparam(2)); - - &mov ("eax",-1); - &test ("ecx","ecx"); - &jz (&label("done")); # userKey==NULL? - &test ("edx","edx"); - &jz (&label("done")); # key==NULL? - - &mov ("eax",-2); - &cmp ("ebx",256); - &je (&label("arg_ok")); # bits==256? - &cmp ("ebx",192); - &je (&label("arg_ok")); # bits==192? - &cmp ("ebx",128); - &jne (&label("done")); # bits!=128? -&set_label("arg_ok",4); - - &push ("edx"); # push arguments - &push ("ecx"); - &push ("ebx"); - &call ("Camellia_Ekeygen"); - &stack_pop(3); - - # eax holds grandRounds and edx points at where to put it - &mov (&DWP(0,"edx"),"eax"); - &xor ("eax","eax"); -&set_label("done",4); - &pop ("ebx"); - &ret (); -&function_end_B("Camellia_set_key"); -} - -@SBOX=( -112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65, - 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189, -134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26, -166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77, -139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153, -223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215, - 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34, -254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80, -170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210, - 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148, -135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226, - 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46, -233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89, -120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250, -114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164, - 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158); - -sub S1110 { my $i=shift; $i=@SBOX[$i]; return $i<<24|$i<<16|$i<<8; } -sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<<16|$i; } -sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; } -sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; } - -&set_label("Camellia_SIGMA",64); -&data_word( - 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2, - 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c, - 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd, - 0, 0, 0, 0); -&set_label("Camellia_SBOX",64); -# tables are interleaved, remember? -for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } -for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } - -# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, -# size_t length, const CAMELLIA_KEY *key, -# unsigned char *ivp,const int enc); -{ -# stack frame layout -# -4(%esp) # return address 0(%esp) -# 0(%esp) # s0 4(%esp) -# 4(%esp) # s1 8(%esp) -# 8(%esp) # s2 12(%esp) -# 12(%esp) # s3 16(%esp) -# 16(%esp) # end of key schedule 20(%esp) -# 20(%esp) # %esp backup -my $_inp=&DWP(24,"esp"); #copy of wparam(0) -my $_out=&DWP(28,"esp"); #copy of wparam(1) -my $_len=&DWP(32,"esp"); #copy of wparam(2) -my $_key=&DWP(36,"esp"); #copy of wparam(3) -my $_ivp=&DWP(40,"esp"); #copy of wparam(4) -my $ivec=&DWP(44,"esp"); #ivec[16] -my $_tmp=&DWP(44,"esp"); #volatile variable [yes, aliases with ivec] -my ($s0,$s1,$s2,$s3) = @T; - -&function_begin("Camellia_cbc_encrypt"); - &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len - &cmp ($s2,0); - &je (&label("enc_out")); - - &pushf (); - &cld (); - - &mov ($s0,&wparam(0)); # load inp - &mov ($s1,&wparam(1)); # load out - #&mov ($s2,&wparam(2)); # load len - &mov ($s3,&wparam(3)); # load key - &mov ($Tbl,&wparam(4)); # load ivp - - # allocate aligned stack frame... - &lea ($idx,&DWP(-64,"esp")); - &and ($idx,-64); - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ($key,&DWP(-64-63,$s3)); - &sub ($key,$idx); - &neg ($key); - &and ($key,0x3C0); # modulo 1024, but aligned to cache-line - &sub ($idx,$key); - - &mov ($key,&wparam(5)); # load enc - - &exch ("esp",$idx); - &add ("esp",4); # reserve for return address! - &mov ($_esp,$idx); # save %esp - - &mov ($_inp,$s0); # save copy of inp - &mov ($_out,$s1); # save copy of out - &mov ($_len,$s2); # save copy of len - &mov ($_key,$s3); # save copy of key - &mov ($_ivp,$Tbl); # save copy of ivp - - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov ($idx,32); - &set_label("prefetch_sbox",4); - &mov ($s0,&DWP(0,$Tbl)); - &mov ($s1,&DWP(32,$Tbl)); - &mov ($s2,&DWP(64,$Tbl)); - &mov ($s3,&DWP(96,$Tbl)); - &lea ($Tbl,&DWP(128,$Tbl)); - &dec ($idx); - &jnz (&label("prefetch_sbox")); - &mov ($s0,$_key); - &sub ($Tbl,4096); - &mov ($idx,$_inp); - &mov ($s3,&DWP(272,$s0)); # load grandRounds - - &cmp ($key,0); - &je (&label("DECRYPT")); - - &mov ($s2,$_len); - &mov ($key,$_ivp); - &shl ($s3,6); - &lea ($s3,&DWP(0,$s0,$s3)); - &mov ($_end,$s3); - - &test ($s2,0xFFFFFFF0); - &jz (&label("enc_tail")); # short input... - - &mov ($s0,&DWP(0,$key)); # load iv - &mov ($s1,&DWP(4,$key)); - - &set_label("enc_loop",4); - &mov ($s2,&DWP(8,$key)); - &mov ($s3,&DWP(12,$key)); - - &xor ($s0,&DWP(0,$idx)); # xor input data - &xor ($s1,&DWP(4,$idx)); - &xor ($s2,&DWP(8,$idx)); - &bswap ($s0); - &xor ($s3,&DWP(12,$idx)); - &bswap ($s1); - &mov ($key,$_key); # load key - &bswap ($s2); - &bswap ($s3); - - &call ("_x86_Camellia_encrypt"); - - &mov ($idx,$_inp); # load inp - &mov ($key,$_out); # load out - - &bswap ($s0); - &bswap ($s1); - &bswap ($s2); - &mov (&DWP(0,$key),$s0); # save output data - &bswap ($s3); - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($s2,$_len); # load len - - &lea ($idx,&DWP(16,$idx)); - &mov ($_inp,$idx); # save inp - - &lea ($s3,&DWP(16,$key)); - &mov ($_out,$s3); # save out - - &sub ($s2,16); - &test ($s2,0xFFFFFFF0); - &mov ($_len,$s2); # save len - &jnz (&label("enc_loop")); - &test ($s2,15); - &jnz (&label("enc_tail")); - &mov ($idx,$_ivp); # load ivp - &mov ($s2,&DWP(8,$key)); # restore last dwords - &mov ($s3,&DWP(12,$key)); - &mov (&DWP(0,$idx),$s0); # save ivec - &mov (&DWP(4,$idx),$s1); - &mov (&DWP(8,$idx),$s2); - &mov (&DWP(12,$idx),$s3); - - &mov ("esp",$_esp); - &popf (); - &set_label("enc_out"); - &function_end_A(); - &pushf (); # kludge, never executed - - &set_label("enc_tail",4); - &mov ($s0,$key eq "edi" ? $key : ""); - &mov ($key,$_out); # load out - &push ($s0); # push ivp - &mov ($s1,16); - &sub ($s1,$s2); - &cmp ($key,$idx); # compare with inp - &je (&label("enc_in_place")); - &align (4); - &data_word(0xA4F3F689); # rep movsb # copy input - &jmp (&label("enc_skip_in_place")); - &set_label("enc_in_place"); - &lea ($key,&DWP(0,$key,$s2)); - &set_label("enc_skip_in_place"); - &mov ($s2,$s1); - &xor ($s0,$s0); - &align (4); - &data_word(0xAAF3F689); # rep stosb # zero tail - &pop ($key); # pop ivp - - &mov ($idx,$_out); # output as input - &mov ($s0,&DWP(0,$key)); - &mov ($s1,&DWP(4,$key)); - &mov ($_len,16); # len=16 - &jmp (&label("enc_loop")); # one more spin... - -#----------------------------- DECRYPT -----------------------------# -&set_label("DECRYPT",16); - &shl ($s3,6); - &lea ($s3,&DWP(0,$s0,$s3)); - &mov ($_end,$s0); - &mov ($_key,$s3); - - &cmp ($idx,$_out); - &je (&label("dec_in_place")); # in-place processing... - - &mov ($key,$_ivp); # load ivp - &mov ($_tmp,$key); - - &set_label("dec_loop",4); - &mov ($s0,&DWP(0,$idx)); # read input - &mov ($s1,&DWP(4,$idx)); - &mov ($s2,&DWP(8,$idx)); - &bswap ($s0); - &mov ($s3,&DWP(12,$idx)); - &bswap ($s1); - &mov ($key,$_key); # load key - &bswap ($s2); - &bswap ($s3); - - &call ("_x86_Camellia_decrypt"); - - &mov ($key,$_tmp); # load ivp - &mov ($idx,$_len); # load len - - &bswap ($s0); - &bswap ($s1); - &bswap ($s2); - &xor ($s0,&DWP(0,$key)); # xor iv - &bswap ($s3); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &sub ($idx,16); - &jc (&label("dec_partial")); - &mov ($_len,$idx); # save len - &mov ($idx,$_inp); # load inp - &mov ($key,$_out); # load out - - &mov (&DWP(0,$key),$s0); # write output - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($_tmp,$idx); # save ivp - &lea ($idx,&DWP(16,$idx)); - &mov ($_inp,$idx); # save inp - - &lea ($key,&DWP(16,$key)); - &mov ($_out,$key); # save out - - &jnz (&label("dec_loop")); - &mov ($key,$_tmp); # load temp ivp - &set_label("dec_end"); - &mov ($idx,$_ivp); # load user ivp - &mov ($s0,&DWP(0,$key)); # load iv - &mov ($s1,&DWP(4,$key)); - &mov ($s2,&DWP(8,$key)); - &mov ($s3,&DWP(12,$key)); - &mov (&DWP(0,$idx),$s0); # copy back to user - &mov (&DWP(4,$idx),$s1); - &mov (&DWP(8,$idx),$s2); - &mov (&DWP(12,$idx),$s3); - &jmp (&label("dec_out")); - - &set_label("dec_partial",4); - &lea ($key,$ivec); - &mov (&DWP(0,$key),$s0); # dump output to stack - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$idx)); - &mov ($idx eq "esi" ? $idx : "",$key); - &mov ($key eq "edi" ? $key : "",$_out); # load out - &data_word(0xA4F3F689); # rep movsb # copy output - &mov ($key,$_inp); # use inp as temp ivp - &jmp (&label("dec_end")); - - &set_label("dec_in_place",4); - &set_label("dec_in_place_loop"); - &lea ($key,$ivec); - &mov ($s0,&DWP(0,$idx)); # read input - &mov ($s1,&DWP(4,$idx)); - &mov ($s2,&DWP(8,$idx)); - &mov ($s3,&DWP(12,$idx)); - - &mov (&DWP(0,$key),$s0); # copy to temp - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &bswap ($s0); - &mov (&DWP(12,$key),$s3); - &bswap ($s1); - &mov ($key,$_key); # load key - &bswap ($s2); - &bswap ($s3); - - &call ("_x86_Camellia_decrypt"); - - &mov ($key,$_ivp); # load ivp - &mov ($idx,$_out); # load out - - &bswap ($s0); - &bswap ($s1); - &bswap ($s2); - &xor ($s0,&DWP(0,$key)); # xor iv - &bswap ($s3); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &mov (&DWP(0,$idx),$s0); # write output - &mov (&DWP(4,$idx),$s1); - &mov (&DWP(8,$idx),$s2); - &mov (&DWP(12,$idx),$s3); - - &lea ($idx,&DWP(16,$idx)); - &mov ($_out,$idx); # save out - - &lea ($idx,$ivec); - &mov ($s0,&DWP(0,$idx)); # read temp - &mov ($s1,&DWP(4,$idx)); - &mov ($s2,&DWP(8,$idx)); - &mov ($s3,&DWP(12,$idx)); - - &mov (&DWP(0,$key),$s0); # copy iv - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($idx,$_inp); # load inp - - &lea ($idx,&DWP(16,$idx)); - &mov ($_inp,$idx); # save inp - - &mov ($s2,$_len); # load len - &sub ($s2,16); - &jc (&label("dec_in_place_partial")); - &mov ($_len,$s2); # save len - &jnz (&label("dec_in_place_loop")); - &jmp (&label("dec_out")); - - &set_label("dec_in_place_partial",4); - # one can argue if this is actually required... - &mov ($key eq "edi" ? $key : "",$_out); - &lea ($idx eq "esi" ? $idx : "",$ivec); - &lea ($key,&DWP(0,$key,$s2)); - &lea ($idx,&DWP(16,$idx,$s2)); - &neg ($s2 eq "ecx" ? $s2 : ""); - &data_word(0xA4F3F689); # rep movsb # restore tail - - &set_label("dec_out",4); - &mov ("esp",$_esp); - &popf (); -&function_end("Camellia_cbc_encrypt"); -} - -&asciz("Camellia for x86 by <appro@openssl.org>"); - -&asm_finish(); diff --git a/crypto/openssl/crypto/camellia/asm/cmll-x86_64.pl b/crypto/openssl/crypto/camellia/asm/cmll-x86_64.pl deleted file mode 100755 index c683646..0000000 --- a/crypto/openssl/crypto/camellia/asm/cmll-x86_64.pl +++ /dev/null @@ -1,1080 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Copyright (c) 2008 Andy Polyakov <appro@openssl.org> -# -# This module may be used under the terms of either the GNU General -# Public License version 2 or later, the GNU Lesser General Public -# License version 2.1 or later, the Mozilla Public License version -# 1.1 or the BSD License. The exact terms of either license are -# distributed along with this module. For further details see -# http://www.openssl.org/~appro/camellia/. -# ==================================================================== - -# Performance in cycles per processed byte (less is better) in -# 'openssl speed ...' benchmark: -# -# AMD64 Core2 EM64T -# -evp camellia-128-ecb 16.7 21.0 22.7 -# + over gcc 3.4.6 +25% +5% 0% -# -# camellia-128-cbc 15.7 20.4 21.1 -# -# 128-bit key setup 128 216 205 cycles/key -# + over gcc 3.4.6 +54% +39% +15% -# -# Numbers in "+" rows represent performance improvement over compiler -# generated code. Key setup timings are impressive on AMD and Core2 -# thanks to 64-bit operations being covertly deployed. Improvement on -# EM64T, pre-Core2 Intel x86_64 CPU, is not as impressive, because it -# apparently emulates some of 64-bit operations in [32-bit] microcode. - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour $output"; - -sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } -sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; - $r =~ s/%[er]([sd]i)/%\1l/; - $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } - -$t0="%eax";$t1="%ebx";$t2="%ecx";$t3="%edx"; -@S=("%r8d","%r9d","%r10d","%r11d"); -$i0="%esi"; -$i1="%edi"; -$Tbl="%rbp"; # size optimization -$inp="%r12"; -$out="%r13"; -$key="%r14"; -$keyend="%r15"; -$arg0d=$win64?"%ecx":"%edi"; - -# const unsigned int Camellia_SBOX[4][256]; -# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][], -# and [2][] - with [3][]. This is done to minimize code size. -$SBOX1_1110=0; # Camellia_SBOX[0] -$SBOX4_4404=4; # Camellia_SBOX[1] -$SBOX2_0222=2048; # Camellia_SBOX[2] -$SBOX3_3033=2052; # Camellia_SBOX[3] - -sub Camellia_Feistel { -my $i=@_[0]; -my $seed=defined(@_[1])?@_[1]:0; -my $scale=$seed<0?-8:8; -my $j=($i&1)*2; -my $s0=@S[($j)%4],$s1=@S[($j+1)%4],$s2=@S[($j+2)%4],$s3=@S[($j+3)%4]; - -$code.=<<___; - xor $s0,$t0 # t0^=key[0] - xor $s1,$t1 # t1^=key[1] - movz `&hi("$t0")`,$i0 # (t0>>8)&0xff - movz `&lo("$t1")`,$i1 # (t1>>0)&0xff - mov $SBOX3_3033($Tbl,$i0,8),$t3 # t3=SBOX3_3033[0] - mov $SBOX1_1110($Tbl,$i1,8),$t2 # t2=SBOX1_1110[1] - movz `&lo("$t0")`,$i0 # (t0>>0)&0xff - shr \$16,$t0 - movz `&hi("$t1")`,$i1 # (t1>>8)&0xff - xor $SBOX4_4404($Tbl,$i0,8),$t3 # t3^=SBOX4_4404[0] - shr \$16,$t1 - xor $SBOX4_4404($Tbl,$i1,8),$t2 # t2^=SBOX4_4404[1] - movz `&hi("$t0")`,$i0 # (t0>>24)&0xff - movz `&lo("$t1")`,$i1 # (t1>>16)&0xff - xor $SBOX1_1110($Tbl,$i0,8),$t3 # t3^=SBOX1_1110[0] - xor $SBOX3_3033($Tbl,$i1,8),$t2 # t2^=SBOX3_3033[1] - movz `&lo("$t0")`,$i0 # (t0>>16)&0xff - movz `&hi("$t1")`,$i1 # (t1>>24)&0xff - xor $SBOX2_0222($Tbl,$i0,8),$t3 # t3^=SBOX2_0222[0] - xor $SBOX2_0222($Tbl,$i1,8),$t2 # t2^=SBOX2_0222[1] - mov `$seed+($i+1)*$scale`($key),$t1 # prefetch key[i+1] - mov `$seed+($i+1)*$scale+4`($key),$t0 - xor $t3,$t2 # t2^=t3 - ror \$8,$t3 # t3=RightRotate(t3,8) - xor $t2,$s2 - xor $t2,$s3 - xor $t3,$s3 -___ -} - -# void Camellia_EncryptBlock_Rounds( -# int grandRounds, -# const Byte plaintext[], -# const KEY_TABLE_TYPE keyTable, -# Byte ciphertext[]) -$code=<<___; -.text - -# V1.x API -.globl Camellia_EncryptBlock -.type Camellia_EncryptBlock,\@abi-omnipotent -.align 16 -Camellia_EncryptBlock: - movl \$128,%eax - subl $arg0d,%eax - movl \$3,$arg0d - adcl \$0,$arg0d # keyBitLength==128?3:4 - jmp .Lenc_rounds -.size Camellia_EncryptBlock,.-Camellia_EncryptBlock -# V2 -.globl Camellia_EncryptBlock_Rounds -.type Camellia_EncryptBlock_Rounds,\@function,4 -.align 16 -.Lenc_rounds: -Camellia_EncryptBlock_Rounds: - push %rbx - push %rbp - push %r13 - push %r14 - push %r15 -.Lenc_prologue: - - #mov %rsi,$inp # put away arguments - mov %rcx,$out - mov %rdx,$key - - shl \$6,%edi # process grandRounds - lea .LCamellia_SBOX(%rip),$Tbl - lea ($key,%rdi),$keyend - - mov 0(%rsi),@S[0] # load plaintext - mov 4(%rsi),@S[1] - mov 8(%rsi),@S[2] - bswap @S[0] - mov 12(%rsi),@S[3] - bswap @S[1] - bswap @S[2] - bswap @S[3] - - call _x86_64_Camellia_encrypt - - bswap @S[0] - bswap @S[1] - bswap @S[2] - mov @S[0],0($out) - bswap @S[3] - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - - mov 0(%rsp),%r15 - mov 8(%rsp),%r14 - mov 16(%rsp),%r13 - mov 24(%rsp),%rbp - mov 32(%rsp),%rbx - lea 40(%rsp),%rsp -.Lenc_epilogue: - ret -.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds - -.type _x86_64_Camellia_encrypt,\@abi-omnipotent -.align 16 -_x86_64_Camellia_encrypt: - xor 0($key),@S[1] - xor 4($key),@S[0] # ^=key[0-3] - xor 8($key),@S[3] - xor 12($key),@S[2] -.align 16 -.Leloop: - mov 16($key),$t1 # prefetch key[4-5] - mov 20($key),$t0 - -___ - for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16); } -$code.=<<___; - lea 16*4($key),$key - cmp $keyend,$key - mov 8($key),$t3 # prefetch key[2-3] - mov 12($key),$t2 - je .Ledone - - and @S[0],$t0 - or @S[3],$t3 - rol \$1,$t0 - xor $t3,@S[2] # s2^=s3|key[3]; - xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); - and @S[2],$t2 - or @S[1],$t1 - rol \$1,$t2 - xor $t1,@S[0] # s0^=s1|key[1]; - xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); - jmp .Leloop - -.align 16 -.Ledone: - xor @S[2],$t0 # SwapHalf - xor @S[3],$t1 - xor @S[0],$t2 - xor @S[1],$t3 - - mov $t0,@S[0] - mov $t1,@S[1] - mov $t2,@S[2] - mov $t3,@S[3] - - .byte 0xf3,0xc3 # rep ret -.size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt - -# V1.x API -.globl Camellia_DecryptBlock -.type Camellia_DecryptBlock,\@abi-omnipotent -.align 16 -Camellia_DecryptBlock: - movl \$128,%eax - subl $arg0d,%eax - movl \$3,$arg0d - adcl \$0,$arg0d # keyBitLength==128?3:4 - jmp .Ldec_rounds -.size Camellia_DecryptBlock,.-Camellia_DecryptBlock -# V2 -.globl Camellia_DecryptBlock_Rounds -.type Camellia_DecryptBlock_Rounds,\@function,4 -.align 16 -.Ldec_rounds: -Camellia_DecryptBlock_Rounds: - push %rbx - push %rbp - push %r13 - push %r14 - push %r15 -.Ldec_prologue: - - #mov %rsi,$inp # put away arguments - mov %rcx,$out - mov %rdx,$keyend - - shl \$6,%edi # process grandRounds - lea .LCamellia_SBOX(%rip),$Tbl - lea ($keyend,%rdi),$key - - mov 0(%rsi),@S[0] # load plaintext - mov 4(%rsi),@S[1] - mov 8(%rsi),@S[2] - bswap @S[0] - mov 12(%rsi),@S[3] - bswap @S[1] - bswap @S[2] - bswap @S[3] - - call _x86_64_Camellia_decrypt - - bswap @S[0] - bswap @S[1] - bswap @S[2] - mov @S[0],0($out) - bswap @S[3] - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - - mov 0(%rsp),%r15 - mov 8(%rsp),%r14 - mov 16(%rsp),%r13 - mov 24(%rsp),%rbp - mov 32(%rsp),%rbx - lea 40(%rsp),%rsp -.Ldec_epilogue: - ret -.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds - -.type _x86_64_Camellia_decrypt,\@abi-omnipotent -.align 16 -_x86_64_Camellia_decrypt: - xor 0($key),@S[1] - xor 4($key),@S[0] # ^=key[0-3] - xor 8($key),@S[3] - xor 12($key),@S[2] -.align 16 -.Ldloop: - mov -8($key),$t1 # prefetch key[4-5] - mov -4($key),$t0 - -___ - for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8); } -$code.=<<___; - lea -16*4($key),$key - cmp $keyend,$key - mov 0($key),$t3 # prefetch key[2-3] - mov 4($key),$t2 - je .Lddone - - and @S[0],$t0 - or @S[3],$t3 - rol \$1,$t0 - xor $t3,@S[2] # s2^=s3|key[3]; - xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); - and @S[2],$t2 - or @S[1],$t1 - rol \$1,$t2 - xor $t1,@S[0] # s0^=s1|key[1]; - xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); - - jmp .Ldloop - -.align 16 -.Lddone: - xor @S[2],$t2 - xor @S[3],$t3 - xor @S[0],$t0 - xor @S[1],$t1 - - mov $t2,@S[0] # SwapHalf - mov $t3,@S[1] - mov $t0,@S[2] - mov $t1,@S[3] - - .byte 0xf3,0xc3 # rep ret -.size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt -___ - -sub _saveround { -my ($rnd,$key,@T)=@_; -my $bias=int(@T[0])?shift(@T):0; - - if ($#T==3) { - $code.=<<___; - mov @T[1],`$bias+$rnd*8+0`($key) - mov @T[0],`$bias+$rnd*8+4`($key) - mov @T[3],`$bias+$rnd*8+8`($key) - mov @T[2],`$bias+$rnd*8+12`($key) -___ - } else { - $code.=" mov @T[0],`$bias+$rnd*8+0`($key)\n"; - $code.=" mov @T[1],`$bias+$rnd*8+8`($key)\n" if ($#T>=1); - } -} - -sub _loadround { -my ($rnd,$key,@T)=@_; -my $bias=int(@T[0])?shift(@T):0; - -$code.=" mov `$bias+$rnd*8+0`($key),@T[0]\n"; -$code.=" mov `$bias+$rnd*8+8`($key),@T[1]\n" if ($#T>=1); -} - -# shld is very slow on Intel EM64T family. Even on AMD it limits -# instruction decode rate [because it's VectorPath] and consequently -# performance... -sub __rotl128 { -my ($i0,$i1,$rot)=@_; - - if ($rot) { - $code.=<<___; - mov $i0,%r11 - shld \$$rot,$i1,$i0 - shld \$$rot,%r11,$i1 -___ - } -} - -# ... Implementing 128-bit rotate without shld gives 80% better -# performance EM64T, +15% on AMD64 and only ~7% degradation on -# Core2. This is therefore preferred. -sub _rotl128 { -my ($i0,$i1,$rot)=@_; - - if ($rot) { - $code.=<<___; - mov $i0,%r11 - shl \$$rot,$i0 - mov $i1,%r9 - shr \$`64-$rot`,%r9 - shr \$`64-$rot`,%r11 - or %r9,$i0 - shl \$$rot,$i1 - or %r11,$i1 -___ - } -} - -{ my $step=0; - -$code.=<<___; -.globl Camellia_Ekeygen -.type Camellia_Ekeygen,\@function,3 -.align 16 -Camellia_Ekeygen: - push %rbx - push %rbp - push %r13 - push %r14 - push %r15 -.Lkey_prologue: - - mov %rdi,$keyend # put away arguments, keyBitLength - mov %rdx,$out # keyTable - - mov 0(%rsi),@S[0] # load 0-127 bits - mov 4(%rsi),@S[1] - mov 8(%rsi),@S[2] - mov 12(%rsi),@S[3] - - bswap @S[0] - bswap @S[1] - bswap @S[2] - bswap @S[3] -___ - &_saveround (0,$out,@S); # KL<<<0 -$code.=<<___; - cmp \$128,$keyend # check keyBitLength - je .L1st128 - - mov 16(%rsi),@S[0] # load 128-191 bits - mov 20(%rsi),@S[1] - cmp \$192,$keyend - je .L1st192 - mov 24(%rsi),@S[2] # load 192-255 bits - mov 28(%rsi),@S[3] - jmp .L1st256 -.L1st192: - mov @S[0],@S[2] - mov @S[1],@S[3] - not @S[2] - not @S[3] -.L1st256: - bswap @S[0] - bswap @S[1] - bswap @S[2] - bswap @S[3] -___ - &_saveround (4,$out,@S); # temp storage for KR! -$code.=<<___; - xor 0($out),@S[1] # KR^KL - xor 4($out),@S[0] - xor 8($out),@S[3] - xor 12($out),@S[2] - -.L1st128: - lea .LCamellia_SIGMA(%rip),$key - lea .LCamellia_SBOX(%rip),$Tbl - - mov 0($key),$t1 - mov 4($key),$t0 -___ - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); -$code.=<<___; - xor 0($out),@S[1] # ^KL - xor 4($out),@S[0] - xor 8($out),@S[3] - xor 12($out),@S[2] -___ - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); -$code.=<<___; - cmp \$128,$keyend - jne .L2nd256 - - lea 128($out),$out # size optimization - shl \$32,%r8 # @S[0]|| - shl \$32,%r10 # @S[2]|| - or %r9,%r8 # ||@S[1] - or %r11,%r10 # ||@S[3] -___ - &_loadround (0,$out,-128,"%rax","%rbx"); # KL - &_saveround (2,$out,-128,"%r8","%r10"); # KA<<<0 - &_rotl128 ("%rax","%rbx",15); - &_saveround (4,$out,-128,"%rax","%rbx"); # KL<<<15 - &_rotl128 ("%r8","%r10",15); - &_saveround (6,$out,-128,"%r8","%r10"); # KA<<<15 - &_rotl128 ("%r8","%r10",15); # 15+15=30 - &_saveround (8,$out,-128,"%r8","%r10"); # KA<<<30 - &_rotl128 ("%rax","%rbx",30); # 15+30=45 - &_saveround (10,$out,-128,"%rax","%rbx"); # KL<<<45 - &_rotl128 ("%r8","%r10",15); # 30+15=45 - &_saveround (12,$out,-128,"%r8"); # KA<<<45 - &_rotl128 ("%rax","%rbx",15); # 45+15=60 - &_saveround (13,$out,-128,"%rbx"); # KL<<<60 - &_rotl128 ("%r8","%r10",15); # 45+15=60 - &_saveround (14,$out,-128,"%r8","%r10"); # KA<<<60 - &_rotl128 ("%rax","%rbx",17); # 60+17=77 - &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<77 - &_rotl128 ("%rax","%rbx",17); # 77+17=94 - &_saveround (18,$out,-128,"%rax","%rbx"); # KL<<<94 - &_rotl128 ("%r8","%r10",34); # 60+34=94 - &_saveround (20,$out,-128,"%r8","%r10"); # KA<<<94 - &_rotl128 ("%rax","%rbx",17); # 94+17=111 - &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<111 - &_rotl128 ("%r8","%r10",17); # 94+17=111 - &_saveround (24,$out,-128,"%r8","%r10"); # KA<<<111 -$code.=<<___; - mov \$3,%eax - jmp .Ldone -.align 16 -.L2nd256: -___ - &_saveround (6,$out,@S); # temp storage for KA! -$code.=<<___; - xor `4*8+0`($out),@S[1] # KA^KR - xor `4*8+4`($out),@S[0] - xor `5*8+0`($out),@S[3] - xor `5*8+4`($out),@S[2] -___ - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); - - &_loadround (0,$out,"%rax","%rbx"); # KL - &_loadround (4,$out,"%rcx","%rdx"); # KR - &_loadround (6,$out,"%r14","%r15"); # KA -$code.=<<___; - lea 128($out),$out # size optimization - shl \$32,%r8 # @S[0]|| - shl \$32,%r10 # @S[2]|| - or %r9,%r8 # ||@S[1] - or %r11,%r10 # ||@S[3] -___ - &_saveround (2,$out,-128,"%r8","%r10"); # KB<<<0 - &_rotl128 ("%rcx","%rdx",15); - &_saveround (4,$out,-128,"%rcx","%rdx"); # KR<<<15 - &_rotl128 ("%r14","%r15",15); - &_saveround (6,$out,-128,"%r14","%r15"); # KA<<<15 - &_rotl128 ("%rcx","%rdx",15); # 15+15=30 - &_saveround (8,$out,-128,"%rcx","%rdx"); # KR<<<30 - &_rotl128 ("%r8","%r10",30); - &_saveround (10,$out,-128,"%r8","%r10"); # KB<<<30 - &_rotl128 ("%rax","%rbx",45); - &_saveround (12,$out,-128,"%rax","%rbx"); # KL<<<45 - &_rotl128 ("%r14","%r15",30); # 15+30=45 - &_saveround (14,$out,-128,"%r14","%r15"); # KA<<<45 - &_rotl128 ("%rax","%rbx",15); # 45+15=60 - &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<60 - &_rotl128 ("%rcx","%rdx",30); # 30+30=60 - &_saveround (18,$out,-128,"%rcx","%rdx"); # KR<<<60 - &_rotl128 ("%r8","%r10",30); # 30+30=60 - &_saveround (20,$out,-128,"%r8","%r10"); # KB<<<60 - &_rotl128 ("%rax","%rbx",17); # 60+17=77 - &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<77 - &_rotl128 ("%r14","%r15",32); # 45+32=77 - &_saveround (24,$out,-128,"%r14","%r15"); # KA<<<77 - &_rotl128 ("%rcx","%rdx",34); # 60+34=94 - &_saveround (26,$out,-128,"%rcx","%rdx"); # KR<<<94 - &_rotl128 ("%r14","%r15",17); # 77+17=94 - &_saveround (28,$out,-128,"%r14","%r15"); # KA<<<77 - &_rotl128 ("%rax","%rbx",34); # 77+34=111 - &_saveround (30,$out,-128,"%rax","%rbx"); # KL<<<111 - &_rotl128 ("%r8","%r10",51); # 60+51=111 - &_saveround (32,$out,-128,"%r8","%r10"); # KB<<<111 -$code.=<<___; - mov \$4,%eax -.Ldone: - mov 0(%rsp),%r15 - mov 8(%rsp),%r14 - mov 16(%rsp),%r13 - mov 24(%rsp),%rbp - mov 32(%rsp),%rbx - lea 40(%rsp),%rsp -.Lkey_epilogue: - ret -.size Camellia_Ekeygen,.-Camellia_Ekeygen -___ -} - -@SBOX=( -112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65, - 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189, -134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26, -166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77, -139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153, -223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215, - 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34, -254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80, -170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210, - 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148, -135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226, - 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46, -233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89, -120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250, -114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164, - 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158); - -sub S1110 { my $i=shift; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i<<8; sprintf("0x%08x",$i); } -sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i; sprintf("0x%08x",$i); } -sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; $i=$i<<16|$i<<8|$i; sprintf("0x%08x",$i); } -sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; $i=$i<<24|$i<<8|$i; sprintf("0x%08x",$i); } - -$code.=<<___; -.align 64 -.LCamellia_SIGMA: -.long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 -.long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 -.long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 -.long 0, 0, 0, 0 -.LCamellia_SBOX: -___ -# tables are interleaved, remember? -sub data_word { $code.=".long\t".join(',',@_)."\n"; } -for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } -for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } - -# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, -# size_t length, const CAMELLIA_KEY *key, -# unsigned char *ivp,const int enc); -{ -$_key="0(%rsp)"; -$_end="8(%rsp)"; # inp+len&~15 -$_res="16(%rsp)"; # len&15 -$ivec="24(%rsp)"; -$_ivp="40(%rsp)"; -$_rsp="48(%rsp)"; - -$code.=<<___; -.globl Camellia_cbc_encrypt -.type Camellia_cbc_encrypt,\@function,6 -.align 16 -Camellia_cbc_encrypt: - cmp \$0,%rdx - je .Lcbc_abort - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 -.Lcbc_prologue: - - mov %rsp,%rbp - sub \$64,%rsp - and \$-64,%rsp - - # place stack frame just "above mod 1024" the key schedule, - # this ensures that cache associativity suffices - lea -64-63(%rcx),%r10 - sub %rsp,%r10 - neg %r10 - and \$0x3C0,%r10 - sub %r10,%rsp - #add \$8,%rsp # 8 is reserved for callee's ra - - mov %rdi,$inp # inp argument - mov %rsi,$out # out argument - mov %r8,%rbx # ivp argument - mov %rcx,$key # key argument - mov 272(%rcx),$keyend # grandRounds - - mov %r8,$_ivp - mov %rbp,$_rsp - -.Lcbc_body: - lea .LCamellia_SBOX(%rip),$Tbl - - mov \$32,%ecx -.align 4 -.Lcbc_prefetch_sbox: - mov 0($Tbl),%rax - mov 32($Tbl),%rsi - mov 64($Tbl),%rdi - mov 96($Tbl),%r11 - lea 128($Tbl),$Tbl - loop .Lcbc_prefetch_sbox - sub \$4096,$Tbl - shl \$6,$keyend - mov %rdx,%rcx # len argument - lea ($key,$keyend),$keyend - - cmp \$0,%r9d # enc argument - je .LCBC_DECRYPT - - and \$-16,%rdx - and \$15,%rcx # length residue - lea ($inp,%rdx),%rdx - mov $key,$_key - mov %rdx,$_end - mov %rcx,$_res - - cmp $inp,%rdx - mov 0(%rbx),@S[0] # load IV - mov 4(%rbx),@S[1] - mov 8(%rbx),@S[2] - mov 12(%rbx),@S[3] - je .Lcbc_enc_tail - jmp .Lcbc_eloop - -.align 16 -.Lcbc_eloop: - xor 0($inp),@S[0] - xor 4($inp),@S[1] - xor 8($inp),@S[2] - bswap @S[0] - xor 12($inp),@S[3] - bswap @S[1] - bswap @S[2] - bswap @S[3] - - call _x86_64_Camellia_encrypt - - mov $_key,$key # "rewind" the key - bswap @S[0] - mov $_end,%rdx - bswap @S[1] - mov $_res,%rcx - bswap @S[2] - mov @S[0],0($out) - bswap @S[3] - mov @S[1],4($out) - mov @S[2],8($out) - lea 16($inp),$inp - mov @S[3],12($out) - cmp %rdx,$inp - lea 16($out),$out - jne .Lcbc_eloop - - cmp \$0,%rcx - jne .Lcbc_enc_tail - - mov $_ivp,$out - mov @S[0],0($out) # write out IV residue - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - jmp .Lcbc_done - -.align 16 -.Lcbc_enc_tail: - xor %rax,%rax - mov %rax,0+$ivec - mov %rax,8+$ivec - mov %rax,$_res - -.Lcbc_enc_pushf: - pushfq - cld - mov $inp,%rsi - lea 8+$ivec,%rdi - .long 0x9066A4F3 # rep movsb - popfq -.Lcbc_enc_popf: - - lea $ivec,$inp - lea 16+$ivec,%rax - mov %rax,$_end - jmp .Lcbc_eloop # one more time - -.align 16 -.LCBC_DECRYPT: - xchg $key,$keyend - add \$15,%rdx - and \$15,%rcx # length residue - and \$-16,%rdx - mov $key,$_key - lea ($inp,%rdx),%rdx - mov %rdx,$_end - mov %rcx,$_res - - mov (%rbx),%rax # load IV - mov 8(%rbx),%rbx - jmp .Lcbc_dloop -.align 16 -.Lcbc_dloop: - mov 0($inp),@S[0] - mov 4($inp),@S[1] - mov 8($inp),@S[2] - bswap @S[0] - mov 12($inp),@S[3] - bswap @S[1] - mov %rax,0+$ivec # save IV to temporary storage - bswap @S[2] - mov %rbx,8+$ivec - bswap @S[3] - - call _x86_64_Camellia_decrypt - - mov $_key,$key # "rewind" the key - mov $_end,%rdx - mov $_res,%rcx - - bswap @S[0] - mov ($inp),%rax # load IV for next iteration - bswap @S[1] - mov 8($inp),%rbx - bswap @S[2] - xor 0+$ivec,@S[0] - bswap @S[3] - xor 4+$ivec,@S[1] - xor 8+$ivec,@S[2] - lea 16($inp),$inp - xor 12+$ivec,@S[3] - cmp %rdx,$inp - je .Lcbc_ddone - - mov @S[0],0($out) - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - - lea 16($out),$out - jmp .Lcbc_dloop - -.align 16 -.Lcbc_ddone: - mov $_ivp,%rdx - cmp \$0,%rcx - jne .Lcbc_dec_tail - - mov @S[0],0($out) - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - - mov %rax,(%rdx) # write out IV residue - mov %rbx,8(%rdx) - jmp .Lcbc_done -.align 16 -.Lcbc_dec_tail: - mov @S[0],0+$ivec - mov @S[1],4+$ivec - mov @S[2],8+$ivec - mov @S[3],12+$ivec - -.Lcbc_dec_pushf: - pushfq - cld - lea 8+$ivec,%rsi - lea ($out),%rdi - .long 0x9066A4F3 # rep movsb - popfq -.Lcbc_dec_popf: - - mov %rax,(%rdx) # write out IV residue - mov %rbx,8(%rdx) - jmp .Lcbc_done - -.align 16 -.Lcbc_done: - mov $_rsp,%rcx - mov 0(%rcx),%r15 - mov 8(%rcx),%r14 - mov 16(%rcx),%r13 - mov 24(%rcx),%r12 - mov 32(%rcx),%rbp - mov 40(%rcx),%rbx - lea 48(%rcx),%rsp -.Lcbc_abort: - ret -.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt - -.asciz "Camellia for x86_64 by <appro@openssl.org>" -___ -} - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type common_se_handler,\@abi-omnipotent -.align 16 -common_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - lea -64(%rsp),%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->Rip<prologue label - jb .Lin_prologue - - mov 152($context),%rax # pull context->Rsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_prologue - - lea 40(%rax),%rax - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r13 - mov -32(%rax),%r14 - mov -40(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - jmp .Lcommon_seh_exit -.size common_se_handler,.-common_se_handler - -.type cbc_se_handler,\@abi-omnipotent -.align 16 -cbc_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - lea -64(%rsp),%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lcbc_prologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_prologue - jb .Lin_cbc_prologue - - lea .Lcbc_body(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_body - jb .Lin_cbc_frame_setup - - mov 152($context),%rax # pull context->Rsp - - lea .Lcbc_abort(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lcbc_abort - jae .Lin_cbc_prologue - - # handle pushf/popf in Camellia_cbc_encrypt - lea .Lcbc_enc_pushf(%rip),%r10 - cmp %r10,%rbx # context->Rip<=.Lcbc_enc_pushf - jbe .Lin_cbc_no_flag - lea 8(%rax),%rax - lea .Lcbc_enc_popf(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_enc_popf - jb .Lin_cbc_no_flag - lea -8(%rax),%rax - lea .Lcbc_dec_pushf(%rip),%r10 - cmp %r10,%rbx # context->Rip<=.Lcbc_dec_pushf - jbe .Lin_cbc_no_flag - lea 8(%rax),%rax - lea .Lcbc_dec_popf(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_dec_popf - jb .Lin_cbc_no_flag - lea -8(%rax),%rax - -.Lin_cbc_no_flag: - mov 48(%rax),%rax # $_rsp - lea 48(%rax),%rax - -.Lin_cbc_frame_setup: - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_cbc_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - -.align 4 -.Lcommon_seh_exit: - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$`1232/8`,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - lea 64(%rsp),%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size cbc_se_handler,.-cbc_se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_Camellia_EncryptBlock_Rounds - .rva .LSEH_end_Camellia_EncryptBlock_Rounds - .rva .LSEH_info_Camellia_EncryptBlock_Rounds - - .rva .LSEH_begin_Camellia_DecryptBlock_Rounds - .rva .LSEH_end_Camellia_DecryptBlock_Rounds - .rva .LSEH_info_Camellia_DecryptBlock_Rounds - - .rva .LSEH_begin_Camellia_Ekeygen - .rva .LSEH_end_Camellia_Ekeygen - .rva .LSEH_info_Camellia_Ekeygen - - .rva .LSEH_begin_Camellia_cbc_encrypt - .rva .LSEH_end_Camellia_cbc_encrypt - .rva .LSEH_info_Camellia_cbc_encrypt - -.section .xdata -.align 8 -.LSEH_info_Camellia_EncryptBlock_Rounds: - .byte 9,0,0,0 - .rva common_se_handler - .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[] -.LSEH_info_Camellia_DecryptBlock_Rounds: - .byte 9,0,0,0 - .rva common_se_handler - .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] -.LSEH_info_Camellia_Ekeygen: - .byte 9,0,0,0 - .rva common_se_handler - .rva .Lkey_prologue,.Lkey_epilogue # HandlerData[] -.LSEH_info_Camellia_cbc_encrypt: - .byte 9,0,0,0 - .rva cbc_se_handler -___ -} - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/cast/c_cfb64.c b/crypto/openssl/crypto/cast/c_cfb64.c index 514c005..dcec13a 100644 --- a/crypto/openssl/crypto/cast/c_cfb64.c +++ b/crypto/openssl/crypto/cast/c_cfb64.c @@ -65,7 +65,7 @@ */ void CAST_cfb64_encrypt(const unsigned char *in, unsigned char *out, - long length, CAST_KEY *schedule, unsigned char *ivec, + long length, const CAST_KEY *schedule, unsigned char *ivec, int *num, int enc) { register CAST_LONG v0,v1,t; @@ -119,4 +119,3 @@ void CAST_cfb64_encrypt(const unsigned char *in, unsigned char *out, v0=v1=ti[0]=ti[1]=t=c=cc=0; *num=n; } - diff --git a/crypto/openssl/crypto/cast/c_ecb.c b/crypto/openssl/crypto/cast/c_ecb.c index f2dc606..b6a3b1f 100644 --- a/crypto/openssl/crypto/cast/c_ecb.c +++ b/crypto/openssl/crypto/cast/c_ecb.c @@ -63,7 +63,7 @@ const char CAST_version[]="CAST" OPENSSL_VERSION_PTEXT; void CAST_ecb_encrypt(const unsigned char *in, unsigned char *out, - CAST_KEY *ks, int enc) + const CAST_KEY *ks, int enc) { CAST_LONG l,d[2]; @@ -77,4 +77,3 @@ void CAST_ecb_encrypt(const unsigned char *in, unsigned char *out, l=d[1]; l2n(l,out); l=d[0]=d[1]=0; } - diff --git a/crypto/openssl/crypto/cast/c_enc.c b/crypto/openssl/crypto/cast/c_enc.c index 0fe2cff..357c41e 100644 --- a/crypto/openssl/crypto/cast/c_enc.c +++ b/crypto/openssl/crypto/cast/c_enc.c @@ -59,9 +59,10 @@ #include <openssl/cast.h> #include "cast_lcl.h" -void CAST_encrypt(CAST_LONG *data, CAST_KEY *key) +void CAST_encrypt(CAST_LONG *data, const CAST_KEY *key) { - register CAST_LONG l,r,*k,t; + register CAST_LONG l,r,t; + const register CAST_LONG *k; k= &(key->data[0]); l=data[0]; @@ -91,9 +92,10 @@ void CAST_encrypt(CAST_LONG *data, CAST_KEY *key) data[0]=r&0xffffffffL; } -void CAST_decrypt(CAST_LONG *data, CAST_KEY *key) +void CAST_decrypt(CAST_LONG *data, const CAST_KEY *key) { - register CAST_LONG l,r,*k,t; + register CAST_LONG l,r,t; + const register CAST_LONG *k; k= &(key->data[0]); l=data[0]; @@ -124,7 +126,7 @@ void CAST_decrypt(CAST_LONG *data, CAST_KEY *key) } void CAST_cbc_encrypt(const unsigned char *in, unsigned char *out, long length, - CAST_KEY *ks, unsigned char *iv, int enc) + const CAST_KEY *ks, unsigned char *iv, int enc) { register CAST_LONG tin0,tin1; register CAST_LONG tout0,tout1,xor0,xor1; @@ -204,4 +206,3 @@ void CAST_cbc_encrypt(const unsigned char *in, unsigned char *out, long length, tin0=tin1=tout0=tout1=xor0=xor1=0; tin[0]=tin[1]=0; } - diff --git a/crypto/openssl/crypto/cast/c_ofb64.c b/crypto/openssl/crypto/cast/c_ofb64.c index fd0469a..cb32224 100644 --- a/crypto/openssl/crypto/cast/c_ofb64.c +++ b/crypto/openssl/crypto/cast/c_ofb64.c @@ -64,7 +64,7 @@ * 64bit block we have used is contained in *num; */ void CAST_ofb64_encrypt(const unsigned char *in, unsigned char *out, - long length, CAST_KEY *schedule, unsigned char *ivec, + long length, const CAST_KEY *schedule, unsigned char *ivec, int *num) { register CAST_LONG v0,v1,t; @@ -108,4 +108,3 @@ void CAST_ofb64_encrypt(const unsigned char *in, unsigned char *out, t=v0=v1=ti[0]=ti[1]=0; *num=n; } - diff --git a/crypto/openssl/crypto/cast/cast.h b/crypto/openssl/crypto/cast/cast.h index 1faf580..6e0cd31 100644 --- a/crypto/openssl/crypto/cast/cast.h +++ b/crypto/openssl/crypto/cast/cast.h @@ -87,17 +87,17 @@ typedef struct cast_key_st void private_CAST_set_key(CAST_KEY *key, int len, const unsigned char *data); #endif void CAST_set_key(CAST_KEY *key, int len, const unsigned char *data); -void CAST_ecb_encrypt(const unsigned char *in,unsigned char *out,CAST_KEY *key, +void CAST_ecb_encrypt(const unsigned char *in, unsigned char *out, const CAST_KEY *key, int enc); -void CAST_encrypt(CAST_LONG *data,CAST_KEY *key); -void CAST_decrypt(CAST_LONG *data,CAST_KEY *key); +void CAST_encrypt(CAST_LONG *data, const CAST_KEY *key); +void CAST_decrypt(CAST_LONG *data, const CAST_KEY *key); void CAST_cbc_encrypt(const unsigned char *in, unsigned char *out, long length, - CAST_KEY *ks, unsigned char *iv, int enc); + const CAST_KEY *ks, unsigned char *iv, int enc); void CAST_cfb64_encrypt(const unsigned char *in, unsigned char *out, - long length, CAST_KEY *schedule, unsigned char *ivec, + long length, const CAST_KEY *schedule, unsigned char *ivec, int *num, int enc); void CAST_ofb64_encrypt(const unsigned char *in, unsigned char *out, - long length, CAST_KEY *schedule, unsigned char *ivec, + long length, const CAST_KEY *schedule, unsigned char *ivec, int *num); #ifdef __cplusplus diff --git a/crypto/openssl/crypto/cms/cms_ess.c b/crypto/openssl/crypto/cms/cms_ess.c index ed34ff3..65613aa 100644 --- a/crypto/openssl/crypto/cms/cms_ess.c +++ b/crypto/openssl/crypto/cms/cms_ess.c @@ -344,7 +344,7 @@ int cms_Receipt_verify(CMS_ContentInfo *cms, CMS_ContentInfo *req_cms) /* Get original receipt request details */ - if (!CMS_get1_ReceiptRequest(osi, &rr)) + if (CMS_get1_ReceiptRequest(osi, &rr) <= 0) { CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_NO_RECEIPT_REQUEST); goto err; @@ -385,7 +385,7 @@ ASN1_OCTET_STRING *cms_encode_Receipt(CMS_SignerInfo *si) /* Get original receipt request details */ - if (!CMS_get1_ReceiptRequest(si, &rr)) + if (CMS_get1_ReceiptRequest(si, &rr) <= 0) { CMSerr(CMS_F_CMS_ENCODE_RECEIPT, CMS_R_NO_RECEIPT_REQUEST); goto err; diff --git a/crypto/openssl/crypto/cms/cms_lib.c b/crypto/openssl/crypto/cms/cms_lib.c index 8e6c1d2..cc00526 100644 --- a/crypto/openssl/crypto/cms/cms_lib.c +++ b/crypto/openssl/crypto/cms/cms_lib.c @@ -415,7 +415,11 @@ int cms_DigestAlgorithm_find_ctx(EVP_MD_CTX *mctx, BIO *chain, return 0; } BIO_get_md_ctx(chain, &mtmp); - if (EVP_MD_CTX_type(mtmp) == nid) + if (EVP_MD_CTX_type(mtmp) == nid + /* Workaround for broken implementations that use signature + * algorithm OID instead of digest. + */ + || EVP_MD_pkey_type(EVP_MD_CTX_md(mtmp)) == nid) { EVP_MD_CTX_copy_ex(mctx, mtmp); return 1; diff --git a/crypto/openssl/crypto/comp/c_zlib.c b/crypto/openssl/crypto/comp/c_zlib.c index eccfd09..8df7792 100644 --- a/crypto/openssl/crypto/comp/c_zlib.c +++ b/crypto/openssl/crypto/comp/c_zlib.c @@ -136,15 +136,6 @@ struct zlib_state static int zlib_stateful_ex_idx = -1; -static void zlib_stateful_free_ex_data(void *obj, void *item, - CRYPTO_EX_DATA *ad, int ind,long argl, void *argp) - { - struct zlib_state *state = (struct zlib_state *)item; - inflateEnd(&state->istream); - deflateEnd(&state->ostream); - OPENSSL_free(state); - } - static int zlib_stateful_init(COMP_CTX *ctx) { int err; @@ -188,6 +179,12 @@ static int zlib_stateful_init(COMP_CTX *ctx) static void zlib_stateful_finish(COMP_CTX *ctx) { + struct zlib_state *state = + (struct zlib_state *)CRYPTO_get_ex_data(&ctx->ex_data, + zlib_stateful_ex_idx); + inflateEnd(&state->istream); + deflateEnd(&state->ostream); + OPENSSL_free(state); CRYPTO_free_ex_data(CRYPTO_EX_INDEX_COMP,ctx,&ctx->ex_data); } @@ -402,7 +399,7 @@ COMP_METHOD *COMP_zlib(void) if (zlib_stateful_ex_idx == -1) zlib_stateful_ex_idx = CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_COMP, - 0,NULL,NULL,NULL,zlib_stateful_free_ex_data); + 0,NULL,NULL,NULL,NULL); CRYPTO_w_unlock(CRYPTO_LOCK_COMP); if (zlib_stateful_ex_idx == -1) goto err; diff --git a/crypto/openssl/crypto/cryptlib.c b/crypto/openssl/crypto/cryptlib.c index 8f9e88e..497d003 100644 --- a/crypto/openssl/crypto/cryptlib.c +++ b/crypto/openssl/crypto/cryptlib.c @@ -513,7 +513,7 @@ void OPENSSL_showfatal (const char *fmta,...) #if defined(_WIN32_WINNT) && _WIN32_WINNT>=0x0333 /* this -------------v--- guards NT-specific calls */ - if (GetVersion() < 0x80000000 && OPENSSL_isservice()) + if (GetVersion() < 0x80000000 && OPENSSL_isservice() > 0) { HANDLE h = RegisterEventSource(0,_T("OPENSSL")); const TCHAR *pmsg=buf; ReportEvent(h,EVENTLOG_ERROR_TYPE,0,0,0,1,0,&pmsg,0); diff --git a/crypto/openssl/crypto/dsa/Makefile b/crypto/openssl/crypto/dsa/Makefile index 2cc45cd..6c9578c 100644 --- a/crypto/openssl/crypto/dsa/Makefile +++ b/crypto/openssl/crypto/dsa/Makefile @@ -84,8 +84,9 @@ dsa_asn1.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h dsa_asn1.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h dsa_asn1.o: ../../include/openssl/opensslconf.h dsa_asn1.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -dsa_asn1.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -dsa_asn1.o: ../../include/openssl/symhacks.h ../cryptlib.h dsa_asn1.c +dsa_asn1.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h +dsa_asn1.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +dsa_asn1.o: ../cryptlib.h dsa_asn1.c dsa_depr.o: ../../e_os.h ../../include/openssl/asn1.h dsa_depr.o: ../../include/openssl/bio.h ../../include/openssl/bn.h dsa_depr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h diff --git a/crypto/openssl/crypto/dsa/dsa_asn1.c b/crypto/openssl/crypto/dsa/dsa_asn1.c index 0645fac..bc7d7a0 100644 --- a/crypto/openssl/crypto/dsa/dsa_asn1.c +++ b/crypto/openssl/crypto/dsa/dsa_asn1.c @@ -62,6 +62,7 @@ #include <openssl/asn1.h> #include <openssl/asn1t.h> #include <openssl/bn.h> +#include <openssl/rand.h> #ifdef OPENSSL_FIPS #include <openssl/fips.h> #endif @@ -155,6 +156,7 @@ int DSA_sign(int type, const unsigned char *dgst, int dlen, unsigned char *sig, return 0; } #endif + RAND_seed(dgst, dlen); s=DSA_do_sign(dgst,dlen,dsa); if (s == NULL) { diff --git a/crypto/openssl/crypto/dsa/dsa_lib.c b/crypto/openssl/crypto/dsa/dsa_lib.c index 7ac9dc8..85556d1 100644 --- a/crypto/openssl/crypto/dsa/dsa_lib.c +++ b/crypto/openssl/crypto/dsa/dsa_lib.c @@ -190,7 +190,7 @@ DSA *DSA_new_method(ENGINE *engine) ret->method_mont_p=NULL; ret->references=1; - ret->flags=ret->meth->flags; + ret->flags=ret->meth->flags & ~DSA_FLAG_NON_FIPS_ALLOW; CRYPTO_new_ex_data(CRYPTO_EX_INDEX_DSA, ret, &ret->ex_data); if ((ret->meth->init != NULL) && !ret->meth->init(ret)) { diff --git a/crypto/openssl/crypto/dso/dso_dlfcn.c b/crypto/openssl/crypto/dso/dso_dlfcn.c index 1fd1010..d91e821 100644 --- a/crypto/openssl/crypto/dso/dso_dlfcn.c +++ b/crypto/openssl/crypto/dso/dso_dlfcn.c @@ -237,7 +237,10 @@ static void *dlfcn_bind_var(DSO *dso, const char *symname) static DSO_FUNC_TYPE dlfcn_bind_func(DSO *dso, const char *symname) { void *ptr; - DSO_FUNC_TYPE sym, *tsym = &sym; + union { + DSO_FUNC_TYPE sym; + void *dlret; + } u; if((dso == NULL) || (symname == NULL)) { @@ -255,14 +258,14 @@ static DSO_FUNC_TYPE dlfcn_bind_func(DSO *dso, const char *symname) DSOerr(DSO_F_DLFCN_BIND_FUNC,DSO_R_NULL_HANDLE); return(NULL); } - *(void **)(tsym) = dlsym(ptr, symname); - if(sym == NULL) + u.dlret = dlsym(ptr, symname); + if(u.dlret == NULL) { DSOerr(DSO_F_DLFCN_BIND_FUNC,DSO_R_SYM_FAILURE); ERR_add_error_data(4, "symname(", symname, "): ", dlerror()); return(NULL); } - return(sym); + return u.sym; } static char *dlfcn_merger(DSO *dso, const char *filespec1, @@ -332,6 +335,15 @@ static char *dlfcn_merger(DSO *dso, const char *filespec1, return(merged); } +#ifdef OPENSSL_SYS_MACOSX +#define DSO_ext ".dylib" +#define DSO_extlen 6 +#else +#define DSO_ext ".so" +#define DSO_extlen 3 +#endif + + static char *dlfcn_name_converter(DSO *dso, const char *filename) { char *translated; @@ -342,8 +354,8 @@ static char *dlfcn_name_converter(DSO *dso, const char *filename) transform = (strstr(filename, "/") == NULL); if(transform) { - /* We will convert this to "%s.so" or "lib%s.so" */ - rsize += 3; /* The length of ".so" */ + /* We will convert this to "%s.so" or "lib%s.so" etc */ + rsize += DSO_extlen; /* The length of ".so" */ if ((DSO_flags(dso) & DSO_FLAG_NAME_TRANSLATION_EXT_ONLY) == 0) rsize += 3; /* The length of "lib" */ } @@ -357,9 +369,9 @@ static char *dlfcn_name_converter(DSO *dso, const char *filename) if(transform) { if ((DSO_flags(dso) & DSO_FLAG_NAME_TRANSLATION_EXT_ONLY) == 0) - sprintf(translated, "lib%s.so", filename); + sprintf(translated, "lib%s" DSO_ext, filename); else - sprintf(translated, "%s.so", filename); + sprintf(translated, "%s" DSO_ext, filename); } else sprintf(translated, "%s", filename); diff --git a/crypto/openssl/crypto/ec/ec2_smpl.c b/crypto/openssl/crypto/ec/ec2_smpl.c index 5cd1eac..522d036 100644 --- a/crypto/openssl/crypto/ec/ec2_smpl.c +++ b/crypto/openssl/crypto/ec/ec2_smpl.c @@ -174,8 +174,10 @@ int ec_GF2m_simple_group_copy(EC_GROUP *dest, const EC_GROUP *src) dest->poly[2] = src->poly[2]; dest->poly[3] = src->poly[3]; dest->poly[4] = src->poly[4]; - bn_wexpand(&dest->a, (int)(dest->poly[0] + BN_BITS2 - 1) / BN_BITS2); - bn_wexpand(&dest->b, (int)(dest->poly[0] + BN_BITS2 - 1) / BN_BITS2); + if(bn_wexpand(&dest->a, (int)(dest->poly[0] + BN_BITS2 - 1) / BN_BITS2) == NULL) + return 0; + if(bn_wexpand(&dest->b, (int)(dest->poly[0] + BN_BITS2 - 1) / BN_BITS2) == NULL) + return 0; for (i = dest->a.top; i < dest->a.dmax; i++) dest->a.d[i] = 0; for (i = dest->b.top; i < dest->b.dmax; i++) dest->b.d[i] = 0; return 1; @@ -199,12 +201,12 @@ int ec_GF2m_simple_group_set_curve(EC_GROUP *group, /* group->a */ if (!BN_GF2m_mod_arr(&group->a, a, group->poly)) goto err; - bn_wexpand(&group->a, (int)(group->poly[0] + BN_BITS2 - 1) / BN_BITS2); + if(bn_wexpand(&group->a, (int)(group->poly[0] + BN_BITS2 - 1) / BN_BITS2) == NULL) goto err; for (i = group->a.top; i < group->a.dmax; i++) group->a.d[i] = 0; /* group->b */ if (!BN_GF2m_mod_arr(&group->b, b, group->poly)) goto err; - bn_wexpand(&group->b, (int)(group->poly[0] + BN_BITS2 - 1) / BN_BITS2); + if(bn_wexpand(&group->b, (int)(group->poly[0] + BN_BITS2 - 1) / BN_BITS2) == NULL) goto err; for (i = group->b.top; i < group->b.dmax; i++) group->b.d[i] = 0; ret = 1; diff --git a/crypto/openssl/crypto/ecdsa/Makefile b/crypto/openssl/crypto/ecdsa/Makefile index 4865f3c..49e2681 100644 --- a/crypto/openssl/crypto/ecdsa/Makefile +++ b/crypto/openssl/crypto/ecdsa/Makefile @@ -123,10 +123,11 @@ ecs_sign.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h ecs_sign.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h ecs_sign.o: ../../include/openssl/opensslconf.h ecs_sign.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -ecs_sign.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h -ecs_sign.o: ../../include/openssl/sha.h ../../include/openssl/stack.h -ecs_sign.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h -ecs_sign.o: ../../include/openssl/x509_vfy.h ecs_locl.h ecs_sign.c +ecs_sign.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h +ecs_sign.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h +ecs_sign.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +ecs_sign.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h +ecs_sign.o: ecs_locl.h ecs_sign.c ecs_vrf.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h ecs_vrf.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h ecs_vrf.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h diff --git a/crypto/openssl/crypto/ecdsa/ecs_ossl.c b/crypto/openssl/crypto/ecdsa/ecs_ossl.c index 3ead1af9..551cf50 100644 --- a/crypto/openssl/crypto/ecdsa/ecs_ossl.c +++ b/crypto/openssl/crypto/ecdsa/ecs_ossl.c @@ -212,7 +212,7 @@ err: static ECDSA_SIG *ecdsa_do_sign(const unsigned char *dgst, int dgst_len, const BIGNUM *in_kinv, const BIGNUM *in_r, EC_KEY *eckey) { - int ok = 0; + int ok = 0, i; BIGNUM *kinv=NULL, *s, *m=NULL,*tmp=NULL,*order=NULL; const BIGNUM *ckinv; BN_CTX *ctx = NULL; @@ -251,22 +251,19 @@ static ECDSA_SIG *ecdsa_do_sign(const unsigned char *dgst, int dgst_len, ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, ERR_R_EC_LIB); goto err; } - if (8 * dgst_len > BN_num_bits(order)) + i = BN_num_bits(order); + /* Need to truncate digest if it is too long: first truncate whole + * bytes. + */ + if (8 * dgst_len > i) + dgst_len = (i + 7)/8; + if (!BN_bin2bn(dgst, dgst_len, m)) { - /* XXX - * - * Should provide for optional hash truncation: - * Keep the BN_num_bits(order) leftmost bits of dgst - * (see March 2006 FIPS 186-3 draft, which has a few - * confusing errors in this part though) - */ - - ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, - ECDSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE); + ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, ERR_R_BN_LIB); goto err; } - - if (!BN_bin2bn(dgst, dgst_len, m)) + /* If still too long truncate remaining bits with a shift */ + if ((8 * dgst_len > i) && !BN_rshift(m, m, 8 - (i & 0x7))) { ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, ERR_R_BN_LIB); goto err; @@ -346,7 +343,7 @@ err: static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len, const ECDSA_SIG *sig, EC_KEY *eckey) { - int ret = -1; + int ret = -1, i; BN_CTX *ctx; BIGNUM *order, *u1, *u2, *m, *X; EC_POINT *point = NULL; @@ -384,21 +381,6 @@ static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len, ECDSAerr(ECDSA_F_ECDSA_DO_VERIFY, ERR_R_EC_LIB); goto err; } - if (8 * dgst_len > BN_num_bits(order)) - { - /* XXX - * - * Should provide for optional hash truncation: - * Keep the BN_num_bits(order) leftmost bits of dgst - * (see March 2006 FIPS 186-3 draft, which has a few - * confusing errors in this part though) - */ - - ECDSAerr(ECDSA_F_ECDSA_DO_VERIFY, - ECDSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE); - ret = 0; - goto err; - } if (BN_is_zero(sig->r) || BN_is_negative(sig->r) || BN_ucmp(sig->r, order) >= 0 || BN_is_zero(sig->s) || @@ -415,11 +397,23 @@ static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len, goto err; } /* digest -> m */ + i = BN_num_bits(order); + /* Need to truncate digest if it is too long: first truncate whole + * bytes. + */ + if (8 * dgst_len > i) + dgst_len = (i + 7)/8; if (!BN_bin2bn(dgst, dgst_len, m)) { ECDSAerr(ECDSA_F_ECDSA_DO_VERIFY, ERR_R_BN_LIB); goto err; } + /* If still too long truncate remaining bits with a shift */ + if ((8 * dgst_len > i) && !BN_rshift(m, m, 8 - (i & 0x7))) + { + ECDSAerr(ECDSA_F_ECDSA_DO_VERIFY, ERR_R_BN_LIB); + goto err; + } /* u1 = m * tmp mod order */ if (!BN_mod_mul(u1, m, u2, order, ctx)) { diff --git a/crypto/openssl/crypto/ecdsa/ecs_sign.c b/crypto/openssl/crypto/ecdsa/ecs_sign.c index 74b1fe8..353d5af 100644 --- a/crypto/openssl/crypto/ecdsa/ecs_sign.c +++ b/crypto/openssl/crypto/ecdsa/ecs_sign.c @@ -57,6 +57,7 @@ #ifndef OPENSSL_NO_ENGINE #include <openssl/engine.h> #endif +#include <openssl/rand.h> ECDSA_SIG *ECDSA_do_sign(const unsigned char *dgst, int dlen, EC_KEY *eckey) { @@ -83,6 +84,7 @@ int ECDSA_sign_ex(int type, const unsigned char *dgst, int dlen, unsigned char EC_KEY *eckey) { ECDSA_SIG *s; + RAND_seed(dgst, dlen); s = ECDSA_do_sign_ex(dgst, dlen, kinv, r, eckey); if (s == NULL) { diff --git a/crypto/openssl/crypto/engine/Makefile b/crypto/openssl/crypto/engine/Makefile index 0cc3722..b52fa48 100644 --- a/crypto/openssl/crypto/engine/Makefile +++ b/crypto/openssl/crypto/engine/Makefile @@ -112,19 +112,21 @@ eng_cnf.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h eng_cnf.o: ../../include/openssl/x509_vfy.h ../cryptlib.h eng_cnf.c eng_int.h eng_cryptodev.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h eng_cryptodev.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h -eng_cryptodev.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h +eng_cryptodev.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h +eng_cryptodev.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h eng_cryptodev.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h eng_cryptodev.o: ../../include/openssl/ecdsa.h ../../include/openssl/engine.h -eng_cryptodev.o: ../../include/openssl/evp.h ../../include/openssl/fips.h -eng_cryptodev.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h +eng_cryptodev.o: ../../include/openssl/err.h ../../include/openssl/evp.h +eng_cryptodev.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h +eng_cryptodev.o: ../../include/openssl/obj_mac.h eng_cryptodev.o: ../../include/openssl/objects.h eng_cryptodev.o: ../../include/openssl/opensslconf.h eng_cryptodev.o: ../../include/openssl/opensslv.h eng_cryptodev.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h -eng_cryptodev.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h -eng_cryptodev.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -eng_cryptodev.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h -eng_cryptodev.o: eng_cryptodev.c +eng_cryptodev.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h +eng_cryptodev.o: ../../include/openssl/sha.h ../../include/openssl/stack.h +eng_cryptodev.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h +eng_cryptodev.o: ../../include/openssl/x509_vfy.h eng_cryptodev.c eng_ctrl.o: ../../e_os.h ../../include/openssl/asn1.h eng_ctrl.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h eng_ctrl.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h diff --git a/crypto/openssl/crypto/engine/eng_cnf.c b/crypto/openssl/crypto/engine/eng_cnf.c index 08066ce..95c4070 100644 --- a/crypto/openssl/crypto/engine/eng_cnf.c +++ b/crypto/openssl/crypto/engine/eng_cnf.c @@ -95,7 +95,7 @@ static int int_engine_configure(char *name, char *value, const CONF *cnf) int ret = 0; long do_init = -1; STACK_OF(CONF_VALUE) *ecmds; - CONF_VALUE *ecmd; + CONF_VALUE *ecmd = NULL; char *ctrlname, *ctrlvalue; ENGINE *e = NULL; int soft = 0; @@ -157,7 +157,7 @@ static int int_engine_configure(char *name, char *value, const CONF *cnf) return 1; } if (!e) - return 0; + goto err; } /* Allow "EMPTY" to mean no value: this allows a valid * "value" to be passed to ctrls of type NO_INPUT @@ -186,16 +186,27 @@ static int int_engine_configure(char *name, char *value, const CONF *cnf) } else if (!ENGINE_ctrl_cmd_string(e, ctrlname, ctrlvalue, 0)) - return 0; + goto err; } } if (e && (do_init == -1) && !int_engine_init(e)) + { + ecmd = NULL; goto err; + } ret = 1; err: + if (ret != 1) + { + ENGINEerr(ENGINE_F_INT_ENGINE_CONFIGURE, ENGINE_R_ENGINE_CONFIGURATION_ERROR); + if (ecmd) + ERR_add_error_data(6, "section=", ecmd->section, + ", name=", ecmd->name, + ", value=", ecmd->value); + } if (e) ENGINE_free(e); return ret; diff --git a/crypto/openssl/crypto/engine/eng_cryptodev.c b/crypto/openssl/crypto/engine/eng_cryptodev.c index 4f2ec69..1a1e1c2 100644 --- a/crypto/openssl/crypto/engine/eng_cryptodev.c +++ b/crypto/openssl/crypto/engine/eng_cryptodev.c @@ -25,12 +25,15 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ -/* $FreeBSD$ */ #include <openssl/objects.h> #include <openssl/engine.h> #include <openssl/evp.h> #include <openssl/bn.h> +#include <openssl/dsa.h> +#include <openssl/rsa.h> +#include <openssl/dh.h> +#include <openssl/err.h> #if (defined(__unix__) || defined(unix)) && !defined(USG) && \ (defined(OpenBSD) || defined(__FreeBSD__)) @@ -80,7 +83,7 @@ static int cryptodev_max_iv(int cipher); static int cryptodev_key_length_valid(int cipher, int len); static int cipher_nid_to_cryptodev(int nid); static int get_cryptodev_ciphers(const int **cnids); -static int get_cryptodev_digests(const int **cnids); +/*static int get_cryptodev_digests(const int **cnids);*/ static int cryptodev_usable_ciphers(const int **nids); static int cryptodev_usable_digests(const int **nids); static int cryptodev_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, @@ -101,7 +104,7 @@ static int cryptodev_asym(struct crypt_kop *kop, int rlen, BIGNUM *r, static int cryptodev_bn_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); static int cryptodev_rsa_nocrt_mod_exp(BIGNUM *r0, const BIGNUM *I, - RSA *rsa); + RSA *rsa, BN_CTX *ctx); static int cryptodev_rsa_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx); static int cryptodev_dsa_bn_mod_exp(DSA *dsa, BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); @@ -140,6 +143,7 @@ static struct { { 0, NID_undef, 0, 0, }, }; +#if 0 static struct { int id; int nid; @@ -152,6 +156,7 @@ static struct { { CRYPTO_SHA1, NID_undef, }, { 0, NID_undef, }, }; +#endif /* * Return a fd if /dev/crypto seems usable, 0 otherwise. @@ -286,6 +291,7 @@ get_cryptodev_ciphers(const int **cnids) return (count); } +#if 0 /* unused */ /* * Find out what digests /dev/crypto will let us have a session for. * XXX note, that some of these openssl doesn't deal with yet! @@ -322,6 +328,8 @@ get_cryptodev_digests(const int **cnids) return (count); } +#endif + /* * Find the useable ciphers|digests from dev/crypto - this is the first * thing called by the engine init crud which determines what it @@ -375,7 +383,7 @@ cryptodev_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, struct crypt_op cryp; struct dev_crypto_state *state = ctx->cipher_data; struct session_op *sess = &state->d_sess; - void *iiv; + const void *iiv; unsigned char save_iv[EVP_MAX_IV_LENGTH]; if (state->d_fd < 0) @@ -399,7 +407,7 @@ cryptodev_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, if (ctx->cipher->iv_len) { cryp.iv = (caddr_t) ctx->iv; if (!ctx->encrypt) { - iiv = (void *) in + inl - ctx->cipher->iv_len; + iiv = in + inl - ctx->cipher->iv_len; memcpy(save_iv, iiv, ctx->cipher->iv_len); } } else @@ -414,7 +422,7 @@ cryptodev_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, if (ctx->cipher->iv_len) { if (ctx->encrypt) - iiv = (void *) out + inl - ctx->cipher->iv_len; + iiv = out + inl - ctx->cipher->iv_len; else iiv = save_iv; memcpy(ctx->iv, iiv, ctx->cipher->iv_len); @@ -444,7 +452,7 @@ cryptodev_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if ((state->d_fd = get_dev_crypto()) < 0) return (0); - sess->key = (unsigned char *)key; + sess->key = (char *)key; sess->keylen = ctx->key_len; sess->cipher = cipher; @@ -626,7 +634,7 @@ static int bn2crparam(const BIGNUM *a, struct crparam *crp) { int i, j, k; - ssize_t words, bytes, bits; + ssize_t bytes, bits; u_char *b; crp->crp_p = NULL; @@ -639,7 +647,7 @@ bn2crparam(const BIGNUM *a, struct crparam *crp) if (b == NULL) return (1); - crp->crp_p = b; + crp->crp_p = (char *)b; crp->crp_nbits = bits; for (i = 0, j = 0; i < a->top; i++) { @@ -757,14 +765,11 @@ err: } static int -cryptodev_rsa_nocrt_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa) +cryptodev_rsa_nocrt_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx) { int r; - BN_CTX *ctx; - ctx = BN_CTX_new(); r = cryptodev_bn_mod_exp(r0, I, rsa->d, rsa->n, ctx, NULL); - BN_CTX_free(ctx); return (r); } @@ -995,7 +1000,7 @@ cryptodev_dh_compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh) goto err; kop.crk_iparams = 3; - kop.crk_param[3].crp_p = key; + kop.crk_param[3].crp_p = (char *)key; kop.crk_param[3].crp_nbits = keylen * 8; kop.crk_oparams = 1; diff --git a/crypto/openssl/crypto/engine/eng_ctrl.c b/crypto/openssl/crypto/engine/eng_ctrl.c index 95b6b45..5ce25d9 100644 --- a/crypto/openssl/crypto/engine/eng_ctrl.c +++ b/crypto/openssl/crypto/engine/eng_ctrl.c @@ -280,7 +280,7 @@ int ENGINE_ctrl_cmd(ENGINE *e, const char *cmd_name, } /* Force the result of the control command to 0 or 1, for the reasons * mentioned before. */ - if (ENGINE_ctrl(e, num, i, p, f)) + if (ENGINE_ctrl(e, num, i, p, f) > 0) return 1; return 0; } @@ -345,7 +345,7 @@ int ENGINE_ctrl_cmd_string(ENGINE *e, const char *cmd_name, const char *arg, * usage of these commands is consistent across applications and * that certain applications don't understand it one way, and * others another. */ - if(ENGINE_ctrl(e, num, 0, (void *)arg, NULL)) + if(ENGINE_ctrl(e, num, 0, (void *)arg, NULL) > 0) return 1; return 0; } @@ -360,7 +360,7 @@ int ENGINE_ctrl_cmd_string(ENGINE *e, const char *cmd_name, const char *arg, if(flags & ENGINE_CMD_FLAG_STRING) { /* Same explanation as above */ - if(ENGINE_ctrl(e, num, 0, (void *)arg, NULL)) + if(ENGINE_ctrl(e, num, 0, (void *)arg, NULL) > 0) return 1; return 0; } @@ -383,7 +383,7 @@ int ENGINE_ctrl_cmd_string(ENGINE *e, const char *cmd_name, const char *arg, } /* Force the result of the control command to 0 or 1, for the reasons * mentioned before. */ - if(ENGINE_ctrl(e, num, l, NULL, NULL)) + if(ENGINE_ctrl(e, num, l, NULL, NULL) > 0) return 1; return 0; } diff --git a/crypto/openssl/crypto/engine/eng_err.c b/crypto/openssl/crypto/engine/eng_err.c index 574ffbb..ac74dd1 100644 --- a/crypto/openssl/crypto/engine/eng_err.c +++ b/crypto/openssl/crypto/engine/eng_err.c @@ -1,6 +1,6 @@ /* crypto/engine/eng_err.c */ /* ==================================================================== - * Copyright (c) 1999-2008 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2010 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -124,6 +124,7 @@ static ERR_STRING_DATA ENGINE_str_reasons[]= {ERR_REASON(ENGINE_R_DSO_FAILURE) ,"DSO failure"}, {ERR_REASON(ENGINE_R_DSO_NOT_FOUND) ,"dso not found"}, {ERR_REASON(ENGINE_R_ENGINES_SECTION_ERROR),"engines section error"}, +{ERR_REASON(ENGINE_R_ENGINE_CONFIGURATION_ERROR),"engine configuration error"}, {ERR_REASON(ENGINE_R_ENGINE_IS_NOT_IN_LIST),"engine is not in the list"}, {ERR_REASON(ENGINE_R_ENGINE_SECTION_ERROR),"engine section error"}, {ERR_REASON(ENGINE_R_FAILED_LOADING_PRIVATE_KEY),"failed loading private key"}, diff --git a/crypto/openssl/crypto/engine/eng_table.c b/crypto/openssl/crypto/engine/eng_table.c index 8879a26..8fc47b3 100644 --- a/crypto/openssl/crypto/engine/eng_table.c +++ b/crypto/openssl/crypto/engine/eng_table.c @@ -237,6 +237,7 @@ ENGINE *engine_table_select_tmp(ENGINE_TABLE **table, int nid, const char *f, in #endif return NULL; } + ERR_set_mark(); CRYPTO_w_lock(CRYPTO_LOCK_ENGINE); /* Check again inside the lock otherwise we could race against cleanup * operations. But don't worry about a fprintf(stderr). */ @@ -310,6 +311,6 @@ end: CRYPTO_w_unlock(CRYPTO_LOCK_ENGINE); /* Whatever happened, any failed init()s are not failures in this * context, so clear our error state. */ - ERR_clear_error(); + ERR_pop_to_mark(); return ret; } diff --git a/crypto/openssl/crypto/engine/engine.h b/crypto/openssl/crypto/engine/engine.h index f503595..d4bc1ef 100644 --- a/crypto/openssl/crypto/engine/engine.h +++ b/crypto/openssl/crypto/engine/engine.h @@ -339,9 +339,11 @@ void ENGINE_load_ubsec(void); void ENGINE_load_cryptodev(void); void ENGINE_load_padlock(void); void ENGINE_load_builtin_engines(void); +#ifdef OPENSSL_SYS_WIN32 #ifndef OPENSSL_NO_CAPIENG void ENGINE_load_capi(void); #endif +#endif /* Get and set global flags (ENGINE_TABLE_FLAG_***) for the implementation * "registry" handling. */ @@ -767,6 +769,7 @@ void ERR_load_ENGINE_strings(void); #define ENGINE_R_DSO_FAILURE 104 #define ENGINE_R_DSO_NOT_FOUND 132 #define ENGINE_R_ENGINES_SECTION_ERROR 148 +#define ENGINE_R_ENGINE_CONFIGURATION_ERROR 101 #define ENGINE_R_ENGINE_IS_NOT_IN_LIST 105 #define ENGINE_R_ENGINE_SECTION_ERROR 149 #define ENGINE_R_FAILED_LOADING_PRIVATE_KEY 128 diff --git a/crypto/openssl/crypto/err/Makefile b/crypto/openssl/crypto/err/Makefile index 91d1379..96d8a1a 100644 --- a/crypto/openssl/crypto/err/Makefile +++ b/crypto/openssl/crypto/err/Makefile @@ -83,23 +83,24 @@ err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h err.o: ../cryptlib.h err.c err_all.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h err_all.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h -err_all.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h -err_all.o: ../../include/openssl/dh.h ../../include/openssl/dsa.h -err_all.o: ../../include/openssl/dso.h ../../include/openssl/e_os2.h -err_all.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h -err_all.o: ../../include/openssl/ecdsa.h ../../include/openssl/engine.h -err_all.o: ../../include/openssl/err.h ../../include/openssl/evp.h -err_all.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h -err_all.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h -err_all.o: ../../include/openssl/ocsp.h ../../include/openssl/opensslconf.h -err_all.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -err_all.o: ../../include/openssl/pem2.h ../../include/openssl/pkcs12.h -err_all.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h -err_all.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h -err_all.o: ../../include/openssl/sha.h ../../include/openssl/stack.h -err_all.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h -err_all.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h -err_all.o: ../../include/openssl/x509v3.h err_all.c +err_all.o: ../../include/openssl/comp.h ../../include/openssl/conf.h +err_all.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h +err_all.o: ../../include/openssl/dsa.h ../../include/openssl/dso.h +err_all.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h +err_all.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h +err_all.o: ../../include/openssl/engine.h ../../include/openssl/err.h +err_all.o: ../../include/openssl/evp.h ../../include/openssl/fips.h +err_all.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h +err_all.o: ../../include/openssl/objects.h ../../include/openssl/ocsp.h +err_all.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +err_all.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pem2.h +err_all.o: ../../include/openssl/pkcs12.h ../../include/openssl/pkcs7.h +err_all.o: ../../include/openssl/rand.h ../../include/openssl/rsa.h +err_all.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h +err_all.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +err_all.o: ../../include/openssl/ui.h ../../include/openssl/x509.h +err_all.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h +err_all.o: err_all.c err_bio.o: ../../e_os.h ../../include/openssl/bio.h err_bio.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h err_bio.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h diff --git a/crypto/openssl/crypto/err/err_all.c b/crypto/openssl/crypto/err/err_all.c index f21a527..39796f7 100644 --- a/crypto/openssl/crypto/err/err_all.c +++ b/crypto/openssl/crypto/err/err_all.c @@ -104,6 +104,7 @@ #ifndef OPENSSL_NO_JPAKE #include <openssl/jpake.h> #endif +#include <openssl/comp.h> void ERR_load_crypto_strings(void) { @@ -157,5 +158,6 @@ void ERR_load_crypto_strings(void) #ifndef OPENSSL_NO_JPAKE ERR_load_JPAKE_strings(); #endif + ERR_load_COMP_strings(); #endif } diff --git a/crypto/openssl/crypto/evp/c_allc.c b/crypto/openssl/crypto/evp/c_allc.c index 7054d81..e45cee8 100644 --- a/crypto/openssl/crypto/evp/c_allc.c +++ b/crypto/openssl/crypto/evp/c_allc.c @@ -71,6 +71,8 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_des_cfb8()); EVP_add_cipher(EVP_des_ede_cfb()); EVP_add_cipher(EVP_des_ede3_cfb()); + EVP_add_cipher(EVP_des_ede3_cfb1()); + EVP_add_cipher(EVP_des_ede3_cfb8()); EVP_add_cipher(EVP_des_ofb()); EVP_add_cipher(EVP_des_ede_ofb()); diff --git a/crypto/openssl/crypto/evp/c_alld.c b/crypto/openssl/crypto/evp/c_alld.c index d270b0e..e0841d1 100644 --- a/crypto/openssl/crypto/evp/c_alld.c +++ b/crypto/openssl/crypto/evp/c_alld.c @@ -64,9 +64,6 @@ void OpenSSL_add_all_digests(void) { -#ifndef OPENSSL_NO_MD2 - EVP_add_digest(EVP_md2()); -#endif #ifndef OPENSSL_NO_MD4 EVP_add_digest(EVP_md4()); #endif diff --git a/crypto/openssl/crypto/evp/digest.c b/crypto/openssl/crypto/evp/digest.c index 3bc2d12..6a8f39b 100644 --- a/crypto/openssl/crypto/evp/digest.c +++ b/crypto/openssl/crypto/evp/digest.c @@ -127,7 +127,8 @@ EVP_MD_CTX *EVP_MD_CTX_create(void) { EVP_MD_CTX *ctx=OPENSSL_malloc(sizeof *ctx); - EVP_MD_CTX_init(ctx); + if (ctx) + EVP_MD_CTX_init(ctx); return ctx; } @@ -299,7 +300,14 @@ int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type, ENGINE *impl) OPENSSL_free(ctx->md_data); ctx->digest=type; if (type->ctx_size) + { ctx->md_data=OPENSSL_malloc(type->ctx_size); + if (!ctx->md_data) + { + EVPerr(EVP_F_EVP_DIGESTINIT_EX, ERR_R_MALLOC_FAILURE); + return 0; + } + } } #ifndef OPENSSL_NO_ENGINE skip_to_init: @@ -380,8 +388,17 @@ int EVP_MD_CTX_copy_ex(EVP_MD_CTX *out, const EVP_MD_CTX *in) if (out->digest->ctx_size) { - if (tmp_buf) out->md_data = tmp_buf; - else out->md_data=OPENSSL_malloc(out->digest->ctx_size); + if (tmp_buf) + out->md_data = tmp_buf; + else + { + out->md_data=OPENSSL_malloc(out->digest->ctx_size); + if (!out->md_data) + { + EVPerr(EVP_F_EVP_MD_CTX_COPY_EX,ERR_R_MALLOC_FAILURE); + return 0; + } + } memcpy(out->md_data,in->md_data,out->digest->ctx_size); } diff --git a/crypto/openssl/crypto/evp/evp_lib.c b/crypto/openssl/crypto/evp/evp_lib.c index 174cf6c..9c20061 100644 --- a/crypto/openssl/crypto/evp/evp_lib.c +++ b/crypto/openssl/crypto/evp/evp_lib.c @@ -163,6 +163,12 @@ int EVP_CIPHER_type(const EVP_CIPHER *ctx) return NID_des_cfb64; + case NID_des_ede3_cfb64: + case NID_des_ede3_cfb8: + case NID_des_ede3_cfb1: + + return NID_des_cfb64; + default: /* Check it has an OID and it is valid */ otmp = OBJ_nid2obj(nid); diff --git a/crypto/openssl/crypto/evp/evp_locl.h b/crypto/openssl/crypto/evp/evp_locl.h index eabcc96..72105b0 100644 --- a/crypto/openssl/crypto/evp/evp_locl.h +++ b/crypto/openssl/crypto/evp/evp_locl.h @@ -127,9 +127,9 @@ BLOCK_CIPHER_def1(cname, cbc, cbc, CBC, kstruct, nid, block_size, key_len, \ #define BLOCK_CIPHER_def_cfb(cname, kstruct, nid, key_len, \ iv_len, cbits, flags, init_key, cleanup, \ set_asn1, get_asn1, ctrl) \ -BLOCK_CIPHER_def1(cname, cfb##cbits, cfb##cbits, CFB, kstruct, nid, 1, \ - key_len, iv_len, flags, init_key, cleanup, set_asn1, \ - get_asn1, ctrl) +BLOCK_CIPHER_def1(cname, cfb##cbits, cfb##cbits, CFB, kstruct, nid, \ + (cbits + 7)/8, key_len, iv_len, \ + flags, init_key, cleanup, set_asn1, get_asn1, ctrl) #define BLOCK_CIPHER_def_ofb(cname, kstruct, nid, key_len, \ iv_len, cbits, flags, init_key, cleanup, \ @@ -139,10 +139,10 @@ BLOCK_CIPHER_def1(cname, ofb##cbits, ofb, OFB, kstruct, nid, 1, \ get_asn1, ctrl) #define BLOCK_CIPHER_def_ecb(cname, kstruct, nid, block_size, key_len, \ - iv_len, flags, init_key, cleanup, set_asn1, \ + flags, init_key, cleanup, set_asn1, \ get_asn1, ctrl) \ BLOCK_CIPHER_def1(cname, ecb, ecb, ECB, kstruct, nid, block_size, key_len, \ - iv_len, flags, init_key, cleanup, set_asn1, get_asn1, ctrl) + 0, flags, init_key, cleanup, set_asn1, get_asn1, ctrl) #define BLOCK_CIPHER_defs(cname, kstruct, \ nid, block_size, key_len, iv_len, cbits, flags, \ @@ -153,7 +153,7 @@ BLOCK_CIPHER_def_cfb(cname, kstruct, nid, key_len, iv_len, cbits, \ flags, init_key, cleanup, set_asn1, get_asn1, ctrl) \ BLOCK_CIPHER_def_ofb(cname, kstruct, nid, key_len, iv_len, cbits, \ flags, init_key, cleanup, set_asn1, get_asn1, ctrl) \ -BLOCK_CIPHER_def_ecb(cname, kstruct, nid, block_size, key_len, iv_len, flags, \ +BLOCK_CIPHER_def_ecb(cname, kstruct, nid, block_size, key_len, flags, \ init_key, cleanup, set_asn1, get_asn1, ctrl) diff --git a/crypto/openssl/crypto/lhash/lhash.c b/crypto/openssl/crypto/lhash/lhash.c index 04ea802..0b41f87 100644 --- a/crypto/openssl/crypto/lhash/lhash.c +++ b/crypto/openssl/crypto/lhash/lhash.c @@ -305,16 +305,40 @@ void lh_doall_arg(LHASH *lh, LHASH_DOALL_ARG_FN_TYPE func, void *arg) static void expand(LHASH *lh) { LHASH_NODE **n,**n1,**n2,*np; - unsigned int p,i,j; + unsigned int p,i,j,pmax; unsigned long hash,nni; + p=(int)lh->p++; + nni=lh->num_alloc_nodes; + pmax=lh->pmax; + + if ((lh->p) >= lh->pmax) + { + j=(int)lh->num_alloc_nodes*2; + n=(LHASH_NODE **)OPENSSL_realloc(lh->b, + (int)sizeof(LHASH_NODE *)*j); + if (n == NULL) + { +/* fputs("realloc error in lhash",stderr); */ + lh->error++; + lh->p=0; + return; + } + /* else */ + for (i=(int)lh->num_alloc_nodes; i<j; i++)/* 26/02/92 eay */ + n[i]=NULL; /* 02/03/92 eay */ + lh->pmax=lh->num_alloc_nodes; + lh->num_alloc_nodes=j; + lh->num_expand_reallocs++; + lh->p=0; + lh->b=n; + } + lh->num_nodes++; lh->num_expands++; - p=(int)lh->p++; n1= &(lh->b[p]); - n2= &(lh->b[p+(int)lh->pmax]); + n2= &(lh->b[p+pmax]); *n2=NULL; /* 27/07/92 - eay - undefined pointer bug */ - nni=lh->num_alloc_nodes; for (np= *n1; np != NULL; ) { @@ -335,35 +359,14 @@ static void expand(LHASH *lh) np= *n1; } - if ((lh->p) >= lh->pmax) - { - j=(int)lh->num_alloc_nodes*2; - n=(LHASH_NODE **)OPENSSL_realloc(lh->b, - (int)(sizeof(LHASH_NODE *)*j)); - if (n == NULL) - { -/* fputs("realloc error in lhash",stderr); */ - lh->error++; - lh->p=0; - return; - } - /* else */ - for (i=(int)lh->num_alloc_nodes; i<j; i++)/* 26/02/92 eay */ - n[i]=NULL; /* 02/03/92 eay */ - lh->pmax=lh->num_alloc_nodes; - lh->num_alloc_nodes=j; - lh->num_expand_reallocs++; - lh->p=0; - lh->b=n; - } } static void contract(LHASH *lh) { LHASH_NODE **n,*n1,*np; + int idx = lh->p+lh->pmax-1; - np=lh->b[lh->p+lh->pmax-1]; - lh->b[lh->p+lh->pmax-1]=NULL; /* 24/07-92 - eay - weird but :-( */ + np=lh->b[idx]; if (lh->p == 0) { n=(LHASH_NODE **)OPENSSL_realloc(lh->b, @@ -383,6 +386,7 @@ static void contract(LHASH *lh) else lh->p--; + lh->b[idx] = NULL; lh->num_nodes--; lh->num_contracts++; diff --git a/crypto/openssl/crypto/md5/asm/md5-x86_64.pl b/crypto/openssl/crypto/md5/asm/md5-x86_64.pl index 9a6fa67..05d040f 100755 --- a/crypto/openssl/crypto/md5/asm/md5-x86_64.pl +++ b/crypto/openssl/crypto/md5/asm/md5-x86_64.pl @@ -19,6 +19,7 @@ my $code; sub round1_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; + $T_i = unpack("l",pack("l", hex($T_i))); # convert to 32-bit signed decimal $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n" if ($pos == -1); $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); $code .= <<EOF; @@ -42,6 +43,7 @@ EOF sub round2_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; + $T_i = unpack("l",pack("l", hex($T_i))); # convert to 32-bit signed decimal $code .= " mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */\n" if ($pos == -1); $code .= " mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1); $code .= <<EOF; @@ -65,6 +67,7 @@ EOF sub round3_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; + $T_i = unpack("l",pack("l", hex($T_i))); # convert to 32-bit signed decimal $code .= " mov 5*4(%rsi), %r10d /* (NEXT STEP) X[5] */\n" if ($pos == -1); $code .= " mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1); $code .= <<EOF; @@ -87,6 +90,7 @@ EOF sub round4_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; + $T_i = unpack("l",pack("l", hex($T_i))); # convert to 32-bit signed decimal $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n" if ($pos == -1); $code .= " mov \$0xffffffff, %r11d\n" if ($pos == -1); $code .= " xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/\n" diff --git a/crypto/openssl/crypto/o_init.c b/crypto/openssl/crypto/o_init.c index 00ed65a..2a5f5aa 100644 --- a/crypto/openssl/crypto/o_init.c +++ b/crypto/openssl/crypto/o_init.c @@ -58,6 +58,11 @@ #include <e_os.h> #include <openssl/err.h> +/* Internal only functions: only ever used here */ +extern void int_ERR_lib_init(void); +extern void int_EVP_MD_init_engine_callbacks(void ); +extern void int_EVP_CIPHER_init_engine_callbacks(void ); +extern void int_RAND_init_engine_callbacks(void ); /* Perform any essential OpenSSL initialization operations. * Currently only sets FIPS callbacks @@ -73,7 +78,7 @@ void OPENSSL_init(void) #ifdef CRYPTO_MDEBUG CRYPTO_malloc_debug_init(); #endif -#ifdef OPENSSL_ENGINE +#ifndef OPENSSL_NO_ENGINE int_EVP_MD_init_engine_callbacks(); int_EVP_CIPHER_init_engine_callbacks(); int_RAND_init_engine_callbacks(); diff --git a/crypto/openssl/crypto/o_str.c b/crypto/openssl/crypto/o_str.c index 59cc250..56104a6 100644 --- a/crypto/openssl/crypto/o_str.c +++ b/crypto/openssl/crypto/o_str.c @@ -60,7 +60,9 @@ #include <e_os.h> #include "o_str.h" -#if !defined(OPENSSL_IMPLEMENTS_strncasecmp) && !defined(OPENSSL_SYSNAME_WIN32) +#if !defined(OPENSSL_IMPLEMENTS_strncasecmp) && \ + !defined(OPENSSL_SYSNAME_WIN32) && \ + !defined(NETWARE_CLIB) # include <strings.h> #endif diff --git a/crypto/openssl/crypto/objects/obj_dat.c b/crypto/openssl/crypto/objects/obj_dat.c index 7fd7433..760af16 100644 --- a/crypto/openssl/crypto/objects/obj_dat.c +++ b/crypto/openssl/crypto/objects/obj_dat.c @@ -456,10 +456,13 @@ int OBJ_obj2txt(char *buf, int buf_len, const ASN1_OBJECT *a, int no_name) s=OBJ_nid2ln(nid); if (s == NULL) s=OBJ_nid2sn(nid); - if (buf) - BUF_strlcpy(buf,s,buf_len); - n=strlen(s); - return n; + if (s) + { + if (buf) + BUF_strlcpy(buf,s,buf_len); + n=strlen(s); + return n; + } } diff --git a/crypto/openssl/crypto/objects/obj_dat.h b/crypto/openssl/crypto/objects/obj_dat.h index dccc15e..23bdb46 100644 --- a/crypto/openssl/crypto/objects/obj_dat.h +++ b/crypto/openssl/crypto/objects/obj_dat.h @@ -62,12 +62,12 @@ * [including the GNU Public Licence.] */ -#define NUM_NID 859 -#define NUM_SN 852 -#define NUM_LN 852 -#define NUM_OBJ 806 +#define NUM_NID 893 +#define NUM_SN 886 +#define NUM_LN 886 +#define NUM_OBJ 840 -static unsigned char lvalues[5722]={ +static unsigned char lvalues[5824]={ 0x00, /* [ 0] OBJ_undef */ 0x2A,0x86,0x48,0x86,0xF7,0x0D, /* [ 1] OBJ_rsadsi */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01, /* [ 7] OBJ_pkcs */ @@ -707,7 +707,7 @@ static unsigned char lvalues[5722]={ 0x2B, /* [4582] OBJ_identified_organization */ 0x2B,0x81,0x04, /* [4583] OBJ_certicom_arc */ 0x67,0x2B, /* [4586] OBJ_wap */ -0x67,0x2B,0x0D, /* [4588] OBJ_wap_wsg */ +0x67,0x2B,0x01, /* [4588] OBJ_wap_wsg */ 0x2A,0x86,0x48,0xCE,0x3D,0x01,0x02,0x03, /* [4591] OBJ_X9_62_id_characteristic_two_basis */ 0x2A,0x86,0x48,0xCE,0x3D,0x01,0x02,0x03,0x01,/* [4599] OBJ_X9_62_onBasis */ 0x2A,0x86,0x48,0xCE,0x3D,0x01,0x02,0x03,0x02,/* [4608] OBJ_X9_62_tpBasis */ @@ -763,17 +763,17 @@ static unsigned char lvalues[5722]={ 0x2B,0x81,0x04,0x00,0x25, /* [4926] OBJ_sect409r1 */ 0x2B,0x81,0x04,0x00,0x26, /* [4931] OBJ_sect571k1 */ 0x2B,0x81,0x04,0x00,0x27, /* [4936] OBJ_sect571r1 */ -0x67,0x2B,0x0D,0x04,0x01, /* [4941] OBJ_wap_wsg_idm_ecid_wtls1 */ -0x67,0x2B,0x0D,0x04,0x03, /* [4946] OBJ_wap_wsg_idm_ecid_wtls3 */ -0x67,0x2B,0x0D,0x04,0x04, /* [4951] OBJ_wap_wsg_idm_ecid_wtls4 */ -0x67,0x2B,0x0D,0x04,0x05, /* [4956] OBJ_wap_wsg_idm_ecid_wtls5 */ -0x67,0x2B,0x0D,0x04,0x06, /* [4961] OBJ_wap_wsg_idm_ecid_wtls6 */ -0x67,0x2B,0x0D,0x04,0x07, /* [4966] OBJ_wap_wsg_idm_ecid_wtls7 */ -0x67,0x2B,0x0D,0x04,0x08, /* [4971] OBJ_wap_wsg_idm_ecid_wtls8 */ -0x67,0x2B,0x0D,0x04,0x09, /* [4976] OBJ_wap_wsg_idm_ecid_wtls9 */ -0x67,0x2B,0x0D,0x04,0x0A, /* [4981] OBJ_wap_wsg_idm_ecid_wtls10 */ -0x67,0x2B,0x0D,0x04,0x0B, /* [4986] OBJ_wap_wsg_idm_ecid_wtls11 */ -0x67,0x2B,0x0D,0x04,0x0C, /* [4991] OBJ_wap_wsg_idm_ecid_wtls12 */ +0x67,0x2B,0x01,0x04,0x01, /* [4941] OBJ_wap_wsg_idm_ecid_wtls1 */ +0x67,0x2B,0x01,0x04,0x03, /* [4946] OBJ_wap_wsg_idm_ecid_wtls3 */ +0x67,0x2B,0x01,0x04,0x04, /* [4951] OBJ_wap_wsg_idm_ecid_wtls4 */ +0x67,0x2B,0x01,0x04,0x05, /* [4956] OBJ_wap_wsg_idm_ecid_wtls5 */ +0x67,0x2B,0x01,0x04,0x06, /* [4961] OBJ_wap_wsg_idm_ecid_wtls6 */ +0x67,0x2B,0x01,0x04,0x07, /* [4966] OBJ_wap_wsg_idm_ecid_wtls7 */ +0x67,0x2B,0x01,0x04,0x08, /* [4971] OBJ_wap_wsg_idm_ecid_wtls8 */ +0x67,0x2B,0x01,0x04,0x09, /* [4976] OBJ_wap_wsg_idm_ecid_wtls9 */ +0x67,0x2B,0x01,0x04,0x0A, /* [4981] OBJ_wap_wsg_idm_ecid_wtls10 */ +0x67,0x2B,0x01,0x04,0x0B, /* [4986] OBJ_wap_wsg_idm_ecid_wtls11 */ +0x67,0x2B,0x01,0x04,0x0C, /* [4991] OBJ_wap_wsg_idm_ecid_wtls12 */ 0x55,0x1D,0x20,0x00, /* [4996] OBJ_any_policy */ 0x55,0x1D,0x21, /* [5000] OBJ_policy_mappings */ 0x55,0x1D,0x36, /* [5003] OBJ_inhibit_any_policy */ @@ -874,6 +874,40 @@ static unsigned char lvalues[5722]={ 0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x11,0x02,/* [5701] OBJ_LocalKeySet */ 0x55,0x1D,0x2E, /* [5710] OBJ_freshest_crl */ 0x2B,0x06,0x01,0x05,0x05,0x07,0x08,0x03, /* [5713] OBJ_id_on_permanentIdentifier */ +0x55,0x04,0x0E, /* [5721] OBJ_searchGuide */ +0x55,0x04,0x0F, /* [5724] OBJ_businessCategory */ +0x55,0x04,0x10, /* [5727] OBJ_postalAddress */ +0x55,0x04,0x12, /* [5730] OBJ_postOfficeBox */ +0x55,0x04,0x13, /* [5733] OBJ_physicalDeliveryOfficeName */ +0x55,0x04,0x14, /* [5736] OBJ_telephoneNumber */ +0x55,0x04,0x15, /* [5739] OBJ_telexNumber */ +0x55,0x04,0x16, /* [5742] OBJ_teletexTerminalIdentifier */ +0x55,0x04,0x17, /* [5745] OBJ_facsimileTelephoneNumber */ +0x55,0x04,0x18, /* [5748] OBJ_x121Address */ +0x55,0x04,0x19, /* [5751] OBJ_internationaliSDNNumber */ +0x55,0x04,0x1A, /* [5754] OBJ_registeredAddress */ +0x55,0x04,0x1B, /* [5757] OBJ_destinationIndicator */ +0x55,0x04,0x1C, /* [5760] OBJ_preferredDeliveryMethod */ +0x55,0x04,0x1D, /* [5763] OBJ_presentationAddress */ +0x55,0x04,0x1E, /* [5766] OBJ_supportedApplicationContext */ +0x55,0x04,0x1F, /* [5769] OBJ_member */ +0x55,0x04,0x20, /* [5772] OBJ_owner */ +0x55,0x04,0x21, /* [5775] OBJ_roleOccupant */ +0x55,0x04,0x22, /* [5778] OBJ_seeAlso */ +0x55,0x04,0x23, /* [5781] OBJ_userPassword */ +0x55,0x04,0x24, /* [5784] OBJ_userCertificate */ +0x55,0x04,0x25, /* [5787] OBJ_cACertificate */ +0x55,0x04,0x26, /* [5790] OBJ_authorityRevocationList */ +0x55,0x04,0x27, /* [5793] OBJ_certificateRevocationList */ +0x55,0x04,0x28, /* [5796] OBJ_crossCertificatePair */ +0x55,0x04,0x2F, /* [5799] OBJ_enhancedSearchGuide */ +0x55,0x04,0x30, /* [5802] OBJ_protocolInformation */ +0x55,0x04,0x31, /* [5805] OBJ_distinguishedName */ +0x55,0x04,0x32, /* [5808] OBJ_uniqueMember */ +0x55,0x04,0x33, /* [5811] OBJ_houseIdentifier */ +0x55,0x04,0x34, /* [5814] OBJ_supportedAlgorithms */ +0x55,0x04,0x35, /* [5817] OBJ_deltaRevocationList */ +0x55,0x04,0x36, /* [5820] OBJ_dmdName */ }; static ASN1_OBJECT nid_objs[NUM_NID]={ @@ -1928,7 +1962,7 @@ static ASN1_OBJECT nid_objs[NUM_NID]={ {"DES-CFB8","des-cfb8",NID_des_cfb8,0,NULL,0}, {"DES-EDE3-CFB1","des-ede3-cfb1",NID_des_ede3_cfb1,0,NULL,0}, {"DES-EDE3-CFB8","des-ede3-cfb8",NID_des_ede3_cfb8,0,NULL,0}, -{"streetAddress","streetAddress",NID_streetAddress,3,&(lvalues[4462]),0}, +{"street","streetAddress",NID_streetAddress,3,&(lvalues[4462]),0}, {"postalCode","postalCode",NID_postalCode,3,&(lvalues[4465]),0}, {"id-ppl","id-ppl",NID_id_ppl,7,&(lvalues[4468]),0}, {"proxyCertInfo","Proxy Certificate Information",NID_proxyCertInfo,8, @@ -2262,6 +2296,61 @@ static ASN1_OBJECT nid_objs[NUM_NID]={ &(lvalues[5710]),0}, {"id-on-permanentIdentifier","Permanent Identifier", NID_id_on_permanentIdentifier,8,&(lvalues[5713]),0}, +{"searchGuide","searchGuide",NID_searchGuide,3,&(lvalues[5721]),0}, +{"businessCategory","businessCategory",NID_businessCategory,3, + &(lvalues[5724]),0}, +{"postalAddress","postalAddress",NID_postalAddress,3,&(lvalues[5727]),0}, +{"postOfficeBox","postOfficeBox",NID_postOfficeBox,3,&(lvalues[5730]),0}, +{"physicalDeliveryOfficeName","physicalDeliveryOfficeName", + NID_physicalDeliveryOfficeName,3,&(lvalues[5733]),0}, +{"telephoneNumber","telephoneNumber",NID_telephoneNumber,3, + &(lvalues[5736]),0}, +{"telexNumber","telexNumber",NID_telexNumber,3,&(lvalues[5739]),0}, +{"teletexTerminalIdentifier","teletexTerminalIdentifier", + NID_teletexTerminalIdentifier,3,&(lvalues[5742]),0}, +{"facsimileTelephoneNumber","facsimileTelephoneNumber", + NID_facsimileTelephoneNumber,3,&(lvalues[5745]),0}, +{"x121Address","x121Address",NID_x121Address,3,&(lvalues[5748]),0}, +{"internationaliSDNNumber","internationaliSDNNumber", + NID_internationaliSDNNumber,3,&(lvalues[5751]),0}, +{"registeredAddress","registeredAddress",NID_registeredAddress,3, + &(lvalues[5754]),0}, +{"destinationIndicator","destinationIndicator", + NID_destinationIndicator,3,&(lvalues[5757]),0}, +{"preferredDeliveryMethod","preferredDeliveryMethod", + NID_preferredDeliveryMethod,3,&(lvalues[5760]),0}, +{"presentationAddress","presentationAddress",NID_presentationAddress, + 3,&(lvalues[5763]),0}, +{"supportedApplicationContext","supportedApplicationContext", + NID_supportedApplicationContext,3,&(lvalues[5766]),0}, +{"member","member",NID_member,3,&(lvalues[5769]),0}, +{"owner","owner",NID_owner,3,&(lvalues[5772]),0}, +{"roleOccupant","roleOccupant",NID_roleOccupant,3,&(lvalues[5775]),0}, +{"seeAlso","seeAlso",NID_seeAlso,3,&(lvalues[5778]),0}, +{"userPassword","userPassword",NID_userPassword,3,&(lvalues[5781]),0}, +{"userCertificate","userCertificate",NID_userCertificate,3, + &(lvalues[5784]),0}, +{"cACertificate","cACertificate",NID_cACertificate,3,&(lvalues[5787]),0}, +{"authorityRevocationList","authorityRevocationList", + NID_authorityRevocationList,3,&(lvalues[5790]),0}, +{"certificateRevocationList","certificateRevocationList", + NID_certificateRevocationList,3,&(lvalues[5793]),0}, +{"crossCertificatePair","crossCertificatePair", + NID_crossCertificatePair,3,&(lvalues[5796]),0}, +{"enhancedSearchGuide","enhancedSearchGuide",NID_enhancedSearchGuide, + 3,&(lvalues[5799]),0}, +{"protocolInformation","protocolInformation",NID_protocolInformation, + 3,&(lvalues[5802]),0}, +{"distinguishedName","distinguishedName",NID_distinguishedName,3, + &(lvalues[5805]),0}, +{"uniqueMember","uniqueMember",NID_uniqueMember,3,&(lvalues[5808]),0}, +{"houseIdentifier","houseIdentifier",NID_houseIdentifier,3, + &(lvalues[5811]),0}, +{"supportedAlgorithms","supportedAlgorithms",NID_supportedAlgorithms, + 3,&(lvalues[5814]),0}, +{"deltaRevocationList","deltaRevocationList",NID_deltaRevocationList, + 3,&(lvalues[5817]),0}, +{"dmdName","dmdName",NID_dmdName,3,&(lvalues[5820]),0}, }; static ASN1_OBJECT *sn_objs[NUM_SN]={ @@ -2458,10 +2547,12 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[501]),/* "audio" */ &(nid_objs[177]),/* "authorityInfoAccess" */ &(nid_objs[90]),/* "authorityKeyIdentifier" */ +&(nid_objs[882]),/* "authorityRevocationList" */ &(nid_objs[87]),/* "basicConstraints" */ &(nid_objs[365]),/* "basicOCSPResponse" */ &(nid_objs[285]),/* "biometricInfo" */ &(nid_objs[494]),/* "buildingName" */ +&(nid_objs[860]),/* "businessCategory" */ &(nid_objs[691]),/* "c2onb191v4" */ &(nid_objs[692]),/* "c2onb191v5" */ &(nid_objs[697]),/* "c2onb239v4" */ @@ -2482,6 +2573,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[696]),/* "c2tnb239v3" */ &(nid_objs[701]),/* "c2tnb359v1" */ &(nid_objs[703]),/* "c2tnb431r1" */ +&(nid_objs[881]),/* "cACertificate" */ &(nid_objs[483]),/* "cNAMERecord" */ &(nid_objs[179]),/* "caIssuers" */ &(nid_objs[785]),/* "caRepository" */ @@ -2490,6 +2582,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[677]),/* "certicom-arc" */ &(nid_objs[771]),/* "certificateIssuer" */ &(nid_objs[89]),/* "certificatePolicies" */ +&(nid_objs[883]),/* "certificateRevocationList" */ &(nid_objs[54]),/* "challengePassword" */ &(nid_objs[407]),/* "characteristic-two-field" */ &(nid_objs[395]),/* "clearance" */ @@ -2500,6 +2593,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[153]),/* "crlBag" */ &(nid_objs[103]),/* "crlDistributionPoints" */ &(nid_objs[88]),/* "crlNumber" */ +&(nid_objs[884]),/* "crossCertificatePair" */ &(nid_objs[806]),/* "cryptocom" */ &(nid_objs[805]),/* "cryptopro" */ &(nid_objs[500]),/* "dITRedirect" */ @@ -2508,9 +2602,13 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[434]),/* "data" */ &(nid_objs[390]),/* "dcobject" */ &(nid_objs[140]),/* "deltaCRL" */ +&(nid_objs[891]),/* "deltaRevocationList" */ &(nid_objs[107]),/* "description" */ +&(nid_objs[871]),/* "destinationIndicator" */ &(nid_objs[28]),/* "dhKeyAgreement" */ &(nid_objs[382]),/* "directory" */ +&(nid_objs[887]),/* "distinguishedName" */ +&(nid_objs[892]),/* "dmdName" */ &(nid_objs[174]),/* "dnQualifier" */ &(nid_objs[447]),/* "document" */ &(nid_objs[471]),/* "documentAuthor" */ @@ -2533,12 +2631,14 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[792]),/* "ecdsa-with-Specified" */ &(nid_objs[48]),/* "emailAddress" */ &(nid_objs[132]),/* "emailProtection" */ +&(nid_objs[885]),/* "enhancedSearchGuide" */ &(nid_objs[389]),/* "enterprises" */ &(nid_objs[384]),/* "experimental" */ &(nid_objs[172]),/* "extReq" */ &(nid_objs[56]),/* "extendedCertificateAttributes" */ &(nid_objs[126]),/* "extendedKeyUsage" */ &(nid_objs[372]),/* "extendedStatus" */ +&(nid_objs[867]),/* "facsimileTelephoneNumber" */ &(nid_objs[462]),/* "favouriteDrink" */ &(nid_objs[857]),/* "freshestCRL" */ &(nid_objs[453]),/* "friendlyCountry" */ @@ -2565,6 +2665,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[486]),/* "homePostalAddress" */ &(nid_objs[473]),/* "homeTelephoneNumber" */ &(nid_objs[466]),/* "host" */ +&(nid_objs[889]),/* "houseIdentifier" */ &(nid_objs[442]),/* "iA5StringSyntax" */ &(nid_objs[783]),/* "id-DHBasedMac" */ &(nid_objs[824]),/* "id-Gost28147-89-CryptoPro-A-ParamSet" */ @@ -2794,6 +2895,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[748]),/* "inhibitAnyPolicy" */ &(nid_objs[101]),/* "initials" */ &(nid_objs[647]),/* "international-organizations" */ +&(nid_objs[869]),/* "internationaliSDNNumber" */ &(nid_objs[142]),/* "invalidityDate" */ &(nid_objs[294]),/* "ipsecEndSystem" */ &(nid_objs[295]),/* "ipsecTunnel" */ @@ -2811,6 +2913,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[493]),/* "mailPreferenceOption" */ &(nid_objs[467]),/* "manager" */ &(nid_objs[809]),/* "md_gost94" */ +&(nid_objs[875]),/* "member" */ &(nid_objs[182]),/* "member-body" */ &(nid_objs[51]),/* "messageDigest" */ &(nid_objs[383]),/* "mgmt" */ @@ -2846,12 +2949,14 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[681]),/* "onBasis" */ &(nid_objs[491]),/* "organizationalStatus" */ &(nid_objs[475]),/* "otherMailbox" */ +&(nid_objs[876]),/* "owner" */ &(nid_objs[489]),/* "pagerTelephoneNumber" */ &(nid_objs[374]),/* "path" */ &(nid_objs[112]),/* "pbeWithMD5AndCast5CBC" */ &(nid_objs[499]),/* "personalSignature" */ &(nid_objs[487]),/* "personalTitle" */ &(nid_objs[464]),/* "photo" */ +&(nid_objs[863]),/* "physicalDeliveryOfficeName" */ &(nid_objs[437]),/* "pilot" */ &(nid_objs[439]),/* "pilotAttributeSyntax" */ &(nid_objs[438]),/* "pilotAttributeType" */ @@ -2877,8 +2982,12 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[47]),/* "pkcs9" */ &(nid_objs[401]),/* "policyConstraints" */ &(nid_objs[747]),/* "policyMappings" */ +&(nid_objs[862]),/* "postOfficeBox" */ +&(nid_objs[861]),/* "postalAddress" */ &(nid_objs[661]),/* "postalCode" */ &(nid_objs[683]),/* "ppBasis" */ +&(nid_objs[872]),/* "preferredDeliveryMethod" */ +&(nid_objs[873]),/* "presentationAddress" */ &(nid_objs[816]),/* "prf-gostr3411-94" */ &(nid_objs[406]),/* "prime-field" */ &(nid_objs[409]),/* "prime192v1" */ @@ -2890,13 +2999,16 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[415]),/* "prime256v1" */ &(nid_objs[385]),/* "private" */ &(nid_objs[84]),/* "privateKeyUsagePeriod" */ +&(nid_objs[886]),/* "protocolInformation" */ &(nid_objs[663]),/* "proxyCertInfo" */ &(nid_objs[510]),/* "pseudonym" */ &(nid_objs[435]),/* "pss" */ &(nid_objs[286]),/* "qcStatements" */ &(nid_objs[457]),/* "qualityLabelledData" */ &(nid_objs[450]),/* "rFC822localPart" */ +&(nid_objs[870]),/* "registeredAddress" */ &(nid_objs[400]),/* "role" */ +&(nid_objs[877]),/* "roleOccupant" */ &(nid_objs[448]),/* "room" */ &(nid_objs[463]),/* "roomNumber" */ &(nid_objs[ 6]),/* "rsaEncryption" */ @@ -2909,6 +3021,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[290]),/* "sbgp-ipAddrBlock" */ &(nid_objs[292]),/* "sbgp-routerIdentifier" */ &(nid_objs[159]),/* "sdsiCertificate" */ +&(nid_objs[859]),/* "searchGuide" */ &(nid_objs[704]),/* "secp112r1" */ &(nid_objs[705]),/* "secp112r2" */ &(nid_objs[706]),/* "secp128r1" */ @@ -2943,6 +3056,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[733]),/* "sect571k1" */ &(nid_objs[734]),/* "sect571r1" */ &(nid_objs[386]),/* "security" */ +&(nid_objs[878]),/* "seeAlso" */ &(nid_objs[394]),/* "selected-attribute-types" */ &(nid_objs[105]),/* "serialNumber" */ &(nid_objs[129]),/* "serverAuth" */ @@ -3081,14 +3195,19 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[454]),/* "simpleSecurityObject" */ &(nid_objs[496]),/* "singleLevelQuality" */ &(nid_objs[387]),/* "snmpv2" */ -&(nid_objs[660]),/* "streetAddress" */ +&(nid_objs[660]),/* "street" */ &(nid_objs[85]),/* "subjectAltName" */ &(nid_objs[769]),/* "subjectDirectoryAttributes" */ &(nid_objs[398]),/* "subjectInfoAccess" */ &(nid_objs[82]),/* "subjectKeyIdentifier" */ &(nid_objs[498]),/* "subtreeMaximumQuality" */ &(nid_objs[497]),/* "subtreeMinimumQuality" */ +&(nid_objs[890]),/* "supportedAlgorithms" */ +&(nid_objs[874]),/* "supportedApplicationContext" */ &(nid_objs[402]),/* "targetInformation" */ +&(nid_objs[864]),/* "telephoneNumber" */ +&(nid_objs[866]),/* "teletexTerminalIdentifier" */ +&(nid_objs[865]),/* "telexNumber" */ &(nid_objs[459]),/* "textEncodedORAddress" */ &(nid_objs[293]),/* "textNotice" */ &(nid_objs[133]),/* "timeStamping" */ @@ -3096,9 +3215,12 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[682]),/* "tpBasis" */ &(nid_objs[375]),/* "trustRoot" */ &(nid_objs[436]),/* "ucl" */ +&(nid_objs[888]),/* "uniqueMember" */ &(nid_objs[55]),/* "unstructuredAddress" */ &(nid_objs[49]),/* "unstructuredName" */ +&(nid_objs[880]),/* "userCertificate" */ &(nid_objs[465]),/* "userClass" */ +&(nid_objs[879]),/* "userPassword" */ &(nid_objs[373]),/* "valid" */ &(nid_objs[678]),/* "wap" */ &(nid_objs[679]),/* "wap-wsg" */ @@ -3114,6 +3236,7 @@ static ASN1_OBJECT *sn_objs[NUM_SN]={ &(nid_objs[741]),/* "wap-wsg-idm-ecid-wtls8" */ &(nid_objs[742]),/* "wap-wsg-idm-ecid-wtls9" */ &(nid_objs[804]),/* "whirlpool" */ +&(nid_objs[868]),/* "x121Address" */ &(nid_objs[503]),/* "x500UniqueIdentifier" */ &(nid_objs[158]),/* "x509Certificate" */ &(nid_objs[160]),/* "x509Crl" */ @@ -3284,11 +3407,13 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[484]),/* "associatedDomain" */ &(nid_objs[485]),/* "associatedName" */ &(nid_objs[501]),/* "audio" */ +&(nid_objs[882]),/* "authorityRevocationList" */ &(nid_objs[91]),/* "bf-cbc" */ &(nid_objs[93]),/* "bf-cfb" */ &(nid_objs[92]),/* "bf-ecb" */ &(nid_objs[94]),/* "bf-ofb" */ &(nid_objs[494]),/* "buildingName" */ +&(nid_objs[860]),/* "businessCategory" */ &(nid_objs[691]),/* "c2onb191v4" */ &(nid_objs[692]),/* "c2onb191v5" */ &(nid_objs[697]),/* "c2onb239v4" */ @@ -3309,6 +3434,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[696]),/* "c2tnb239v3" */ &(nid_objs[701]),/* "c2tnb359v1" */ &(nid_objs[703]),/* "c2tnb431r1" */ +&(nid_objs[881]),/* "cACertificate" */ &(nid_objs[483]),/* "cNAMERecord" */ &(nid_objs[751]),/* "camellia-128-cbc" */ &(nid_objs[757]),/* "camellia-128-cfb" */ @@ -3336,6 +3462,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[152]),/* "certBag" */ &(nid_objs[677]),/* "certicom-arc" */ &(nid_objs[517]),/* "certificate extensions" */ +&(nid_objs[883]),/* "certificateRevocationList" */ &(nid_objs[54]),/* "challengePassword" */ &(nid_objs[407]),/* "characteristic-two-field" */ &(nid_objs[395]),/* "clearance" */ @@ -3346,6 +3473,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[53]),/* "countersignature" */ &(nid_objs[14]),/* "countryName" */ &(nid_objs[153]),/* "crlBag" */ +&(nid_objs[884]),/* "crossCertificatePair" */ &(nid_objs[806]),/* "cryptocom" */ &(nid_objs[805]),/* "cryptopro" */ &(nid_objs[500]),/* "dITRedirect" */ @@ -3353,6 +3481,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[495]),/* "dSAQuality" */ &(nid_objs[434]),/* "data" */ &(nid_objs[390]),/* "dcObject" */ +&(nid_objs[891]),/* "deltaRevocationList" */ &(nid_objs[31]),/* "des-cbc" */ &(nid_objs[643]),/* "des-cdmf" */ &(nid_objs[30]),/* "des-cfb" */ @@ -3371,10 +3500,13 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[63]),/* "des-ede3-ofb" */ &(nid_objs[45]),/* "des-ofb" */ &(nid_objs[107]),/* "description" */ +&(nid_objs[871]),/* "destinationIndicator" */ &(nid_objs[80]),/* "desx-cbc" */ &(nid_objs[28]),/* "dhKeyAgreement" */ &(nid_objs[11]),/* "directory services (X.500)" */ &(nid_objs[378]),/* "directory services - algorithms" */ +&(nid_objs[887]),/* "distinguishedName" */ +&(nid_objs[892]),/* "dmdName" */ &(nid_objs[174]),/* "dnQualifier" */ &(nid_objs[447]),/* "document" */ &(nid_objs[471]),/* "documentAuthor" */ @@ -3404,7 +3536,9 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[792]),/* "ecdsa-with-Specified" */ &(nid_objs[48]),/* "emailAddress" */ &(nid_objs[632]),/* "encrypted track 2" */ +&(nid_objs[885]),/* "enhancedSearchGuide" */ &(nid_objs[56]),/* "extendedCertificateAttributes" */ +&(nid_objs[867]),/* "facsimileTelephoneNumber" */ &(nid_objs[462]),/* "favouriteDrink" */ &(nid_objs[453]),/* "friendlyCountry" */ &(nid_objs[490]),/* "friendlyCountryName" */ @@ -3426,6 +3560,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[486]),/* "homePostalAddress" */ &(nid_objs[473]),/* "homeTelephoneNumber" */ &(nid_objs[466]),/* "host" */ +&(nid_objs[889]),/* "houseIdentifier" */ &(nid_objs[442]),/* "iA5StringSyntax" */ &(nid_objs[381]),/* "iana" */ &(nid_objs[824]),/* "id-Gost28147-89-CryptoPro-A-ParamSet" */ @@ -3640,6 +3775,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[676]),/* "identified-organization" */ &(nid_objs[461]),/* "info" */ &(nid_objs[101]),/* "initials" */ +&(nid_objs[869]),/* "internationaliSDNNumber" */ &(nid_objs[749]),/* "ipsec3" */ &(nid_objs[750]),/* "ipsec4" */ &(nid_objs[181]),/* "iso" */ @@ -3666,6 +3802,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[ 8]),/* "md5WithRSAEncryption" */ &(nid_objs[95]),/* "mdc2" */ &(nid_objs[96]),/* "mdc2WithRSA" */ +&(nid_objs[875]),/* "member" */ &(nid_objs[602]),/* "merchant initiated auth" */ &(nid_objs[514]),/* "message extensions" */ &(nid_objs[51]),/* "messageDigest" */ @@ -3680,6 +3817,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[491]),/* "organizationalStatus" */ &(nid_objs[18]),/* "organizationalUnitName" */ &(nid_objs[475]),/* "otherMailbox" */ +&(nid_objs[876]),/* "owner" */ &(nid_objs[489]),/* "pagerTelephoneNumber" */ &(nid_objs[782]),/* "password based MAC" */ &(nid_objs[374]),/* "path" */ @@ -3700,6 +3838,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[499]),/* "personalSignature" */ &(nid_objs[487]),/* "personalTitle" */ &(nid_objs[464]),/* "photo" */ +&(nid_objs[863]),/* "physicalDeliveryOfficeName" */ &(nid_objs[437]),/* "pilot" */ &(nid_objs[439]),/* "pilotAttributeSyntax" */ &(nid_objs[438]),/* "pilotAttributeType" */ @@ -3722,8 +3861,12 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[22]),/* "pkcs7-signedData" */ &(nid_objs[151]),/* "pkcs8ShroudedKeyBag" */ &(nid_objs[47]),/* "pkcs9" */ +&(nid_objs[862]),/* "postOfficeBox" */ +&(nid_objs[861]),/* "postalAddress" */ &(nid_objs[661]),/* "postalCode" */ &(nid_objs[683]),/* "ppBasis" */ +&(nid_objs[872]),/* "preferredDeliveryMethod" */ +&(nid_objs[873]),/* "presentationAddress" */ &(nid_objs[406]),/* "prime-field" */ &(nid_objs[409]),/* "prime192v1" */ &(nid_objs[410]),/* "prime192v2" */ @@ -3732,6 +3875,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[413]),/* "prime239v2" */ &(nid_objs[414]),/* "prime239v3" */ &(nid_objs[415]),/* "prime256v1" */ +&(nid_objs[886]),/* "protocolInformation" */ &(nid_objs[510]),/* "pseudonym" */ &(nid_objs[435]),/* "pss" */ &(nid_objs[286]),/* "qcStatements" */ @@ -3749,10 +3893,12 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[122]),/* "rc5-cfb" */ &(nid_objs[121]),/* "rc5-ecb" */ &(nid_objs[123]),/* "rc5-ofb" */ +&(nid_objs[870]),/* "registeredAddress" */ &(nid_objs[460]),/* "rfc822Mailbox" */ &(nid_objs[117]),/* "ripemd160" */ &(nid_objs[119]),/* "ripemd160WithRSA" */ &(nid_objs[400]),/* "role" */ +&(nid_objs[877]),/* "roleOccupant" */ &(nid_objs[448]),/* "room" */ &(nid_objs[463]),/* "roomNumber" */ &(nid_objs[19]),/* "rsa" */ @@ -3766,6 +3912,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[290]),/* "sbgp-ipAddrBlock" */ &(nid_objs[292]),/* "sbgp-routerIdentifier" */ &(nid_objs[159]),/* "sdsiCertificate" */ +&(nid_objs[859]),/* "searchGuide" */ &(nid_objs[704]),/* "secp112r1" */ &(nid_objs[705]),/* "secp112r2" */ &(nid_objs[706]),/* "secp128r1" */ @@ -3800,6 +3947,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[733]),/* "sect571k1" */ &(nid_objs[734]),/* "sect571r1" */ &(nid_objs[635]),/* "secure device signature" */ +&(nid_objs[878]),/* "seeAlso" */ &(nid_objs[777]),/* "seed-cbc" */ &(nid_objs[779]),/* "seed-cfb" */ &(nid_objs[776]),/* "seed-ecb" */ @@ -3942,17 +4090,25 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[660]),/* "streetAddress" */ &(nid_objs[498]),/* "subtreeMaximumQuality" */ &(nid_objs[497]),/* "subtreeMinimumQuality" */ +&(nid_objs[890]),/* "supportedAlgorithms" */ +&(nid_objs[874]),/* "supportedApplicationContext" */ &(nid_objs[100]),/* "surname" */ +&(nid_objs[864]),/* "telephoneNumber" */ +&(nid_objs[866]),/* "teletexTerminalIdentifier" */ +&(nid_objs[865]),/* "telexNumber" */ &(nid_objs[459]),/* "textEncodedORAddress" */ &(nid_objs[293]),/* "textNotice" */ &(nid_objs[106]),/* "title" */ &(nid_objs[682]),/* "tpBasis" */ &(nid_objs[436]),/* "ucl" */ &(nid_objs[ 0]),/* "undefined" */ +&(nid_objs[888]),/* "uniqueMember" */ &(nid_objs[55]),/* "unstructuredAddress" */ &(nid_objs[49]),/* "unstructuredName" */ +&(nid_objs[880]),/* "userCertificate" */ &(nid_objs[465]),/* "userClass" */ &(nid_objs[458]),/* "userId" */ +&(nid_objs[879]),/* "userPassword" */ &(nid_objs[373]),/* "valid" */ &(nid_objs[678]),/* "wap" */ &(nid_objs[679]),/* "wap-wsg" */ @@ -3968,6 +4124,7 @@ static ASN1_OBJECT *ln_objs[NUM_LN]={ &(nid_objs[741]),/* "wap-wsg-idm-ecid-wtls8" */ &(nid_objs[742]),/* "wap-wsg-idm-ecid-wtls9" */ &(nid_objs[804]),/* "whirlpool" */ +&(nid_objs[868]),/* "x121Address" */ &(nid_objs[503]),/* "x500UniqueIdentifier" */ &(nid_objs[158]),/* "x509Certificate" */ &(nid_objs[160]),/* "x509Crl" */ @@ -4009,13 +4166,47 @@ static ASN1_OBJECT *obj_objs[NUM_OBJ]={ &(nid_objs[18]),/* OBJ_organizationalUnitName 2 5 4 11 */ &(nid_objs[106]),/* OBJ_title 2 5 4 12 */ &(nid_objs[107]),/* OBJ_description 2 5 4 13 */ +&(nid_objs[859]),/* OBJ_searchGuide 2 5 4 14 */ +&(nid_objs[860]),/* OBJ_businessCategory 2 5 4 15 */ +&(nid_objs[861]),/* OBJ_postalAddress 2 5 4 16 */ &(nid_objs[661]),/* OBJ_postalCode 2 5 4 17 */ +&(nid_objs[862]),/* OBJ_postOfficeBox 2 5 4 18 */ +&(nid_objs[863]),/* OBJ_physicalDeliveryOfficeName 2 5 4 19 */ +&(nid_objs[864]),/* OBJ_telephoneNumber 2 5 4 20 */ +&(nid_objs[865]),/* OBJ_telexNumber 2 5 4 21 */ +&(nid_objs[866]),/* OBJ_teletexTerminalIdentifier 2 5 4 22 */ +&(nid_objs[867]),/* OBJ_facsimileTelephoneNumber 2 5 4 23 */ +&(nid_objs[868]),/* OBJ_x121Address 2 5 4 24 */ +&(nid_objs[869]),/* OBJ_internationaliSDNNumber 2 5 4 25 */ +&(nid_objs[870]),/* OBJ_registeredAddress 2 5 4 26 */ +&(nid_objs[871]),/* OBJ_destinationIndicator 2 5 4 27 */ +&(nid_objs[872]),/* OBJ_preferredDeliveryMethod 2 5 4 28 */ +&(nid_objs[873]),/* OBJ_presentationAddress 2 5 4 29 */ +&(nid_objs[874]),/* OBJ_supportedApplicationContext 2 5 4 30 */ +&(nid_objs[875]),/* OBJ_member 2 5 4 31 */ +&(nid_objs[876]),/* OBJ_owner 2 5 4 32 */ +&(nid_objs[877]),/* OBJ_roleOccupant 2 5 4 33 */ +&(nid_objs[878]),/* OBJ_seeAlso 2 5 4 34 */ +&(nid_objs[879]),/* OBJ_userPassword 2 5 4 35 */ +&(nid_objs[880]),/* OBJ_userCertificate 2 5 4 36 */ +&(nid_objs[881]),/* OBJ_cACertificate 2 5 4 37 */ +&(nid_objs[882]),/* OBJ_authorityRevocationList 2 5 4 38 */ +&(nid_objs[883]),/* OBJ_certificateRevocationList 2 5 4 39 */ +&(nid_objs[884]),/* OBJ_crossCertificatePair 2 5 4 40 */ &(nid_objs[173]),/* OBJ_name 2 5 4 41 */ &(nid_objs[99]),/* OBJ_givenName 2 5 4 42 */ &(nid_objs[101]),/* OBJ_initials 2 5 4 43 */ &(nid_objs[509]),/* OBJ_generationQualifier 2 5 4 44 */ &(nid_objs[503]),/* OBJ_x500UniqueIdentifier 2 5 4 45 */ &(nid_objs[174]),/* OBJ_dnQualifier 2 5 4 46 */ +&(nid_objs[885]),/* OBJ_enhancedSearchGuide 2 5 4 47 */ +&(nid_objs[886]),/* OBJ_protocolInformation 2 5 4 48 */ +&(nid_objs[887]),/* OBJ_distinguishedName 2 5 4 49 */ +&(nid_objs[888]),/* OBJ_uniqueMember 2 5 4 50 */ +&(nid_objs[889]),/* OBJ_houseIdentifier 2 5 4 51 */ +&(nid_objs[890]),/* OBJ_supportedAlgorithms 2 5 4 52 */ +&(nid_objs[891]),/* OBJ_deltaRevocationList 2 5 4 53 */ +&(nid_objs[892]),/* OBJ_dmdName 2 5 4 54 */ &(nid_objs[510]),/* OBJ_pseudonym 2 5 4 65 */ &(nid_objs[400]),/* OBJ_role 2 5 4 72 */ &(nid_objs[769]),/* OBJ_subject_directory_attributes 2 5 29 9 */ @@ -4049,7 +4240,7 @@ static ASN1_OBJECT *obj_objs[NUM_OBJ]={ &(nid_objs[516]),/* OBJ_set_policy 2 23 42 5 */ &(nid_objs[517]),/* OBJ_set_certExt 2 23 42 7 */ &(nid_objs[518]),/* OBJ_set_brand 2 23 42 8 */ -&(nid_objs[679]),/* OBJ_wap_wsg 2 23 43 13 */ +&(nid_objs[679]),/* OBJ_wap_wsg 2 23 43 1 */ &(nid_objs[382]),/* OBJ_Directory 1 3 6 1 1 */ &(nid_objs[383]),/* OBJ_Management 1 3 6 1 2 */ &(nid_objs[384]),/* OBJ_Experimental 1 3 6 1 3 */ @@ -4235,17 +4426,17 @@ static ASN1_OBJECT *obj_objs[NUM_OBJ]={ &(nid_objs[629]),/* OBJ_setAttr_IssCap_T2 2 23 42 3 3 4 */ &(nid_objs[630]),/* OBJ_setAttr_IssCap_Sig 2 23 42 3 3 5 */ &(nid_objs[642]),/* OBJ_set_brand_Novus 2 23 42 8 6011 */ -&(nid_objs[735]),/* OBJ_wap_wsg_idm_ecid_wtls1 2 23 43 13 4 1 */ -&(nid_objs[736]),/* OBJ_wap_wsg_idm_ecid_wtls3 2 23 43 13 4 3 */ -&(nid_objs[737]),/* OBJ_wap_wsg_idm_ecid_wtls4 2 23 43 13 4 4 */ -&(nid_objs[738]),/* OBJ_wap_wsg_idm_ecid_wtls5 2 23 43 13 4 5 */ -&(nid_objs[739]),/* OBJ_wap_wsg_idm_ecid_wtls6 2 23 43 13 4 6 */ -&(nid_objs[740]),/* OBJ_wap_wsg_idm_ecid_wtls7 2 23 43 13 4 7 */ -&(nid_objs[741]),/* OBJ_wap_wsg_idm_ecid_wtls8 2 23 43 13 4 8 */ -&(nid_objs[742]),/* OBJ_wap_wsg_idm_ecid_wtls9 2 23 43 13 4 9 */ -&(nid_objs[743]),/* OBJ_wap_wsg_idm_ecid_wtls10 2 23 43 13 4 10 */ -&(nid_objs[744]),/* OBJ_wap_wsg_idm_ecid_wtls11 2 23 43 13 4 11 */ -&(nid_objs[745]),/* OBJ_wap_wsg_idm_ecid_wtls12 2 23 43 13 4 12 */ +&(nid_objs[735]),/* OBJ_wap_wsg_idm_ecid_wtls1 2 23 43 1 4 1 */ +&(nid_objs[736]),/* OBJ_wap_wsg_idm_ecid_wtls3 2 23 43 1 4 3 */ +&(nid_objs[737]),/* OBJ_wap_wsg_idm_ecid_wtls4 2 23 43 1 4 4 */ +&(nid_objs[738]),/* OBJ_wap_wsg_idm_ecid_wtls5 2 23 43 1 4 5 */ +&(nid_objs[739]),/* OBJ_wap_wsg_idm_ecid_wtls6 2 23 43 1 4 6 */ +&(nid_objs[740]),/* OBJ_wap_wsg_idm_ecid_wtls7 2 23 43 1 4 7 */ +&(nid_objs[741]),/* OBJ_wap_wsg_idm_ecid_wtls8 2 23 43 1 4 8 */ +&(nid_objs[742]),/* OBJ_wap_wsg_idm_ecid_wtls9 2 23 43 1 4 9 */ +&(nid_objs[743]),/* OBJ_wap_wsg_idm_ecid_wtls10 2 23 43 1 4 10 */ +&(nid_objs[744]),/* OBJ_wap_wsg_idm_ecid_wtls11 2 23 43 1 4 11 */ +&(nid_objs[745]),/* OBJ_wap_wsg_idm_ecid_wtls12 2 23 43 1 4 12 */ &(nid_objs[804]),/* OBJ_whirlpool 1 0 10118 3 0 55 */ &(nid_objs[124]),/* OBJ_rle_compression 1 1 1 1 666 1 */ &(nid_objs[773]),/* OBJ_kisa 1 2 410 200004 */ diff --git a/crypto/openssl/crypto/objects/obj_mac.h b/crypto/openssl/crypto/objects/obj_mac.h index ad5f7cf..282f11a 100644 --- a/crypto/openssl/crypto/objects/obj_mac.h +++ b/crypto/openssl/crypto/objects/obj_mac.h @@ -122,7 +122,7 @@ #define SN_wap_wsg "wap-wsg" #define NID_wap_wsg 679 -#define OBJ_wap_wsg OBJ_wap,13L +#define OBJ_wap_wsg OBJ_wap,1L #define SN_selected_attribute_types "selected-attribute-types" #define LN_selected_attribute_types "Selected Attribute Types" @@ -2049,6 +2049,7 @@ #define NID_stateOrProvinceName 16 #define OBJ_stateOrProvinceName OBJ_X509,8L +#define SN_streetAddress "street" #define LN_streetAddress "streetAddress" #define NID_streetAddress 660 #define OBJ_streetAddress OBJ_X509,9L @@ -2063,6 +2064,7 @@ #define NID_organizationalUnitName 18 #define OBJ_organizationalUnitName OBJ_X509,11L +#define SN_title "title" #define LN_title "title" #define NID_title 106 #define OBJ_title OBJ_X509,12L @@ -2071,10 +2073,114 @@ #define NID_description 107 #define OBJ_description OBJ_X509,13L +#define LN_searchGuide "searchGuide" +#define NID_searchGuide 859 +#define OBJ_searchGuide OBJ_X509,14L + +#define LN_businessCategory "businessCategory" +#define NID_businessCategory 860 +#define OBJ_businessCategory OBJ_X509,15L + +#define LN_postalAddress "postalAddress" +#define NID_postalAddress 861 +#define OBJ_postalAddress OBJ_X509,16L + #define LN_postalCode "postalCode" #define NID_postalCode 661 #define OBJ_postalCode OBJ_X509,17L +#define LN_postOfficeBox "postOfficeBox" +#define NID_postOfficeBox 862 +#define OBJ_postOfficeBox OBJ_X509,18L + +#define LN_physicalDeliveryOfficeName "physicalDeliveryOfficeName" +#define NID_physicalDeliveryOfficeName 863 +#define OBJ_physicalDeliveryOfficeName OBJ_X509,19L + +#define LN_telephoneNumber "telephoneNumber" +#define NID_telephoneNumber 864 +#define OBJ_telephoneNumber OBJ_X509,20L + +#define LN_telexNumber "telexNumber" +#define NID_telexNumber 865 +#define OBJ_telexNumber OBJ_X509,21L + +#define LN_teletexTerminalIdentifier "teletexTerminalIdentifier" +#define NID_teletexTerminalIdentifier 866 +#define OBJ_teletexTerminalIdentifier OBJ_X509,22L + +#define LN_facsimileTelephoneNumber "facsimileTelephoneNumber" +#define NID_facsimileTelephoneNumber 867 +#define OBJ_facsimileTelephoneNumber OBJ_X509,23L + +#define LN_x121Address "x121Address" +#define NID_x121Address 868 +#define OBJ_x121Address OBJ_X509,24L + +#define LN_internationaliSDNNumber "internationaliSDNNumber" +#define NID_internationaliSDNNumber 869 +#define OBJ_internationaliSDNNumber OBJ_X509,25L + +#define LN_registeredAddress "registeredAddress" +#define NID_registeredAddress 870 +#define OBJ_registeredAddress OBJ_X509,26L + +#define LN_destinationIndicator "destinationIndicator" +#define NID_destinationIndicator 871 +#define OBJ_destinationIndicator OBJ_X509,27L + +#define LN_preferredDeliveryMethod "preferredDeliveryMethod" +#define NID_preferredDeliveryMethod 872 +#define OBJ_preferredDeliveryMethod OBJ_X509,28L + +#define LN_presentationAddress "presentationAddress" +#define NID_presentationAddress 873 +#define OBJ_presentationAddress OBJ_X509,29L + +#define LN_supportedApplicationContext "supportedApplicationContext" +#define NID_supportedApplicationContext 874 +#define OBJ_supportedApplicationContext OBJ_X509,30L + +#define SN_member "member" +#define NID_member 875 +#define OBJ_member OBJ_X509,31L + +#define SN_owner "owner" +#define NID_owner 876 +#define OBJ_owner OBJ_X509,32L + +#define LN_roleOccupant "roleOccupant" +#define NID_roleOccupant 877 +#define OBJ_roleOccupant OBJ_X509,33L + +#define SN_seeAlso "seeAlso" +#define NID_seeAlso 878 +#define OBJ_seeAlso OBJ_X509,34L + +#define LN_userPassword "userPassword" +#define NID_userPassword 879 +#define OBJ_userPassword OBJ_X509,35L + +#define LN_userCertificate "userCertificate" +#define NID_userCertificate 880 +#define OBJ_userCertificate OBJ_X509,36L + +#define LN_cACertificate "cACertificate" +#define NID_cACertificate 881 +#define OBJ_cACertificate OBJ_X509,37L + +#define LN_authorityRevocationList "authorityRevocationList" +#define NID_authorityRevocationList 882 +#define OBJ_authorityRevocationList OBJ_X509,38L + +#define LN_certificateRevocationList "certificateRevocationList" +#define NID_certificateRevocationList 883 +#define OBJ_certificateRevocationList OBJ_X509,39L + +#define LN_crossCertificatePair "crossCertificatePair" +#define NID_crossCertificatePair 884 +#define OBJ_crossCertificatePair OBJ_X509,40L + #define SN_name "name" #define LN_name "name" #define NID_name 173 @@ -2085,6 +2191,7 @@ #define NID_givenName 99 #define OBJ_givenName OBJ_X509,42L +#define SN_initials "initials" #define LN_initials "initials" #define NID_initials 101 #define OBJ_initials OBJ_X509,43L @@ -2102,6 +2209,38 @@ #define NID_dnQualifier 174 #define OBJ_dnQualifier OBJ_X509,46L +#define LN_enhancedSearchGuide "enhancedSearchGuide" +#define NID_enhancedSearchGuide 885 +#define OBJ_enhancedSearchGuide OBJ_X509,47L + +#define LN_protocolInformation "protocolInformation" +#define NID_protocolInformation 886 +#define OBJ_protocolInformation OBJ_X509,48L + +#define LN_distinguishedName "distinguishedName" +#define NID_distinguishedName 887 +#define OBJ_distinguishedName OBJ_X509,49L + +#define LN_uniqueMember "uniqueMember" +#define NID_uniqueMember 888 +#define OBJ_uniqueMember OBJ_X509,50L + +#define LN_houseIdentifier "houseIdentifier" +#define NID_houseIdentifier 889 +#define OBJ_houseIdentifier OBJ_X509,51L + +#define LN_supportedAlgorithms "supportedAlgorithms" +#define NID_supportedAlgorithms 890 +#define OBJ_supportedAlgorithms OBJ_X509,52L + +#define LN_deltaRevocationList "deltaRevocationList" +#define NID_deltaRevocationList 891 +#define OBJ_deltaRevocationList OBJ_X509,53L + +#define SN_dmdName "dmdName" +#define NID_dmdName 892 +#define OBJ_dmdName OBJ_X509,54L + #define LN_pseudonym "pseudonym" #define NID_pseudonym 510 #define OBJ_pseudonym OBJ_X509,65L diff --git a/crypto/openssl/crypto/objects/obj_mac.num b/crypto/openssl/crypto/objects/obj_mac.num index e3f56bc..8c50aac 100644 --- a/crypto/openssl/crypto/objects/obj_mac.num +++ b/crypto/openssl/crypto/objects/obj_mac.num @@ -856,3 +856,37 @@ hmac 855 LocalKeySet 856 freshest_crl 857 id_on_permanentIdentifier 858 +searchGuide 859 +businessCategory 860 +postalAddress 861 +postOfficeBox 862 +physicalDeliveryOfficeName 863 +telephoneNumber 864 +telexNumber 865 +teletexTerminalIdentifier 866 +facsimileTelephoneNumber 867 +x121Address 868 +internationaliSDNNumber 869 +registeredAddress 870 +destinationIndicator 871 +preferredDeliveryMethod 872 +presentationAddress 873 +supportedApplicationContext 874 +member 875 +owner 876 +roleOccupant 877 +seeAlso 878 +userPassword 879 +userCertificate 880 +cACertificate 881 +authorityRevocationList 882 +certificateRevocationList 883 +crossCertificatePair 884 +enhancedSearchGuide 885 +protocolInformation 886 +distinguishedName 887 +uniqueMember 888 +houseIdentifier 889 +supportedAlgorithms 890 +deltaRevocationList 891 +dmdName 892 diff --git a/crypto/openssl/crypto/objects/objects.txt b/crypto/openssl/crypto/objects/objects.txt index a6a811b..e61fe60 100644 --- a/crypto/openssl/crypto/objects/objects.txt +++ b/crypto/openssl/crypto/objects/objects.txt @@ -20,7 +20,7 @@ identified-organization 132 : certicom-arc joint-iso-itu-t 23 : international-organizations : International Organizations international-organizations 43 : wap -wap 13 : wap-wsg +wap 1 : wap-wsg joint-iso-itu-t 5 1 5 : selected-attribute-types : Selected Attribute Types @@ -664,18 +664,52 @@ X509 5 : : serialNumber X509 6 : C : countryName X509 7 : L : localityName X509 8 : ST : stateOrProvinceName -X509 9 : : streetAddress +X509 9 : street : streetAddress X509 10 : O : organizationName X509 11 : OU : organizationalUnitName -X509 12 : : title +X509 12 : title : title X509 13 : : description -X509 17 : : postalCode +X509 14 : : searchGuide +X509 15 : : businessCategory +X509 16 : : postalAddress +X509 17 : : postalCode +X509 18 : : postOfficeBox +X509 19 : : physicalDeliveryOfficeName +X509 20 : : telephoneNumber +X509 21 : : telexNumber +X509 22 : : teletexTerminalIdentifier +X509 23 : : facsimileTelephoneNumber +X509 24 : : x121Address +X509 25 : : internationaliSDNNumber +X509 26 : : registeredAddress +X509 27 : : destinationIndicator +X509 28 : : preferredDeliveryMethod +X509 29 : : presentationAddress +X509 30 : : supportedApplicationContext +X509 31 : member : +X509 32 : owner : +X509 33 : : roleOccupant +X509 34 : seeAlso : +X509 35 : : userPassword +X509 36 : : userCertificate +X509 37 : : cACertificate +X509 38 : : authorityRevocationList +X509 39 : : certificateRevocationList +X509 40 : : crossCertificatePair X509 41 : name : name X509 42 : GN : givenName -X509 43 : : initials +X509 43 : initials : initials X509 44 : : generationQualifier X509 45 : : x500UniqueIdentifier X509 46 : dnQualifier : dnQualifier +X509 47 : : enhancedSearchGuide +X509 48 : : protocolInformation +X509 49 : : distinguishedName +X509 50 : : uniqueMember +X509 51 : : houseIdentifier +X509 52 : : supportedAlgorithms +X509 53 : : deltaRevocationList +X509 54 : dmdName : X509 65 : : pseudonym X509 72 : role : role diff --git a/crypto/openssl/crypto/ocsp/ocsp_prn.c b/crypto/openssl/crypto/ocsp/ocsp_prn.c index 3dfb51c..01f81e7 100644 --- a/crypto/openssl/crypto/ocsp/ocsp_prn.c +++ b/crypto/openssl/crypto/ocsp/ocsp_prn.c @@ -266,12 +266,12 @@ int OCSP_RESPONSE_print(BIO *bp, OCSP_RESPONSE* o, unsigned long flags) if (!ASN1_GENERALIZEDTIME_print(bp,single->nextUpdate)) goto err; } - if (!BIO_write(bp,"\n",1)) goto err; + if (BIO_write(bp,"\n",1) <= 0) goto err; if (!X509V3_extensions_print(bp, "Response Single Extensions", single->singleExtensions, flags, 8)) goto err; - if (!BIO_write(bp,"\n",1)) goto err; + if (BIO_write(bp,"\n",1) <= 0) goto err; } if (!X509V3_extensions_print(bp, "Response Extensions", rd->responseExtensions, flags, 4)) diff --git a/crypto/openssl/crypto/opensslv.h b/crypto/openssl/crypto/opensslv.h index c6207f7..3d794d9 100644 --- a/crypto/openssl/crypto/opensslv.h +++ b/crypto/openssl/crypto/opensslv.h @@ -25,11 +25,11 @@ * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -#define OPENSSL_VERSION_NUMBER 0x009080bfL +#define OPENSSL_VERSION_NUMBER 0x009080dfL #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT "OpenSSL 0.9.8k-fips 25 Mar 2009" +#define OPENSSL_VERSION_TEXT "OpenSSL 0.9.8m-fips 25 Feb 2010" #else -#define OPENSSL_VERSION_TEXT "OpenSSL 0.9.8k 25 Mar 2009" +#define OPENSSL_VERSION_TEXT "OpenSSL 0.9.8m 25 Feb 2010" #endif #define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/crypto/openssl/crypto/pem/pem_seal.c b/crypto/openssl/crypto/pem/pem_seal.c index 4e554e5..59690b5 100644 --- a/crypto/openssl/crypto/pem/pem_seal.c +++ b/crypto/openssl/crypto/pem/pem_seal.c @@ -100,7 +100,7 @@ int PEM_SealInit(PEM_ENCODE_SEAL_CTX *ctx, EVP_CIPHER *type, EVP_MD *md_type, EVP_CIPHER_CTX_init(&ctx->cipher); ret=EVP_SealInit(&ctx->cipher,type,ek,ekl,iv,pubk,npubk); - if (!ret) goto err; + if (ret <= 0) goto err; /* base64 encode the keys */ for (i=0; i<npubk; i++) diff --git a/crypto/openssl/crypto/perlasm/x86_64-xlate.pl b/crypto/openssl/crypto/perlasm/x86_64-xlate.pl index a4af769..fe348b9 100755 --- a/crypto/openssl/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/openssl/crypto/perlasm/x86_64-xlate.pl @@ -189,8 +189,10 @@ my $current_function; if (!$masm) { # Solaris /usr/ccs/bin/as can't handle multiplications # in $self->{label} + use integer; $self->{label} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/egi; $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; + $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg; if (defined($self->{index})) { sprintf "%s(%%%s,%%%s,%d)", diff --git a/crypto/openssl/crypto/pkcs12/p12_attr.c b/crypto/openssl/crypto/pkcs12/p12_attr.c index 68d6c5a..856933d 100644 --- a/crypto/openssl/crypto/pkcs12/p12_attr.c +++ b/crypto/openssl/crypto/pkcs12/p12_attr.c @@ -60,6 +60,12 @@ #include "cryptlib.h" #include <openssl/pkcs12.h> +#ifdef OPENSSL_SYS_NETWARE +/* Rename these functions to avoid name clashes on NetWare OS */ +#define uni2asc OPENSSL_uni2asc +#define asc2uni OPENSSL_asc2uni +#endif + /* Add a local keyid to a safebag */ int PKCS12_add_localkeyid(PKCS12_SAFEBAG *bag, unsigned char *name, diff --git a/crypto/openssl/crypto/pkcs12/p12_key.c b/crypto/openssl/crypto/pkcs12/p12_key.c index 9e57eee..5cfe727 100644 --- a/crypto/openssl/crypto/pkcs12/p12_key.c +++ b/crypto/openssl/crypto/pkcs12/p12_key.c @@ -69,6 +69,12 @@ extern BIO *bio_err; void h__dump (unsigned char *p, int len); #endif +#ifdef OPENSSL_SYS_NETWARE +/* Rename these functions to avoid name clashes on NetWare OS */ +#define uni2asc OPENSSL_uni2asc +#define asc2uni OPENSSL_asc2uni +#endif + /* PKCS12 compatible key/IV generation */ #ifndef min #define min(a,b) ((a) < (b) ? (a) : (b)) diff --git a/crypto/openssl/crypto/pkcs12/p12_utl.c b/crypto/openssl/crypto/pkcs12/p12_utl.c index ca30ac4..2edbf90 100644 --- a/crypto/openssl/crypto/pkcs12/p12_utl.c +++ b/crypto/openssl/crypto/pkcs12/p12_utl.c @@ -60,6 +60,12 @@ #include "cryptlib.h" #include <openssl/pkcs12.h> +#ifdef OPENSSL_SYS_NETWARE +/* Rename these functions to avoid name clashes on NetWare OS */ +#define uni2asc OPENSSL_uni2asc +#define asc2uni OPENSSL_asc2uni +#endif + /* Cheap and nasty Unicode stuff */ unsigned char *asc2uni(const char *asc, int asclen, unsigned char **uni, int *unilen) diff --git a/crypto/openssl/crypto/pkcs12/pkcs12.h b/crypto/openssl/crypto/pkcs12/pkcs12.h index 4bee605..78317fb 100644 --- a/crypto/openssl/crypto/pkcs12/pkcs12.h +++ b/crypto/openssl/crypto/pkcs12/pkcs12.h @@ -232,9 +232,14 @@ int PKCS12_set_mac(PKCS12 *p12, const char *pass, int passlen, const EVP_MD *md_type); int PKCS12_setup_mac(PKCS12 *p12, int iter, unsigned char *salt, int saltlen, const EVP_MD *md_type); +#if defined(NETWARE) || defined(OPENSSL_SYS_NETWARE) +/* Rename these functions to avoid name clashes on NetWare OS */ +unsigned char *OPENSSL_asc2uni(const char *asc, int asclen, unsigned char **uni, int *unilen); +char *OPENSSL_uni2asc(unsigned char *uni, int unilen); +#else unsigned char *asc2uni(const char *asc, int asclen, unsigned char **uni, int *unilen); char *uni2asc(unsigned char *uni, int unilen); - +#endif DECLARE_ASN1_FUNCTIONS(PKCS12) DECLARE_ASN1_FUNCTIONS(PKCS12_MAC_DATA) DECLARE_ASN1_FUNCTIONS(PKCS12_SAFEBAG) diff --git a/crypto/openssl/crypto/pkcs7/pk7_mime.c b/crypto/openssl/crypto/pkcs7/pk7_mime.c index bf19036..7762d64 100644 --- a/crypto/openssl/crypto/pkcs7/pk7_mime.c +++ b/crypto/openssl/crypto/pkcs7/pk7_mime.c @@ -50,10 +50,6 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * */ #include <stdio.h> @@ -61,200 +57,47 @@ #include "cryptlib.h" #include <openssl/rand.h> #include <openssl/x509.h> +#include <openssl/asn1.h> -/* MIME and related routines */ - -/* MIME format structures - * Note that all are translated to lower case apart from - * parameter values. Quotes are stripped off - */ - -typedef struct { -char *param_name; /* Param name e.g. "micalg" */ -char *param_value; /* Param value e.g. "sha1" */ -} MIME_PARAM; - -DECLARE_STACK_OF(MIME_PARAM) -IMPLEMENT_STACK_OF(MIME_PARAM) - -typedef struct { -char *name; /* Name of line e.g. "content-type" */ -char *value; /* Value of line e.g. "text/plain" */ -STACK_OF(MIME_PARAM) *params; /* Zero or more parameters */ -} MIME_HEADER; - -DECLARE_STACK_OF(MIME_HEADER) -IMPLEMENT_STACK_OF(MIME_HEADER) - -static int pkcs7_output_data(BIO *bio, BIO *data, PKCS7 *p7, int flags); -static int B64_write_PKCS7(BIO *bio, PKCS7 *p7); -static PKCS7 *B64_read_PKCS7(BIO *bio); -static char * strip_ends(char *name); -static char * strip_start(char *name); -static char * strip_end(char *name); -static MIME_HEADER *mime_hdr_new(char *name, char *value); -static int mime_hdr_addparam(MIME_HEADER *mhdr, char *name, char *value); -static STACK_OF(MIME_HEADER) *mime_parse_hdr(BIO *bio); -static int mime_hdr_cmp(const MIME_HEADER * const *a, - const MIME_HEADER * const *b); -static int mime_param_cmp(const MIME_PARAM * const *a, - const MIME_PARAM * const *b); -static void mime_param_free(MIME_PARAM *param); -static int mime_bound_check(char *line, int linelen, char *bound, int blen); -static int multi_split(BIO *bio, char *bound, STACK_OF(BIO) **ret); -static int strip_eol(char *linebuf, int *plen); -static MIME_HEADER *mime_hdr_find(STACK_OF(MIME_HEADER) *hdrs, char *name); -static MIME_PARAM *mime_param_find(MIME_HEADER *hdr, char *name); -static void mime_hdr_free(MIME_HEADER *hdr); +/* PKCS#7 wrappers round generalised MIME routines */ -#define MAX_SMLEN 1024 -#define mime_debug(x) /* x */ - -/* Base 64 read and write of PKCS#7 structure */ - -static int B64_write_PKCS7(BIO *bio, PKCS7 *p7) -{ - BIO *b64; - if(!(b64 = BIO_new(BIO_f_base64()))) { - PKCS7err(PKCS7_F_B64_WRITE_PKCS7,ERR_R_MALLOC_FAILURE); - return 0; - } - bio = BIO_push(b64, bio); - i2d_PKCS7_bio(bio, p7); - (void)BIO_flush(bio); - bio = BIO_pop(bio); - BIO_free(b64); - return 1; -} - -static PKCS7 *B64_read_PKCS7(BIO *bio) -{ - BIO *b64; - PKCS7 *p7; - if(!(b64 = BIO_new(BIO_f_base64()))) { - PKCS7err(PKCS7_F_B64_READ_PKCS7,ERR_R_MALLOC_FAILURE); - return 0; - } - bio = BIO_push(b64, bio); - if(!(p7 = d2i_PKCS7_bio(bio, NULL))) - PKCS7err(PKCS7_F_B64_READ_PKCS7,PKCS7_R_DECODE_ERROR); - (void)BIO_flush(bio); - bio = BIO_pop(bio); - BIO_free(b64); - return p7; -} - -/* SMIME sender */ - -int SMIME_write_PKCS7(BIO *bio, PKCS7 *p7, BIO *data, int flags) -{ - char bound[33], c; - int i; - char *mime_prefix, *mime_eol, *msg_type=NULL; - if (flags & PKCS7_NOOLDMIMETYPE) - mime_prefix = "application/pkcs7-"; - else - mime_prefix = "application/x-pkcs7-"; - - if (flags & PKCS7_CRLFEOL) - mime_eol = "\r\n"; - else - mime_eol = "\n"; - if((flags & PKCS7_DETACHED) && data) { - /* We want multipart/signed */ - /* Generate a random boundary */ - RAND_pseudo_bytes((unsigned char *)bound, 32); - for(i = 0; i < 32; i++) { - c = bound[i] & 0xf; - if(c < 10) c += '0'; - else c += 'A' - 10; - bound[i] = c; - } - bound[32] = 0; - BIO_printf(bio, "MIME-Version: 1.0%s", mime_eol); - BIO_printf(bio, "Content-Type: multipart/signed;"); - BIO_printf(bio, " protocol=\"%ssignature\";", mime_prefix); - BIO_printf(bio, " micalg=sha1; boundary=\"----%s\"%s%s", - bound, mime_eol, mime_eol); - BIO_printf(bio, "This is an S/MIME signed message%s%s", - mime_eol, mime_eol); - /* Now write out the first part */ - BIO_printf(bio, "------%s%s", bound, mime_eol); - pkcs7_output_data(bio, data, p7, flags); - BIO_printf(bio, "%s------%s%s", mime_eol, bound, mime_eol); - - /* Headers for signature */ - - BIO_printf(bio, "Content-Type: %ssignature;", mime_prefix); - BIO_printf(bio, " name=\"smime.p7s\"%s", mime_eol); - BIO_printf(bio, "Content-Transfer-Encoding: base64%s", - mime_eol); - BIO_printf(bio, "Content-Disposition: attachment;"); - BIO_printf(bio, " filename=\"smime.p7s\"%s%s", - mime_eol, mime_eol); - B64_write_PKCS7(bio, p7); - BIO_printf(bio,"%s------%s--%s%s", mime_eol, bound, - mime_eol, mime_eol); - return 1; +PKCS7 *SMIME_read_PKCS7(BIO *bio, BIO **bcont) + { + return (PKCS7 *)SMIME_read_ASN1(bio, bcont, ASN1_ITEM_rptr(PKCS7)); } - /* Determine smime-type header */ - - if (PKCS7_type_is_enveloped(p7)) - msg_type = "enveloped-data"; - else if (PKCS7_type_is_signed(p7)) - { - /* If we have any signers it is signed-data othewise - * certs-only. - */ - STACK_OF(PKCS7_SIGNER_INFO) *sinfos; - sinfos = PKCS7_get_signer_info(p7); - if (sk_PKCS7_SIGNER_INFO_num(sinfos) > 0) - msg_type = "signed-data"; - else - msg_type = "certs-only"; - } - /* MIME headers */ - BIO_printf(bio, "MIME-Version: 1.0%s", mime_eol); - BIO_printf(bio, "Content-Disposition: attachment;"); - BIO_printf(bio, " filename=\"smime.p7m\"%s", mime_eol); - BIO_printf(bio, "Content-Type: %smime;", mime_prefix); - if (msg_type) - BIO_printf(bio, " smime-type=%s;", msg_type); - BIO_printf(bio, " name=\"smime.p7m\"%s", mime_eol); - BIO_printf(bio, "Content-Transfer-Encoding: base64%s%s", - mime_eol, mime_eol); - B64_write_PKCS7(bio, p7); - BIO_printf(bio, "%s", mime_eol); - return 1; -} - -/* Handle output of PKCS#7 data */ +/* Callback for int_smime_write_ASN1 */ - -static int pkcs7_output_data(BIO *out, BIO *data, PKCS7 *p7, int flags) +static int pk7_output_data(BIO *out, BIO *data, ASN1_VALUE *val, int flags, + const ASN1_ITEM *it) { + PKCS7 *p7 = (PKCS7 *)val; BIO *tmpbio, *p7bio; + int r = 0; - if (!(flags & PKCS7_STREAM)) + if (!(flags & SMIME_DETACHED)) { SMIME_crlf_copy(data, out, flags); return 1; } - /* Partial sign operation */ + /* Let PKCS7 code prepend any needed BIOs */ - /* Initialize sign operation */ p7bio = PKCS7_dataInit(p7, out); - /* Copy data across, computing digests etc */ + if (!p7bio) + return 0; + + /* Copy data across, passing through filter BIOs for processing */ SMIME_crlf_copy(data, p7bio, flags); - /* Must be detached */ - PKCS7_set_detached(p7, 1); + /* Finalize structure */ + if (PKCS7_dataFinal(p7, p7bio) <= 0) + goto err; + + r = 1; - /* Finalize signatures */ - PKCS7_dataFinal(p7, p7bio); + err: /* Now remove any digests prepended to the BIO */ @@ -269,454 +112,17 @@ static int pkcs7_output_data(BIO *out, BIO *data, PKCS7 *p7, int flags) } -/* SMIME reader: handle multipart/signed and opaque signing. - * in multipart case the content is placed in a memory BIO - * pointed to by "bcont". In opaque this is set to NULL - */ - -PKCS7 *SMIME_read_PKCS7(BIO *bio, BIO **bcont) -{ - BIO *p7in; - STACK_OF(MIME_HEADER) *headers = NULL; - STACK_OF(BIO) *parts = NULL; - MIME_HEADER *hdr; - MIME_PARAM *prm; - PKCS7 *p7; - int ret; - - if(bcont) *bcont = NULL; - - if (!(headers = mime_parse_hdr(bio))) { - PKCS7err(PKCS7_F_SMIME_READ_PKCS7,PKCS7_R_MIME_PARSE_ERROR); - return NULL; - } - - if(!(hdr = mime_hdr_find(headers, "content-type")) || !hdr->value) { - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); - PKCS7err(PKCS7_F_SMIME_READ_PKCS7, PKCS7_R_NO_CONTENT_TYPE); - return NULL; - } - - /* Handle multipart/signed */ - - if(!strcmp(hdr->value, "multipart/signed")) { - /* Split into two parts */ - prm = mime_param_find(hdr, "boundary"); - if(!prm || !prm->param_value) { - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); - PKCS7err(PKCS7_F_SMIME_READ_PKCS7, PKCS7_R_NO_MULTIPART_BOUNDARY); - return NULL; - } - ret = multi_split(bio, prm->param_value, &parts); - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); - if(!ret || (sk_BIO_num(parts) != 2) ) { - PKCS7err(PKCS7_F_SMIME_READ_PKCS7, PKCS7_R_NO_MULTIPART_BODY_FAILURE); - sk_BIO_pop_free(parts, BIO_vfree); - return NULL; - } - - /* Parse the signature piece */ - p7in = sk_BIO_value(parts, 1); - - if (!(headers = mime_parse_hdr(p7in))) { - PKCS7err(PKCS7_F_SMIME_READ_PKCS7,PKCS7_R_MIME_SIG_PARSE_ERROR); - sk_BIO_pop_free(parts, BIO_vfree); - return NULL; - } - - /* Get content type */ - - if(!(hdr = mime_hdr_find(headers, "content-type")) || - !hdr->value) { - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); - PKCS7err(PKCS7_F_SMIME_READ_PKCS7, PKCS7_R_NO_SIG_CONTENT_TYPE); - return NULL; - } - - if(strcmp(hdr->value, "application/x-pkcs7-signature") && - strcmp(hdr->value, "application/pkcs7-signature")) { - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); - PKCS7err(PKCS7_F_SMIME_READ_PKCS7,PKCS7_R_SIG_INVALID_MIME_TYPE); - ERR_add_error_data(2, "type: ", hdr->value); - sk_BIO_pop_free(parts, BIO_vfree); - return NULL; - } - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); - /* Read in PKCS#7 */ - if(!(p7 = B64_read_PKCS7(p7in))) { - PKCS7err(PKCS7_F_SMIME_READ_PKCS7,PKCS7_R_PKCS7_SIG_PARSE_ERROR); - sk_BIO_pop_free(parts, BIO_vfree); - return NULL; - } - - if(bcont) { - *bcont = sk_BIO_value(parts, 0); - BIO_free(p7in); - sk_BIO_free(parts); - } else sk_BIO_pop_free(parts, BIO_vfree); - return p7; - } - - /* OK, if not multipart/signed try opaque signature */ - - if (strcmp (hdr->value, "application/x-pkcs7-mime") && - strcmp (hdr->value, "application/pkcs7-mime")) { - PKCS7err(PKCS7_F_SMIME_READ_PKCS7,PKCS7_R_INVALID_MIME_TYPE); - ERR_add_error_data(2, "type: ", hdr->value); - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); - return NULL; - } - - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); - - if(!(p7 = B64_read_PKCS7(bio))) { - PKCS7err(PKCS7_F_SMIME_READ_PKCS7, PKCS7_R_PKCS7_PARSE_ERROR); - return NULL; - } - return p7; - -} - -/* Split a multipart/XXX message body into component parts: result is - * canonical parts in a STACK of bios - */ - -static int multi_split(BIO *bio, char *bound, STACK_OF(BIO) **ret) -{ - char linebuf[MAX_SMLEN]; - int len, blen; - int eol = 0, next_eol = 0; - BIO *bpart = NULL; - STACK_OF(BIO) *parts; - char state, part, first; - - blen = strlen(bound); - part = 0; - state = 0; - first = 1; - parts = sk_BIO_new_null(); - *ret = parts; - while ((len = BIO_gets(bio, linebuf, MAX_SMLEN)) > 0) { - state = mime_bound_check(linebuf, len, bound, blen); - if(state == 1) { - first = 1; - part++; - } else if(state == 2) { - sk_BIO_push(parts, bpart); - return 1; - } else if(part) { - /* Strip CR+LF from linebuf */ - next_eol = strip_eol(linebuf, &len); - if(first) { - first = 0; - if(bpart) sk_BIO_push(parts, bpart); - bpart = BIO_new(BIO_s_mem()); - BIO_set_mem_eof_return(bpart, 0); - } else if (eol) - BIO_write(bpart, "\r\n", 2); - eol = next_eol; - if (len) - BIO_write(bpart, linebuf, len); - } - } - return 0; -} - -/* This is the big one: parse MIME header lines up to message body */ - -#define MIME_INVALID 0 -#define MIME_START 1 -#define MIME_TYPE 2 -#define MIME_NAME 3 -#define MIME_VALUE 4 -#define MIME_QUOTE 5 -#define MIME_COMMENT 6 - - -static STACK_OF(MIME_HEADER) *mime_parse_hdr(BIO *bio) -{ - char *p, *q, c; - char *ntmp; - char linebuf[MAX_SMLEN]; - MIME_HEADER *mhdr = NULL; - STACK_OF(MIME_HEADER) *headers; - int len, state, save_state = 0; - - headers = sk_MIME_HEADER_new(mime_hdr_cmp); - while ((len = BIO_gets(bio, linebuf, MAX_SMLEN)) > 0) { - /* If whitespace at line start then continuation line */ - if(mhdr && isspace((unsigned char)linebuf[0])) state = MIME_NAME; - else state = MIME_START; - ntmp = NULL; - /* Go through all characters */ - for(p = linebuf, q = linebuf; (c = *p) && (c!='\r') && (c!='\n'); p++) { - - /* State machine to handle MIME headers - * if this looks horrible that's because it *is* - */ - - switch(state) { - case MIME_START: - if(c == ':') { - state = MIME_TYPE; - *p = 0; - ntmp = strip_ends(q); - q = p + 1; - } - break; - - case MIME_TYPE: - if(c == ';') { - mime_debug("Found End Value\n"); - *p = 0; - mhdr = mime_hdr_new(ntmp, strip_ends(q)); - sk_MIME_HEADER_push(headers, mhdr); - ntmp = NULL; - q = p + 1; - state = MIME_NAME; - } else if(c == '(') { - save_state = state; - state = MIME_COMMENT; - } - break; - - case MIME_COMMENT: - if(c == ')') { - state = save_state; - } - break; - - case MIME_NAME: - if(c == '=') { - state = MIME_VALUE; - *p = 0; - ntmp = strip_ends(q); - q = p + 1; - } - break ; - - case MIME_VALUE: - if(c == ';') { - state = MIME_NAME; - *p = 0; - mime_hdr_addparam(mhdr, ntmp, strip_ends(q)); - ntmp = NULL; - q = p + 1; - } else if (c == '"') { - mime_debug("Found Quote\n"); - state = MIME_QUOTE; - } else if(c == '(') { - save_state = state; - state = MIME_COMMENT; - } - break; - - case MIME_QUOTE: - if(c == '"') { - mime_debug("Found Match Quote\n"); - state = MIME_VALUE; - } - break; - } - } - - if(state == MIME_TYPE) { - mhdr = mime_hdr_new(ntmp, strip_ends(q)); - sk_MIME_HEADER_push(headers, mhdr); - } else if(state == MIME_VALUE) - mime_hdr_addparam(mhdr, ntmp, strip_ends(q)); - if(p == linebuf) break; /* Blank line means end of headers */ -} - -return headers; - -} - -static char *strip_ends(char *name) -{ - return strip_end(strip_start(name)); -} - -/* Strip a parameter of whitespace from start of param */ -static char *strip_start(char *name) -{ - char *p, c; - /* Look for first non white space or quote */ - for(p = name; (c = *p) ;p++) { - if(c == '"') { - /* Next char is start of string if non null */ - if(p[1]) return p + 1; - /* Else null string */ - return NULL; - } - if(!isspace((unsigned char)c)) return p; - } - return NULL; -} - -/* As above but strip from end of string : maybe should handle brackets? */ -static char *strip_end(char *name) -{ - char *p, c; - if(!name) return NULL; - /* Look for first non white space or quote */ - for(p = name + strlen(name) - 1; p >= name ;p--) { - c = *p; - if(c == '"') { - if(p - 1 == name) return NULL; - *p = 0; - return name; - } - if(isspace((unsigned char)c)) *p = 0; - else return name; - } - return NULL; -} - -static MIME_HEADER *mime_hdr_new(char *name, char *value) -{ - MIME_HEADER *mhdr; - char *tmpname, *tmpval, *p; - int c; - if(name) { - if(!(tmpname = BUF_strdup(name))) return NULL; - for(p = tmpname ; *p; p++) { - c = *p; - if(isupper(c)) { - c = tolower(c); - *p = c; - } - } - } else tmpname = NULL; - if(value) { - if(!(tmpval = BUF_strdup(value))) return NULL; - for(p = tmpval ; *p; p++) { - c = *p; - if(isupper(c)) { - c = tolower(c); - *p = c; - } - } - } else tmpval = NULL; - mhdr = (MIME_HEADER *) OPENSSL_malloc(sizeof(MIME_HEADER)); - if(!mhdr) return NULL; - mhdr->name = tmpname; - mhdr->value = tmpval; - if(!(mhdr->params = sk_MIME_PARAM_new(mime_param_cmp))) return NULL; - return mhdr; -} - -static int mime_hdr_addparam(MIME_HEADER *mhdr, char *name, char *value) -{ - char *tmpname, *tmpval, *p; - int c; - MIME_PARAM *mparam; - if(name) { - tmpname = BUF_strdup(name); - if(!tmpname) return 0; - for(p = tmpname ; *p; p++) { - c = *p; - if(isupper(c)) { - c = tolower(c); - *p = c; - } - } - } else tmpname = NULL; - if(value) { - tmpval = BUF_strdup(value); - if(!tmpval) return 0; - } else tmpval = NULL; - /* Parameter values are case sensitive so leave as is */ - mparam = (MIME_PARAM *) OPENSSL_malloc(sizeof(MIME_PARAM)); - if(!mparam) return 0; - mparam->param_name = tmpname; - mparam->param_value = tmpval; - sk_MIME_PARAM_push(mhdr->params, mparam); - return 1; -} - -static int mime_hdr_cmp(const MIME_HEADER * const *a, - const MIME_HEADER * const *b) -{ - return(strcmp((*a)->name, (*b)->name)); -} - -static int mime_param_cmp(const MIME_PARAM * const *a, - const MIME_PARAM * const *b) -{ - return(strcmp((*a)->param_name, (*b)->param_name)); -} - -/* Find a header with a given name (if possible) */ - -static MIME_HEADER *mime_hdr_find(STACK_OF(MIME_HEADER) *hdrs, char *name) -{ - MIME_HEADER htmp; - int idx; - htmp.name = name; - idx = sk_MIME_HEADER_find(hdrs, &htmp); - if(idx < 0) return NULL; - return sk_MIME_HEADER_value(hdrs, idx); -} - -static MIME_PARAM *mime_param_find(MIME_HEADER *hdr, char *name) -{ - MIME_PARAM param; - int idx; - param.param_name = name; - idx = sk_MIME_PARAM_find(hdr->params, ¶m); - if(idx < 0) return NULL; - return sk_MIME_PARAM_value(hdr->params, idx); -} - -static void mime_hdr_free(MIME_HEADER *hdr) -{ - if(hdr->name) OPENSSL_free(hdr->name); - if(hdr->value) OPENSSL_free(hdr->value); - if(hdr->params) sk_MIME_PARAM_pop_free(hdr->params, mime_param_free); - OPENSSL_free(hdr); -} - -static void mime_param_free(MIME_PARAM *param) -{ - if(param->param_name) OPENSSL_free(param->param_name); - if(param->param_value) OPENSSL_free(param->param_value); - OPENSSL_free(param); -} - -/* Check for a multipart boundary. Returns: - * 0 : no boundary - * 1 : part boundary - * 2 : final boundary - */ -static int mime_bound_check(char *line, int linelen, char *bound, int blen) -{ - if(linelen == -1) linelen = strlen(line); - if(blen == -1) blen = strlen(bound); - /* Quickly eliminate if line length too short */ - if(blen + 2 > linelen) return 0; - /* Check for part boundary */ - if(!strncmp(line, "--", 2) && !strncmp(line + 2, bound, blen)) { - if(!strncmp(line + blen + 2, "--", 2)) return 2; - else return 1; - } - return 0; -} - -static int strip_eol(char *linebuf, int *plen) +int SMIME_write_PKCS7(BIO *bio, PKCS7 *p7, BIO *data, int flags) { - int len = *plen; - char *p, c; - int is_eol = 0; - p = linebuf + len - 1; - for (p = linebuf + len - 1; len > 0; len--, p--) - { - c = *p; - if (c == '\n') - is_eol = 1; - else if (c != '\r') - break; - } - *plen = len; - return is_eol; + STACK_OF(X509_ALGOR) *mdalgs; + int ctype_nid = OBJ_obj2nid(p7->type); + if (ctype_nid == NID_pkcs7_signed) + mdalgs = p7->d.sign->md_algs; + else + mdalgs = NULL; + + return int_smime_write_ASN1(bio, (ASN1_VALUE *)p7, data, flags, + ctype_nid, NID_undef, mdalgs, + pk7_output_data, + ASN1_ITEM_rptr(PKCS7)); } diff --git a/crypto/openssl/crypto/ppccpuid.pl b/crypto/openssl/crypto/ppccpuid.pl deleted file mode 100755 index fe44ff0..0000000 --- a/crypto/openssl/crypto/ppccpuid.pl +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env perl - -$flavour = shift; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -if ($flavour=~/64/) { - $CMPLI="cmpldi"; - $SHRLI="srdi"; - $SIGNX="extsw"; -} else { - $CMPLI="cmplwi"; - $SHRLI="srwi"; - $SIGNX="mr"; -} - -$code=<<___; -.machine "any" -.text - -.globl .OPENSSL_cpuid_setup -.align 4 -.OPENSSL_cpuid_setup: - blr - -.globl .OPENSSL_wipe_cpu -.align 4 -.OPENSSL_wipe_cpu: - xor r0,r0,r0 - mr r3,r1 - xor r4,r4,r4 - xor r5,r5,r5 - xor r6,r6,r6 - xor r7,r7,r7 - xor r8,r8,r8 - xor r9,r9,r9 - xor r10,r10,r10 - xor r11,r11,r11 - xor r12,r12,r12 - blr - -.globl .OPENSSL_atomic_add -.align 4 -.OPENSSL_atomic_add: -Loop: lwarx r5,0,r3 - add r0,r4,r5 - stwcx. r0,0,r3 - bne- Loop - $SIGNX r3,r0 - blr - -.globl .OPENSSL_rdtsc -.align 4 -.OPENSSL_rdtsc: - mftb r3 - mftbu r4 - blr - -.globl .OPENSSL_cleanse -.align 4 -.OPENSSL_cleanse: - $CMPLI r4,7 - li r0,0 - bge Lot -Little: mtctr r4 - stb r0,0(r3) - addi r3,r3,1 - bdnz- \$-8 - blr -Lot: andi. r5,r3,3 - beq Laligned - stb r0,0(r3) - subi r4,r4,1 - addi r3,r3,1 - b Lot -Laligned: - $SHRLI r5,r4,2 - mtctr r5 - stw r0,0(r3) - addi r3,r3,4 - bdnz- \$-8 - andi. r4,r4,3 - bne Little - blr -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/crypto/openssl/crypto/rand/rand_win.c b/crypto/openssl/crypto/rand/rand_win.c index 00dbe42..5198351 100644 --- a/crypto/openssl/crypto/rand/rand_win.c +++ b/crypto/openssl/crypto/rand/rand_win.c @@ -463,7 +463,7 @@ int RAND_poll(void) PROCESSENTRY32 p; THREADENTRY32 t; MODULEENTRY32 m; - DWORD stoptime = 0; + DWORD starttime = 0; snap = (CREATETOOLHELP32SNAPSHOT) GetProcAddress(kernel, "CreateToolhelp32Snapshot"); @@ -494,12 +494,29 @@ int RAND_poll(void) * each entry. Consider each field a source of 1 byte * of entropy. */ + ZeroMemory(&hlist, sizeof(HEAPLIST32)); hlist.dwSize = sizeof(HEAPLIST32); - if (good) stoptime = GetTickCount() + MAXDELAY; + if (good) starttime = GetTickCount(); +#ifdef _MSC_VER if (heaplist_first(handle, &hlist)) + { + /* + following discussion on dev ML, exception on WinCE (or other Win + platform) is theoretically of unknown origin; prevent infinite + loop here when this theoretical case occurs; otherwise cope with + the expected (MSDN documented) exception-throwing behaviour of + Heap32Next() on WinCE. + + based on patch in original message by Tanguy Fautré (2009/03/02) + Subject: RAND_poll() and CreateToolhelp32Snapshot() stability + */ + int ex_cnt_limit = 42; do { RAND_add(&hlist, hlist.dwSize, 3); + __try + { + ZeroMemory(&hentry, sizeof(HEAPENTRY32)); hentry.dwSize = sizeof(HEAPENTRY32); if (heap_first(&hentry, hlist.th32ProcessID, @@ -510,10 +527,42 @@ int RAND_poll(void) RAND_add(&hentry, hentry.dwSize, 5); while (heap_next(&hentry) + && (!good || (GetTickCount()-starttime)<MAXDELAY) && --entrycnt > 0); } - } while (heaplist_next(handle, - &hlist) && GetTickCount() < stoptime); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + /* ignore access violations when walking the heap list */ + ex_cnt_limit--; + } + } while (heaplist_next(handle, &hlist) + && (!good || (GetTickCount()-starttime)<MAXDELAY) + && ex_cnt_limit > 0); + } + +#else + if (heaplist_first(handle, &hlist)) + { + do + { + RAND_add(&hlist, hlist.dwSize, 3); + hentry.dwSize = sizeof(HEAPENTRY32); + if (heap_first(&hentry, + hlist.th32ProcessID, + hlist.th32HeapID)) + { + int entrycnt = 80; + do + RAND_add(&hentry, + hentry.dwSize, 5); + while (heap_next(&hentry) + && --entrycnt > 0); + } + } while (heaplist_next(handle, &hlist) + && (!good || (GetTickCount()-starttime)<MAXDELAY)); + } +#endif /* process walking */ /* PROCESSENTRY32 contains 9 fields that will change @@ -522,11 +571,11 @@ int RAND_poll(void) */ p.dwSize = sizeof(PROCESSENTRY32); - if (good) stoptime = GetTickCount() + MAXDELAY; + if (good) starttime = GetTickCount(); if (process_first(handle, &p)) do RAND_add(&p, p.dwSize, 9); - while (process_next(handle, &p) && GetTickCount() < stoptime); + while (process_next(handle, &p) && (!good || (GetTickCount()-starttime)<MAXDELAY)); /* thread walking */ /* THREADENTRY32 contains 6 fields that will change @@ -534,11 +583,11 @@ int RAND_poll(void) * 1 byte of entropy. */ t.dwSize = sizeof(THREADENTRY32); - if (good) stoptime = GetTickCount() + MAXDELAY; + if (good) starttime = GetTickCount(); if (thread_first(handle, &t)) do RAND_add(&t, t.dwSize, 6); - while (thread_next(handle, &t) && GetTickCount() < stoptime); + while (thread_next(handle, &t) && (!good || (GetTickCount()-starttime)<MAXDELAY)); /* module walking */ /* MODULEENTRY32 contains 9 fields that will change @@ -546,12 +595,12 @@ int RAND_poll(void) * 1 byte of entropy. */ m.dwSize = sizeof(MODULEENTRY32); - if (good) stoptime = GetTickCount() + MAXDELAY; + if (good) starttime = GetTickCount(); if (module_first(handle, &m)) do RAND_add(&m, m.dwSize, 9); while (module_next(handle, &m) - && (GetTickCount() < stoptime)); + && (!good || (GetTickCount()-starttime)<MAXDELAY)); if (close_snap) close_snap(handle); else diff --git a/crypto/openssl/crypto/rand/randfile.c b/crypto/openssl/crypto/rand/randfile.c index d108353..84276d7 100644 --- a/crypto/openssl/crypto/rand/randfile.c +++ b/crypto/openssl/crypto/rand/randfile.c @@ -117,6 +117,15 @@ int RAND_load_file(const char *file, long bytes) if (file == NULL) return(0); +#ifdef PURIFY + /* struct stat can have padding and unused fields that may not be + * initialized in the call to stat(). We need to clear the entire + * structure before calling RAND_add() to avoid complaints from + * applications such as Valgrind. + */ + memset(&sb, 0, sizeof(sb)); +#endif + if (stat(file,&sb) < 0) return(0); RAND_add(&sb,sizeof(sb),0.0); if (bytes == 0) return(ret); @@ -127,8 +136,8 @@ int RAND_load_file(const char *file, long bytes) in=fopen(file,"rb"); #endif if (in == NULL) goto err; -#if defined(S_IFBLK) && defined(S_IFCHR) - if (sb.st_mode & (S_IFBLK | S_IFCHR)) { +#if defined(S_ISBLK) && defined(S_ISCHR) + if (S_ISBLK(sb.st_mode) || S_ISCHR(sb.st_mode)) { /* this file is a device. we don't want read an infinite number * of bytes from a random device, nor do we want to use buffered * I/O because we will waste system entropy. @@ -174,8 +183,8 @@ int RAND_write_file(const char *file) i=stat(file,&sb); if (i != -1) { -#if defined(S_IFBLK) && defined(S_IFCHR) - if (sb.st_mode & (S_IFBLK | S_IFCHR)) { +#if defined(S_ISBLK) && defined(S_ISCHR) + if (S_ISBLK(sb.st_mode) || S_ISCHR(sb.st_mode)) { /* this file is a device. we don't write back to it. * we "succeed" on the assumption this is some sort * of random device. Otherwise attempting to write to diff --git a/crypto/openssl/crypto/rsa/rsa.h b/crypto/openssl/crypto/rsa/rsa.h index b7903d3..5bb932a 100644 --- a/crypto/openssl/crypto/rsa/rsa.h +++ b/crypto/openssl/crypto/rsa/rsa.h @@ -55,7 +55,6 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ -/* $FreeBSD$ */ #ifndef HEADER_RSA_H #define HEADER_RSA_H diff --git a/crypto/openssl/crypto/rsa/rsa_eay.c b/crypto/openssl/crypto/rsa/rsa_eay.c index 7d3b260..0ac6418 100644 --- a/crypto/openssl/crypto/rsa/rsa_eay.c +++ b/crypto/openssl/crypto/rsa/rsa_eay.c @@ -108,7 +108,6 @@ * Hudson (tjh@cryptsoft.com). * */ -/* $FreeBSD$ */ #include <stdio.h> #include "cryptlib.h" diff --git a/crypto/openssl/crypto/rsa/rsa_eng.c b/crypto/openssl/crypto/rsa/rsa_eng.c index 383a704..2f21ddb 100644 --- a/crypto/openssl/crypto/rsa/rsa_eng.c +++ b/crypto/openssl/crypto/rsa/rsa_eng.c @@ -207,8 +207,17 @@ RSA *RSA_new_method(ENGINE *engine) ret->blinding=NULL; ret->mt_blinding=NULL; ret->bignum_data=NULL; - ret->flags=ret->meth->flags; - CRYPTO_new_ex_data(CRYPTO_EX_INDEX_RSA, ret, &ret->ex_data); + ret->flags=ret->meth->flags & ~RSA_FLAG_NON_FIPS_ALLOW; + if (!CRYPTO_new_ex_data(CRYPTO_EX_INDEX_RSA, ret, &ret->ex_data)) + { +#ifndef OPENSSL_NO_ENGINE + if (ret->engine) + ENGINE_finish(ret->engine); +#endif + OPENSSL_free(ret); + return(NULL); + } + if ((ret->meth->init != NULL) && !ret->meth->init(ret)) { #ifndef OPENSSL_NO_ENGINE diff --git a/crypto/openssl/crypto/rsa/rsa_oaep.c b/crypto/openssl/crypto/rsa/rsa_oaep.c index 4d30c9d..546ae5f 100644 --- a/crypto/openssl/crypto/rsa/rsa_oaep.c +++ b/crypto/openssl/crypto/rsa/rsa_oaep.c @@ -52,13 +52,6 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, return 0; } - dbmask = OPENSSL_malloc(emlen - SHA_DIGEST_LENGTH); - if (dbmask == NULL) - { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, ERR_R_MALLOC_FAILURE); - return 0; - } - to[0] = 0; seed = to + 1; db = to + SHA_DIGEST_LENGTH + 1; @@ -76,6 +69,13 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, 20); #endif + dbmask = OPENSSL_malloc(emlen - SHA_DIGEST_LENGTH); + if (dbmask == NULL) + { + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, ERR_R_MALLOC_FAILURE); + return 0; + } + MGF1(dbmask, emlen - SHA_DIGEST_LENGTH, seed, SHA_DIGEST_LENGTH); for (i = 0; i < emlen - SHA_DIGEST_LENGTH; i++) db[i] ^= dbmask[i]; diff --git a/crypto/openssl/crypto/rsa/rsa_pss.c b/crypto/openssl/crypto/rsa/rsa_pss.c index 9b993ac..2bda491 100644 --- a/crypto/openssl/crypto/rsa/rsa_pss.c +++ b/crypto/openssl/crypto/rsa/rsa_pss.c @@ -217,7 +217,7 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM, ERR_R_MALLOC_FAILURE); goto err; } - if (!RAND_bytes(salt, sLen)) + if (RAND_bytes(salt, sLen) <= 0) goto err; } maskedDBLen = emLen - hLen - 1; diff --git a/crypto/openssl/crypto/rsa/rsa_sign.c b/crypto/openssl/crypto/rsa/rsa_sign.c index 5488c06..743dfd7 100644 --- a/crypto/openssl/crypto/rsa/rsa_sign.c +++ b/crypto/openssl/crypto/rsa/rsa_sign.c @@ -137,7 +137,12 @@ int RSA_sign(int type, const unsigned char *m, unsigned int m_len, i2d_X509_SIG(&sig,&p); s=tmps; } +#ifdef OPENSSL_FIPS + /* Bypass algorithm blocking: this is allowed if we get this far */ + i=rsa->meth->rsa_priv_enc(i,s,sigret,rsa,RSA_PKCS1_PADDING); +#else i=RSA_private_encrypt(i,s,sigret,rsa,RSA_PKCS1_PADDING); +#endif if (i <= 0) ret=0; else @@ -190,8 +195,11 @@ int RSA_verify(int dtype, const unsigned char *m, unsigned int m_len, RSAerr(RSA_F_RSA_VERIFY, RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE); return 0; } -#endif + /* Bypass algorithm blocking: this is allowed */ + i=rsa->meth->rsa_pub_dec((int)siglen,sigbuf,s,rsa,RSA_PKCS1_PADDING); +#else i=RSA_public_decrypt((int)siglen,sigbuf,s,rsa,RSA_PKCS1_PADDING); +#endif if (i <= 0) goto err; diff --git a/crypto/openssl/crypto/s390xcpuid.S b/crypto/openssl/crypto/s390xcpuid.S deleted file mode 100644 index 8500133..0000000 --- a/crypto/openssl/crypto/s390xcpuid.S +++ /dev/null @@ -1,90 +0,0 @@ -.text - -.globl OPENSSL_cpuid_setup -.type OPENSSL_cpuid_setup,@function -.align 16 -OPENSSL_cpuid_setup: - br %r14 # reserved for future -.size OPENSSL_cpuid_setup,.-OPENSSL_cpuid_setup - -.globl OPENSSL_s390x_facilities -.type OPENSSL_s390x_facilities,@function -.align 16 -OPENSSL_s390x_facilities: - lghi %r0,0 - .long 0xb2b0f010 # stfle 16(%r15) - lg %r2,16(%r15) - br %r14 -.size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities - -.globl OPENSSL_rdtsc -.type OPENSSL_rdtsc,@function -.align 16 -OPENSSL_rdtsc: - stck 16(%r15) - lg %r2,16(%r15) - br %r14 -.size OPENSSL_rdtsc,.-OPENSSL_rdtsc - -.globl OPENSSL_atomic_add -.type OPENSSL_atomic_add,@function -.align 16 -OPENSSL_atomic_add: - l %r1,0(%r2) -.Lspin: lr %r0,%r1 - ar %r0,%r3 - cs %r1,%r0,0(%r2) - brc 4,.Lspin - lgfr %r2,%r0 # OpenSSL expects the new value - br %r14 -.size OPENSSL_atomic_add,.-OPENSSL_atomic_add - -.globl OPENSSL_wipe_cpu -.type OPENSSL_wipe_cpu,@function -.align 16 -OPENSSL_wipe_cpu: - xgr %r0,%r0 - xgr %r1,%r1 - lgr %r2,%r15 - xgr %r3,%r3 - xgr %r4,%r4 - lzdr %f0 - lzdr %f1 - lzdr %f2 - lzdr %f3 - lzdr %f4 - lzdr %f5 - lzdr %f6 - lzdr %f7 - br %r14 -.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu - -.globl OPENSSL_cleanse -.type OPENSSL_cleanse,@function -.align 16 -OPENSSL_cleanse: - lghi %r4,15 - lghi %r0,0 - clgr %r3,%r4 - jh .Lot -.Little: - stc %r0,0(%r2) - la %r2,1(%r2) - brctg %r3,.Little - br %r14 -.align 4 -.Lot: tmll %r2,7 - jz .Laligned - stc %r0,0(%r2) - la %r2,1(%r2) - brctg %r3,.Lot -.Laligned: - srlg %r4,%r3,3 -.Loop: stg %r0,0(%r2) - la %r2,8(%r2) - brctg %r4,.Loop - lghi %r4,7 - ngr %r3,%r4 - jnz .Little - br %r14 -.size OPENSSL_cleanse,.-OPENSSL_cleanse diff --git a/crypto/openssl/crypto/sha/sha512.c b/crypto/openssl/crypto/sha/sha512.c index f5ed468..9e91bca 100644 --- a/crypto/openssl/crypto/sha/sha512.c +++ b/crypto/openssl/crypto/sha/sha512.c @@ -544,4 +544,13 @@ static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num #endif /* SHA512_ASM */ +#else /* OPENSSL_NO_SHA512 */ + +/* Sensitive compilers ("Compaq C V6.4-005 on OpenVMS VAX V7.3", for + * example) dislike a statement-free file, complaining: + * "%CC-W-EMPTYFILE, Source file does not contain any declarations." + */ + +int sha512_dummy(); + #endif /* OPENSSL_NO_SHA512 */ diff --git a/crypto/openssl/crypto/sparcv9cap.c b/crypto/openssl/crypto/sparcv9cap.c deleted file mode 100644 index 5f31d20..0000000 --- a/crypto/openssl/crypto/sparcv9cap.c +++ /dev/null @@ -1,154 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <openssl/bn.h> - -#define SPARCV9_TICK_PRIVILEGED (1<<0) -#define SPARCV9_PREFER_FPU (1<<1) -#define SPARCV9_VIS1 (1<<2) -#define SPARCV9_VIS2 (1<<3) /* reserved */ -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ -static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED; - -int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) - { - int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); - int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); - - if ((OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == - (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) - return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); - else - return bn_mul_mont_int(rp,ap,bp,np,n0,num); - } - -unsigned long OPENSSL_rdtsc(void) - { - unsigned long _sparcv9_rdtick(void); - - if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED) -#if defined(__sun) && defined(__SVR4) - return gethrtime(); -#else - return 0; -#endif - else - return _sparcv9_rdtick(); - } - -#if defined(__sun) && defined(__SVR4) - -#include <dlfcn.h> -#include <libdevinfo.h> -#include <sys/systeminfo.h> - -typedef di_node_t (*di_init_t)(const char *,uint_t); -typedef void (*di_fini_t)(di_node_t); -typedef char * (*di_node_name_t)(di_node_t); -typedef int (*di_walk_node_t)(di_node_t,uint_t,di_node_name_t,int (*)(di_node_t,di_node_name_t)); - -#define DLLINK(h,name) (name=(name##_t)dlsym((h),#name)) - -static int walk_nodename(di_node_t node, di_node_name_t di_node_name) - { - char *name = (*di_node_name)(node); - - /* This is expected to catch all UltraSPARC flavors prior T1 */ - if (!strcmp (name,"SUNW,UltraSPARC") || - !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */ - { - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; - - /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ - if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0') - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; - - return DI_WALK_TERMINATE; - } - /* This is expected to catch remaining UltraSPARCs, such as T1 */ - else if (!strncmp(name,"SUNW,UltraSPARC",15)) - { - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; - - return DI_WALK_TERMINATE; - } - - return DI_WALK_CONTINUE; - } - -void OPENSSL_cpuid_setup(void) - { - void *h; - char *e,si[256]; - static int trigger=0; - - if (trigger) return; - trigger=1; - - if ((e=getenv("OPENSSL_sparcv9cap"))) - { - OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); - return; - } - - if (sysinfo(SI_MACHINE,si,sizeof(si))>0) - { - if (strcmp(si,"sun4v")) - /* FPU is preferred for all CPUs, but US-T1/2 */ - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; - } - - if (sysinfo(SI_ISALIST,si,sizeof(si))>0) - { - if (strstr(si,"+vis")) - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; - if (strstr(si,"+vis2")) - { - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; - return; - } - } - - if ((h = dlopen("libdevinfo.so.1",RTLD_LAZY))) do - { - di_init_t di_init; - di_fini_t di_fini; - di_walk_node_t di_walk_node; - di_node_name_t di_node_name; - di_node_t root_node; - - if (!DLLINK(h,di_init)) break; - if (!DLLINK(h,di_fini)) break; - if (!DLLINK(h,di_walk_node)) break; - if (!DLLINK(h,di_node_name)) break; - - if ((root_node = (*di_init)("/",DINFOSUBTREE))!=DI_NODE_NIL) - { - (*di_walk_node)(root_node,DI_WALK_SIBFIRST, - di_node_name,walk_nodename); - (*di_fini)(root_node); - } - } while(0); - - if (h) dlclose(h); - } - -#else - -void OPENSSL_cpuid_setup(void) - { - char *e; - - if ((e=getenv("OPENSSL_sparcv9cap"))) - { - OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); - return; - } - - /* For now we assume that the rest supports UltraSPARC-I* only */ - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; - } - -#endif diff --git a/crypto/openssl/crypto/stack/safestack.h b/crypto/openssl/crypto/stack/safestack.h index 40b1790..78cc485 100644 --- a/crypto/openssl/crypto/stack/safestack.h +++ b/crypto/openssl/crypto/stack/safestack.h @@ -986,50 +986,6 @@ STACK_OF(type) \ #define sk_MIME_HEADER_sort(st) SKM_sk_sort(MIME_HEADER, (st)) #define sk_MIME_HEADER_is_sorted(st) SKM_sk_is_sorted(MIME_HEADER, (st)) -#define sk_MIME_HEADER_new(st) SKM_sk_new(MIME_HEADER, (st)) -#define sk_MIME_HEADER_new_null() SKM_sk_new_null(MIME_HEADER) -#define sk_MIME_HEADER_free(st) SKM_sk_free(MIME_HEADER, (st)) -#define sk_MIME_HEADER_num(st) SKM_sk_num(MIME_HEADER, (st)) -#define sk_MIME_HEADER_value(st, i) SKM_sk_value(MIME_HEADER, (st), (i)) -#define sk_MIME_HEADER_set(st, i, val) SKM_sk_set(MIME_HEADER, (st), (i), (val)) -#define sk_MIME_HEADER_zero(st) SKM_sk_zero(MIME_HEADER, (st)) -#define sk_MIME_HEADER_push(st, val) SKM_sk_push(MIME_HEADER, (st), (val)) -#define sk_MIME_HEADER_unshift(st, val) SKM_sk_unshift(MIME_HEADER, (st), (val)) -#define sk_MIME_HEADER_find(st, val) SKM_sk_find(MIME_HEADER, (st), (val)) -#define sk_MIME_HEADER_find_ex(st, val) SKM_sk_find_ex(MIME_HEADER, (st), (val)) -#define sk_MIME_HEADER_delete(st, i) SKM_sk_delete(MIME_HEADER, (st), (i)) -#define sk_MIME_HEADER_delete_ptr(st, ptr) SKM_sk_delete_ptr(MIME_HEADER, (st), (ptr)) -#define sk_MIME_HEADER_insert(st, val, i) SKM_sk_insert(MIME_HEADER, (st), (val), (i)) -#define sk_MIME_HEADER_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(MIME_HEADER, (st), (cmp)) -#define sk_MIME_HEADER_dup(st) SKM_sk_dup(MIME_HEADER, st) -#define sk_MIME_HEADER_pop_free(st, free_func) SKM_sk_pop_free(MIME_HEADER, (st), (free_func)) -#define sk_MIME_HEADER_shift(st) SKM_sk_shift(MIME_HEADER, (st)) -#define sk_MIME_HEADER_pop(st) SKM_sk_pop(MIME_HEADER, (st)) -#define sk_MIME_HEADER_sort(st) SKM_sk_sort(MIME_HEADER, (st)) -#define sk_MIME_HEADER_is_sorted(st) SKM_sk_is_sorted(MIME_HEADER, (st)) - -#define sk_MIME_PARAM_new(st) SKM_sk_new(MIME_PARAM, (st)) -#define sk_MIME_PARAM_new_null() SKM_sk_new_null(MIME_PARAM) -#define sk_MIME_PARAM_free(st) SKM_sk_free(MIME_PARAM, (st)) -#define sk_MIME_PARAM_num(st) SKM_sk_num(MIME_PARAM, (st)) -#define sk_MIME_PARAM_value(st, i) SKM_sk_value(MIME_PARAM, (st), (i)) -#define sk_MIME_PARAM_set(st, i, val) SKM_sk_set(MIME_PARAM, (st), (i), (val)) -#define sk_MIME_PARAM_zero(st) SKM_sk_zero(MIME_PARAM, (st)) -#define sk_MIME_PARAM_push(st, val) SKM_sk_push(MIME_PARAM, (st), (val)) -#define sk_MIME_PARAM_unshift(st, val) SKM_sk_unshift(MIME_PARAM, (st), (val)) -#define sk_MIME_PARAM_find(st, val) SKM_sk_find(MIME_PARAM, (st), (val)) -#define sk_MIME_PARAM_find_ex(st, val) SKM_sk_find_ex(MIME_PARAM, (st), (val)) -#define sk_MIME_PARAM_delete(st, i) SKM_sk_delete(MIME_PARAM, (st), (i)) -#define sk_MIME_PARAM_delete_ptr(st, ptr) SKM_sk_delete_ptr(MIME_PARAM, (st), (ptr)) -#define sk_MIME_PARAM_insert(st, val, i) SKM_sk_insert(MIME_PARAM, (st), (val), (i)) -#define sk_MIME_PARAM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(MIME_PARAM, (st), (cmp)) -#define sk_MIME_PARAM_dup(st) SKM_sk_dup(MIME_PARAM, st) -#define sk_MIME_PARAM_pop_free(st, free_func) SKM_sk_pop_free(MIME_PARAM, (st), (free_func)) -#define sk_MIME_PARAM_shift(st) SKM_sk_shift(MIME_PARAM, (st)) -#define sk_MIME_PARAM_pop(st) SKM_sk_pop(MIME_PARAM, (st)) -#define sk_MIME_PARAM_sort(st) SKM_sk_sort(MIME_PARAM, (st)) -#define sk_MIME_PARAM_is_sorted(st) SKM_sk_is_sorted(MIME_PARAM, (st)) - #define sk_MIME_PARAM_new(st) SKM_sk_new(MIME_PARAM, (st)) #define sk_MIME_PARAM_new_null() SKM_sk_new_null(MIME_PARAM) #define sk_MIME_PARAM_free(st) SKM_sk_free(MIME_PARAM, (st)) diff --git a/crypto/openssl/crypto/symhacks.h b/crypto/openssl/crypto/symhacks.h index 8728e61..0114093 100644 --- a/crypto/openssl/crypto/symhacks.h +++ b/crypto/openssl/crypto/symhacks.h @@ -60,6 +60,11 @@ /* Hacks to solve the problem with linkers incapable of handling very long symbol names. In the case of VMS, the limit is 31 characters on VMS for VAX. */ +/* Note that this affects util/libeay.num and util/ssleay.num... you may + change those manually, but that's not recommended, as those files are + controlled centrally and updated on Unix, and the central definition + may disagree with yours, which in turn may come with shareable library + incompatibilities. */ #ifdef OPENSSL_SYS_VMS /* Hack a long name in crypto/cryptlib.c */ @@ -137,6 +142,8 @@ #define X509_policy_node_get0_qualifiers X509_pcy_node_get0_qualifiers #undef X509_STORE_CTX_get_explicit_policy #define X509_STORE_CTX_get_explicit_policy X509_STORE_CTX_get_expl_policy +#undef X509_STORE_CTX_get0_current_issuer +#define X509_STORE_CTX_get0_current_issuer X509_STORE_CTX_get0_cur_issuer /* Hack some long CRYPTO names */ #undef CRYPTO_set_dynlock_destroy_callback @@ -174,6 +181,15 @@ #undef SSL_COMP_get_compression_methods #define SSL_COMP_get_compression_methods SSL_COMP_get_compress_methods +#undef ssl_add_clienthello_renegotiate_ext +#define ssl_add_clienthello_renegotiate_ext ssl_add_clienthello_reneg_ext +#undef ssl_add_serverhello_renegotiate_ext +#define ssl_add_serverhello_renegotiate_ext ssl_add_serverhello_reneg_ext +#undef ssl_parse_clienthello_renegotiate_ext +#define ssl_parse_clienthello_renegotiate_ext ssl_parse_clienthello_reneg_ext +#undef ssl_parse_serverhello_renegotiate_ext +#define ssl_parse_serverhello_renegotiate_ext ssl_parse_serverhello_reneg_ext + /* Hack some long ENGINE names */ #undef ENGINE_get_default_BN_mod_exp_crt #define ENGINE_get_default_BN_mod_exp_crt ENGINE_get_def_BN_mod_exp_crt @@ -365,6 +381,10 @@ #undef cms_SignerIdentifier_get0_signer_id #define cms_SignerIdentifier_get0_signer_id cms_SignerId_get0_signer_id +/* Hack some long DTLS1 names */ +#undef dtls1_retransmit_buffered_messages +#define dtls1_retransmit_buffered_messages dtls1_retransmit_buffered_msgs + #endif /* defined OPENSSL_SYS_VMS */ diff --git a/crypto/openssl/crypto/ui/ui_openssl.c b/crypto/openssl/crypto/ui/ui_openssl.c index ef930bf..06270f0 100644 --- a/crypto/openssl/crypto/ui/ui_openssl.c +++ b/crypto/openssl/crypto/ui/ui_openssl.c @@ -297,7 +297,7 @@ static int is_a_tty; /* Declare static functions */ #if !defined(OPENSSL_SYS_WIN16) && !defined(OPENSSL_SYS_WINCE) -static void read_till_nl(FILE *); +static int read_till_nl(FILE *); static void recsig(int); static void pushsig(void); static void popsig(void); @@ -390,14 +390,16 @@ static int read_string(UI *ui, UI_STRING *uis) #if !defined(OPENSSL_SYS_WIN16) && !defined(OPENSSL_SYS_WINCE) /* Internal functions to read a string without echoing */ -static void read_till_nl(FILE *in) +static int read_till_nl(FILE *in) { #define SIZE 4 char buf[SIZE+1]; do { - fgets(buf,SIZE,in); + if (!fgets(buf,SIZE,in)) + return 0; } while (strchr(buf,'\n') == NULL); + return 1; } static volatile sig_atomic_t intr_signal; @@ -445,7 +447,8 @@ static int read_string_inner(UI *ui, UI_STRING *uis, int echo, int strip_nl) *p='\0'; } else - read_till_nl(tty_in); + if (!read_till_nl(tty_in)) + goto error; if (UI_set_result(ui, uis, result) >= 0) ok=1; diff --git a/crypto/openssl/crypto/x509/by_dir.c b/crypto/openssl/crypto/x509/by_dir.c index 341e0ba..b3acd80 100644 --- a/crypto/openssl/crypto/x509/by_dir.c +++ b/crypto/openssl/crypto/x509/by_dir.c @@ -360,11 +360,11 @@ static int get_cert_by_subject(X509_LOOKUP *xl, int type, X509_NAME *name, /* we have added it to the cache so now pull * it out again */ - CRYPTO_r_lock(CRYPTO_LOCK_X509_STORE); + CRYPTO_w_lock(CRYPTO_LOCK_X509_STORE); j = sk_X509_OBJECT_find(xl->store_ctx->objs,&stmp); if(j != -1) tmp=sk_X509_OBJECT_value(xl->store_ctx->objs,j); else tmp = NULL; - CRYPTO_r_unlock(CRYPTO_LOCK_X509_STORE); + CRYPTO_w_unlock(CRYPTO_LOCK_X509_STORE); if (tmp != NULL) { @@ -383,4 +383,3 @@ finish: if (b != NULL) BUF_MEM_free(b); return(ok); } - diff --git a/crypto/openssl/crypto/x509/x509.h b/crypto/openssl/crypto/x509/x509.h index e71b525..8958e34 100644 --- a/crypto/openssl/crypto/x509/x509.h +++ b/crypto/openssl/crypto/x509/x509.h @@ -116,6 +116,7 @@ extern "C" { /* Under Win32 these are defined in wincrypt.h */ #undef X509_NAME #undef X509_CERT_PAIR +#undef X509_EXTENSIONS #endif #define X509_FILETYPE_PEM 1 diff --git a/crypto/openssl/crypto/x509/x509_lu.c b/crypto/openssl/crypto/x509/x509_lu.c index cd2cfb6..b486171 100644 --- a/crypto/openssl/crypto/x509/x509_lu.c +++ b/crypto/openssl/crypto/x509/x509_lu.c @@ -198,7 +198,13 @@ X509_STORE *X509_STORE_new(void) ret->cert_crl = 0; ret->cleanup = 0; - CRYPTO_new_ex_data(CRYPTO_EX_INDEX_X509_STORE, ret, &ret->ex_data); + if (!CRYPTO_new_ex_data(CRYPTO_EX_INDEX_X509_STORE, ret, &ret->ex_data)) + { + sk_X509_OBJECT_free(ret->objs); + OPENSSL_free(ret); + return NULL; + } + ret->references=1; return ret; } @@ -286,7 +292,9 @@ int X509_STORE_get_by_subject(X509_STORE_CTX *vs, int type, X509_NAME *name, X509_OBJECT stmp,*tmp; int i,j; + CRYPTO_w_lock(CRYPTO_LOCK_X509_STORE); tmp=X509_OBJECT_retrieve_by_subject(ctx->objs,type,name); + CRYPTO_w_unlock(CRYPTO_LOCK_X509_STORE); if (tmp == NULL) { @@ -340,7 +348,6 @@ int X509_STORE_add_cert(X509_STORE *ctx, X509 *x) X509_OBJECT_up_ref_count(obj); - if (X509_OBJECT_retrieve_match(ctx->objs, obj)) { X509_OBJECT_free_contents(obj); @@ -446,15 +453,15 @@ int X509_OBJECT_idx_by_subject(STACK_OF(X509_OBJECT) *h, int type, X509_OBJECT *X509_OBJECT_retrieve_by_subject(STACK_OF(X509_OBJECT) *h, int type, X509_NAME *name) -{ + { int idx; idx = X509_OBJECT_idx_by_subject(h, type, name); if (idx==-1) return NULL; return sk_X509_OBJECT_value(h, idx); -} + } X509_OBJECT *X509_OBJECT_retrieve_match(STACK_OF(X509_OBJECT) *h, X509_OBJECT *x) -{ + { int idx, i; X509_OBJECT *obj; idx = sk_X509_OBJECT_find(h, x); @@ -469,13 +476,13 @@ X509_OBJECT *X509_OBJECT_retrieve_match(STACK_OF(X509_OBJECT) *h, X509_OBJECT *x return obj; } return NULL; -} + } /* Try to get issuer certificate from store. Due to limitations * of the API this can only retrieve a single certificate matching * a given subject name. However it will fill the cache with all - * matching certificates, so we can examine the cache for all + * matching certificates, so we can examine the cache for all * matches. * * Return values are: @@ -483,13 +490,11 @@ X509_OBJECT *X509_OBJECT_retrieve_match(STACK_OF(X509_OBJECT) *h, X509_OBJECT *x * 0 certificate not found. * -1 some other error. */ - - int X509_STORE_CTX_get1_issuer(X509 **issuer, X509_STORE_CTX *ctx, X509 *x) -{ + { X509_NAME *xn; X509_OBJECT obj, *pobj; - int i, ok, idx; + int i, ok, idx, ret; xn=X509_get_issuer_name(x); ok=X509_STORE_get_by_subject(ctx,X509_LU_X509,xn,&obj); if (ok != X509_LU_X509) @@ -515,27 +520,34 @@ int X509_STORE_CTX_get1_issuer(X509 **issuer, X509_STORE_CTX *ctx, X509 *x) return 1; } X509_OBJECT_free_contents(&obj); - /* Else find index of first matching cert */ - idx = X509_OBJECT_idx_by_subject(ctx->ctx->objs, X509_LU_X509, xn); - /* This shouldn't normally happen since we already have one match */ - if (idx == -1) return 0; - /* Look through all matching certificates for a suitable issuer */ - for (i = idx; i < sk_X509_OBJECT_num(ctx->ctx->objs); i++) + /* Else find index of first cert accepted by 'check_issued' */ + ret = 0; + CRYPTO_w_lock(CRYPTO_LOCK_X509_STORE); + idx = X509_OBJECT_idx_by_subject(ctx->ctx->objs, X509_LU_X509, xn); + if (idx != -1) /* should be true as we've had at least one match */ { - pobj = sk_X509_OBJECT_value(ctx->ctx->objs, i); - /* See if we've ran out of matches */ - if (pobj->type != X509_LU_X509) return 0; - if (X509_NAME_cmp(xn, X509_get_subject_name(pobj->data.x509))) return 0; - if (ctx->check_issued(ctx, x, pobj->data.x509)) + /* Look through all matching certs for suitable issuer */ + for (i = idx; i < sk_X509_OBJECT_num(ctx->ctx->objs); i++) { - *issuer = pobj->data.x509; - X509_OBJECT_up_ref_count(pobj); - return 1; + pobj = sk_X509_OBJECT_value(ctx->ctx->objs, i); + /* See if we've run past the matches */ + if (pobj->type != X509_LU_X509) + break; + if (X509_NAME_cmp(xn, X509_get_subject_name(pobj->data.x509))) + break; + if (ctx->check_issued(ctx, x, pobj->data.x509)) + { + *issuer = pobj->data.x509; + X509_OBJECT_up_ref_count(pobj); + ret = 1; + break; + } } } - return 0; -} + CRYPTO_w_unlock(CRYPTO_LOCK_X509_STORE); + return ret; + } int X509_STORE_set_flags(X509_STORE *ctx, unsigned long flags) { diff --git a/crypto/openssl/crypto/x509/x509_vfy.c b/crypto/openssl/crypto/x509/x509_vfy.c index 336c40d..b85456e 100644 --- a/crypto/openssl/crypto/x509/x509_vfy.c +++ b/crypto/openssl/crypto/x509/x509_vfy.c @@ -986,7 +986,12 @@ static int internal_verify(X509_STORE_CTX *ctx) while (n >= 0) { ctx->error_depth=n; - if (!xs->valid) + + /* Skip signature check for self signed certificates unless + * explicitly asked for. It doesn't add any security and + * just wastes time. + */ + if (!xs->valid && (xs != xi || (ctx->param->flags & X509_V_FLAG_CHECK_SS_SIGNATURE))) { if ((pkey=X509_get_pubkey(xi)) == NULL) { @@ -996,13 +1001,6 @@ static int internal_verify(X509_STORE_CTX *ctx) if (!ok) goto end; } else if (X509_verify(xs,pkey) <= 0) - /* XXX For the final trusted self-signed cert, - * this is a waste of time. That check should - * optional so that e.g. 'openssl x509' can be - * used to detect invalid self-signatures, but - * we don't verify again and again in SSL - * handshakes and the like once the cert has - * been declared trusted. */ { ctx->error=X509_V_ERR_CERT_SIGNATURE_FAILURE; ctx->current_cert=xs; diff --git a/crypto/openssl/crypto/x509/x509_vfy.h b/crypto/openssl/crypto/x509/x509_vfy.h index 76c76e1..86ae35f 100644 --- a/crypto/openssl/crypto/x509/x509_vfy.h +++ b/crypto/openssl/crypto/x509/x509_vfy.h @@ -363,6 +363,9 @@ void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); /* Notify callback that policy is OK */ #define X509_V_FLAG_NOTIFY_POLICY 0x800 +/* Check selfsigned CA signature */ +#define X509_V_FLAG_CHECK_SS_SIGNATURE 0x4000 + #define X509_VP_FLAG_DEFAULT 0x1 #define X509_VP_FLAG_OVERWRITE 0x2 #define X509_VP_FLAG_RESET_FLAGS 0x4 diff --git a/crypto/openssl/crypto/x509/x509_vpm.c b/crypto/openssl/crypto/x509/x509_vpm.c index 2b06718..01c5541 100644 --- a/crypto/openssl/crypto/x509/x509_vpm.c +++ b/crypto/openssl/crypto/x509/x509_vpm.c @@ -198,8 +198,12 @@ int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *dest, int X509_VERIFY_PARAM_set1(X509_VERIFY_PARAM *to, const X509_VERIFY_PARAM *from) { + unsigned long save_flags = to->inh_flags; + int ret; to->inh_flags |= X509_VP_FLAG_DEFAULT; - return X509_VERIFY_PARAM_inherit(to, from); + ret = X509_VERIFY_PARAM_inherit(to, from); + to->inh_flags = save_flags; + return ret; } int X509_VERIFY_PARAM_set1_name(X509_VERIFY_PARAM *param, const char *name) diff --git a/crypto/openssl/crypto/x509v3/pcy_tree.c b/crypto/openssl/crypto/x509v3/pcy_tree.c index 6c87a7f..89f84bf 100644 --- a/crypto/openssl/crypto/x509v3/pcy_tree.c +++ b/crypto/openssl/crypto/x509v3/pcy_tree.c @@ -160,7 +160,7 @@ static int tree_init(X509_POLICY_TREE **ptree, STACK_OF(X509) *certs, tree->auth_policies = NULL; tree->user_policies = NULL; - if (!tree) + if (!tree->levels) { OPENSSL_free(tree); return 0; diff --git a/crypto/openssl/crypto/x509v3/v3_alt.c b/crypto/openssl/crypto/x509v3/v3_alt.c index 58b2952..69244e4 100644 --- a/crypto/openssl/crypto/x509v3/v3_alt.c +++ b/crypto/openssl/crypto/x509v3/v3_alt.c @@ -360,6 +360,7 @@ static int copy_email(X509V3_CTX *ctx, GENERAL_NAMES *gens, int move_p) if (move_p) { X509_NAME_delete_entry(nm, i); + X509_NAME_ENTRY_free(ne); i--; } if(!email || !(gen = GENERAL_NAME_new())) { @@ -577,6 +578,8 @@ static int do_dirname(GENERAL_NAME *gen, char *value, X509V3_CTX *ctx) if (!ret) X509_NAME_free(nm); gen->d.dirn = nm; + + X509V3_section_free(ctx, sk); return ret; } diff --git a/crypto/openssl/crypto/x509v3/v3_ocsp.c b/crypto/openssl/crypto/x509v3/v3_ocsp.c index e426ea9..5c19cf4 100644 --- a/crypto/openssl/crypto/x509v3/v3_ocsp.c +++ b/crypto/openssl/crypto/x509v3/v3_ocsp.c @@ -153,21 +153,21 @@ static int i2r_ocsp_crlid(X509V3_EXT_METHOD *method, void *in, BIO *bp, int ind) OCSP_CRLID *a = in; if (a->crlUrl) { - if (!BIO_printf(bp, "%*scrlUrl: ", ind, "")) goto err; + if (BIO_printf(bp, "%*scrlUrl: ", ind, "") <= 0) goto err; if (!ASN1_STRING_print(bp, (ASN1_STRING*)a->crlUrl)) goto err; - if (!BIO_write(bp, "\n", 1)) goto err; + if (BIO_write(bp, "\n", 1) <= 0) goto err; } if (a->crlNum) { - if (!BIO_printf(bp, "%*scrlNum: ", ind, "")) goto err; - if (!i2a_ASN1_INTEGER(bp, a->crlNum)) goto err; - if (!BIO_write(bp, "\n", 1)) goto err; + if (BIO_printf(bp, "%*scrlNum: ", ind, "") <= 0) goto err; + if (i2a_ASN1_INTEGER(bp, a->crlNum) <= 0) goto err; + if (BIO_write(bp, "\n", 1) <= 0) goto err; } if (a->crlTime) { - if (!BIO_printf(bp, "%*scrlTime: ", ind, "")) goto err; + if (BIO_printf(bp, "%*scrlTime: ", ind, "") <= 0) goto err; if (!ASN1_GENERALIZEDTIME_print(bp, a->crlTime)) goto err; - if (!BIO_write(bp, "\n", 1)) goto err; + if (BIO_write(bp, "\n", 1) <= 0) goto err; } return 1; err: @@ -176,7 +176,7 @@ static int i2r_ocsp_crlid(X509V3_EXT_METHOD *method, void *in, BIO *bp, int ind) static int i2r_ocsp_acutoff(X509V3_EXT_METHOD *method, void *cutoff, BIO *bp, int ind) { - if (!BIO_printf(bp, "%*s", ind, "")) return 0; + if (BIO_printf(bp, "%*s", ind, "") <= 0) return 0; if(!ASN1_GENERALIZEDTIME_print(bp, cutoff)) return 0; return 1; } @@ -184,8 +184,8 @@ static int i2r_ocsp_acutoff(X509V3_EXT_METHOD *method, void *cutoff, BIO *bp, in static int i2r_object(X509V3_EXT_METHOD *method, void *oid, BIO *bp, int ind) { - if (!BIO_printf(bp, "%*s", ind, "")) return 0; - if(!i2a_ASN1_OBJECT(bp, oid)) return 0; + if (BIO_printf(bp, "%*s", ind, "") <= 0) return 0; + if(i2a_ASN1_OBJECT(bp, oid) <= 0) return 0; return 1; } |