diff options
author | simon <simon@FreeBSD.org> | 2006-07-29 19:10:21 +0000 |
---|---|---|
committer | simon <simon@FreeBSD.org> | 2006-07-29 19:10:21 +0000 |
commit | fb3c70eda88d3175627edc6a3316b4508b3d29c5 (patch) | |
tree | 213a0c4d5ba3869f66ecf970819532048fed4a9d /crypto/openssl/crypto/bn | |
parent | 3c8d7d9993705e30bc69e55cd19d8a298e582292 (diff) | |
download | FreeBSD-src-fb3c70eda88d3175627edc6a3316b4508b3d29c5.zip FreeBSD-src-fb3c70eda88d3175627edc6a3316b4508b3d29c5.tar.gz |
Vendor import of OpenSSL 0.9.8b
Diffstat (limited to 'crypto/openssl/crypto/bn')
38 files changed, 7944 insertions, 886 deletions
diff --git a/crypto/openssl/crypto/bn/Makefile b/crypto/openssl/crypto/bn/Makefile index 76ced9f..5c3e08f 100644 --- a/crypto/openssl/crypto/bn/Makefile +++ b/crypto/openssl/crypto/bn/Makefile @@ -1,5 +1,5 @@ # -# SSLeay/crypto/bn/Makefile +# OpenSSL/crypto/bn/Makefile # DIR= bn @@ -8,11 +8,6 @@ CC= cc CPP= $(CC) -E INCLUDES= -I.. -I$(TOP) -I../../include CFLAG=-g -INSTALL_PREFIX= -OPENSSLDIR= /usr/local/ssl -INSTALLTOP=/usr/local/ssl -MAKEDEPPROG= makedepend -MAKEDEPEND= $(TOP)/util/domd $(TOP) -MD $(MAKEDEPPROG) MAKEFILE= Makefile AR= ar r @@ -22,6 +17,7 @@ BN_ASM= bn_asm.o CFLAGS= $(INCLUDES) $(CFLAG) ASFLAGS= $(INCLUDES) $(ASFLAG) +AFLAGS= $(ASFLAGS) GENERAL=Makefile TEST=bntest.c exptest.c @@ -31,12 +27,14 @@ LIB=$(TOP)/libcrypto.a LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \ bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \ - bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c + bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ + bn_depr.c bn_const.c LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \ bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \ bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \ - bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o + bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \ + bn_depr.o bn_const.o SRC= $(LIBSRC) @@ -64,63 +62,52 @@ lib: $(LIBOBJ) $(RANLIB) $(LIB) || echo Never mind. @touch lib -# elf -asm/bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > bn86-elf.s) - -asm/co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) co-586.pl elf $(CFLAGS) > co86-elf.s) - +# ELF +bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > ../$@) +co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) co-586.pl elf $(CFLAGS) > ../$@) +# COFF +bn86-cof.s: asm/bn-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) bn-586.pl coff $(CFLAGS) > ../$@) +co86-cof.s: asm/co-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) co-586.pl coff $(CFLAGS) > ../$@) # a.out -asm/bn86-out.o: asm/bn86unix.cpp - $(CPP) -DOUT asm/bn86unix.cpp | as -o asm/bn86-out.o - -asm/co86-out.o: asm/co86unix.cpp - $(CPP) -DOUT asm/co86unix.cpp | as -o asm/co86-out.o - -# bsdi -asm/bn86bsdi.o: asm/bn86unix.cpp - $(CPP) -DBSDI asm/bn86unix.cpp | sed 's/ :/:/' | as -o asm/bn86bsdi.o - -asm/co86bsdi.o: asm/co86unix.cpp - $(CPP) -DBSDI asm/co86unix.cpp | sed 's/ :/:/' | as -o asm/co86bsdi.o - -asm/bn86unix.cpp: asm/bn-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) bn-586.pl cpp >bn86unix.cpp ) - -asm/co86unix.cpp: asm/co-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) co-586.pl cpp >co86unix.cpp ) - -asm/sparcv8.o: asm/sparcv8.S - -asm/sparcv8plus.o: asm/sparcv8plus.S - -# Old GNU assembler doesn't understand V9 instructions, so we -# hire /usr/ccs/bin/as to do the job. Note that option is called -# *-gcc27, but even gcc 2>=8 users may experience similar problem -# if they didn't bother to upgrade GNU assembler. Such users should -# not choose this option, but be adviced to *remove* GNU assembler -# or upgrade it. -asm/sparcv8plus-gcc27.o: asm/sparcv8plus.S - $(CC) $(ASFLAGS) -E asm/sparcv8plus.S | \ - /usr/ccs/bin/as -xarch=v8plus - -o asm/sparcv8plus-gcc27.o - - -asm/ia64.o: asm/ia64.S - -# Some compiler drivers (most notably HP-UX and Intel C++) don't -# understand .S extension:-( I wish I could pipe output from cc -E, -# but it's too compiler driver/ABI dependent to cover with a single -# rule... <appro@fy.chalmers.se> -asm/ia64-cpp.o: asm/ia64.S - $(CC) $(ASFLAGS) -E asm/ia64.S > /tmp/ia64.$$$$.s && \ - $(CC) $(ASFLAGS) -c -o asm/ia64-cpp.o /tmp/ia64.$$$$.s; \ - rm -f /tmp/ia64.$$$$.s - -asm/x86_64-gcc.o: asm/x86_64-gcc.c - -asm/pa-risc2W.o: asm/pa-risc2W.s - /usr/ccs/bin/as -o asm/pa-risc2W.o asm/pa-risc2W.s +bn86-out.s: asm/bn-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) bn-586.pl a.out $(CFLAGS) > ../$@) +co86-out.s: asm/co-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) co-586.pl a.out $(CFLAGS) > ../$@) + +sparcv8.o: asm/sparcv8.S + $(CC) $(CFLAGS) -c asm/sparcv8.S +sparcv8plus.o: asm/sparcv8plus.S + $(CC) $(CFLAGS) -c asm/sparcv8plus.S + +bn-mips3.o: asm/mips3.s + @if [ "$(CC)" = "gcc" ]; then \ + ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \ + as -$$ABI -O -o $@ asm/mips3.s; \ + else $(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi + +x86_64-gcc.o: asm/x86_64-gcc.c + $(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c + +bn-ia64.s: asm/ia64.S + $(CC) $(CFLAGS) -E asm/ia64.S > $@ + +# GNU assembler fails to compile PA-RISC2 modules, insist on calling +# vendor assembler... +pa-risc2W.o: asm/pa-risc2W.s + /usr/ccs/bin/as -o pa-risc2W.o asm/pa-risc2W.s +pa-risc2.o: asm/pa-risc2.s + /usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s + +# ppc - AIX, Linux, MacOS X... +linux_ppc32.s: asm/ppc.pl; $(PERL) $< $@ +linux_ppc64.s: asm/ppc.pl; $(PERL) $< $@ +aix_ppc32.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@ +aix_ppc64.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@ +osx_ppc32.s: asm/ppc.pl; $(PERL) $< $@ files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO @@ -131,7 +118,8 @@ links: @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) install: - @for i in $(EXHEADER) ; \ + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ do \ (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ @@ -154,6 +142,7 @@ lint: lint -DLINT $(INCLUDES) $(SRC)>fluff depend: + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) dclean: @@ -161,7 +150,7 @@ dclean: mv -f Makefile.new $(MAKEFILE) clean: - rm -f asm/co86unix.cpp asm/bn86unix.cpp asm/*-elf.* *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s + rm -f *.s *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff # DO NOT DELETE THIS LINE -- make depend depends on it. @@ -169,101 +158,131 @@ bn_add.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_add.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_add.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_add.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_add.o: ../cryptlib.h bn_add.c bn_lcl.h +bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_add.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_add.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_add.c bn_lcl.h bn_asm.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_asm.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_asm.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_asm.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_asm.o: ../cryptlib.h bn_asm.c bn_lcl.h +bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_asm.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_asm.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_asm.c bn_lcl.h bn_blind.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_blind.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_blind.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_blind.o: ../cryptlib.h bn_blind.c bn_lcl.h +bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_blind.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_blind.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_blind.c bn_lcl.h +bn_const.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h +bn_const.o: ../../include/openssl/ossl_typ.h bn.h bn_const.c bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_ctx.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_ctx.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_ctx.o: ../cryptlib.h bn_ctx.c bn_lcl.h +bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_ctx.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_ctx.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_ctx.c bn_lcl.h +bn_depr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h +bn_depr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h +bn_depr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h +bn_depr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h +bn_depr.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_depr.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h +bn_depr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +bn_depr.o: ../cryptlib.h bn_depr.c bn_lcl.h bn_div.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_div.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_div.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_div.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_div.o: ../cryptlib.h bn_div.c bn_lcl.h +bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_div.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_div.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_div.c bn_lcl.h bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h bn_err.o: ../../include/openssl/err.h ../../include/openssl/lhash.h bn_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h -bn_err.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_err.o: ../../include/openssl/symhacks.h bn_err.c +bn_err.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h +bn_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +bn_err.o: bn_err.c bn_exp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_exp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_exp.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_exp.o: ../cryptlib.h bn_exp.c bn_lcl.h +bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp.c bn_lcl.h bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_exp2.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_exp2.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_exp2.o: ../cryptlib.h bn_exp2.c bn_lcl.h +bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_exp2.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_exp2.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp2.c bn_lcl.h bn_gcd.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_gcd.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_gcd.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_gcd.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_gcd.o: ../cryptlib.h bn_gcd.c bn_lcl.h -bn_kron.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h -bn_kron.o: ../../include/openssl/opensslconf.h bn_kron.c bn_lcl.h +bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_gcd.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_gcd.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_gcd.c bn_lcl.h +bn_gf2m.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h +bn_gf2m.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h +bn_gf2m.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h +bn_gf2m.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h +bn_gf2m.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_gf2m.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_gf2m.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_gf2m.c bn_lcl.h +bn_kron.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h +bn_kron.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h +bn_kron.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h +bn_kron.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h +bn_kron.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_kron.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_kron.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_kron.c bn_lcl.h bn_lib.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_lib.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_lib.o: ../cryptlib.h bn_lcl.h bn_lib.c +bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_lib.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_lib.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_lib.c bn_mod.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_mod.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_mod.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_mod.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mod.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mod.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_mod.o: ../cryptlib.h bn_lcl.h bn_mod.c +bn_mod.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_mod.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_mod.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mod.c bn_mont.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_mont.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_mont.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mont.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_mont.o: ../cryptlib.h bn_lcl.h bn_mont.c +bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_mont.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_mont.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mont.c bn_mpi.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_mpi.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_mpi.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mpi.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_mpi.o: ../cryptlib.h bn_lcl.h bn_mpi.c +bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_mpi.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_mpi.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mpi.c bn_mul.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_mul.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_mul.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mul.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_mul.o: ../cryptlib.h bn_lcl.h bn_mul.c +bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_mul.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_mul.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mul.c +bn_nist.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h +bn_nist.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h +bn_nist.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h +bn_nist.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h +bn_nist.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_nist.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_nist.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_nist.c bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h @@ -276,9 +295,9 @@ bn_print.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_print.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_print.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_print.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_print.o: ../cryptlib.h bn_lcl.h bn_print.c +bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_print.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_print.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_print.c bn_rand.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_rand.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h @@ -291,34 +310,34 @@ bn_recp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_recp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_recp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_recp.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_recp.o: ../cryptlib.h bn_lcl.h bn_recp.c +bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_recp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_recp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_recp.c bn_shift.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_shift.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_shift.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_shift.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_shift.o: ../cryptlib.h bn_lcl.h bn_shift.c +bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_shift.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_shift.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_shift.c bn_sqr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_sqr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_sqr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_sqr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_sqr.o: ../cryptlib.h bn_lcl.h bn_sqr.c +bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_sqr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_sqr.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_sqr.c bn_sqrt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_sqrt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_sqrt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_sqrt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_sqrt.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_sqrt.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_sqrt.o: ../cryptlib.h bn_lcl.h bn_sqrt.c +bn_sqrt.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_sqrt.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_sqrt.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_sqrt.c bn_word.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_word.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_word.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_word.o: ../cryptlib.h bn_lcl.h bn_word.c +bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c diff --git a/crypto/openssl/crypto/bn/asm/bn-586.pl b/crypto/openssl/crypto/bn/asm/bn-586.pl index c4de4a2..26c2685 100644 --- a/crypto/openssl/crypto/bn/asm/bn-586.pl +++ b/crypto/openssl/crypto/bn/asm/bn-586.pl @@ -5,13 +5,18 @@ require "x86asm.pl"; &asm_init($ARGV[0],$0); +$sse2=0; +for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } + +&external_label("OPENSSL_ia32cap_P") if ($sse2); + &bn_mul_add_words("bn_mul_add_words"); &bn_mul_words("bn_mul_words"); &bn_sqr_words("bn_sqr_words"); &bn_div_words("bn_div_words"); &bn_add_words("bn_add_words"); &bn_sub_words("bn_sub_words"); -#&bn_sub_part_words("bn_sub_part_words"); +&bn_sub_part_words("bn_sub_part_words"); &asm_finish(); @@ -19,7 +24,7 @@ sub bn_mul_add_words { local($name)=@_; - &function_begin($name,""); + &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); &comment(""); $Low="eax"; @@ -42,6 +47,83 @@ sub bn_mul_add_words &jz(&label("maw_finish")); + if ($sse2) { + &picmeup("eax","OPENSSL_ia32cap_P"); + &bt(&DWP(0,"eax"),26); + &jnc(&label("maw_loop")); + + &movd("mm0",$w); # mm0 = w + &pxor("mm1","mm1"); # mm1 = carry_in + + &set_label("maw_sse2_loop",0); + &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] + &paddq("mm1","mm3"); # mm1 = carry_in + r[0] + &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] + &pmuludq("mm2","mm0"); # mm2 = w*a[0] + &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] + &pmuludq("mm4","mm0"); # mm4 = w*a[1] + &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] + &pmuludq("mm6","mm0"); # mm6 = w*a[2] + &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] + &pmuludq("mm7","mm0"); # mm7 = w*a[3] + &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] + &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] + &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] + &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] + &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] + &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] + &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] + &movd(&DWP(0,$r,"",0),"mm1"); + &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] + &pmuludq("mm2","mm0"); # mm2 = w*a[4] + &psrlq("mm1",32); # mm1 = carry0 + &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] + &pmuludq("mm4","mm0"); # mm4 = w*a[5] + &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] + &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] + &pmuludq("mm6","mm0"); # mm6 = w*a[6] + &movd(&DWP(4,$r,"",0),"mm1"); + &psrlq("mm1",32); # mm1 = carry1 + &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] + &add($a,32); + &pmuludq("mm3","mm0"); # mm3 = w*a[7] + &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] + &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] + &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] + &movd(&DWP(8,$r,"",0),"mm1"); + &psrlq("mm1",32); # mm1 = carry2 + &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] + &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] + &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] + &movd(&DWP(12,$r,"",0),"mm1"); + &psrlq("mm1",32); # mm1 = carry3 + &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] + &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] + &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] + &movd(&DWP(16,$r,"",0),"mm1"); + &psrlq("mm1",32); # mm1 = carry4 + &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] + &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] + &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] + &movd(&DWP(20,$r,"",0),"mm1"); + &psrlq("mm1",32); # mm1 = carry5 + &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] + &movd(&DWP(24,$r,"",0),"mm1"); + &psrlq("mm1",32); # mm1 = carry6 + &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] + &movd(&DWP(28,$r,"",0),"mm1"); + &add($r,32); + &psrlq("mm1",32); # mm1 = carry_out + + &sub("ecx",8); + &jnz(&label("maw_sse2_loop")); + + &movd($c,"mm1"); # c = carry_out + &emms(); + + &jmp(&label("maw_finish")); + } + &set_label("maw_loop",0); &mov(&swtmp(0),"ecx"); # diff --git a/crypto/openssl/crypto/bn/asm/ppc.pl b/crypto/openssl/crypto/bn/asm/ppc.pl new file mode 100644 index 0000000..08e0053 --- /dev/null +++ b/crypto/openssl/crypto/bn/asm/ppc.pl @@ -0,0 +1,2078 @@ +#!/usr/bin/env perl +# +# Implemented as a Perl wrapper as we want to support several different +# architectures with single file. We pick up the target based on the +# file name we are asked to generate. +# +# It should be noted though that this perl code is nothing like +# <openssl>/crypto/perlasm/x86*. In this case perl is used pretty much +# as pre-processor to cover for platform differences in name decoration, +# linker tables, 32-/64-bit instruction sets... +# +# As you might know there're several PowerPC ABI in use. Most notably +# Linux and AIX use different 32-bit ABIs. Good news are that these ABIs +# are similar enough to implement leaf(!) functions, which would be ABI +# neutral. And that's what you find here: ABI neutral leaf functions. +# In case you wonder what that is... +# +# AIX performance +# +# MEASUREMENTS WITH cc ON a 200 MhZ PowerPC 604e. +# +# The following is the performance of 32-bit compiler +# generated code: +# +# OpenSSL 0.9.6c 21 dec 2001 +# built on: Tue Jun 11 11:06:51 EDT 2002 +# options:bn(64,32) ... +#compiler: cc -DTHREADS -DAIX -DB_ENDIAN -DBN_LLONG -O3 +# sign verify sign/s verify/s +#rsa 512 bits 0.0098s 0.0009s 102.0 1170.6 +#rsa 1024 bits 0.0507s 0.0026s 19.7 387.5 +#rsa 2048 bits 0.3036s 0.0085s 3.3 117.1 +#rsa 4096 bits 2.0040s 0.0299s 0.5 33.4 +#dsa 512 bits 0.0087s 0.0106s 114.3 94.5 +#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 +# +# Same bechmark with this assembler code: +# +#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2 +#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1 +#rsa 2048 bits 0.1744s 0.0050s 5.7 201.2 +#rsa 4096 bits 1.1644s 0.0179s 0.9 55.7 +#dsa 512 bits 0.0052s 0.0062s 191.6 162.0 +#dsa 1024 bits 0.0149s 0.0180s 67.0 55.5 +# +# Number of operations increases by at almost 75% +# +# Here are performance numbers for 64-bit compiler +# generated code: +# +# OpenSSL 0.9.6g [engine] 9 Aug 2002 +# built on: Fri Apr 18 16:59:20 EDT 2003 +# options:bn(64,64) ... +# compiler: cc -DTHREADS -D_REENTRANT -q64 -DB_ENDIAN -O3 +# sign verify sign/s verify/s +#rsa 512 bits 0.0028s 0.0003s 357.1 3844.4 +#rsa 1024 bits 0.0148s 0.0008s 67.5 1239.7 +#rsa 2048 bits 0.0963s 0.0028s 10.4 353.0 +#rsa 4096 bits 0.6538s 0.0102s 1.5 98.1 +#dsa 512 bits 0.0026s 0.0032s 382.5 313.7 +#dsa 1024 bits 0.0081s 0.0099s 122.8 100.6 +# +# Same benchmark with this assembler code: +# +#rsa 512 bits 0.0020s 0.0002s 510.4 6273.7 +#rsa 1024 bits 0.0088s 0.0005s 114.1 2128.3 +#rsa 2048 bits 0.0540s 0.0016s 18.5 622.5 +#rsa 4096 bits 0.3700s 0.0058s 2.7 171.0 +#dsa 512 bits 0.0016s 0.0020s 610.7 507.1 +#dsa 1024 bits 0.0047s 0.0058s 212.5 173.2 +# +# Again, performance increases by at about 75% +# +# Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code) +# OpenSSL 0.9.7c 30 Sep 2003 +# +# Original code. +# +#rsa 512 bits 0.0011s 0.0001s 906.1 11012.5 +#rsa 1024 bits 0.0060s 0.0003s 166.6 3363.1 +#rsa 2048 bits 0.0370s 0.0010s 27.1 982.4 +#rsa 4096 bits 0.2426s 0.0036s 4.1 280.4 +#dsa 512 bits 0.0010s 0.0012s 1038.1 841.5 +#dsa 1024 bits 0.0030s 0.0037s 329.6 269.7 +#dsa 2048 bits 0.0101s 0.0127s 98.9 78.6 +# +# Same benchmark with this assembler code: +# +#rsa 512 bits 0.0007s 0.0001s 1416.2 16645.9 +#rsa 1024 bits 0.0036s 0.0002s 274.4 5380.6 +#rsa 2048 bits 0.0222s 0.0006s 45.1 1589.5 +#rsa 4096 bits 0.1469s 0.0022s 6.8 449.6 +#dsa 512 bits 0.0006s 0.0007s 1664.2 1376.2 +#dsa 1024 bits 0.0018s 0.0023s 545.0 442.2 +#dsa 2048 bits 0.0061s 0.0075s 163.5 132.8 +# +# Performance increase of ~60% +# +# If you have comments or suggestions to improve code send +# me a note at schari@us.ibm.com +# + +$opf = shift; + +if ($opf =~ /32\.s/) { + $BITS= 32; + $BNSZ= $BITS/8; + $ISA= "\"ppc\""; + + $LD= "lwz"; # load + $LDU= "lwzu"; # load and update + $ST= "stw"; # store + $STU= "stwu"; # store and update + $UMULL= "mullw"; # unsigned multiply low + $UMULH= "mulhwu"; # unsigned multiply high + $UDIV= "divwu"; # unsigned divide + $UCMPI= "cmplwi"; # unsigned compare with immediate + $UCMP= "cmplw"; # unsigned compare + $CNTLZ= "cntlzw"; # count leading zeros + $SHL= "slw"; # shift left + $SHR= "srw"; # unsigned shift right + $SHRI= "srwi"; # unsigned shift right by immediate + $SHLI= "slwi"; # shift left by immediate + $CLRU= "clrlwi"; # clear upper bits + $INSR= "insrwi"; # insert right + $ROTL= "rotlwi"; # rotate left by immediate + $TR= "tw"; # conditional trap +} elsif ($opf =~ /64\.s/) { + $BITS= 64; + $BNSZ= $BITS/8; + $ISA= "\"ppc64\""; + + # same as above, but 64-bit mnemonics... + $LD= "ld"; # load + $LDU= "ldu"; # load and update + $ST= "std"; # store + $STU= "stdu"; # store and update + $UMULL= "mulld"; # unsigned multiply low + $UMULH= "mulhdu"; # unsigned multiply high + $UDIV= "divdu"; # unsigned divide + $UCMPI= "cmpldi"; # unsigned compare with immediate + $UCMP= "cmpld"; # unsigned compare + $CNTLZ= "cntlzd"; # count leading zeros + $SHL= "sld"; # shift left + $SHR= "srd"; # unsigned shift right + $SHRI= "srdi"; # unsigned shift right by immediate + $SHLI= "sldi"; # shift left by immediate + $CLRU= "clrldi"; # clear upper bits + $INSR= "insrdi"; # insert right + $ROTL= "rotldi"; # rotate left by immediate + $TR= "td"; # conditional trap +} else { die "nonsense $opf"; } + +( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!"; + +# function entry points from the AIX code +# +# There are other, more elegant, ways to handle this. We (IBM) chose +# this approach as it plays well with scripts we run to 'namespace' +# OpenSSL .i.e. we add a prefix to all the public symbols so we can +# co-exist in the same process with other implementations of OpenSSL. +# 'cleverer' ways of doing these substitutions tend to hide data we +# need to be obvious. +# +my @items = ("bn_sqr_comba4", + "bn_sqr_comba8", + "bn_mul_comba4", + "bn_mul_comba8", + "bn_sub_words", + "bn_add_words", + "bn_div_words", + "bn_sqr_words", + "bn_mul_words", + "bn_mul_add_words"); + +if ($opf =~ /linux/) { do_linux(); } +elsif ($opf =~ /aix/) { do_aix(); } +elsif ($opf =~ /osx/) { do_osx(); } +else { do_bsd(); } + +sub do_linux { + $d=&data(); + + if ($BITS==64) { + foreach $t (@items) { + $d =~ s/\.$t:/\ +\t.section\t".opd","aw"\ +\t.align\t3\ +\t.globl\t$t\ +$t:\ +\t.quad\t.$t,.TOC.\@tocbase,0\ +\t.size\t$t,24\ +\t.previous\n\ +\t.type\t.$t,\@function\ +\t.globl\t.$t\ +.$t:/g; + } + } + else { + foreach $t (@items) { + $d=~s/\.$t/$t/g; + } + } + # hide internal labels to avoid pollution of name table... + $d=~s/Lppcasm_/.Lppcasm_/gm; + print $d; +} + +sub do_aix { + # AIX assembler is smart enough to please the linker without + # making us do something special... + print &data(); +} + +# MacOSX 32 bit +sub do_osx { + $d=&data(); + # Change the bn symbol prefix from '.' to '_' + foreach $t (@items) { + $d=~s/\.$t/_$t/g; + } + # Change .machine to something OS X asm will accept + $d=~s/\.machine.*/.text/g; + $d=~s/\#/;/g; # change comment from '#' to ';' + print $d; +} + +# BSD (Untested) +sub do_bsd { + $d=&data(); + foreach $t (@items) { + $d=~s/\.$t/_$t/g; + } + print $d; +} + +sub data { + local($data)=<<EOF; +#-------------------------------------------------------------------- +# +# +# +# +# File: ppc32.s +# +# Created by: Suresh Chari +# IBM Thomas J. Watson Research Library +# Hawthorne, NY +# +# +# Description: Optimized assembly routines for OpenSSL crypto +# on the 32 bitPowerPC platform. +# +# +# Version History +# +# 2. Fixed bn_add,bn_sub and bn_div_words, added comments, +# cleaned up code. Also made a single version which can +# be used for both the AIX and Linux compilers. See NOTE +# below. +# 12/05/03 Suresh Chari +# (with lots of help from) Andy Polyakov +## +# 1. Initial version 10/20/02 Suresh Chari +# +# +# The following file works for the xlc,cc +# and gcc compilers. +# +# NOTE: To get the file to link correctly with the gcc compiler +# you have to change the names of the routines and remove +# the first .(dot) character. This should automatically +# be done in the build process. +# +# Hand optimized assembly code for the following routines +# +# bn_sqr_comba4 +# bn_sqr_comba8 +# bn_mul_comba4 +# bn_mul_comba8 +# bn_sub_words +# bn_add_words +# bn_div_words +# bn_sqr_words +# bn_mul_words +# bn_mul_add_words +# +# NOTE: It is possible to optimize this code more for +# specific PowerPC or Power architectures. On the Northstar +# architecture the optimizations in this file do +# NOT provide much improvement. +# +# If you have comments or suggestions to improve code send +# me a note at schari\@us.ibm.com +# +#-------------------------------------------------------------------------- +# +# Defines to be used in the assembly code. +# +.set r0,0 # we use it as storage for value of 0 +.set SP,1 # preserved +.set RTOC,2 # preserved +.set r3,3 # 1st argument/return value +.set r4,4 # 2nd argument/volatile register +.set r5,5 # 3rd argument/volatile register +.set r6,6 # ... +.set r7,7 +.set r8,8 +.set r9,9 +.set r10,10 +.set r11,11 +.set r12,12 +.set r13,13 # not used, nor any other "below" it... + +.set BO_IF_NOT,4 +.set BO_IF,12 +.set BO_dCTR_NZERO,16 +.set BO_dCTR_ZERO,18 +.set BO_ALWAYS,20 +.set CR0_LT,0; +.set CR0_GT,1; +.set CR0_EQ,2 +.set CR1_FX,4; +.set CR1_FEX,5; +.set CR1_VX,6 +.set LR,8 + +# Declare function names to be global +# NOTE: For gcc these names MUST be changed to remove +# the first . i.e. for example change ".bn_sqr_comba4" +# to "bn_sqr_comba4". This should be automatically done +# in the build. + + .globl .bn_sqr_comba4 + .globl .bn_sqr_comba8 + .globl .bn_mul_comba4 + .globl .bn_mul_comba8 + .globl .bn_sub_words + .globl .bn_add_words + .globl .bn_div_words + .globl .bn_sqr_words + .globl .bn_mul_words + .globl .bn_mul_add_words + +# .text section + + .machine $ISA + +# +# NOTE: The following label name should be changed to +# "bn_sqr_comba4" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# + +.align 4 +.bn_sqr_comba4: +# +# Optimized version of bn_sqr_comba4. +# +# void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +# r3 contains r +# r4 contains a +# +# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: +# +# r5,r6 are the two BN_ULONGs being multiplied. +# r7,r8 are the results of the 32x32 giving 64 bit multiply. +# r9,r10, r11 are the equivalents of c1,c2, c3. +# Here's the assembly +# +# + xor r0,r0,r0 # set r0 = 0. Used in the addze + # instructions below + + #sqr_add_c(a,0,c1,c2,c3) + $LD r5,`0*$BNSZ`(r4) + $UMULL r9,r5,r5 + $UMULH r10,r5,r5 #in first iteration. No need + #to add since c1=c2=c3=0. + # Note c3(r11) is NOT set to 0 + # but will be. + + $ST r9,`0*$BNSZ`(r3) # r[0]=c1; + # sqr_add_c2(a,1,0,c2,c3,c1); + $LD r6,`1*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r7,r7,r7 # compute (r7,r8)=2*(r7,r8) + adde r8,r8,r8 + addze r9,r0 # catch carry if any. + # r9= r0(=0) and carry + + addc r10,r7,r10 # now add to temp result. + addze r11,r8 # r8 added to r11 which is 0 + addze r9,r9 + + $ST r10,`1*$BNSZ`(r3) #r[1]=c2; + #sqr_add_c(a,1,c3,c1,c2) + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r0 + #sqr_add_c2(a,2,0,c3,c1,c2) + $LD r6,`2*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r7,r7,r7 + adde r8,r8,r8 + addze r10,r10 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + $ST r11,`2*$BNSZ`(r3) #r[2]=c3 + #sqr_add_c2(a,3,0,c1,c2,c3); + $LD r6,`3*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r7,r7,r7 + adde r8,r8,r8 + addze r11,r0 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + #sqr_add_c2(a,2,1,c1,c2,c3); + $LD r5,`1*$BNSZ`(r4) + $LD r6,`2*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r7,r7,r7 + adde r8,r8,r8 + addze r11,r11 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + $ST r9,`3*$BNSZ`(r3) #r[3]=c1 + #sqr_add_c(a,2,c2,c3,c1); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r0 + #sqr_add_c2(a,3,1,c2,c3,c1); + $LD r6,`3*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r7,r7,r7 + adde r8,r8,r8 + addze r9,r9 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + $ST r10,`4*$BNSZ`(r3) #r[4]=c2 + #sqr_add_c2(a,3,2,c3,c1,c2); + $LD r5,`2*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r7,r7,r7 + adde r8,r8,r8 + addze r10,r0 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + $ST r11,`5*$BNSZ`(r3) #r[5] = c3 + #sqr_add_c(a,3,c1,c2,c3); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r9,r7,r9 + adde r10,r8,r10 + + $ST r9,`6*$BNSZ`(r3) #r[6]=c1 + $ST r10,`7*$BNSZ`(r3) #r[7]=c2 + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + +# +# NOTE: The following label name should be changed to +# "bn_sqr_comba8" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# + +.align 4 +.bn_sqr_comba8: +# +# This is an optimized version of the bn_sqr_comba8 routine. +# Tightly uses the adde instruction +# +# +# void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +# r3 contains r +# r4 contains a +# +# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: +# +# r5,r6 are the two BN_ULONGs being multiplied. +# r7,r8 are the results of the 32x32 giving 64 bit multiply. +# r9,r10, r11 are the equivalents of c1,c2, c3. +# +# Possible optimization of loading all 8 longs of a into registers +# doesnt provide any speedup +# + + xor r0,r0,r0 #set r0 = 0.Used in addze + #instructions below. + + #sqr_add_c(a,0,c1,c2,c3); + $LD r5,`0*$BNSZ`(r4) + $UMULL r9,r5,r5 #1st iteration: no carries. + $UMULH r10,r5,r5 + $ST r9,`0*$BNSZ`(r3) # r[0]=c1; + #sqr_add_c2(a,1,0,c2,c3,c1); + $LD r6,`1*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r10,r7,r10 #add the two register number + adde r11,r8,r0 # (r8,r7) to the three register + addze r9,r0 # number (r9,r11,r10).NOTE:r0=0 + + addc r10,r7,r10 #add the two register number + adde r11,r8,r11 # (r8,r7) to the three register + addze r9,r9 # number (r9,r11,r10). + + $ST r10,`1*$BNSZ`(r3) # r[1]=c2 + + #sqr_add_c(a,1,c3,c1,c2); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r0 + #sqr_add_c2(a,2,0,c3,c1,c2); + $LD r6,`2*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + + $ST r11,`2*$BNSZ`(r3) #r[2]=c3 + #sqr_add_c2(a,3,0,c1,c2,c3); + $LD r6,`3*$BNSZ`(r4) #r6 = a[3]. r5 is already a[0]. + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r0 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + #sqr_add_c2(a,2,1,c1,c2,c3); + $LD r5,`1*$BNSZ`(r4) + $LD r6,`2*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + + $ST r9,`3*$BNSZ`(r3) #r[3]=c1; + #sqr_add_c(a,2,c2,c3,c1); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r0 + #sqr_add_c2(a,3,1,c2,c3,c1); + $LD r6,`3*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + #sqr_add_c2(a,4,0,c2,c3,c1); + $LD r5,`0*$BNSZ`(r4) + $LD r6,`4*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + $ST r10,`4*$BNSZ`(r3) #r[4]=c2; + #sqr_add_c2(a,5,0,c3,c1,c2); + $LD r6,`5*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r0 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + #sqr_add_c2(a,4,1,c3,c1,c2); + $LD r5,`1*$BNSZ`(r4) + $LD r6,`4*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + #sqr_add_c2(a,3,2,c3,c1,c2); + $LD r5,`2*$BNSZ`(r4) + $LD r6,`3*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + $ST r11,`5*$BNSZ`(r3) #r[5]=c3; + #sqr_add_c(a,3,c1,c2,c3); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r0 + #sqr_add_c2(a,4,2,c1,c2,c3); + $LD r6,`4*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + #sqr_add_c2(a,5,1,c1,c2,c3); + $LD r5,`1*$BNSZ`(r4) + $LD r6,`5*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + #sqr_add_c2(a,6,0,c1,c2,c3); + $LD r5,`0*$BNSZ`(r4) + $LD r6,`6*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + $ST r9,`6*$BNSZ`(r3) #r[6]=c1; + #sqr_add_c2(a,7,0,c2,c3,c1); + $LD r6,`7*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r0 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + #sqr_add_c2(a,6,1,c2,c3,c1); + $LD r5,`1*$BNSZ`(r4) + $LD r6,`6*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + #sqr_add_c2(a,5,2,c2,c3,c1); + $LD r5,`2*$BNSZ`(r4) + $LD r6,`5*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + #sqr_add_c2(a,4,3,c2,c3,c1); + $LD r5,`3*$BNSZ`(r4) + $LD r6,`4*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + $ST r10,`7*$BNSZ`(r3) #r[7]=c2; + #sqr_add_c(a,4,c3,c1,c2); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r0 + #sqr_add_c2(a,5,3,c3,c1,c2); + $LD r6,`5*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + #sqr_add_c2(a,6,2,c3,c1,c2); + $LD r5,`2*$BNSZ`(r4) + $LD r6,`6*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + #sqr_add_c2(a,7,1,c3,c1,c2); + $LD r5,`1*$BNSZ`(r4) + $LD r6,`7*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + $ST r11,`8*$BNSZ`(r3) #r[8]=c3; + #sqr_add_c2(a,7,2,c1,c2,c3); + $LD r5,`2*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r0 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + #sqr_add_c2(a,6,3,c1,c2,c3); + $LD r5,`3*$BNSZ`(r4) + $LD r6,`6*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + #sqr_add_c2(a,5,4,c1,c2,c3); + $LD r5,`4*$BNSZ`(r4) + $LD r6,`5*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + $ST r9,`9*$BNSZ`(r3) #r[9]=c1; + #sqr_add_c(a,5,c2,c3,c1); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r0 + #sqr_add_c2(a,6,4,c2,c3,c1); + $LD r6,`6*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + #sqr_add_c2(a,7,3,c2,c3,c1); + $LD r5,`3*$BNSZ`(r4) + $LD r6,`7*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + $ST r10,`10*$BNSZ`(r3) #r[10]=c2; + #sqr_add_c2(a,7,4,c3,c1,c2); + $LD r5,`4*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r0 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + #sqr_add_c2(a,6,5,c3,c1,c2); + $LD r5,`5*$BNSZ`(r4) + $LD r6,`6*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + addc r11,r7,r11 + adde r9,r8,r9 + addze r10,r10 + $ST r11,`11*$BNSZ`(r3) #r[11]=c3; + #sqr_add_c(a,6,c1,c2,c3); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r0 + #sqr_add_c2(a,7,5,c1,c2,c3) + $LD r6,`7*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + addc r9,r7,r9 + adde r10,r8,r10 + addze r11,r11 + $ST r9,`12*$BNSZ`(r3) #r[12]=c1; + + #sqr_add_c2(a,7,6,c2,c3,c1) + $LD r5,`6*$BNSZ`(r4) + $UMULL r7,r5,r6 + $UMULH r8,r5,r6 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r0 + addc r10,r7,r10 + adde r11,r8,r11 + addze r9,r9 + $ST r10,`13*$BNSZ`(r3) #r[13]=c2; + #sqr_add_c(a,7,c3,c1,c2); + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + addc r11,r7,r11 + adde r9,r8,r9 + $ST r11,`14*$BNSZ`(r3) #r[14]=c3; + $ST r9, `15*$BNSZ`(r3) #r[15]=c1; + + + bclr BO_ALWAYS,CR0_LT + + .long 0x00000000 + +# +# NOTE: The following label name should be changed to +# "bn_mul_comba4" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# + +.align 4 +.bn_mul_comba4: +# +# This is an optimized version of the bn_mul_comba4 routine. +# +# void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +# r3 contains r +# r4 contains a +# r5 contains b +# r6, r7 are the 2 BN_ULONGs being multiplied. +# r8, r9 are the results of the 32x32 giving 64 multiply. +# r10, r11, r12 are the equivalents of c1, c2, and c3. +# + xor r0,r0,r0 #r0=0. Used in addze below. + #mul_add_c(a[0],b[0],c1,c2,c3); + $LD r6,`0*$BNSZ`(r4) + $LD r7,`0*$BNSZ`(r5) + $UMULL r10,r6,r7 + $UMULH r11,r6,r7 + $ST r10,`0*$BNSZ`(r3) #r[0]=c1 + #mul_add_c(a[0],b[1],c2,c3,c1); + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r8,r11 + adde r12,r9,r0 + addze r10,r0 + #mul_add_c(a[1],b[0],c2,c3,c1); + $LD r6, `1*$BNSZ`(r4) + $LD r7, `0*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r8,r11 + adde r12,r9,r12 + addze r10,r10 + $ST r11,`1*$BNSZ`(r3) #r[1]=c2 + #mul_add_c(a[2],b[0],c3,c1,c2); + $LD r6,`2*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r8,r12 + adde r10,r9,r10 + addze r11,r0 + #mul_add_c(a[1],b[1],c3,c1,c2); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r8,r12 + adde r10,r9,r10 + addze r11,r11 + #mul_add_c(a[0],b[2],c3,c1,c2); + $LD r6,`0*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r8,r12 + adde r10,r9,r10 + addze r11,r11 + $ST r12,`2*$BNSZ`(r3) #r[2]=c3 + #mul_add_c(a[0],b[3],c1,c2,c3); + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r8,r10 + adde r11,r9,r11 + addze r12,r0 + #mul_add_c(a[1],b[2],c1,c2,c3); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r8,r10 + adde r11,r9,r11 + addze r12,r12 + #mul_add_c(a[2],b[1],c1,c2,c3); + $LD r6,`2*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r8,r10 + adde r11,r9,r11 + addze r12,r12 + #mul_add_c(a[3],b[0],c1,c2,c3); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`0*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r8,r10 + adde r11,r9,r11 + addze r12,r12 + $ST r10,`3*$BNSZ`(r3) #r[3]=c1 + #mul_add_c(a[3],b[1],c2,c3,c1); + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r8,r11 + adde r12,r9,r12 + addze r10,r0 + #mul_add_c(a[2],b[2],c2,c3,c1); + $LD r6,`2*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r8,r11 + adde r12,r9,r12 + addze r10,r10 + #mul_add_c(a[1],b[3],c2,c3,c1); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r8,r11 + adde r12,r9,r12 + addze r10,r10 + $ST r11,`4*$BNSZ`(r3) #r[4]=c2 + #mul_add_c(a[2],b[3],c3,c1,c2); + $LD r6,`2*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r8,r12 + adde r10,r9,r10 + addze r11,r0 + #mul_add_c(a[3],b[2],c3,c1,c2); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r8,r12 + adde r10,r9,r10 + addze r11,r11 + $ST r12,`5*$BNSZ`(r3) #r[5]=c3 + #mul_add_c(a[3],b[3],c1,c2,c3); + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r8,r10 + adde r11,r9,r11 + + $ST r10,`6*$BNSZ`(r3) #r[6]=c1 + $ST r11,`7*$BNSZ`(r3) #r[7]=c2 + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + +# +# NOTE: The following label name should be changed to +# "bn_mul_comba8" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# + +.align 4 +.bn_mul_comba8: +# +# Optimized version of the bn_mul_comba8 routine. +# +# void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +# r3 contains r +# r4 contains a +# r5 contains b +# r6, r7 are the 2 BN_ULONGs being multiplied. +# r8, r9 are the results of the 32x32 giving 64 multiply. +# r10, r11, r12 are the equivalents of c1, c2, and c3. +# + xor r0,r0,r0 #r0=0. Used in addze below. + + #mul_add_c(a[0],b[0],c1,c2,c3); + $LD r6,`0*$BNSZ`(r4) #a[0] + $LD r7,`0*$BNSZ`(r5) #b[0] + $UMULL r10,r6,r7 + $UMULH r11,r6,r7 + $ST r10,`0*$BNSZ`(r3) #r[0]=c1; + #mul_add_c(a[0],b[1],c2,c3,c1); + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + addze r12,r9 # since we didnt set r12 to zero before. + addze r10,r0 + #mul_add_c(a[1],b[0],c2,c3,c1); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`0*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + $ST r11,`1*$BNSZ`(r3) #r[1]=c2; + #mul_add_c(a[2],b[0],c3,c1,c2); + $LD r6,`2*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r0 + #mul_add_c(a[1],b[1],c3,c1,c2); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[0],b[2],c3,c1,c2); + $LD r6,`0*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + $ST r12,`2*$BNSZ`(r3) #r[2]=c3; + #mul_add_c(a[0],b[3],c1,c2,c3); + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r0 + #mul_add_c(a[1],b[2],c1,c2,c3); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + + #mul_add_c(a[2],b[1],c1,c2,c3); + $LD r6,`2*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[3],b[0],c1,c2,c3); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`0*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + $ST r10,`3*$BNSZ`(r3) #r[3]=c1; + #mul_add_c(a[4],b[0],c2,c3,c1); + $LD r6,`4*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r0 + #mul_add_c(a[3],b[1],c2,c3,c1); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[2],b[2],c2,c3,c1); + $LD r6,`2*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[1],b[3],c2,c3,c1); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[0],b[4],c2,c3,c1); + $LD r6,`0*$BNSZ`(r4) + $LD r7,`4*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + $ST r11,`4*$BNSZ`(r3) #r[4]=c2; + #mul_add_c(a[0],b[5],c3,c1,c2); + $LD r7,`5*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r0 + #mul_add_c(a[1],b[4],c3,c1,c2); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`4*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[2],b[3],c3,c1,c2); + $LD r6,`2*$BNSZ`(r4) + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[3],b[2],c3,c1,c2); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[4],b[1],c3,c1,c2); + $LD r6,`4*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[5],b[0],c3,c1,c2); + $LD r6,`5*$BNSZ`(r4) + $LD r7,`0*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + $ST r12,`5*$BNSZ`(r3) #r[5]=c3; + #mul_add_c(a[6],b[0],c1,c2,c3); + $LD r6,`6*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r0 + #mul_add_c(a[5],b[1],c1,c2,c3); + $LD r6,`5*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[4],b[2],c1,c2,c3); + $LD r6,`4*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[3],b[3],c1,c2,c3); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[2],b[4],c1,c2,c3); + $LD r6,`2*$BNSZ`(r4) + $LD r7,`4*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[1],b[5],c1,c2,c3); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`5*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[0],b[6],c1,c2,c3); + $LD r6,`0*$BNSZ`(r4) + $LD r7,`6*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + $ST r10,`6*$BNSZ`(r3) #r[6]=c1; + #mul_add_c(a[0],b[7],c2,c3,c1); + $LD r7,`7*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r0 + #mul_add_c(a[1],b[6],c2,c3,c1); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`6*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[2],b[5],c2,c3,c1); + $LD r6,`2*$BNSZ`(r4) + $LD r7,`5*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[3],b[4],c2,c3,c1); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`4*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[4],b[3],c2,c3,c1); + $LD r6,`4*$BNSZ`(r4) + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[5],b[2],c2,c3,c1); + $LD r6,`5*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[6],b[1],c2,c3,c1); + $LD r6,`6*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[7],b[0],c2,c3,c1); + $LD r6,`7*$BNSZ`(r4) + $LD r7,`0*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + $ST r11,`7*$BNSZ`(r3) #r[7]=c2; + #mul_add_c(a[7],b[1],c3,c1,c2); + $LD r7,`1*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r0 + #mul_add_c(a[6],b[2],c3,c1,c2); + $LD r6,`6*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[5],b[3],c3,c1,c2); + $LD r6,`5*$BNSZ`(r4) + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[4],b[4],c3,c1,c2); + $LD r6,`4*$BNSZ`(r4) + $LD r7,`4*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[3],b[5],c3,c1,c2); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`5*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[2],b[6],c3,c1,c2); + $LD r6,`2*$BNSZ`(r4) + $LD r7,`6*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[1],b[7],c3,c1,c2); + $LD r6,`1*$BNSZ`(r4) + $LD r7,`7*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + $ST r12,`8*$BNSZ`(r3) #r[8]=c3; + #mul_add_c(a[2],b[7],c1,c2,c3); + $LD r6,`2*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r0 + #mul_add_c(a[3],b[6],c1,c2,c3); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`6*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[4],b[5],c1,c2,c3); + $LD r6,`4*$BNSZ`(r4) + $LD r7,`5*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[5],b[4],c1,c2,c3); + $LD r6,`5*$BNSZ`(r4) + $LD r7,`4*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[6],b[3],c1,c2,c3); + $LD r6,`6*$BNSZ`(r4) + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[7],b[2],c1,c2,c3); + $LD r6,`7*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + $ST r10,`9*$BNSZ`(r3) #r[9]=c1; + #mul_add_c(a[7],b[3],c2,c3,c1); + $LD r7,`3*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r0 + #mul_add_c(a[6],b[4],c2,c3,c1); + $LD r6,`6*$BNSZ`(r4) + $LD r7,`4*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[5],b[5],c2,c3,c1); + $LD r6,`5*$BNSZ`(r4) + $LD r7,`5*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[4],b[6],c2,c3,c1); + $LD r6,`4*$BNSZ`(r4) + $LD r7,`6*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + #mul_add_c(a[3],b[7],c2,c3,c1); + $LD r6,`3*$BNSZ`(r4) + $LD r7,`7*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + $ST r11,`10*$BNSZ`(r3) #r[10]=c2; + #mul_add_c(a[4],b[7],c3,c1,c2); + $LD r6,`4*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r0 + #mul_add_c(a[5],b[6],c3,c1,c2); + $LD r6,`5*$BNSZ`(r4) + $LD r7,`6*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[6],b[5],c3,c1,c2); + $LD r6,`6*$BNSZ`(r4) + $LD r7,`5*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + #mul_add_c(a[7],b[4],c3,c1,c2); + $LD r6,`7*$BNSZ`(r4) + $LD r7,`4*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + addze r11,r11 + $ST r12,`11*$BNSZ`(r3) #r[11]=c3; + #mul_add_c(a[7],b[5],c1,c2,c3); + $LD r7,`5*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r0 + #mul_add_c(a[6],b[6],c1,c2,c3); + $LD r6,`6*$BNSZ`(r4) + $LD r7,`6*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + #mul_add_c(a[5],b[7],c1,c2,c3); + $LD r6,`5*$BNSZ`(r4) + $LD r7,`7*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r10,r10,r8 + adde r11,r11,r9 + addze r12,r12 + $ST r10,`12*$BNSZ`(r3) #r[12]=c1; + #mul_add_c(a[6],b[7],c2,c3,c1); + $LD r6,`6*$BNSZ`(r4) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r0 + #mul_add_c(a[7],b[6],c2,c3,c1); + $LD r6,`7*$BNSZ`(r4) + $LD r7,`6*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r11,r11,r8 + adde r12,r12,r9 + addze r10,r10 + $ST r11,`13*$BNSZ`(r3) #r[13]=c2; + #mul_add_c(a[7],b[7],c3,c1,c2); + $LD r7,`7*$BNSZ`(r5) + $UMULL r8,r6,r7 + $UMULH r9,r6,r7 + addc r12,r12,r8 + adde r10,r10,r9 + $ST r12,`14*$BNSZ`(r3) #r[14]=c3; + $ST r10,`15*$BNSZ`(r3) #r[15]=c1; + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + +# +# NOTE: The following label name should be changed to +# "bn_sub_words" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# +# +.align 4 +.bn_sub_words: +# +# Handcoded version of bn_sub_words +# +#BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +# +# r3 = r +# r4 = a +# r5 = b +# r6 = n +# +# Note: No loop unrolling done since this is not a performance +# critical loop. + + xor r0,r0,r0 #set r0 = 0 +# +# check for r6 = 0 AND set carry bit. +# + subfc. r7,r0,r6 # If r6 is 0 then result is 0. + # if r6 > 0 then result !=0 + # In either case carry bit is set. + bc BO_IF,CR0_EQ,Lppcasm_sub_adios + addi r4,r4,-$BNSZ + addi r3,r3,-$BNSZ + addi r5,r5,-$BNSZ + mtctr r6 +Lppcasm_sub_mainloop: + $LDU r7,$BNSZ(r4) + $LDU r8,$BNSZ(r5) + subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8) + # if carry = 1 this is r7-r8. Else it + # is r7-r8 -1 as we need. + $STU r6,$BNSZ(r3) + bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop +Lppcasm_sub_adios: + subfze r3,r0 # if carry bit is set then r3 = 0 else -1 + andi. r3,r3,1 # keep only last bit. + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + + +# +# NOTE: The following label name should be changed to +# "bn_add_words" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# + +.align 4 +.bn_add_words: +# +# Handcoded version of bn_add_words +# +#BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +# +# r3 = r +# r4 = a +# r5 = b +# r6 = n +# +# Note: No loop unrolling done since this is not a performance +# critical loop. + + xor r0,r0,r0 +# +# check for r6 = 0. Is this needed? +# + addic. r6,r6,0 #test r6 and clear carry bit. + bc BO_IF,CR0_EQ,Lppcasm_add_adios + addi r4,r4,-$BNSZ + addi r3,r3,-$BNSZ + addi r5,r5,-$BNSZ + mtctr r6 +Lppcasm_add_mainloop: + $LDU r7,$BNSZ(r4) + $LDU r8,$BNSZ(r5) + adde r8,r7,r8 + $STU r8,$BNSZ(r3) + bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop +Lppcasm_add_adios: + addze r3,r0 #return carry bit. + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + +# +# NOTE: The following label name should be changed to +# "bn_div_words" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# + +.align 4 +.bn_div_words: +# +# This is a cleaned up version of code generated by +# the AIX compiler. The only optimization is to use +# the PPC instruction to count leading zeros instead +# of call to num_bits_word. Since this was compiled +# only at level -O2 we can possibly squeeze it more? +# +# r3 = h +# r4 = l +# r5 = d + + $UCMPI 0,r5,0 # compare r5 and 0 + bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0 + li r3,-1 # d=0 return -1 + bclr BO_ALWAYS,CR0_LT +Lppcasm_div1: + xor r0,r0,r0 #r0=0 + li r8,$BITS + $CNTLZ. r7,r5 #r7 = num leading 0s in d. + bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros + subf r8,r7,r8 #r8 = BN_num_bits_word(d) + $SHR. r9,r3,r8 #are there any bits above r8'th? + $TR 16,r9,r0 #if there're, signal to dump core... +Lppcasm_div2: + $UCMP 0,r3,r5 #h>=d? + bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not + subf r3,r5,r3 #h-=d ; +Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i + cmpi 0,0,r7,0 # is (i == 0)? + bc BO_IF,CR0_EQ,Lppcasm_div4 + $SHL r3,r3,r7 # h = (h<< i) + $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i) + $SHL r5,r5,r7 # d<<=i + or r3,r3,r8 # h = (h<<i)|(l>>(BN_BITS2-i)) + $SHL r4,r4,r7 # l <<=i +Lppcasm_div4: + $SHRI r9,r5,`$BITS/2` # r9 = dh + # dl will be computed when needed + # as it saves registers. + li r6,2 #r6=2 + mtctr r6 #counter will be in count. +Lppcasm_divouterloop: + $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4) + $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 + # compute here for innerloop. + $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh + bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not + + li r8,-1 + $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l + b Lppcasm_div6 +Lppcasm_div5: + $UDIV r8,r3,r9 #q = h/dh +Lppcasm_div6: + $UMULL r12,r9,r8 #th = q*dh + $CLRU r10,r5,`$BITS/2` #r10=dl + $UMULL r6,r8,r10 #tl = q*dl + +Lppcasm_divinnerloop: + subf r10,r12,r3 #t = h -th + $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of... + addic. r7,r7,0 #test if r7 == 0. used below. + # now want to compute + # r7 = (t<<BN_BITS4)|((l&BN_MASK2h)>>BN_BITS4) + # the following 2 instructions do that + $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4) + or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4) + $UCMP 1,r6,r7 # compare (tl <= r7) + bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit + bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit + addi r8,r8,-1 #q-- + subf r12,r9,r12 #th -=dh + $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop. + subf r6,r10,r6 #tl -=dl + b Lppcasm_divinnerloop +Lppcasm_divinnerexit: + $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4) + $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h; + $UCMP 1,r4,r11 # compare l and tl + add r12,r12,r10 # th+=t + bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7 + addi r12,r12,1 # th++ +Lppcasm_div7: + subf r11,r11,r4 #r11=l-tl + $UCMP 1,r3,r12 #compare h and th + bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 + addi r8,r8,-1 # q-- + add r3,r5,r3 # h+=d +Lppcasm_div8: + subf r12,r12,r3 #r12 = h-th + $SHLI r4,r11,`$BITS/2` #l=(l&BN_MASK2l)<<BN_BITS4 + # want to compute + # h = ((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2 + # the following 2 instructions will do this. + $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2. + $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3 + bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ; + $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4 + b Lppcasm_divouterloop +Lppcasm_div9: + or r3,r8,r0 + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + +# +# NOTE: The following label name should be changed to +# "bn_sqr_words" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# +.align 4 +.bn_sqr_words: +# +# Optimized version of bn_sqr_words +# +# void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +# +# r3 = r +# r4 = a +# r5 = n +# +# r6 = a[i]. +# r7,r8 = product. +# +# No unrolling done here. Not performance critical. + + addic. r5,r5,0 #test r5. + bc BO_IF,CR0_EQ,Lppcasm_sqr_adios + addi r4,r4,-$BNSZ + addi r3,r3,-$BNSZ + mtctr r5 +Lppcasm_sqr_mainloop: + #sqr(r[0],r[1],a[0]); + $LDU r6,$BNSZ(r4) + $UMULL r7,r6,r6 + $UMULH r8,r6,r6 + $STU r7,$BNSZ(r3) + $STU r8,$BNSZ(r3) + bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop +Lppcasm_sqr_adios: + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + + +# +# NOTE: The following label name should be changed to +# "bn_mul_words" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# + +.align 4 +.bn_mul_words: +# +# BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +# +# r3 = rp +# r4 = ap +# r5 = num +# r6 = w + xor r0,r0,r0 + xor r12,r12,r12 # used for carry + rlwinm. r7,r5,30,2,31 # num >> 2 + bc BO_IF,CR0_EQ,Lppcasm_mw_REM + mtctr r7 +Lppcasm_mw_LOOP: + #mul(rp[0],ap[0],w,c1); + $LD r8,`0*$BNSZ`(r4) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + addc r9,r9,r12 + #addze r10,r10 #carry is NOT ignored. + #will be taken care of + #in second spin below + #using adde. + $ST r9,`0*$BNSZ`(r3) + #mul(rp[1],ap[1],w,c1); + $LD r8,`1*$BNSZ`(r4) + $UMULL r11,r6,r8 + $UMULH r12,r6,r8 + adde r11,r11,r10 + #addze r12,r12 + $ST r11,`1*$BNSZ`(r3) + #mul(rp[2],ap[2],w,c1); + $LD r8,`2*$BNSZ`(r4) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + adde r9,r9,r12 + #addze r10,r10 + $ST r9,`2*$BNSZ`(r3) + #mul_add(rp[3],ap[3],w,c1); + $LD r8,`3*$BNSZ`(r4) + $UMULL r11,r6,r8 + $UMULH r12,r6,r8 + adde r11,r11,r10 + addze r12,r12 #this spin we collect carry into + #r12 + $ST r11,`3*$BNSZ`(r3) + + addi r3,r3,`4*$BNSZ` + addi r4,r4,`4*$BNSZ` + bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP + +Lppcasm_mw_REM: + andi. r5,r5,0x3 + bc BO_IF,CR0_EQ,Lppcasm_mw_OVER + #mul(rp[0],ap[0],w,c1); + $LD r8,`0*$BNSZ`(r4) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + addc r9,r9,r12 + addze r10,r10 + $ST r9,`0*$BNSZ`(r3) + addi r12,r10,0 + + addi r5,r5,-1 + cmpli 0,0,r5,0 + bc BO_IF,CR0_EQ,Lppcasm_mw_OVER + + + #mul(rp[1],ap[1],w,c1); + $LD r8,`1*$BNSZ`(r4) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + addc r9,r9,r12 + addze r10,r10 + $ST r9,`1*$BNSZ`(r3) + addi r12,r10,0 + + addi r5,r5,-1 + cmpli 0,0,r5,0 + bc BO_IF,CR0_EQ,Lppcasm_mw_OVER + + #mul_add(rp[2],ap[2],w,c1); + $LD r8,`2*$BNSZ`(r4) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + addc r9,r9,r12 + addze r10,r10 + $ST r9,`2*$BNSZ`(r3) + addi r12,r10,0 + +Lppcasm_mw_OVER: + addi r3,r12,0 + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + +# +# NOTE: The following label name should be changed to +# "bn_mul_add_words" i.e. remove the first dot +# for the gcc compiler. This should be automatically +# done in the build +# + +.align 4 +.bn_mul_add_words: +# +# BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +# +# r3 = rp +# r4 = ap +# r5 = num +# r6 = w +# +# empirical evidence suggests that unrolled version performs best!! +# + xor r0,r0,r0 #r0 = 0 + xor r12,r12,r12 #r12 = 0 . used for carry + rlwinm. r7,r5,30,2,31 # num >> 2 + bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover + mtctr r7 +Lppcasm_maw_mainloop: + #mul_add(rp[0],ap[0],w,c1); + $LD r8,`0*$BNSZ`(r4) + $LD r11,`0*$BNSZ`(r3) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + addc r9,r9,r12 #r12 is carry. + addze r10,r10 + addc r9,r9,r11 + #addze r10,r10 + #the above instruction addze + #is NOT needed. Carry will NOT + #be ignored. It's not affected + #by multiply and will be collected + #in the next spin + $ST r9,`0*$BNSZ`(r3) + + #mul_add(rp[1],ap[1],w,c1); + $LD r8,`1*$BNSZ`(r4) + $LD r9,`1*$BNSZ`(r3) + $UMULL r11,r6,r8 + $UMULH r12,r6,r8 + adde r11,r11,r10 #r10 is carry. + addze r12,r12 + addc r11,r11,r9 + #addze r12,r12 + $ST r11,`1*$BNSZ`(r3) + + #mul_add(rp[2],ap[2],w,c1); + $LD r8,`2*$BNSZ`(r4) + $UMULL r9,r6,r8 + $LD r11,`2*$BNSZ`(r3) + $UMULH r10,r6,r8 + adde r9,r9,r12 + addze r10,r10 + addc r9,r9,r11 + #addze r10,r10 + $ST r9,`2*$BNSZ`(r3) + + #mul_add(rp[3],ap[3],w,c1); + $LD r8,`3*$BNSZ`(r4) + $UMULL r11,r6,r8 + $LD r9,`3*$BNSZ`(r3) + $UMULH r12,r6,r8 + adde r11,r11,r10 + addze r12,r12 + addc r11,r11,r9 + addze r12,r12 + $ST r11,`3*$BNSZ`(r3) + addi r3,r3,`4*$BNSZ` + addi r4,r4,`4*$BNSZ` + bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop + +Lppcasm_maw_leftover: + andi. r5,r5,0x3 + bc BO_IF,CR0_EQ,Lppcasm_maw_adios + addi r3,r3,-$BNSZ + addi r4,r4,-$BNSZ + #mul_add(rp[0],ap[0],w,c1); + mtctr r5 + $LDU r8,$BNSZ(r4) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + $LDU r11,$BNSZ(r3) + addc r9,r9,r11 + addze r10,r10 + addc r9,r9,r12 + addze r12,r10 + $ST r9,0(r3) + + bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios + #mul_add(rp[1],ap[1],w,c1); + $LDU r8,$BNSZ(r4) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + $LDU r11,$BNSZ(r3) + addc r9,r9,r11 + addze r10,r10 + addc r9,r9,r12 + addze r12,r10 + $ST r9,0(r3) + + bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios + #mul_add(rp[2],ap[2],w,c1); + $LDU r8,$BNSZ(r4) + $UMULL r9,r6,r8 + $UMULH r10,r6,r8 + $LDU r11,$BNSZ(r3) + addc r9,r9,r11 + addze r10,r10 + addc r9,r9,r12 + addze r12,r10 + $ST r9,0(r3) + +Lppcasm_maw_adios: + addi r3,r12,0 + bclr BO_ALWAYS,CR0_LT + .long 0x00000000 + .align 4 +EOF + $data =~ s/\`([^\`]*)\`/eval $1/gem; + + # if some assembler chokes on some simplified mnemonic, + # this is the spot to fix it up, e.g.: + # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare + $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm; + # assembler X doesn't accept li, load immediate value + #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm; + return($data); +} diff --git a/crypto/openssl/crypto/bn/asm/sparcv8plus.S b/crypto/openssl/crypto/bn/asm/sparcv8plus.S index 0074dfd..8c56e2e 100644 --- a/crypto/openssl/crypto/bn/asm/sparcv8plus.S +++ b/crypto/openssl/crypto/bn/asm/sparcv8plus.S @@ -162,10 +162,14 @@ * BN_ULONG w; */ bn_mul_add_words: + sra %o2,%g0,%o2 ! signx %o2 brgz,a %o2,.L_bn_mul_add_words_proceed lduw [%o1],%g2 retl clr %o0 + nop + nop + nop .L_bn_mul_add_words_proceed: srl %o3,%g0,%o3 ! clruw %o3 @@ -260,10 +264,14 @@ bn_mul_add_words: * BN_ULONG w; */ bn_mul_words: + sra %o2,%g0,%o2 ! signx %o2 brgz,a %o2,.L_bn_mul_words_proceeed lduw [%o1],%g2 retl clr %o0 + nop + nop + nop .L_bn_mul_words_proceeed: srl %o3,%g0,%o3 ! clruw %o3 @@ -344,10 +352,14 @@ bn_mul_words: * int n; */ bn_sqr_words: + sra %o2,%g0,%o2 ! signx %o2 brgz,a %o2,.L_bn_sqr_words_proceeed lduw [%o1],%g2 retl clr %o0 + nop + nop + nop .L_bn_sqr_words_proceeed: andcc %o2,-4,%g0 @@ -445,6 +457,7 @@ bn_div_words: * int n; */ bn_add_words: + sra %o3,%g0,%o3 ! signx %o3 brgz,a %o3,.L_bn_add_words_proceed lduw [%o1],%o4 retl @@ -454,7 +467,6 @@ bn_add_words: andcc %o3,-4,%g0 bz,pn %icc,.L_bn_add_words_tail addcc %g0,0,%g0 ! clear carry flag - nop .L_bn_add_words_loop: ! wow! 32 aligned! dec 4,%o3 @@ -523,6 +535,7 @@ bn_add_words: * int n; */ bn_sub_words: + sra %o3,%g0,%o3 ! signx %o3 brgz,a %o3,.L_bn_sub_words_proceed lduw [%o1],%o4 retl @@ -532,7 +545,6 @@ bn_sub_words: andcc %o3,-4,%g0 bz,pn %icc,.L_bn_sub_words_tail addcc %g0,0,%g0 ! clear carry flag - nop .L_bn_sub_words_loop: ! wow! 32 aligned! dec 4,%o3 diff --git a/crypto/openssl/crypto/bn/asm/x86_64-gcc.c b/crypto/openssl/crypto/bn/asm/x86_64-gcc.c index 450e8e4..7378344 100644 --- a/crypto/openssl/crypto/bn/asm/x86_64-gcc.c +++ b/crypto/openssl/crypto/bn/asm/x86_64-gcc.c @@ -13,20 +13,42 @@ * A. Well, that's because this code is basically a quick-n-dirty * proof-of-concept hack. As you can see it's implemented with * inline assembler, which means that you're bound to GCC and that - * there must be a room for fine-tuning. + * there might be enough room for further improvement. * * Q. Why inline assembler? - * A. x86_64 features own ABI I'm not familiar with. Which is why - * I decided to let the compiler take care of subroutine - * prologue/epilogue as well as register allocation. + * A. x86_64 features own ABI which I'm not familiar with. This is + * why I decided to let the compiler take care of subroutine + * prologue/epilogue as well as register allocation. For reference. + * Win64 implements different ABI for AMD64, different from Linux. * * Q. How much faster does it get? - * A. Unfortunately people sitting on x86_64 hardware are prohibited - * to disclose the performance numbers, so they (SuSE labs to be - * specific) wouldn't tell me. However! Very similar coding technique - * (reaching out for 128-bit result from 64x64-bit multiplication) - * results in >3 times performance improvement on MIPS and I see no - * reason why gain on x86_64 would be so much different:-) + * A. 'apps/openssl speed rsa dsa' output with no-asm: + * + * sign verify sign/s verify/s + * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2 + * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0 + * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8 + * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6 + * sign verify sign/s verify/s + * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3 + * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2 + * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0 + * + * 'apps/openssl speed rsa dsa' output with this module: + * + * sign verify sign/s verify/s + * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9 + * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7 + * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0 + * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8 + * sign verify sign/s verify/s + * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3 + * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4 + * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6 + * + * For the reference. IA-32 assembler implementation performs + * very much like 64-bit code compiled with no-asm on the same + * machine. */ #define BN_ULONG unsigned long @@ -151,7 +173,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) } BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) -{ BN_ULONG ret,i; +{ BN_ULONG ret=0,i=0; if (n <= 0) return 0; @@ -164,7 +186,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) " leaq 1(%2),%2 \n" " loop 1b \n" " sbbq %0,%0 \n" - : "+a"(ret),"+c"(n),"+r"(i) + : "=&a"(ret),"+c"(n),"=&r"(i) : "r"(rp),"r"(ap),"r"(bp) : "cc" ); @@ -174,7 +196,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) #ifndef SIMICS BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) -{ BN_ULONG ret,i; +{ BN_ULONG ret=0,i=0; if (n <= 0) return 0; @@ -187,7 +209,7 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) " leaq 1(%2),%2 \n" " loop 1b \n" " sbbq %0,%0 \n" - : "+a"(ret),"+c"(n),"+r"(i) + : "=&a"(ret),"+c"(n),"=&r"(i) : "r"(rp),"r"(ap),"r"(bp) : "cc" ); @@ -318,7 +340,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { - BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -423,7 +444,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { - BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -464,7 +484,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) { - BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -541,7 +560,6 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) { - BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; diff --git a/crypto/openssl/crypto/bn/bn.h b/crypto/openssl/crypto/bn/bn.h index 3da6d8c..95c5d64 100644 --- a/crypto/openssl/crypto/bn/bn.h +++ b/crypto/openssl/crypto/bn/bn.h @@ -55,6 +55,19 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * + * Portions of the attached software ("Contribution") are developed by + * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. + * + * The Contribution is licensed pursuant to the Eric Young open source + * license provided above. + * + * The binary polynomial arithmetic software is originally written by + * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories. + * + */ #ifndef HEADER_BN_H #define HEADER_BN_H @@ -63,14 +76,23 @@ #ifndef OPENSSL_NO_FP_API #include <stdio.h> /* FILE */ #endif +#include <openssl/ossl_typ.h> #ifdef __cplusplus extern "C" { #endif -#ifdef OPENSSL_SYS_VMS -#undef BN_LLONG /* experimental, so far... */ -#endif +/* These preprocessor symbols control various aspects of the bignum headers and + * library code. They're not defined by any "normal" configuration, as they are + * intended for development and testing purposes. NB: defining all three can be + * useful for debugging application code as well as openssl itself. + * + * BN_DEBUG - turn on various debugging alterations to the bignum code + * BN_DEBUG_RAND - uses random poisoning of unused words to trip up + * mismanagement of bignum internals. You must also define BN_DEBUG. + */ +/* #define BN_DEBUG */ +/* #define BN_DEBUG_RAND */ #define BN_MUL_COMBA #define BN_SQR_COMBA @@ -143,10 +165,12 @@ extern "C" { #endif #ifdef THIRTY_TWO_BIT -#if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__) -#define BN_ULLONG unsigned _int64 -#else -#define BN_ULLONG unsigned long long +#ifdef BN_LLONG +# if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__) +# define BN_ULLONG unsigned __int64 +# else +# define BN_ULLONG unsigned long long +# endif #endif #define BN_ULONG unsigned long #define BN_LONG long @@ -219,17 +243,39 @@ extern "C" { #define BN_DEFAULT_BITS 1280 -#ifdef BIGNUM -#undef BIGNUM -#endif - #define BN_FLG_MALLOCED 0x01 #define BN_FLG_STATIC_DATA 0x02 +#define BN_FLG_EXP_CONSTTIME 0x04 /* avoid leaking exponent information through timings + * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */ +#ifndef OPENSSL_NO_DEPRECATED #define BN_FLG_FREE 0x8000 /* used for debuging */ +#endif #define BN_set_flags(b,n) ((b)->flags|=(n)) #define BN_get_flags(b,n) ((b)->flags&(n)) -typedef struct bignum_st +/* get a clone of a BIGNUM with changed flags, for *temporary* use only + * (the two BIGNUMs cannot not be used in parallel!) */ +#define BN_with_flags(dest,b,n) ((dest)->d=(b)->d, \ + (dest)->top=(b)->top, \ + (dest)->dmax=(b)->dmax, \ + (dest)->neg=(b)->neg, \ + (dest)->flags=(((dest)->flags & BN_FLG_MALLOCED) \ + | ((b)->flags & ~BN_FLG_MALLOCED) \ + | BN_FLG_STATIC_DATA \ + | (n))) + +/* Already declared in ossl_typ.h */ +#if 0 +typedef struct bignum_st BIGNUM; +/* Used for temp variables (declaration hidden in bn_lcl.h) */ +typedef struct bignum_ctx BN_CTX; +typedef struct bn_blinding_st BN_BLINDING; +typedef struct bn_mont_ctx_st BN_MONT_CTX; +typedef struct bn_recp_ctx_st BN_RECP_CTX; +typedef struct bn_gencb_st BN_GENCB; +#endif + +struct bignum_st { BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ int top; /* Index of last used d +1. */ @@ -237,23 +283,10 @@ typedef struct bignum_st int dmax; /* Size of the d array. */ int neg; /* one if the number is negative */ int flags; - } BIGNUM; - -/* Used for temp variables (declaration hidden in bn_lcl.h) */ -typedef struct bignum_ctx BN_CTX; - -typedef struct bn_blinding_st - { - int init; - BIGNUM *A; - BIGNUM *Ai; - BIGNUM *mod; /* just a reference */ - unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b; - * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */ - } BN_BLINDING; + }; /* Used for montgomery multiplication */ -typedef struct bn_mont_ctx_st +struct bn_mont_ctx_st { int ri; /* number of bits in R */ BIGNUM RR; /* used to convert to montgomery form */ @@ -262,19 +295,47 @@ typedef struct bn_mont_ctx_st * (Ni is only stored for bignum algorithm) */ BN_ULONG n0; /* least significant word of Ni */ int flags; - } BN_MONT_CTX; + }; /* Used for reciprocal division/mod functions * It cannot be shared between threads */ -typedef struct bn_recp_ctx_st +struct bn_recp_ctx_st { BIGNUM N; /* the divisor */ BIGNUM Nr; /* the reciprocal */ int num_bits; int shift; int flags; - } BN_RECP_CTX; + }; + +/* Used for slow "generation" functions. */ +struct bn_gencb_st + { + unsigned int ver; /* To handle binary (in)compatibility */ + void *arg; /* callback-specific data */ + union + { + /* if(ver==1) - handles old style callbacks */ + void (*cb_1)(int, int, void *); + /* if(ver==2) - new callback style */ + int (*cb_2)(int, int, BN_GENCB *); + } cb; + }; +/* Wrapper function to make using BN_GENCB easier, */ +int BN_GENCB_call(BN_GENCB *cb, int a, int b); +/* Macro to populate a BN_GENCB structure with an "old"-style callback */ +#define BN_GENCB_set_old(gencb, callback, cb_arg) { \ + BN_GENCB *tmp_gencb = (gencb); \ + tmp_gencb->ver = 1; \ + tmp_gencb->arg = (cb_arg); \ + tmp_gencb->cb.cb_1 = (callback); } +/* Macro to populate a BN_GENCB structure with a "new"-style callback */ +#define BN_GENCB_set(gencb, callback, cb_arg) { \ + BN_GENCB *tmp_gencb = (gencb); \ + tmp_gencb->ver = 2; \ + tmp_gencb->arg = (cb_arg); \ + tmp_gencb->cb.cb_2 = (callback); } #define BN_prime_checks 0 /* default: select number of iterations based on the size of the number */ @@ -299,24 +360,33 @@ typedef struct bn_recp_ctx_st #define BN_num_bytes(a) ((BN_num_bits(a)+7)/8) -/* Note that BN_abs_is_word does not work reliably for w == 0 */ -#define BN_abs_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) -#define BN_is_zero(a) (((a)->top == 0) || BN_abs_is_word(a,0)) +/* Note that BN_abs_is_word didn't work reliably for w == 0 until 0.9.8 */ +#define BN_abs_is_word(a,w) ((((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) || \ + (((w) == 0) && ((a)->top == 0))) +#define BN_is_zero(a) ((a)->top == 0) #define BN_is_one(a) (BN_abs_is_word((a),1) && !(a)->neg) -#define BN_is_word(a,w) ((w) ? BN_abs_is_word((a),(w)) && !(a)->neg : \ - BN_is_zero((a))) +#define BN_is_word(a,w) (BN_abs_is_word((a),(w)) && (!(w) || !(a)->neg)) #define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1)) #define BN_one(a) (BN_set_word((a),1)) +#define BN_zero_ex(a) \ + do { \ + BIGNUM *_tmp_bn = (a); \ + _tmp_bn->top = 0; \ + _tmp_bn->neg = 0; \ + } while(0) +#ifdef OPENSSL_NO_DEPRECATED +#define BN_zero(a) BN_zero_ex(a) +#else #define BN_zero(a) (BN_set_word((a),0)) - -/*#define BN_ascii2bn(a) BN_hex2bn(a) */ -/*#define BN_bn2ascii(a) BN_bn2hex(a) */ +#endif const BIGNUM *BN_value_one(void); char * BN_options(void); BN_CTX *BN_CTX_new(void); +#ifndef OPENSSL_NO_DEPRECATED void BN_CTX_init(BN_CTX *c); +#endif void BN_CTX_free(BN_CTX *c); void BN_CTX_start(BN_CTX *ctx); BIGNUM *BN_CTX_get(BN_CTX *ctx); @@ -342,6 +412,16 @@ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); int BN_sqr(BIGNUM *r, const BIGNUM *a,BN_CTX *ctx); +/** BN_set_negative sets sign of a BIGNUM + * \param b pointer to the BIGNUM object + * \param n 0 if the BIGNUM b should be positive and a value != 0 otherwise + */ +void BN_set_negative(BIGNUM *b, int n); +/** BN_is_negative returns 1 if the BIGNUM is negative + * \param a pointer to the BIGNUM object + * \return 1 if a < 0 and 0 otherwise + */ +#define BN_is_negative(a) ((a)->neg != 0) int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx); @@ -378,6 +458,8 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,BN_CTX *ctx); int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); +int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont); int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1, @@ -413,6 +495,9 @@ BIGNUM *BN_mod_inverse(BIGNUM *ret, const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx); BIGNUM *BN_mod_sqrt(BIGNUM *ret, const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx); + +/* Deprecated versions */ +#ifndef OPENSSL_NO_DEPRECATED BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe, const BIGNUM *add, const BIGNUM *rem, void (*callback)(int,int,void *),void *cb_arg); @@ -422,6 +507,14 @@ int BN_is_prime(const BIGNUM *p,int nchecks, int BN_is_prime_fasttest(const BIGNUM *p,int nchecks, void (*callback)(int,int,void *),BN_CTX *ctx,void *cb_arg, int do_trial_division); +#endif /* !defined(OPENSSL_NO_DEPRECATED) */ + +/* Newer versions */ +int BN_generate_prime_ex(BIGNUM *ret,int bits,int safe, const BIGNUM *add, + const BIGNUM *rem, BN_GENCB *cb); +int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb); +int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, + int do_trial_division, BN_GENCB *cb); BN_MONT_CTX *BN_MONT_CTX_new(void ); void BN_MONT_CTX_init(BN_MONT_CTX *ctx); @@ -434,15 +527,34 @@ int BN_from_montgomery(BIGNUM *r,const BIGNUM *a, void BN_MONT_CTX_free(BN_MONT_CTX *mont); int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *mod,BN_CTX *ctx); BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from); +BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, + const BIGNUM *mod, BN_CTX *ctx); + +/* BN_BLINDING flags */ +#define BN_BLINDING_NO_UPDATE 0x00000001 +#define BN_BLINDING_NO_RECREATE 0x00000002 -BN_BLINDING *BN_BLINDING_new(BIGNUM *A,BIGNUM *Ai,BIGNUM *mod); +BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod); void BN_BLINDING_free(BN_BLINDING *b); int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx); -int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *r, BN_CTX *ctx); +int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); - +int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *); +int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *); +unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *); +void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long); +unsigned long BN_BLINDING_get_flags(const BN_BLINDING *); +void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long); +BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, + const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, + int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), + BN_MONT_CTX *m_ctx); + +#ifndef OPENSSL_NO_DEPRECATED void BN_set_params(int mul,int high,int low,int mont); int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */ +#endif void BN_RECP_CTX_init(BN_RECP_CTX *recp); BN_RECP_CTX *BN_RECP_CTX_new(void); @@ -455,15 +567,162 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, BN_RECP_CTX *recp, BN_CTX *ctx); +/* Functions for arithmetic over binary polynomials represented by BIGNUMs. + * + * The BIGNUM::neg property of BIGNUMs representing binary polynomials is + * ignored. + * + * Note that input arguments are not const so that their bit arrays can + * be expanded to the appropriate size if needed. + */ + +int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); /*r = a + b*/ +#define BN_GF2m_sub(r, a, b) BN_GF2m_add(r, a, b) +int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p); /*r=a mod p*/ +int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *p, BN_CTX *ctx); /* r = (a * b) mod p */ +int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + BN_CTX *ctx); /* r = (a * a) mod p */ +int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *b, const BIGNUM *p, + BN_CTX *ctx); /* r = (1 / b) mod p */ +int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *p, BN_CTX *ctx); /* r = (a / b) mod p */ +int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *p, BN_CTX *ctx); /* r = (a ^ b) mod p */ +int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + BN_CTX *ctx); /* r = sqrt(a) mod p */ +int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + BN_CTX *ctx); /* r^2 + r = a mod p */ +#define BN_GF2m_cmp(a, b) BN_ucmp((a), (b)) +/* Some functions allow for representation of the irreducible polynomials + * as an unsigned int[], say p. The irreducible f(t) is then of the form: + * t^p[0] + t^p[1] + ... + t^p[k] + * where m = p[0] > p[1] > ... > p[k] = 0. + */ +int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]); + /* r = a mod p */ +int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const unsigned int p[], BN_CTX *ctx); /* r = (a * b) mod p */ +int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], + BN_CTX *ctx); /* r = (a * a) mod p */ +int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const unsigned int p[], + BN_CTX *ctx); /* r = (1 / b) mod p */ +int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const unsigned int p[], BN_CTX *ctx); /* r = (a / b) mod p */ +int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const unsigned int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */ +int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, + const unsigned int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */ +int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, + const unsigned int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ +int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max); +int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a); + +/* faster mod functions for the 'NIST primes' + * 0 <= a < p^2 */ +int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); +int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); +int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); +int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); +int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); + +const BIGNUM *BN_get0_nist_prime_192(void); +const BIGNUM *BN_get0_nist_prime_224(void); +const BIGNUM *BN_get0_nist_prime_256(void); +const BIGNUM *BN_get0_nist_prime_384(void); +const BIGNUM *BN_get0_nist_prime_521(void); + /* library internal functions */ #define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\ - (a):bn_expand2((a),(bits)/BN_BITS2+1)) + (a):bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2)) #define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) BIGNUM *bn_expand2(BIGNUM *a, int words); -BIGNUM *bn_dup_expand(const BIGNUM *a, int words); +#ifndef OPENSSL_NO_DEPRECATED +BIGNUM *bn_dup_expand(const BIGNUM *a, int words); /* unused */ +#endif + +/* Bignum consistency macros + * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from + * bignum data after direct manipulations on the data. There is also an + * "internal" macro, bn_check_top(), for verifying that there are no leading + * zeroes. Unfortunately, some auditing is required due to the fact that + * bn_fix_top() has become an overabused duct-tape because bignum data is + * occasionally passed around in an inconsistent state. So the following + * changes have been made to sort this out; + * - bn_fix_top()s implementation has been moved to bn_correct_top() + * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and + * bn_check_top() is as before. + * - if BN_DEBUG *is* defined; + * - bn_check_top() tries to pollute unused words even if the bignum 'top' is + * consistent. (ed: only if BN_DEBUG_RAND is defined) + * - bn_fix_top() maps to bn_check_top() rather than "fixing" anything. + * The idea is to have debug builds flag up inconsistent bignums when they + * occur. If that occurs in a bn_fix_top(), we examine the code in question; if + * the use of bn_fix_top() was appropriate (ie. it follows directly after code + * that manipulates the bignum) it is converted to bn_correct_top(), and if it + * was not appropriate, we convert it permanently to bn_check_top() and track + * down the cause of the bug. Eventually, no internal code should be using the + * bn_fix_top() macro. External applications and libraries should try this with + * their own code too, both in terms of building against the openssl headers + * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it + * defined. This not only improves external code, it provides more test + * coverage for openssl's own code. + */ + +#ifdef BN_DEBUG + +/* We only need assert() when debugging */ +#include <assert.h> -#define bn_fix_top(a) \ +#ifdef BN_DEBUG_RAND +/* To avoid "make update" cvs wars due to BN_DEBUG, use some tricks */ +#ifndef RAND_pseudo_bytes +int RAND_pseudo_bytes(unsigned char *buf,int num); +#define BN_DEBUG_TRIX +#endif +#define bn_pollute(a) \ + do { \ + const BIGNUM *_bnum1 = (a); \ + if(_bnum1->top < _bnum1->dmax) { \ + unsigned char _tmp_char; \ + /* We cast away const without the compiler knowing, any \ + * *genuinely* constant variables that aren't mutable \ + * wouldn't be constructed with top!=dmax. */ \ + BN_ULONG *_not_const; \ + memcpy(&_not_const, &_bnum1->d, sizeof(BN_ULONG*)); \ + RAND_pseudo_bytes(&_tmp_char, 1); \ + memset((unsigned char *)(_not_const + _bnum1->top), _tmp_char, \ + (_bnum1->dmax - _bnum1->top) * sizeof(BN_ULONG)); \ + } \ + } while(0) +#ifdef BN_DEBUG_TRIX +#undef RAND_pseudo_bytes +#endif +#else +#define bn_pollute(a) +#endif +#define bn_check_top(a) \ + do { \ + const BIGNUM *_bnum2 = (a); \ + if (_bnum2 != NULL) { \ + assert((_bnum2->top == 0) || \ + (_bnum2->d[_bnum2->top - 1] != 0)); \ + bn_pollute(_bnum2); \ + } \ + } while(0) + +#define bn_fix_top(a) bn_check_top(a) + +#else /* !BN_DEBUG */ + +#define bn_pollute(a) +#define bn_check_top(a) +#define bn_fix_top(a) bn_correct_top(a) + +#endif + +#define bn_correct_top(a) \ { \ BN_ULONG *ftl; \ if ((a)->top > 0) \ @@ -471,6 +730,7 @@ BIGNUM *bn_dup_expand(const BIGNUM *a, int words); for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ if (*(ftl--)) break; \ } \ + bn_pollute(a); \ } BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); @@ -480,15 +740,17 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num); BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num); -#ifdef BN_DEBUG -void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n); -# define bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \ - fprintf(stderr,"\n");} -# define bn_dump(a,n) bn_dump1(stderr,#a,a,n); -#else -# define bn_print(a) -# define bn_dump(a,b) -#endif +/* Primes from RFC 2409 */ +BIGNUM *get_rfc2409_prime_768(BIGNUM *bn); +BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn); + +/* Primes from RFC 3526 */ +BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn); +BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn); +BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn); +BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn); +BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn); +BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn); int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom); @@ -501,20 +763,35 @@ void ERR_load_BN_strings(void); /* Error codes for the BN functions. */ /* Function codes. */ -#define BN_F_BN_BLINDING_CONVERT 100 -#define BN_F_BN_BLINDING_INVERT 101 +#define BN_F_BNRAND 127 +#define BN_F_BN_BLINDING_CONVERT_EX 100 +#define BN_F_BN_BLINDING_CREATE_PARAM 128 +#define BN_F_BN_BLINDING_INVERT_EX 101 #define BN_F_BN_BLINDING_NEW 102 #define BN_F_BN_BLINDING_UPDATE 103 #define BN_F_BN_BN2DEC 104 #define BN_F_BN_BN2HEX 105 #define BN_F_BN_CTX_GET 116 #define BN_F_BN_CTX_NEW 106 +#define BN_F_BN_CTX_START 129 #define BN_F_BN_DIV 107 +#define BN_F_BN_DIV_RECP 130 +#define BN_F_BN_EXP 123 #define BN_F_BN_EXPAND2 108 #define BN_F_BN_EXPAND_INTERNAL 120 +#define BN_F_BN_GF2M_MOD 131 +#define BN_F_BN_GF2M_MOD_EXP 132 +#define BN_F_BN_GF2M_MOD_MUL 133 +#define BN_F_BN_GF2M_MOD_SOLVE_QUAD 134 +#define BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR 135 +#define BN_F_BN_GF2M_MOD_SQR 136 +#define BN_F_BN_GF2M_MOD_SQRT 137 #define BN_F_BN_MOD_EXP2_MONT 118 #define BN_F_BN_MOD_EXP_MONT 109 +#define BN_F_BN_MOD_EXP_MONT_CONSTTIME 124 #define BN_F_BN_MOD_EXP_MONT_WORD 117 +#define BN_F_BN_MOD_EXP_RECP 125 +#define BN_F_BN_MOD_EXP_SIMPLE 126 #define BN_F_BN_MOD_INVERSE 110 #define BN_F_BN_MOD_LSHIFT_QUICK 119 #define BN_F_BN_MOD_MUL_RECIPROCAL 111 @@ -539,6 +816,7 @@ void ERR_load_BN_strings(void); #define BN_R_NOT_A_SQUARE 111 #define BN_R_NOT_INITIALIZED 107 #define BN_R_NO_INVERSE 108 +#define BN_R_NO_SOLUTION 116 #define BN_R_P_IS_NOT_PRIME 112 #define BN_R_TOO_MANY_ITERATIONS 113 #define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109 diff --git a/crypto/openssl/crypto/bn/bn_add.c b/crypto/openssl/crypto/bn/bn_add.c index 6cba07e..9405163 100644 --- a/crypto/openssl/crypto/bn/bn_add.c +++ b/crypto/openssl/crypto/bn/bn_add.c @@ -64,7 +64,7 @@ int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { const BIGNUM *tmp; - int a_neg = a->neg; + int a_neg = a->neg, ret; bn_check_top(a); bn_check_top(b); @@ -95,20 +95,17 @@ int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) return(1); } - if (!BN_uadd(r,a,b)) return(0); - if (a_neg) /* both are neg */ - r->neg=1; - else - r->neg=0; - return(1); + ret = BN_uadd(r,a,b); + r->neg = a_neg; + bn_check_top(r); + return ret; } -/* unsigned add of b to a, r must be large enough */ +/* unsigned add of b to a */ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { - register int i; - int max,min; - BN_ULONG *ap,*bp,*rp,carry,t1; + int max,min,dif; + BN_ULONG *ap,*bp,*rp,carry,t1,t2; const BIGNUM *tmp; bn_check_top(a); @@ -116,11 +113,12 @@ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) if (a->top < b->top) { tmp=a; a=b; b=tmp; } - max=a->top; - min=b->top; + max = a->top; + min = b->top; + dif = max - min; if (bn_wexpand(r,max+1) == NULL) - return(0); + return 0; r->top=max; @@ -128,46 +126,46 @@ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) ap=a->d; bp=b->d; rp=r->d; - carry=0; carry=bn_add_words(rp,ap,bp,min); rp+=min; ap+=min; bp+=min; - i=min; if (carry) { - while (i < max) + while (dif) { - i++; - t1= *(ap++); - if ((*(rp++)=(t1+1)&BN_MASK2) >= t1) + dif--; + t1 = *(ap++); + t2 = (t1+1) & BN_MASK2; + *(rp++) = t2; + if (t2) { carry=0; break; } } - if ((i >= max) && carry) + if (carry) { - *(rp++)=1; + /* carry != 0 => dif == 0 */ + *rp = 1; r->top++; } } - if (rp != ap) - { - for (; i<max; i++) - *(rp++)= *(ap++); - } - /* memcpy(rp,ap,sizeof(*ap)*(max-i));*/ + if (dif && rp != ap) + while (dif--) + /* copy remaining words if ap != rp */ + *(rp++) = *(ap++); r->neg = 0; - return(1); + bn_check_top(r); + return 1; } /* unsigned subtraction of b from a, a must be larger than b. */ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { - int max,min; + int max,min,dif; register BN_ULONG t1,t2,*ap,*bp,*rp; int i,carry; #if defined(IRIX_CC_BUG) && !defined(LINT) @@ -177,14 +175,16 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) bn_check_top(a); bn_check_top(b); - if (a->top < b->top) /* hmm... should not be happening */ + max = a->top; + min = b->top; + dif = max - min; + + if (dif < 0) /* hmm... should not be happening */ { BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3); return(0); } - max=a->top; - min=b->top; if (bn_wexpand(r,max) == NULL) return(0); ap=a->d; @@ -193,7 +193,7 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) #if 1 carry=0; - for (i=0; i<min; i++) + for (i = min; i != 0; i--) { t1= *(ap++); t2= *(bp++); @@ -217,17 +217,20 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) ap+=min; bp+=min; rp+=min; - i=min; #endif if (carry) /* subtracted */ { - while (i < max) + if (!dif) + /* error: a < b */ + return 0; + while (dif) { - i++; - t1= *(ap++); - t2=(t1-1)&BN_MASK2; - *(rp++)=t2; - if (t1 > t2) break; + dif--; + t1 = *(ap++); + t2 = (t1-1)&BN_MASK2; + *(rp++) = t2; + if (t1) + break; } } #if 0 @@ -237,13 +240,13 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { for (;;) { - if (i++ >= max) break; + if (!dif--) break; rp[0]=ap[0]; - if (i++ >= max) break; + if (!dif--) break; rp[1]=ap[1]; - if (i++ >= max) break; + if (!dif--) break; rp[2]=ap[2]; - if (i++ >= max) break; + if (!dif--) break; rp[3]=ap[3]; rp+=4; ap+=4; @@ -253,7 +256,7 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) r->top=max; r->neg=0; - bn_fix_top(r); + bn_correct_top(r); return(1); } @@ -304,6 +307,7 @@ int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) if (!BN_usub(r,a,b)) return(0); r->neg=0; } + bn_check_top(r); return(1); } diff --git a/crypto/openssl/crypto/bn/bn_asm.c b/crypto/openssl/crypto/bn/bn_asm.c index be8aa3f..99bc2de 100644 --- a/crypto/openssl/crypto/bn/bn_asm.c +++ b/crypto/openssl/crypto/bn/bn_asm.c @@ -237,7 +237,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) if (d == 0) return(BN_MASK2); i=BN_num_bits_word(d); - assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i)); + assert((i == BN_BITS2) || (h <= (BN_ULONG)1<<i)); i=BN_BITS2-i; if (h >= d) h-=d; @@ -459,6 +459,34 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) +#elif defined(BN_UMULT_LOHI) + +#define mul_add_c(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b); \ + BN_UMULT_LOHI(t1,t2,ta,tb); \ + c0 += t1; t2 += (c0<t1)?1:0; \ + c1 += t2; c2 += (c1<t2)?1:0; \ + } + +#define mul_add_c2(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b),t0; \ + BN_UMULT_LOHI(t0,t1,ta,tb); \ + t2 = t1+t1; c2 += (t2<t1)?1:0; \ + t1 = t0+t0; t2 += (t1<t0)?1:0; \ + c0 += t1; t2 += (c0<t1)?1:0; \ + c1 += t2; c2 += (c1<t2)?1:0; \ + } + +#define sqr_add_c(a,i,c0,c1,c2) { \ + BN_ULONG ta=(a)[i]; \ + BN_UMULT_LOHI(t1,t2,ta,ta); \ + c0 += t1; t2 += (c0<t1)?1:0; \ + c1 += t2; c2 += (c1<t2)?1:0; \ + } + +#define sqr_add_c2(a,i,j,c0,c1,c2) \ + mul_add_c2((a)[i],(a)[j],c0,c1,c2) + #elif defined(BN_UMULT_HIGH) #define mul_add_c(a,b,c0,c1,c2) { \ diff --git a/crypto/openssl/crypto/bn/bn_blind.c b/crypto/openssl/crypto/bn/bn_blind.c index 2d287e6..ca22d4f 100644 --- a/crypto/openssl/crypto/bn/bn_blind.c +++ b/crypto/openssl/crypto/bn/bn_blind.c @@ -1,4 +1,57 @@ /* crypto/bn/bn_blind.c */ +/* ==================================================================== + * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -60,11 +113,28 @@ #include "cryptlib.h" #include "bn_lcl.h" -BN_BLINDING *BN_BLINDING_new(BIGNUM *A, BIGNUM *Ai, BIGNUM *mod) +#define BN_BLINDING_COUNTER 32 + +struct bn_blinding_st + { + BIGNUM *A; + BIGNUM *Ai; + BIGNUM *e; + BIGNUM *mod; /* just a reference */ + unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b; + * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */ + unsigned int counter; + unsigned long flags; + BN_MONT_CTX *m_ctx; + int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, + BN_MONT_CTX *m_ctx); + }; + +BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) { BN_BLINDING *ret=NULL; - bn_check_top(Ai); bn_check_top(mod); if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL) @@ -73,11 +143,16 @@ BN_BLINDING *BN_BLINDING_new(BIGNUM *A, BIGNUM *Ai, BIGNUM *mod) return(NULL); } memset(ret,0,sizeof(BN_BLINDING)); - if ((ret->A=BN_new()) == NULL) goto err; - if ((ret->Ai=BN_new()) == NULL) goto err; - if (!BN_copy(ret->A,A)) goto err; - if (!BN_copy(ret->Ai,Ai)) goto err; - ret->mod=mod; + if (A != NULL) + { + if ((ret->A = BN_dup(A)) == NULL) goto err; + } + if (Ai != NULL) + { + if ((ret->Ai = BN_dup(Ai)) == NULL) goto err; + } + ret->mod = mod; + ret->counter = BN_BLINDING_COUNTER; return(ret); err: if (ret != NULL) BN_BLINDING_free(ret); @@ -91,6 +166,7 @@ void BN_BLINDING_free(BN_BLINDING *r) if (r->A != NULL) BN_free(r->A ); if (r->Ai != NULL) BN_free(r->Ai); + if (r->e != NULL) BN_free(r->e ); OPENSSL_free(r); } @@ -103,42 +179,181 @@ int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITIALIZED); goto err; } - - if (!BN_mod_mul(b->A,b->A,b->A,b->mod,ctx)) goto err; - if (!BN_mod_mul(b->Ai,b->Ai,b->Ai,b->mod,ctx)) goto err; + + if (--(b->counter) == 0 && b->e != NULL && + !(b->flags & BN_BLINDING_NO_RECREATE)) + { + /* re-create blinding parameters */ + if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL)) + goto err; + } + else if (!(b->flags & BN_BLINDING_NO_UPDATE)) + { + if (!BN_mod_mul(b->A,b->A,b->A,b->mod,ctx)) goto err; + if (!BN_mod_mul(b->Ai,b->Ai,b->Ai,b->mod,ctx)) goto err; + } ret=1; err: + if (b->counter == 0) + b->counter = BN_BLINDING_COUNTER; return(ret); } int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx) { + return BN_BLINDING_convert_ex(n, NULL, b, ctx); + } + +int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx) + { + int ret = 1; + bn_check_top(n); if ((b->A == NULL) || (b->Ai == NULL)) { - BNerr(BN_F_BN_BLINDING_CONVERT,BN_R_NOT_INITIALIZED); + BNerr(BN_F_BN_BLINDING_CONVERT_EX,BN_R_NOT_INITIALIZED); return(0); } - return(BN_mod_mul(n,n,b->A,b->mod,ctx)); + + if (r != NULL) + { + if (!BN_copy(r, b->Ai)) ret=0; + } + + if (!BN_mod_mul(n,n,b->A,b->mod,ctx)) ret=0; + + return ret; } int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx) { + return BN_BLINDING_invert_ex(n, NULL, b, ctx); + } + +int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx) + { int ret; bn_check_top(n); if ((b->A == NULL) || (b->Ai == NULL)) { - BNerr(BN_F_BN_BLINDING_INVERT,BN_R_NOT_INITIALIZED); + BNerr(BN_F_BN_BLINDING_INVERT_EX,BN_R_NOT_INITIALIZED); return(0); } - if ((ret=BN_mod_mul(n,n,b->Ai,b->mod,ctx)) >= 0) + + if (r != NULL) + ret = BN_mod_mul(n, n, r, b->mod, ctx); + else + ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx); + + if (ret >= 0) { if (!BN_BLINDING_update(b,ctx)) return(0); } + bn_check_top(n); return(ret); } +unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b) + { + return b->thread_id; + } + +void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n) + { + b->thread_id = n; + } + +unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b) + { + return b->flags; + } + +void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags) + { + b->flags = flags; + } + +BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, + const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, + int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), + BN_MONT_CTX *m_ctx) +{ + int retry_counter = 32; + BN_BLINDING *ret = NULL; + + if (b == NULL) + ret = BN_BLINDING_new(NULL, NULL, m); + else + ret = b; + + if (ret == NULL) + goto err; + + if (ret->A == NULL && (ret->A = BN_new()) == NULL) + goto err; + if (ret->Ai == NULL && (ret->Ai = BN_new()) == NULL) + goto err; + + if (e != NULL) + { + if (ret->e != NULL) + BN_free(ret->e); + ret->e = BN_dup(e); + } + if (ret->e == NULL) + goto err; + + if (bn_mod_exp != NULL) + ret->bn_mod_exp = bn_mod_exp; + if (m_ctx != NULL) + ret->m_ctx = m_ctx; + + do { + if (!BN_rand_range(ret->A, ret->mod)) goto err; + if (BN_mod_inverse(ret->Ai, ret->A, ret->mod, ctx) == NULL) + { + /* this should almost never happen for good RSA keys */ + unsigned long error = ERR_peek_last_error(); + if (ERR_GET_REASON(error) == BN_R_NO_INVERSE) + { + if (retry_counter-- == 0) + { + BNerr(BN_F_BN_BLINDING_CREATE_PARAM, + BN_R_TOO_MANY_ITERATIONS); + goto err; + } + ERR_clear_error(); + } + else + goto err; + } + else + break; + } while (1); + + if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL) + { + if (!ret->bn_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx)) + goto err; + } + else + { + if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx)) + goto err; + } + + return ret; +err: + if (b == NULL && ret != NULL) + { + BN_BLINDING_free(ret); + ret = NULL; + } + + return ret; +} diff --git a/crypto/openssl/crypto/bn/bn_const.c b/crypto/openssl/crypto/bn/bn_const.c new file mode 100755 index 0000000..eb60a25 --- /dev/null +++ b/crypto/openssl/crypto/bn/bn_const.c @@ -0,0 +1,402 @@ +/* crypto/bn/knownprimes.c */ +/* Insert boilerplate */ + +#include "bn.h" + +/* "First Oakley Default Group" from RFC2409, section 6.1. + * + * The prime is: 2^768 - 2 ^704 - 1 + 2^64 * { [2^638 pi] + 149686 } + * + * RFC2409 specifies a generator of 2. + * RFC2412 specifies a generator of of 22. + */ + +BIGNUM *get_rfc2409_prime_768(BIGNUM *bn) + { + static const unsigned char RFC2409_PRIME_768[]={ + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2, + 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1, + 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6, + 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD, + 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D, + 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45, + 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9, + 0xA6,0x3A,0x36,0x20,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + }; + return BN_bin2bn(RFC2409_PRIME_768,sizeof(RFC2409_PRIME_768),bn); + } + +/* "Second Oakley Default Group" from RFC2409, section 6.2. + * + * The prime is: 2^1024 - 2^960 - 1 + 2^64 * { [2^894 pi] + 129093 }. + * + * RFC2409 specifies a generator of 2. + * RFC2412 specifies a generator of 22. + */ + +BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn) + { + static const unsigned char RFC2409_PRIME_1024[]={ + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2, + 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1, + 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6, + 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD, + 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D, + 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45, + 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9, + 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED, + 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11, + 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE6,0x53,0x81, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + }; + return BN_bin2bn(RFC2409_PRIME_1024,sizeof(RFC2409_PRIME_1024),bn); + } + +/* "1536-bit MODP Group" from RFC3526, Section 2. + * + * The prime is: 2^1536 - 2^1472 - 1 + 2^64 * { [2^1406 pi] + 741804 } + * + * RFC3526 specifies a generator of 2. + * RFC2312 specifies a generator of 22. + */ + +BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn) + { + static const unsigned char RFC3526_PRIME_1536[]={ + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2, + 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1, + 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6, + 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD, + 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D, + 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45, + 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9, + 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED, + 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11, + 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D, + 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36, + 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F, + 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56, + 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D, + 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08, + 0xCA,0x23,0x73,0x27,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + }; + return BN_bin2bn(RFC3526_PRIME_1536,sizeof(RFC3526_PRIME_1536),bn); + } + +/* "2048-bit MODP Group" from RFC3526, Section 3. + * + * The prime is: 2^2048 - 2^1984 - 1 + 2^64 * { [2^1918 pi] + 124476 } + * + * RFC3526 specifies a generator of 2. + */ + +BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn) + { + static const unsigned char RFC3526_PRIME_2048[]={ + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2, + 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1, + 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6, + 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD, + 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D, + 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45, + 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9, + 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED, + 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11, + 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D, + 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36, + 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F, + 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56, + 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D, + 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08, + 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B, + 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2, + 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9, + 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C, + 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10, + 0x15,0x72,0x8E,0x5A,0x8A,0xAC,0xAA,0x68,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF, + }; + return BN_bin2bn(RFC3526_PRIME_2048,sizeof(RFC3526_PRIME_2048),bn); + } + +/* "3072-bit MODP Group" from RFC3526, Section 4. + * + * The prime is: 2^3072 - 2^3008 - 1 + 2^64 * { [2^2942 pi] + 1690314 } + * + * RFC3526 specifies a generator of 2. + */ + +BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn) + { + static const unsigned char RFC3526_PRIME_3072[]={ + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2, + 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1, + 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6, + 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD, + 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D, + 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45, + 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9, + 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED, + 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11, + 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D, + 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36, + 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F, + 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56, + 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D, + 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08, + 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B, + 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2, + 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9, + 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C, + 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10, + 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D, + 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64, + 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57, + 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7, + 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0, + 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B, + 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73, + 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C, + 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0, + 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31, + 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20, + 0xA9,0x3A,0xD2,0xCA,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + }; + return BN_bin2bn(RFC3526_PRIME_3072,sizeof(RFC3526_PRIME_3072),bn); + } + +/* "4096-bit MODP Group" from RFC3526, Section 5. + * + * The prime is: 2^4096 - 2^4032 - 1 + 2^64 * { [2^3966 pi] + 240904 } + * + * RFC3526 specifies a generator of 2. + */ + +BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn) + { + static const unsigned char RFC3526_PRIME_4096[]={ + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2, + 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1, + 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6, + 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD, + 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D, + 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45, + 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9, + 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED, + 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11, + 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D, + 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36, + 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F, + 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56, + 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D, + 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08, + 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B, + 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2, + 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9, + 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C, + 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10, + 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D, + 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64, + 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57, + 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7, + 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0, + 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B, + 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73, + 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C, + 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0, + 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31, + 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20, + 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7, + 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18, + 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA, + 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB, + 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6, + 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F, + 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED, + 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76, + 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9, + 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC, + 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x06,0x31,0x99, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + }; + return BN_bin2bn(RFC3526_PRIME_4096,sizeof(RFC3526_PRIME_4096),bn); + } + +/* "6144-bit MODP Group" from RFC3526, Section 6. + * + * The prime is: 2^6144 - 2^6080 - 1 + 2^64 * { [2^6014 pi] + 929484 } + * + * RFC3526 specifies a generator of 2. + */ + +BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn) + { + static const unsigned char RFC3526_PRIME_6144[]={ + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2, + 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1, + 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6, + 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD, + 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D, + 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45, + 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9, + 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED, + 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11, + 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D, + 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36, + 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F, + 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56, + 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D, + 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08, + 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B, + 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2, + 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9, + 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C, + 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10, + 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D, + 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64, + 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57, + 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7, + 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0, + 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B, + 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73, + 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C, + 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0, + 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31, + 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20, + 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7, + 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18, + 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA, + 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB, + 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6, + 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F, + 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED, + 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76, + 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9, + 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC, + 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x02,0x84,0x92, + 0x36,0xC3,0xFA,0xB4,0xD2,0x7C,0x70,0x26,0xC1,0xD4,0xDC,0xB2, + 0x60,0x26,0x46,0xDE,0xC9,0x75,0x1E,0x76,0x3D,0xBA,0x37,0xBD, + 0xF8,0xFF,0x94,0x06,0xAD,0x9E,0x53,0x0E,0xE5,0xDB,0x38,0x2F, + 0x41,0x30,0x01,0xAE,0xB0,0x6A,0x53,0xED,0x90,0x27,0xD8,0x31, + 0x17,0x97,0x27,0xB0,0x86,0x5A,0x89,0x18,0xDA,0x3E,0xDB,0xEB, + 0xCF,0x9B,0x14,0xED,0x44,0xCE,0x6C,0xBA,0xCE,0xD4,0xBB,0x1B, + 0xDB,0x7F,0x14,0x47,0xE6,0xCC,0x25,0x4B,0x33,0x20,0x51,0x51, + 0x2B,0xD7,0xAF,0x42,0x6F,0xB8,0xF4,0x01,0x37,0x8C,0xD2,0xBF, + 0x59,0x83,0xCA,0x01,0xC6,0x4B,0x92,0xEC,0xF0,0x32,0xEA,0x15, + 0xD1,0x72,0x1D,0x03,0xF4,0x82,0xD7,0xCE,0x6E,0x74,0xFE,0xF6, + 0xD5,0x5E,0x70,0x2F,0x46,0x98,0x0C,0x82,0xB5,0xA8,0x40,0x31, + 0x90,0x0B,0x1C,0x9E,0x59,0xE7,0xC9,0x7F,0xBE,0xC7,0xE8,0xF3, + 0x23,0xA9,0x7A,0x7E,0x36,0xCC,0x88,0xBE,0x0F,0x1D,0x45,0xB7, + 0xFF,0x58,0x5A,0xC5,0x4B,0xD4,0x07,0xB2,0x2B,0x41,0x54,0xAA, + 0xCC,0x8F,0x6D,0x7E,0xBF,0x48,0xE1,0xD8,0x14,0xCC,0x5E,0xD2, + 0x0F,0x80,0x37,0xE0,0xA7,0x97,0x15,0xEE,0xF2,0x9B,0xE3,0x28, + 0x06,0xA1,0xD5,0x8B,0xB7,0xC5,0xDA,0x76,0xF5,0x50,0xAA,0x3D, + 0x8A,0x1F,0xBF,0xF0,0xEB,0x19,0xCC,0xB1,0xA3,0x13,0xD5,0x5C, + 0xDA,0x56,0xC9,0xEC,0x2E,0xF2,0x96,0x32,0x38,0x7F,0xE8,0xD7, + 0x6E,0x3C,0x04,0x68,0x04,0x3E,0x8F,0x66,0x3F,0x48,0x60,0xEE, + 0x12,0xBF,0x2D,0x5B,0x0B,0x74,0x74,0xD6,0xE6,0x94,0xF9,0x1E, + 0x6D,0xCC,0x40,0x24,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + }; + return BN_bin2bn(RFC3526_PRIME_6144,sizeof(RFC3526_PRIME_6144),bn); + } + +/* "8192-bit MODP Group" from RFC3526, Section 7. + * + * The prime is: 2^8192 - 2^8128 - 1 + 2^64 * { [2^8062 pi] + 4743158 } + * + * RFC3526 specifies a generator of 2. + */ + +BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn) + { + static const unsigned char RFC3526_PRIME_8192[]={ + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2, + 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1, + 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6, + 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD, + 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D, + 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45, + 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9, + 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED, + 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11, + 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D, + 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36, + 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F, + 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56, + 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D, + 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08, + 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B, + 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2, + 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9, + 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C, + 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10, + 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D, + 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64, + 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57, + 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7, + 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0, + 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B, + 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73, + 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C, + 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0, + 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31, + 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20, + 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7, + 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18, + 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA, + 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB, + 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6, + 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F, + 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED, + 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76, + 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9, + 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC, + 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x02,0x84,0x92, + 0x36,0xC3,0xFA,0xB4,0xD2,0x7C,0x70,0x26,0xC1,0xD4,0xDC,0xB2, + 0x60,0x26,0x46,0xDE,0xC9,0x75,0x1E,0x76,0x3D,0xBA,0x37,0xBD, + 0xF8,0xFF,0x94,0x06,0xAD,0x9E,0x53,0x0E,0xE5,0xDB,0x38,0x2F, + 0x41,0x30,0x01,0xAE,0xB0,0x6A,0x53,0xED,0x90,0x27,0xD8,0x31, + 0x17,0x97,0x27,0xB0,0x86,0x5A,0x89,0x18,0xDA,0x3E,0xDB,0xEB, + 0xCF,0x9B,0x14,0xED,0x44,0xCE,0x6C,0xBA,0xCE,0xD4,0xBB,0x1B, + 0xDB,0x7F,0x14,0x47,0xE6,0xCC,0x25,0x4B,0x33,0x20,0x51,0x51, + 0x2B,0xD7,0xAF,0x42,0x6F,0xB8,0xF4,0x01,0x37,0x8C,0xD2,0xBF, + 0x59,0x83,0xCA,0x01,0xC6,0x4B,0x92,0xEC,0xF0,0x32,0xEA,0x15, + 0xD1,0x72,0x1D,0x03,0xF4,0x82,0xD7,0xCE,0x6E,0x74,0xFE,0xF6, + 0xD5,0x5E,0x70,0x2F,0x46,0x98,0x0C,0x82,0xB5,0xA8,0x40,0x31, + 0x90,0x0B,0x1C,0x9E,0x59,0xE7,0xC9,0x7F,0xBE,0xC7,0xE8,0xF3, + 0x23,0xA9,0x7A,0x7E,0x36,0xCC,0x88,0xBE,0x0F,0x1D,0x45,0xB7, + 0xFF,0x58,0x5A,0xC5,0x4B,0xD4,0x07,0xB2,0x2B,0x41,0x54,0xAA, + 0xCC,0x8F,0x6D,0x7E,0xBF,0x48,0xE1,0xD8,0x14,0xCC,0x5E,0xD2, + 0x0F,0x80,0x37,0xE0,0xA7,0x97,0x15,0xEE,0xF2,0x9B,0xE3,0x28, + 0x06,0xA1,0xD5,0x8B,0xB7,0xC5,0xDA,0x76,0xF5,0x50,0xAA,0x3D, + 0x8A,0x1F,0xBF,0xF0,0xEB,0x19,0xCC,0xB1,0xA3,0x13,0xD5,0x5C, + 0xDA,0x56,0xC9,0xEC,0x2E,0xF2,0x96,0x32,0x38,0x7F,0xE8,0xD7, + 0x6E,0x3C,0x04,0x68,0x04,0x3E,0x8F,0x66,0x3F,0x48,0x60,0xEE, + 0x12,0xBF,0x2D,0x5B,0x0B,0x74,0x74,0xD6,0xE6,0x94,0xF9,0x1E, + 0x6D,0xBE,0x11,0x59,0x74,0xA3,0x92,0x6F,0x12,0xFE,0xE5,0xE4, + 0x38,0x77,0x7C,0xB6,0xA9,0x32,0xDF,0x8C,0xD8,0xBE,0xC4,0xD0, + 0x73,0xB9,0x31,0xBA,0x3B,0xC8,0x32,0xB6,0x8D,0x9D,0xD3,0x00, + 0x74,0x1F,0xA7,0xBF,0x8A,0xFC,0x47,0xED,0x25,0x76,0xF6,0x93, + 0x6B,0xA4,0x24,0x66,0x3A,0xAB,0x63,0x9C,0x5A,0xE4,0xF5,0x68, + 0x34,0x23,0xB4,0x74,0x2B,0xF1,0xC9,0x78,0x23,0x8F,0x16,0xCB, + 0xE3,0x9D,0x65,0x2D,0xE3,0xFD,0xB8,0xBE,0xFC,0x84,0x8A,0xD9, + 0x22,0x22,0x2E,0x04,0xA4,0x03,0x7C,0x07,0x13,0xEB,0x57,0xA8, + 0x1A,0x23,0xF0,0xC7,0x34,0x73,0xFC,0x64,0x6C,0xEA,0x30,0x6B, + 0x4B,0xCB,0xC8,0x86,0x2F,0x83,0x85,0xDD,0xFA,0x9D,0x4B,0x7F, + 0xA2,0xC0,0x87,0xE8,0x79,0x68,0x33,0x03,0xED,0x5B,0xDD,0x3A, + 0x06,0x2B,0x3C,0xF5,0xB3,0xA2,0x78,0xA6,0x6D,0x2A,0x13,0xF8, + 0x3F,0x44,0xF8,0x2D,0xDF,0x31,0x0E,0xE0,0x74,0xAB,0x6A,0x36, + 0x45,0x97,0xE8,0x99,0xA0,0x25,0x5D,0xC1,0x64,0xF3,0x1C,0xC5, + 0x08,0x46,0x85,0x1D,0xF9,0xAB,0x48,0x19,0x5D,0xED,0x7E,0xA1, + 0xB1,0xD5,0x10,0xBD,0x7E,0xE7,0x4D,0x73,0xFA,0xF3,0x6B,0xC3, + 0x1E,0xCF,0xA2,0x68,0x35,0x90,0x46,0xF4,0xEB,0x87,0x9F,0x92, + 0x40,0x09,0x43,0x8B,0x48,0x1C,0x6C,0xD7,0x88,0x9A,0x00,0x2E, + 0xD5,0xEE,0x38,0x2B,0xC9,0x19,0x0D,0xA6,0xFC,0x02,0x6E,0x47, + 0x95,0x58,0xE4,0x47,0x56,0x77,0xE9,0xAA,0x9E,0x30,0x50,0xE2, + 0x76,0x56,0x94,0xDF,0xC8,0x1F,0x56,0xE8,0x80,0xB9,0x6E,0x71, + 0x60,0xC9,0x80,0xDD,0x98,0xED,0xD3,0xDF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF, + }; + return BN_bin2bn(RFC3526_PRIME_8192,sizeof(RFC3526_PRIME_8192),bn); + } + diff --git a/crypto/openssl/crypto/bn/bn_ctx.c b/crypto/openssl/crypto/bn/bn_ctx.c index 7daf19e..b3452f1 100644 --- a/crypto/openssl/crypto/bn/bn_ctx.c +++ b/crypto/openssl/crypto/bn/bn_ctx.c @@ -1,7 +1,7 @@ /* crypto/bn/bn_ctx.c */ /* Written by Ulf Moeller for the OpenSSL project. */ /* ==================================================================== - * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. + * Copyright (c) 1998-2004 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -54,9 +54,10 @@ * */ -#ifndef BN_CTX_DEBUG -# undef NDEBUG /* avoid conflicting definitions */ -# define NDEBUG +#if !defined(BN_CTX_DEBUG) && !defined(BN_DEBUG) +#ifndef NDEBUG +#define NDEBUG +#endif #endif #include <stdio.h> @@ -65,91 +66,389 @@ #include "cryptlib.h" #include "bn_lcl.h" +/* TODO list + * + * 1. Check a bunch of "(words+1)" type hacks in various bignum functions and + * check they can be safely removed. + * - Check +1 and other ugliness in BN_from_montgomery() + * + * 2. Consider allowing a BN_new_ex() that, at least, lets you specify an + * appropriate 'block' size that will be honoured by bn_expand_internal() to + * prevent piddly little reallocations. OTOH, profiling bignum expansions in + * BN_CTX doesn't show this to be a big issue. + */ + +/* How many bignums are in each "pool item"; */ +#define BN_CTX_POOL_SIZE 16 +/* The stack frame info is resizing, set a first-time expansion size; */ +#define BN_CTX_START_FRAMES 32 -BN_CTX *BN_CTX_new(void) +/***********/ +/* BN_POOL */ +/***********/ + +/* A bundle of bignums that can be linked with other bundles */ +typedef struct bignum_pool_item + { + /* The bignum values */ + BIGNUM vals[BN_CTX_POOL_SIZE]; + /* Linked-list admin */ + struct bignum_pool_item *prev, *next; + } BN_POOL_ITEM; +/* A linked-list of bignums grouped in bundles */ +typedef struct bignum_pool + { + /* Linked-list admin */ + BN_POOL_ITEM *head, *current, *tail; + /* Stack depth and allocation size */ + unsigned used, size; + } BN_POOL; +static void BN_POOL_init(BN_POOL *); +static void BN_POOL_finish(BN_POOL *); +#ifndef OPENSSL_NO_DEPRECATED +static void BN_POOL_reset(BN_POOL *); +#endif +static BIGNUM * BN_POOL_get(BN_POOL *); +static void BN_POOL_release(BN_POOL *, unsigned int); + +/************/ +/* BN_STACK */ +/************/ + +/* A wrapper to manage the "stack frames" */ +typedef struct bignum_ctx_stack { - BN_CTX *ret; + /* Array of indexes into the bignum stack */ + unsigned int *indexes; + /* Number of stack frames, and the size of the allocated array */ + unsigned int depth, size; + } BN_STACK; +static void BN_STACK_init(BN_STACK *); +static void BN_STACK_finish(BN_STACK *); +#ifndef OPENSSL_NO_DEPRECATED +static void BN_STACK_reset(BN_STACK *); +#endif +static int BN_STACK_push(BN_STACK *, unsigned int); +static unsigned int BN_STACK_pop(BN_STACK *); + +/**********/ +/* BN_CTX */ +/**********/ + +/* The opaque BN_CTX type */ +struct bignum_ctx + { + /* The bignum bundles */ + BN_POOL pool; + /* The "stack frames", if you will */ + BN_STACK stack; + /* The number of bignums currently assigned */ + unsigned int used; + /* Depth of stack overflow */ + int err_stack; + /* Block "gets" until an "end" (compatibility behaviour) */ + int too_many; + }; - ret=(BN_CTX *)OPENSSL_malloc(sizeof(BN_CTX)); - if (ret == NULL) +/* Enable this to find BN_CTX bugs */ +#ifdef BN_CTX_DEBUG +static const char *ctxdbg_cur = NULL; +static void ctxdbg(BN_CTX *ctx) + { + unsigned int bnidx = 0, fpidx = 0; + BN_POOL_ITEM *item = ctx->pool.head; + BN_STACK *stack = &ctx->stack; + fprintf(stderr,"(%08x): ", (unsigned int)ctx); + while(bnidx < ctx->used) { - BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); - return(NULL); + fprintf(stderr,"%02x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); + if(!(bnidx % BN_CTX_POOL_SIZE)) + item = item->next; } - - BN_CTX_init(ret); - ret->flags=BN_FLG_MALLOCED; - return(ret); + fprintf(stderr,"\n"); + bnidx = 0; + fprintf(stderr," : "); + while(fpidx < stack->depth) + { + while(bnidx++ < stack->indexes[fpidx]) + fprintf(stderr," "); + fprintf(stderr,"^^ "); + bnidx++; + fpidx++; + } + fprintf(stderr,"\n"); } +#define CTXDBG_ENTRY(str, ctx) do { \ + ctxdbg_cur = (str); \ + fprintf(stderr,"Starting %s\n", ctxdbg_cur); \ + ctxdbg(ctx); \ + } while(0) +#define CTXDBG_EXIT(ctx) do { \ + fprintf(stderr,"Ending %s\n", ctxdbg_cur); \ + ctxdbg(ctx); \ + } while(0) +#define CTXDBG_RET(ctx,ret) +#else +#define CTXDBG_ENTRY(str, ctx) +#define CTXDBG_EXIT(ctx) +#define CTXDBG_RET(ctx,ret) +#endif +/* This function is an evil legacy and should not be used. This implementation + * is WYSIWYG, though I've done my best. */ +#ifndef OPENSSL_NO_DEPRECATED void BN_CTX_init(BN_CTX *ctx) { -#if 0 /* explicit version */ - int i; - ctx->tos = 0; - ctx->flags = 0; - ctx->depth = 0; + /* Assume the caller obtained the context via BN_CTX_new() and so is + * trying to reset it for use. Nothing else makes sense, least of all + * binary compatibility from a time when they could declare a static + * variable. */ + BN_POOL_reset(&ctx->pool); + BN_STACK_reset(&ctx->stack); + ctx->used = 0; + ctx->err_stack = 0; ctx->too_many = 0; - for (i = 0; i < BN_CTX_NUM; i++) - BN_init(&(ctx->bn[i])); -#else - memset(ctx, 0, sizeof *ctx); + } #endif + +BN_CTX *BN_CTX_new(void) + { + BN_CTX *ret = OPENSSL_malloc(sizeof(BN_CTX)); + if(!ret) + { + BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); + return NULL; + } + /* Initialise the structure */ + BN_POOL_init(&ret->pool); + BN_STACK_init(&ret->stack); + ret->used = 0; + ret->err_stack = 0; + ret->too_many = 0; + return ret; } void BN_CTX_free(BN_CTX *ctx) { - int i; - - if (ctx == NULL) return; - assert(ctx->depth == 0); - - for (i=0; i < BN_CTX_NUM; i++) - BN_clear_free(&(ctx->bn[i])); - if (ctx->flags & BN_FLG_MALLOCED) - OPENSSL_free(ctx); + if (ctx == NULL) + return; +#ifdef BN_CTX_DEBUG + { + BN_POOL_ITEM *pool = ctx->pool.head; + fprintf(stderr,"BN_CTX_free, stack-size=%d, pool-bignums=%d\n", + ctx->stack.size, ctx->pool.size); + fprintf(stderr,"dmaxs: "); + while(pool) { + unsigned loop = 0; + while(loop < BN_CTX_POOL_SIZE) + fprintf(stderr,"%02x ", pool->vals[loop++].dmax); + pool = pool->next; + } + fprintf(stderr,"\n"); + } +#endif + BN_STACK_finish(&ctx->stack); + BN_POOL_finish(&ctx->pool); + OPENSSL_free(ctx); } void BN_CTX_start(BN_CTX *ctx) { - if (ctx->depth < BN_CTX_NUM_POS) - ctx->pos[ctx->depth] = ctx->tos; - ctx->depth++; + CTXDBG_ENTRY("BN_CTX_start", ctx); + /* If we're already overflowing ... */ + if(ctx->err_stack || ctx->too_many) + ctx->err_stack++; + /* (Try to) get a new frame pointer */ + else if(!BN_STACK_push(&ctx->stack, ctx->used)) + { + BNerr(BN_F_BN_CTX_START,BN_R_TOO_MANY_TEMPORARY_VARIABLES); + ctx->err_stack++; + } + CTXDBG_EXIT(ctx); } +void BN_CTX_end(BN_CTX *ctx) + { + CTXDBG_ENTRY("BN_CTX_end", ctx); + if(ctx->err_stack) + ctx->err_stack--; + else + { + unsigned int fp = BN_STACK_pop(&ctx->stack); + /* Does this stack frame have anything to release? */ + if(fp < ctx->used) + BN_POOL_release(&ctx->pool, ctx->used - fp); + ctx->used = fp; + /* Unjam "too_many" in case "get" had failed */ + ctx->too_many = 0; + } + CTXDBG_EXIT(ctx); + } BIGNUM *BN_CTX_get(BN_CTX *ctx) { - /* Note: If BN_CTX_get is ever changed to allocate BIGNUMs dynamically, - * make sure that if BN_CTX_get fails once it will return NULL again - * until BN_CTX_end is called. (This is so that callers have to check - * only the last return value.) - */ - if (ctx->depth > BN_CTX_NUM_POS || ctx->tos >= BN_CTX_NUM) + BIGNUM *ret; + CTXDBG_ENTRY("BN_CTX_get", ctx); + if(ctx->err_stack || ctx->too_many) return NULL; + if((ret = BN_POOL_get(&ctx->pool)) == NULL) + { + /* Setting too_many prevents repeated "get" attempts from + * cluttering the error stack. */ + ctx->too_many = 1; + BNerr(BN_F_BN_CTX_GET,BN_R_TOO_MANY_TEMPORARY_VARIABLES); + return NULL; + } + /* OK, make sure the returned bignum is "zero" */ + BN_zero(ret); + ctx->used++; + CTXDBG_RET(ctx, ret); + return ret; + } + +/************/ +/* BN_STACK */ +/************/ + +static void BN_STACK_init(BN_STACK *st) + { + st->indexes = NULL; + st->depth = st->size = 0; + } + +static void BN_STACK_finish(BN_STACK *st) + { + if(st->size) OPENSSL_free(st->indexes); + } + +#ifndef OPENSSL_NO_DEPRECATED +static void BN_STACK_reset(BN_STACK *st) + { + st->depth = 0; + } +#endif + +static int BN_STACK_push(BN_STACK *st, unsigned int idx) + { + if(st->depth == st->size) + /* Need to expand */ + { + unsigned int newsize = (st->size ? + (st->size * 3 / 2) : BN_CTX_START_FRAMES); + unsigned int *newitems = OPENSSL_malloc(newsize * + sizeof(unsigned int)); + if(!newitems) return 0; + if(st->depth) + memcpy(newitems, st->indexes, st->depth * + sizeof(unsigned int)); + if(st->size) OPENSSL_free(st->indexes); + st->indexes = newitems; + st->size = newsize; + } + st->indexes[(st->depth)++] = idx; + return 1; + } + +static unsigned int BN_STACK_pop(BN_STACK *st) + { + return st->indexes[--(st->depth)]; + } + +/***********/ +/* BN_POOL */ +/***********/ + +static void BN_POOL_init(BN_POOL *p) + { + p->head = p->current = p->tail = NULL; + p->used = p->size = 0; + } + +static void BN_POOL_finish(BN_POOL *p) + { + while(p->head) { - if (!ctx->too_many) + unsigned int loop = 0; + BIGNUM *bn = p->head->vals; + while(loop++ < BN_CTX_POOL_SIZE) { - BNerr(BN_F_BN_CTX_GET,BN_R_TOO_MANY_TEMPORARY_VARIABLES); - /* disable error code until BN_CTX_end is called: */ - ctx->too_many = 1; + if(bn->d) BN_clear_free(bn); + bn++; } - return NULL; + p->current = p->head->next; + OPENSSL_free(p->head); + p->head = p->current; } - return (&(ctx->bn[ctx->tos++])); } -void BN_CTX_end(BN_CTX *ctx) +#ifndef OPENSSL_NO_DEPRECATED +static void BN_POOL_reset(BN_POOL *p) { - if (ctx == NULL) return; - assert(ctx->depth > 0); - if (ctx->depth == 0) - /* should never happen, but we can tolerate it if not in - * debug mode (could be a 'goto err' in the calling function - * before BN_CTX_start was reached) */ - BN_CTX_start(ctx); + BN_POOL_ITEM *item = p->head; + while(item) + { + unsigned int loop = 0; + BIGNUM *bn = item->vals; + while(loop++ < BN_CTX_POOL_SIZE) + { + if(bn->d) BN_clear(bn); + bn++; + } + item = item->next; + } + p->current = p->head; + p->used = 0; + } +#endif - ctx->too_many = 0; - ctx->depth--; - if (ctx->depth < BN_CTX_NUM_POS) - ctx->tos = ctx->pos[ctx->depth]; +static BIGNUM *BN_POOL_get(BN_POOL *p) + { + if(p->used == p->size) + { + BIGNUM *bn; + unsigned int loop = 0; + BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(BN_POOL_ITEM)); + if(!item) return NULL; + /* Initialise the structure */ + bn = item->vals; + while(loop++ < BN_CTX_POOL_SIZE) + BN_init(bn++); + item->prev = p->tail; + item->next = NULL; + /* Link it in */ + if(!p->head) + p->head = p->current = p->tail = item; + else + { + p->tail->next = item; + p->tail = item; + p->current = item; + } + p->size += BN_CTX_POOL_SIZE; + p->used++; + /* Return the first bignum from the new pool */ + return item->vals; + } + if(!p->used) + p->current = p->head; + else if((p->used % BN_CTX_POOL_SIZE) == 0) + p->current = p->current->next; + return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE); + } + +static void BN_POOL_release(BN_POOL *p, unsigned int num) + { + unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE; + p->used -= num; + while(num--) + { + bn_check_top(p->current->vals + offset); + if(!offset) + { + offset = BN_CTX_POOL_SIZE - 1; + p->current = p->current->prev; + } + else + offset--; + } } + diff --git a/crypto/openssl/crypto/bn/bn_depr.c b/crypto/openssl/crypto/bn/bn_depr.c new file mode 100644 index 0000000..27535e4 --- /dev/null +++ b/crypto/openssl/crypto/bn/bn_depr.c @@ -0,0 +1,112 @@ +/* crypto/bn/bn_depr.c */ +/* ==================================================================== + * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +/* Support for deprecated functions goes here - static linkage will only slurp + * this code if applications are using them directly. */ + +#include <stdio.h> +#include <time.h> +#include "cryptlib.h" +#include "bn_lcl.h" +#include <openssl/rand.h> + +static void *dummy=&dummy; + +#ifndef OPENSSL_NO_DEPRECATED +BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, + const BIGNUM *add, const BIGNUM *rem, + void (*callback)(int,int,void *), void *cb_arg) + { + BN_GENCB cb; + BIGNUM *rnd=NULL; + int found = 0; + + BN_GENCB_set_old(&cb, callback, cb_arg); + + if (ret == NULL) + { + if ((rnd=BN_new()) == NULL) goto err; + } + else + rnd=ret; + if(!BN_generate_prime_ex(rnd, bits, safe, add, rem, &cb)) + goto err; + + /* we have a prime :-) */ + found = 1; +err: + if (!found && (ret == NULL) && (rnd != NULL)) BN_free(rnd); + return(found ? rnd : NULL); + } + +int BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int,int,void *), + BN_CTX *ctx_passed, void *cb_arg) + { + BN_GENCB cb; + BN_GENCB_set_old(&cb, callback, cb_arg); + return BN_is_prime_ex(a, checks, ctx_passed, &cb); + } + +int BN_is_prime_fasttest(const BIGNUM *a, int checks, + void (*callback)(int,int,void *), + BN_CTX *ctx_passed, void *cb_arg, + int do_trial_division) + { + BN_GENCB cb; + BN_GENCB_set_old(&cb, callback, cb_arg); + return BN_is_prime_fasttest_ex(a, checks, ctx_passed, + do_trial_division, &cb); + } +#endif diff --git a/crypto/openssl/crypto/bn/bn_div.c b/crypto/openssl/crypto/bn/bn_div.c index 580d120..2857f44 100644 --- a/crypto/openssl/crypto/bn/bn_div.c +++ b/crypto/openssl/crypto/bn/bn_div.c @@ -179,12 +179,14 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, BN_CTX *ctx) { - int norm_shift,i,j,loop; + int norm_shift,i,loop; BIGNUM *tmp,wnum,*snum,*sdiv,*res; BN_ULONG *resp,*wnump; BN_ULONG d0,d1; int num_n,div_n; + bn_check_top(dv); + bn_check_top(rm); bn_check_top(num); bn_check_top(divisor); @@ -210,7 +212,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, res=BN_CTX_get(ctx); else res=dv; if (sdiv == NULL || res == NULL) goto err; - tmp->neg=0; /* First we normalise the numbers */ norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); @@ -222,17 +223,17 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, div_n=sdiv->top; num_n=snum->top; loop=num_n-div_n; - /* Lets setup a 'window' into snum * This is the part that corresponds to the current * 'area' being divided */ - BN_init(&wnum); - wnum.d= &(snum->d[loop]); - wnum.top= div_n; - wnum.dmax= snum->dmax+1; /* a bit of a lie */ + wnum.neg = 0; + wnum.d = &(snum->d[loop]); + wnum.top = div_n; + /* only needed when BN_ucmp messes up the values between top and max */ + wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */ /* Get the top 2 words of sdiv */ - /* i=sdiv->top; */ + /* div_n=sdiv->top; */ d0=sdiv->d[div_n-1]; d1=(div_n == 1)?0:sdiv->d[div_n-2]; @@ -250,19 +251,28 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, if (BN_ucmp(&wnum,sdiv) >= 0) { - if (!BN_usub(&wnum,&wnum,sdiv)) goto err; + /* If BN_DEBUG_RAND is defined BN_ucmp changes (via + * bn_pollute) the const bignum arguments => + * clean the values between top and max again */ + bn_clear_top2max(&wnum); + bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n); *resp=1; - res->d[res->top-1]=1; } else res->top--; + /* if res->top == 0 then clear the neg value otherwise decrease + * the resp pointer */ if (res->top == 0) res->neg = 0; - resp--; + else + resp--; - for (i=0; i<loop-1; i++) + for (i=0; i<loop-1; i++, wnump--, resp--) { BN_ULONG q,l0; + /* the first part of the loop uses the top two words of + * snum and sdiv to calculate a BN_ULONG q such that + * | wnum - sdiv * q | < sdiv */ #if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM) BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG); q=bn_div_3_words(wnump,d1,d0); @@ -346,27 +356,28 @@ X) -> 0x%08X\n", #endif /* !BN_DIV3W */ l0=bn_mul_words(tmp->d,sdiv->d,div_n,q); - wnum.d--; wnum.top++; tmp->d[div_n]=l0; - for (j=div_n+1; j>0; j--) - if (tmp->d[j-1]) break; - tmp->top=j; - - j=wnum.top; - if (!BN_sub(&wnum,&wnum,tmp)) goto err; - - snum->top=snum->top+wnum.top-j; - - if (wnum.neg) + wnum.d--; + /* ingore top values of the bignums just sub the two + * BN_ULONG arrays with bn_sub_words */ + if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1)) { + /* Note: As we have considered only the leading + * two BN_ULONGs in the calculation of q, sdiv * q + * might be greater than wnum (but then (q-1) * sdiv + * is less or equal than wnum) + */ q--; - j=wnum.top; - if (!BN_add(&wnum,&wnum,sdiv)) goto err; - snum->top+=wnum.top-j; + if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n)) + /* we can't have an overflow here (assuming + * that q != 0, but if q == 0 then tmp is + * zero anyway) */ + (*wnump)++; } - *(resp--)=q; - wnump--; + /* store part of the result */ + *resp = q; } + bn_correct_top(snum); if (rm != NULL) { /* Keep a copy of the neg flag in num because if rm==num @@ -376,10 +387,12 @@ X) -> 0x%08X\n", BN_rshift(rm,snum,norm_shift); if (!BN_is_zero(rm)) rm->neg = neg; + bn_check_top(rm); } BN_CTX_end(ctx); return(1); err: + bn_check_top(rm); BN_CTX_end(ctx); return(0); } diff --git a/crypto/openssl/crypto/bn/bn_err.c b/crypto/openssl/crypto/bn/bn_err.c index fb84ee9..a253959 100644 --- a/crypto/openssl/crypto/bn/bn_err.c +++ b/crypto/openssl/crypto/bn/bn_err.c @@ -1,6 +1,6 @@ /* crypto/bn/bn_err.c */ /* ==================================================================== - * Copyright (c) 1999 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2005 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -64,52 +64,72 @@ /* BEGIN ERROR CODES */ #ifndef OPENSSL_NO_ERR + +#define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0) +#define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason) + static ERR_STRING_DATA BN_str_functs[]= { -{ERR_PACK(0,BN_F_BN_BLINDING_CONVERT,0), "BN_BLINDING_convert"}, -{ERR_PACK(0,BN_F_BN_BLINDING_INVERT,0), "BN_BLINDING_invert"}, -{ERR_PACK(0,BN_F_BN_BLINDING_NEW,0), "BN_BLINDING_new"}, -{ERR_PACK(0,BN_F_BN_BLINDING_UPDATE,0), "BN_BLINDING_update"}, -{ERR_PACK(0,BN_F_BN_BN2DEC,0), "BN_bn2dec"}, -{ERR_PACK(0,BN_F_BN_BN2HEX,0), "BN_bn2hex"}, -{ERR_PACK(0,BN_F_BN_CTX_GET,0), "BN_CTX_get"}, -{ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"}, -{ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"}, -{ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"}, -{ERR_PACK(0,BN_F_BN_EXPAND_INTERNAL,0), "BN_EXPAND_INTERNAL"}, -{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"}, -{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"}, -{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"}, -{ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"}, -{ERR_PACK(0,BN_F_BN_MOD_LSHIFT_QUICK,0), "BN_mod_lshift_quick"}, -{ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"}, -{ERR_PACK(0,BN_F_BN_MOD_SQRT,0), "BN_mod_sqrt"}, -{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"}, -{ERR_PACK(0,BN_F_BN_NEW,0), "BN_new"}, -{ERR_PACK(0,BN_F_BN_RAND,0), "BN_rand"}, -{ERR_PACK(0,BN_F_BN_RAND_RANGE,0), "BN_rand_range"}, -{ERR_PACK(0,BN_F_BN_USUB,0), "BN_usub"}, +{ERR_FUNC(BN_F_BNRAND), "BNRAND"}, +{ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"}, +{ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"}, +{ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"}, +{ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"}, +{ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"}, +{ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"}, +{ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"}, +{ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"}, +{ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"}, +{ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"}, +{ERR_FUNC(BN_F_BN_DIV), "BN_div"}, +{ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"}, +{ERR_FUNC(BN_F_BN_EXP), "BN_exp"}, +{ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"}, +{ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"}, +{ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"}, +{ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"}, +{ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"}, +{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"}, +{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"}, +{ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"}, +{ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"}, +{ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"}, +{ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"}, +{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"}, +{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"}, +{ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"}, +{ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"}, +{ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"}, +{ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"}, +{ERR_FUNC(BN_F_BN_MOD_MUL_RECIPROCAL), "BN_mod_mul_reciprocal"}, +{ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"}, +{ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"}, +{ERR_FUNC(BN_F_BN_NEW), "BN_new"}, +{ERR_FUNC(BN_F_BN_RAND), "BN_rand"}, +{ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"}, +{ERR_FUNC(BN_F_BN_USUB), "BN_usub"}, {0,NULL} }; static ERR_STRING_DATA BN_str_reasons[]= { -{BN_R_ARG2_LT_ARG3 ,"arg2 lt arg3"}, -{BN_R_BAD_RECIPROCAL ,"bad reciprocal"}, -{BN_R_BIGNUM_TOO_LONG ,"bignum too long"}, -{BN_R_CALLED_WITH_EVEN_MODULUS ,"called with even modulus"}, -{BN_R_DIV_BY_ZERO ,"div by zero"}, -{BN_R_ENCODING_ERROR ,"encoding error"}, -{BN_R_EXPAND_ON_STATIC_BIGNUM_DATA ,"expand on static bignum data"}, -{BN_R_INPUT_NOT_REDUCED ,"input not reduced"}, -{BN_R_INVALID_LENGTH ,"invalid length"}, -{BN_R_INVALID_RANGE ,"invalid range"}, -{BN_R_NOT_A_SQUARE ,"not a square"}, -{BN_R_NOT_INITIALIZED ,"not initialized"}, -{BN_R_NO_INVERSE ,"no inverse"}, -{BN_R_P_IS_NOT_PRIME ,"p is not prime"}, -{BN_R_TOO_MANY_ITERATIONS ,"too many iterations"}, -{BN_R_TOO_MANY_TEMPORARY_VARIABLES ,"too many temporary variables"}, +{ERR_REASON(BN_R_ARG2_LT_ARG3) ,"arg2 lt arg3"}, +{ERR_REASON(BN_R_BAD_RECIPROCAL) ,"bad reciprocal"}, +{ERR_REASON(BN_R_BIGNUM_TOO_LONG) ,"bignum too long"}, +{ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS),"called with even modulus"}, +{ERR_REASON(BN_R_DIV_BY_ZERO) ,"div by zero"}, +{ERR_REASON(BN_R_ENCODING_ERROR) ,"encoding error"}, +{ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),"expand on static bignum data"}, +{ERR_REASON(BN_R_INPUT_NOT_REDUCED) ,"input not reduced"}, +{ERR_REASON(BN_R_INVALID_LENGTH) ,"invalid length"}, +{ERR_REASON(BN_R_INVALID_RANGE) ,"invalid range"}, +{ERR_REASON(BN_R_NOT_A_SQUARE) ,"not a square"}, +{ERR_REASON(BN_R_NOT_INITIALIZED) ,"not initialized"}, +{ERR_REASON(BN_R_NO_INVERSE) ,"no inverse"}, +{ERR_REASON(BN_R_NO_SOLUTION) ,"no solution"}, +{ERR_REASON(BN_R_P_IS_NOT_PRIME) ,"p is not prime"}, +{ERR_REASON(BN_R_TOO_MANY_ITERATIONS) ,"too many iterations"}, +{ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),"too many temporary variables"}, {0,NULL} }; @@ -123,8 +143,8 @@ void ERR_load_BN_strings(void) { init=0; #ifndef OPENSSL_NO_ERR - ERR_load_strings(ERR_LIB_BN,BN_str_functs); - ERR_load_strings(ERR_LIB_BN,BN_str_reasons); + ERR_load_strings(0,BN_str_functs); + ERR_load_strings(0,BN_str_reasons); #endif } diff --git a/crypto/openssl/crypto/bn/bn_exp.c b/crypto/openssl/crypto/bn/bn_exp.c index afdfd58..8f8c694 100644 --- a/crypto/openssl/crypto/bn/bn_exp.c +++ b/crypto/openssl/crypto/bn/bn_exp.c @@ -56,7 +56,7 @@ * [including the GNU Public Licence.] */ /* ==================================================================== - * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. + * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -113,6 +113,7 @@ #include "cryptlib.h" #include "bn_lcl.h" +/* maximum precomputation table size for *variable* sliding windows */ #define TABLE_SIZE 32 /* this one works - simple but works */ @@ -121,6 +122,13 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) int i,bits,ret=0; BIGNUM *v,*rr; + if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0) + { + /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */ + BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return -1; + } + BN_CTX_start(ctx); if ((r == a) || (r == p)) rr = BN_CTX_get(ctx); @@ -147,6 +155,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) err: if (r != rr) BN_copy(r,rr); BN_CTX_end(ctx); + bn_check_top(r); return(ret); } @@ -204,7 +213,7 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, if (BN_is_odd(m)) { # ifdef MONT_EXP_WORD - if (a->top == 1 && !a->neg) + if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) == 0)) { BN_ULONG A = a->d[0]; ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL); @@ -221,6 +230,7 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, { ret=BN_mod_exp_simple(r,a,p,m,ctx); } #endif + bn_check_top(r); return(ret); } @@ -229,11 +239,19 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx) { int i,j,bits,ret=0,wstart,wend,window,wvalue; - int start=1,ts=0; + int start=1; BIGNUM *aa; - BIGNUM val[TABLE_SIZE]; + /* Table of variables obtained from 'ctx' */ + BIGNUM *val[TABLE_SIZE]; BN_RECP_CTX recp; + if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0) + { + /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */ + BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return -1; + } + bits=BN_num_bits(p); if (bits == 0) @@ -243,7 +261,9 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, } BN_CTX_start(ctx); - if ((aa = BN_CTX_get(ctx)) == NULL) goto err; + aa = BN_CTX_get(ctx); + val[0] = BN_CTX_get(ctx); + if(!aa || !val[0]) goto err; BN_RECP_CTX_init(&recp); if (m->neg) @@ -258,29 +278,27 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err; } - BN_init(&(val[0])); - ts=1; - - if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err; /* 1 */ - if (BN_is_zero(&(val[0]))) + if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */ + if (BN_is_zero(val[0])) { - ret = BN_zero(r); + BN_zero(r); + ret = 1; goto err; } window = BN_window_bits_for_exponent_size(bits); if (window > 1) { - if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx)) + if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx)) goto err; /* 2 */ j=1<<(window-1); for (i=1; i<j; i++) { - BN_init(&val[i]); - if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx)) + if(((val[i] = BN_CTX_get(ctx)) == NULL) || + !BN_mod_mul_reciprocal(val[i],val[i-1], + aa,&recp,ctx)) goto err; } - ts=i; } start=1; /* This is used to avoid multiplication etc @@ -332,7 +350,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, } /* wvalue will be an odd number < 2^window */ - if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx)) + if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx)) goto err; /* move the 'window' down further */ @@ -344,9 +362,8 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, ret=1; err: BN_CTX_end(ctx); - for (i=0; i<ts; i++) - BN_clear_free(&(val[i])); BN_RECP_CTX_free(&recp); + bn_check_top(r); return(ret); } @@ -355,17 +372,23 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { int i,j,bits,ret=0,wstart,wend,window,wvalue; - int start=1,ts=0; + int start=1; BIGNUM *d,*r; const BIGNUM *aa; - BIGNUM val[TABLE_SIZE]; + /* Table of variables obtained from 'ctx' */ + BIGNUM *val[TABLE_SIZE]; BN_MONT_CTX *mont=NULL; + if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0) + { + return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); + } + bn_check_top(a); bn_check_top(p); bn_check_top(m); - if (!(m->d[0] & 1)) + if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); return(0); @@ -380,7 +403,8 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, BN_CTX_start(ctx); d = BN_CTX_get(ctx); r = BN_CTX_get(ctx); - if (d == NULL || r == NULL) goto err; + val[0] = BN_CTX_get(ctx); + if (!d || !r || !val[0]) goto err; /* If this is not done, things will break in the montgomery * part */ @@ -393,35 +417,34 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; } - BN_init(&val[0]); - ts=1; if (a->neg || BN_ucmp(a,m) >= 0) { - if (!BN_nnmod(&(val[0]),a,m,ctx)) + if (!BN_nnmod(val[0],a,m,ctx)) goto err; - aa= &(val[0]); + aa= val[0]; } else aa=a; if (BN_is_zero(aa)) { - ret = BN_zero(rr); + BN_zero(rr); + ret = 1; goto err; } - if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */ + if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */ window = BN_window_bits_for_exponent_size(bits); if (window > 1) { - if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */ + if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */ j=1<<(window-1); for (i=1; i<j; i++) { - BN_init(&(val[i])); - if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx)) + if(((val[i] = BN_CTX_get(ctx)) == NULL) || + !BN_mod_mul_montgomery(val[i],val[i-1], + d,mont,ctx)) goto err; } - ts=i; } start=1; /* This is used to avoid multiplication etc @@ -474,7 +497,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } /* wvalue will be an odd number < 2^window */ - if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx)) + if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx)) goto err; /* move the 'window' down further */ @@ -488,8 +511,213 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); BN_CTX_end(ctx); - for (i=0; i<ts; i++) - BN_clear_free(&(val[i])); + bn_check_top(rr); + return(ret); + } + + +/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout + * so that accessing any of these table values shows the same access pattern as far + * as cache lines are concerned. The following functions are used to transfer a BIGNUM + * from/to that table. */ + +static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) + { + size_t i, j; + + if (bn_wexpand(b, top) == NULL) + return 0; + while (b->top < top) + { + b->d[b->top++] = 0; + } + + for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) + { + buf[j] = ((unsigned char*)b->d)[i]; + } + + bn_correct_top(b); + return 1; + } + +static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) + { + size_t i, j; + + if (bn_wexpand(b, top) == NULL) + return 0; + + for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) + { + ((unsigned char*)b->d)[i] = buf[j]; + } + + b->top = top; + bn_correct_top(b); + return 1; + } + +/* Given a pointer value, compute the next address that is a cache line multiple. */ +#define MOD_EXP_CTIME_ALIGN(x_) \ + ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) + +/* This variant of BN_mod_exp_mont() uses fixed windows and the special + * precomputation memory layout to limit data-dependency to a minimum + * to protect secret exponents (cf. the hyper-threading timing attacks + * pointed out by Colin Percival, + * http://www.daemonology.net/hyperthreading-considered-harmful/) + */ +int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) + { + int i,bits,ret=0,idx,window,wvalue; + int top; + BIGNUM *r; + const BIGNUM *aa; + BN_MONT_CTX *mont=NULL; + + int numPowers; + unsigned char *powerbufFree=NULL; + int powerbufLen = 0; + unsigned char *powerbuf=NULL; + BIGNUM *computeTemp=NULL, *am=NULL; + + bn_check_top(a); + bn_check_top(p); + bn_check_top(m); + + top = m->top; + + if (!(m->d[0] & 1)) + { + BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS); + return(0); + } + bits=BN_num_bits(p); + if (bits == 0) + { + ret = BN_one(rr); + return ret; + } + + /* Initialize BIGNUM context and allocate intermediate result */ + BN_CTX_start(ctx); + r = BN_CTX_get(ctx); + if (r == NULL) goto err; + + /* Allocate a montgomery context if it was not supplied by the caller. + * If this is not done, things will break in the montgomery part. + */ + if (in_mont != NULL) + mont=in_mont; + else + { + if ((mont=BN_MONT_CTX_new()) == NULL) goto err; + if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; + } + + /* Get the window size to use with size of p. */ + window = BN_window_bits_for_ctime_exponent_size(bits); + + /* Allocate a buffer large enough to hold all of the pre-computed + * powers of a. + */ + numPowers = 1 << window; + powerbufLen = sizeof(m->d[0])*top*numPowers; + if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) + goto err; + + powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); + memset(powerbuf, 0, powerbufLen); + + /* Initialize the intermediate result. Do this early to save double conversion, + * once each for a^0 and intermediate result. + */ + if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err; + + /* Initialize computeTemp as a^1 with montgomery precalcs */ + computeTemp = BN_CTX_get(ctx); + am = BN_CTX_get(ctx); + if (computeTemp==NULL || am==NULL) goto err; + + if (a->neg || BN_ucmp(a,m) >= 0) + { + if (!BN_mod(am,a,m,ctx)) + goto err; + aa= am; + } + else + aa=a; + if (!BN_to_montgomery(am,aa,mont,ctx)) goto err; + if (!BN_copy(computeTemp, am)) goto err; + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err; + + /* If the window size is greater than 1, then calculate + * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) + * (even powers could instead be computed as (a^(i/2))^2 + * to use the slight performance advantage of sqr over mul). + */ + if (window > 1) + { + for (i=2; i<numPowers; i++) + { + /* Calculate a^i = a^(i-1) * a */ + if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx)) + goto err; + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err; + } + } + + /* Adjust the number of bits up to a multiple of the window size. + * If the exponent length is not a multiple of the window size, then + * this pads the most significant bits with zeros to normalize the + * scanning loop to there's no special cases. + * + * * NOTE: Making the window size a power of two less than the native + * * word size ensures that the padded bits won't go past the last + * * word in the internal BIGNUM structure. Going past the end will + * * still produce the correct result, but causes a different branch + * * to be taken in the BN_is_bit_set function. + */ + bits = ((bits+window-1)/window)*window; + idx=bits-1; /* The top bit of the window */ + + /* Scan the exponent one window at a time starting from the most + * significant bits. + */ + while (idx >= 0) + { + wvalue=0; /* The 'value' of the window */ + + /* Scan the window, squaring the result as we go */ + for (i=0; i<window; i++,idx--) + { + if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err; + wvalue = (wvalue<<1)+BN_is_bit_set(p,idx); + } + + /* Fetch the appropriate pre-computed value from the pre-buf */ + if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err; + + /* Multiply the result into the intermediate result */ + if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err; + } + + /* Convert the final result from montgomery to standard format */ + if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; + ret=1; +err: + if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); + if (powerbuf!=NULL) + { + OPENSSL_cleanse(powerbuf,powerbufLen); + OPENSSL_free(powerbufFree); + } + if (am!=NULL) BN_clear(am); + if (computeTemp!=NULL) BN_clear(computeTemp); + BN_CTX_end(ctx); return(ret); } @@ -517,10 +745,17 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) + if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0) + { + /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */ + BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return -1; + } + bn_check_top(p); bn_check_top(m); - if (m->top == 0 || !(m->d[0] & 1)) + if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); return(0); @@ -536,7 +771,8 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, } if (a == 0) { - ret = BN_zero(rr); + BN_zero(rr); + ret = 1; return ret; } @@ -630,19 +866,27 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); BN_CTX_end(ctx); + bn_check_top(rr); return(ret); } /* The old fallback, simple version :-) */ -int BN_mod_exp_simple(BIGNUM *r, - const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, - BN_CTX *ctx) +int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx) { - int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0; + int i,j,bits,ret=0,wstart,wend,window,wvalue; int start=1; BIGNUM *d; - BIGNUM val[TABLE_SIZE]; + /* Table of variables obtained from 'ctx' */ + BIGNUM *val[TABLE_SIZE]; + + if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0) + { + /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */ + BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return -1; + } bits=BN_num_bits(p); @@ -653,30 +897,30 @@ int BN_mod_exp_simple(BIGNUM *r, } BN_CTX_start(ctx); - if ((d = BN_CTX_get(ctx)) == NULL) goto err; + d = BN_CTX_get(ctx); + val[0] = BN_CTX_get(ctx); + if(!d || !val[0]) goto err; - BN_init(&(val[0])); - ts=1; - if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err; /* 1 */ - if (BN_is_zero(&(val[0]))) + if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */ + if (BN_is_zero(val[0])) { - ret = BN_zero(r); + BN_zero(r); + ret = 1; goto err; } window = BN_window_bits_for_exponent_size(bits); if (window > 1) { - if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx)) + if (!BN_mod_mul(d,val[0],val[0],m,ctx)) goto err; /* 2 */ j=1<<(window-1); for (i=1; i<j; i++) { - BN_init(&(val[i])); - if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx)) + if(((val[i] = BN_CTX_get(ctx)) == NULL) || + !BN_mod_mul(val[i],val[i-1],d,m,ctx)) goto err; } - ts=i; } start=1; /* This is used to avoid multiplication etc @@ -728,7 +972,7 @@ int BN_mod_exp_simple(BIGNUM *r, } /* wvalue will be an odd number < 2^window */ - if (!BN_mod_mul(r,r,&(val[wvalue>>1]),m,ctx)) + if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx)) goto err; /* move the 'window' down further */ @@ -740,8 +984,7 @@ int BN_mod_exp_simple(BIGNUM *r, ret=1; err: BN_CTX_end(ctx); - for (i=0; i<ts; i++) - BN_clear_free(&(val[i])); + bn_check_top(r); return(ret); } diff --git a/crypto/openssl/crypto/bn/bn_exp2.c b/crypto/openssl/crypto/bn/bn_exp2.c index 73ccd58..b3f43ce 100644 --- a/crypto/openssl/crypto/bn/bn_exp2.c +++ b/crypto/openssl/crypto/bn/bn_exp2.c @@ -120,10 +120,11 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, BN_CTX *ctx, BN_MONT_CTX *in_mont) { int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2; - int r_is_one=1,ts1=0,ts2=0; + int r_is_one=1; BIGNUM *d,*r; const BIGNUM *a_mod_m; - BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE]; + /* Tables of variables obtained from 'ctx' */ + BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE]; BN_MONT_CTX *mont=NULL; bn_check_top(a1); @@ -150,7 +151,9 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, BN_CTX_start(ctx); d = BN_CTX_get(ctx); r = BN_CTX_get(ctx); - if (d == NULL || r == NULL) goto err; + val1[0] = BN_CTX_get(ctx); + val2[0] = BN_CTX_get(ctx); + if(!d || !r || !val1[0] || !val2[0]) goto err; if (in_mont != NULL) mont=in_mont; @@ -166,69 +169,67 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, /* * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1) */ - BN_init(&val1[0]); - ts1=1; if (a1->neg || BN_ucmp(a1,m) >= 0) { - if (!BN_mod(&(val1[0]),a1,m,ctx)) + if (!BN_mod(val1[0],a1,m,ctx)) goto err; - a_mod_m = &(val1[0]); + a_mod_m = val1[0]; } else a_mod_m = a1; if (BN_is_zero(a_mod_m)) { - ret = BN_zero(rr); + BN_zero(rr); + ret = 1; goto err; } - if (!BN_to_montgomery(&(val1[0]),a_mod_m,mont,ctx)) goto err; + if (!BN_to_montgomery(val1[0],a_mod_m,mont,ctx)) goto err; if (window1 > 1) { - if (!BN_mod_mul_montgomery(d,&(val1[0]),&(val1[0]),mont,ctx)) goto err; + if (!BN_mod_mul_montgomery(d,val1[0],val1[0],mont,ctx)) goto err; j=1<<(window1-1); for (i=1; i<j; i++) { - BN_init(&(val1[i])); - if (!BN_mod_mul_montgomery(&(val1[i]),&(val1[i-1]),d,mont,ctx)) + if(((val1[i] = BN_CTX_get(ctx)) == NULL) || + !BN_mod_mul_montgomery(val1[i],val1[i-1], + d,mont,ctx)) goto err; } - ts1=i; } /* * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1) */ - BN_init(&val2[0]); - ts2=1; if (a2->neg || BN_ucmp(a2,m) >= 0) { - if (!BN_mod(&(val2[0]),a2,m,ctx)) + if (!BN_mod(val2[0],a2,m,ctx)) goto err; - a_mod_m = &(val2[0]); + a_mod_m = val2[0]; } else a_mod_m = a2; if (BN_is_zero(a_mod_m)) { - ret = BN_zero(rr); + BN_zero(rr); + ret = 1; goto err; } - if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err; + if (!BN_to_montgomery(val2[0],a_mod_m,mont,ctx)) goto err; if (window2 > 1) { - if (!BN_mod_mul_montgomery(d,&(val2[0]),&(val2[0]),mont,ctx)) goto err; + if (!BN_mod_mul_montgomery(d,val2[0],val2[0],mont,ctx)) goto err; j=1<<(window2-1); for (i=1; i<j; i++) { - BN_init(&(val2[i])); - if (!BN_mod_mul_montgomery(&(val2[i]),&(val2[i-1]),d,mont,ctx)) + if(((val2[i] = BN_CTX_get(ctx)) == NULL) || + !BN_mod_mul_montgomery(val2[i],val2[i-1], + d,mont,ctx)) goto err; } - ts2=i; } @@ -285,7 +286,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, if (wvalue1 && b == wpos1) { /* wvalue1 is odd and < 2^window1 */ - if (!BN_mod_mul_montgomery(r,r,&(val1[wvalue1>>1]),mont,ctx)) + if (!BN_mod_mul_montgomery(r,r,val1[wvalue1>>1],mont,ctx)) goto err; wvalue1 = 0; r_is_one = 0; @@ -294,7 +295,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, if (wvalue2 && b == wpos2) { /* wvalue2 is odd and < 2^window2 */ - if (!BN_mod_mul_montgomery(r,r,&(val2[wvalue2>>1]),mont,ctx)) + if (!BN_mod_mul_montgomery(r,r,val2[wvalue2>>1],mont,ctx)) goto err; wvalue2 = 0; r_is_one = 0; @@ -305,9 +306,6 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); BN_CTX_end(ctx); - for (i=0; i<ts1; i++) - BN_clear_free(&(val1[i])); - for (i=0; i<ts2; i++) - BN_clear_free(&(val2[i])); + bn_check_top(rr); return(ret); } diff --git a/crypto/openssl/crypto/bn/bn_gcd.c b/crypto/openssl/crypto/bn/bn_gcd.c index 7649f63..f02e6fc 100644 --- a/crypto/openssl/crypto/bn/bn_gcd.c +++ b/crypto/openssl/crypto/bn/bn_gcd.c @@ -140,6 +140,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx) ret=1; err: BN_CTX_end(ctx); + bn_check_top(r); return(ret); } @@ -194,6 +195,7 @@ static BIGNUM *euclid(BIGNUM *a, BIGNUM *b) { if (!BN_lshift(a,a,shifts)) goto err; } + bn_check_top(a); return(a); err: return(NULL); @@ -486,5 +488,6 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, err: if ((ret == NULL) && (in == NULL)) BN_free(R); BN_CTX_end(ctx); + bn_check_top(ret); return(ret); } diff --git a/crypto/openssl/crypto/bn/bn_gf2m.c b/crypto/openssl/crypto/bn/bn_gf2m.c new file mode 100644 index 0000000..6a79385 --- /dev/null +++ b/crypto/openssl/crypto/bn/bn_gf2m.c @@ -0,0 +1,1091 @@ +/* crypto/bn/bn_gf2m.c */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * + * The Elliptic Curve Public-Key Crypto Library (ECC Code) included + * herein is developed by SUN MICROSYSTEMS, INC., and is contributed + * to the OpenSSL project. + * + * The ECC Code is licensed pursuant to the OpenSSL open source + * license provided below. + * + * In addition, Sun covenants to all licensees who provide a reciprocal + * covenant with respect to their own patents if any, not to sue under + * current and future patent claims necessarily infringed by the making, + * using, practicing, selling, offering for sale and/or otherwise + * disposing of the ECC Code as delivered hereunder (or portions thereof), + * provided that such covenant shall not apply: + * 1) for code that a licensee deletes from the ECC Code; + * 2) separates from the ECC Code; or + * 3) for infringements caused by: + * i) the modification of the ECC Code or + * ii) the combination of the ECC Code with other software or + * devices where such combination causes the infringement. + * + * The software is originally written by Sheueling Chang Shantz and + * Douglas Stebila of Sun Microsystems Laboratories. + * + */ + +/* NOTE: This file is licensed pursuant to the OpenSSL license below + * and may be modified; but after modifications, the above covenant + * may no longer apply! In such cases, the corresponding paragraph + * ["In addition, Sun covenants ... causes the infringement."] and + * this note can be edited out; but please keep the Sun copyright + * notice and attribution. */ + +/* ==================================================================== + * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include <assert.h> +#include <limits.h> +#include <stdio.h> +#include "cryptlib.h" +#include "bn_lcl.h" + +/* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */ +#define MAX_ITERATIONS 50 + +static const BN_ULONG SQR_tb[16] = + { 0, 1, 4, 5, 16, 17, 20, 21, + 64, 65, 68, 69, 80, 81, 84, 85 }; +/* Platform-specific macros to accelerate squaring. */ +#if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) +#define SQR1(w) \ + SQR_tb[(w) >> 60 & 0xF] << 56 | SQR_tb[(w) >> 56 & 0xF] << 48 | \ + SQR_tb[(w) >> 52 & 0xF] << 40 | SQR_tb[(w) >> 48 & 0xF] << 32 | \ + SQR_tb[(w) >> 44 & 0xF] << 24 | SQR_tb[(w) >> 40 & 0xF] << 16 | \ + SQR_tb[(w) >> 36 & 0xF] << 8 | SQR_tb[(w) >> 32 & 0xF] +#define SQR0(w) \ + SQR_tb[(w) >> 28 & 0xF] << 56 | SQR_tb[(w) >> 24 & 0xF] << 48 | \ + SQR_tb[(w) >> 20 & 0xF] << 40 | SQR_tb[(w) >> 16 & 0xF] << 32 | \ + SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ + SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] +#endif +#ifdef THIRTY_TWO_BIT +#define SQR1(w) \ + SQR_tb[(w) >> 28 & 0xF] << 24 | SQR_tb[(w) >> 24 & 0xF] << 16 | \ + SQR_tb[(w) >> 20 & 0xF] << 8 | SQR_tb[(w) >> 16 & 0xF] +#define SQR0(w) \ + SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ + SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] +#endif +#ifdef SIXTEEN_BIT +#define SQR1(w) \ + SQR_tb[(w) >> 12 & 0xF] << 8 | SQR_tb[(w) >> 8 & 0xF] +#define SQR0(w) \ + SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] +#endif +#ifdef EIGHT_BIT +#define SQR1(w) \ + SQR_tb[(w) >> 4 & 0xF] +#define SQR0(w) \ + SQR_tb[(w) & 15] +#endif + +/* Product of two polynomials a, b each with degree < BN_BITS2 - 1, + * result is a polynomial r with degree < 2 * BN_BITS - 1 + * The caller MUST ensure that the variables have the right amount + * of space allocated. + */ +#ifdef EIGHT_BIT +static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) + { + register BN_ULONG h, l, s; + BN_ULONG tab[4], top1b = a >> 7; + register BN_ULONG a1, a2; + + a1 = a & (0x7F); a2 = a1 << 1; + + tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; + + s = tab[b & 0x3]; l = s; + s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 6; + s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4; + s = tab[b >> 6 ]; l ^= s << 6; h ^= s >> 2; + + /* compensate for the top bit of a */ + + if (top1b & 01) { l ^= b << 7; h ^= b >> 1; } + + *r1 = h; *r0 = l; + } +#endif +#ifdef SIXTEEN_BIT +static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) + { + register BN_ULONG h, l, s; + BN_ULONG tab[4], top1b = a >> 15; + register BN_ULONG a1, a2; + + a1 = a & (0x7FFF); a2 = a1 << 1; + + tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; + + s = tab[b & 0x3]; l = s; + s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 14; + s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 12; + s = tab[b >> 6 & 0x3]; l ^= s << 6; h ^= s >> 10; + s = tab[b >> 8 & 0x3]; l ^= s << 8; h ^= s >> 8; + s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >> 6; + s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >> 4; + s = tab[b >>14 ]; l ^= s << 14; h ^= s >> 2; + + /* compensate for the top bit of a */ + + if (top1b & 01) { l ^= b << 15; h ^= b >> 1; } + + *r1 = h; *r0 = l; + } +#endif +#ifdef THIRTY_TWO_BIT +static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) + { + register BN_ULONG h, l, s; + BN_ULONG tab[8], top2b = a >> 30; + register BN_ULONG a1, a2, a4; + + a1 = a & (0x3FFFFFFF); a2 = a1 << 1; a4 = a2 << 1; + + tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; + tab[4] = a4; tab[5] = a1^a4; tab[6] = a2^a4; tab[7] = a1^a2^a4; + + s = tab[b & 0x7]; l = s; + s = tab[b >> 3 & 0x7]; l ^= s << 3; h = s >> 29; + s = tab[b >> 6 & 0x7]; l ^= s << 6; h ^= s >> 26; + s = tab[b >> 9 & 0x7]; l ^= s << 9; h ^= s >> 23; + s = tab[b >> 12 & 0x7]; l ^= s << 12; h ^= s >> 20; + s = tab[b >> 15 & 0x7]; l ^= s << 15; h ^= s >> 17; + s = tab[b >> 18 & 0x7]; l ^= s << 18; h ^= s >> 14; + s = tab[b >> 21 & 0x7]; l ^= s << 21; h ^= s >> 11; + s = tab[b >> 24 & 0x7]; l ^= s << 24; h ^= s >> 8; + s = tab[b >> 27 & 0x7]; l ^= s << 27; h ^= s >> 5; + s = tab[b >> 30 ]; l ^= s << 30; h ^= s >> 2; + + /* compensate for the top two bits of a */ + + if (top2b & 01) { l ^= b << 30; h ^= b >> 2; } + if (top2b & 02) { l ^= b << 31; h ^= b >> 1; } + + *r1 = h; *r0 = l; + } +#endif +#if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) +static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) + { + register BN_ULONG h, l, s; + BN_ULONG tab[16], top3b = a >> 61; + register BN_ULONG a1, a2, a4, a8; + + a1 = a & (0x1FFFFFFFFFFFFFFFULL); a2 = a1 << 1; a4 = a2 << 1; a8 = a4 << 1; + + tab[ 0] = 0; tab[ 1] = a1; tab[ 2] = a2; tab[ 3] = a1^a2; + tab[ 4] = a4; tab[ 5] = a1^a4; tab[ 6] = a2^a4; tab[ 7] = a1^a2^a4; + tab[ 8] = a8; tab[ 9] = a1^a8; tab[10] = a2^a8; tab[11] = a1^a2^a8; + tab[12] = a4^a8; tab[13] = a1^a4^a8; tab[14] = a2^a4^a8; tab[15] = a1^a2^a4^a8; + + s = tab[b & 0xF]; l = s; + s = tab[b >> 4 & 0xF]; l ^= s << 4; h = s >> 60; + s = tab[b >> 8 & 0xF]; l ^= s << 8; h ^= s >> 56; + s = tab[b >> 12 & 0xF]; l ^= s << 12; h ^= s >> 52; + s = tab[b >> 16 & 0xF]; l ^= s << 16; h ^= s >> 48; + s = tab[b >> 20 & 0xF]; l ^= s << 20; h ^= s >> 44; + s = tab[b >> 24 & 0xF]; l ^= s << 24; h ^= s >> 40; + s = tab[b >> 28 & 0xF]; l ^= s << 28; h ^= s >> 36; + s = tab[b >> 32 & 0xF]; l ^= s << 32; h ^= s >> 32; + s = tab[b >> 36 & 0xF]; l ^= s << 36; h ^= s >> 28; + s = tab[b >> 40 & 0xF]; l ^= s << 40; h ^= s >> 24; + s = tab[b >> 44 & 0xF]; l ^= s << 44; h ^= s >> 20; + s = tab[b >> 48 & 0xF]; l ^= s << 48; h ^= s >> 16; + s = tab[b >> 52 & 0xF]; l ^= s << 52; h ^= s >> 12; + s = tab[b >> 56 & 0xF]; l ^= s << 56; h ^= s >> 8; + s = tab[b >> 60 ]; l ^= s << 60; h ^= s >> 4; + + /* compensate for the top three bits of a */ + + if (top3b & 01) { l ^= b << 61; h ^= b >> 3; } + if (top3b & 02) { l ^= b << 62; h ^= b >> 2; } + if (top3b & 04) { l ^= b << 63; h ^= b >> 1; } + + *r1 = h; *r0 = l; + } +#endif + +/* Product of two polynomials a, b each with degree < 2 * BN_BITS2 - 1, + * result is a polynomial r with degree < 4 * BN_BITS2 - 1 + * The caller MUST ensure that the variables have the right amount + * of space allocated. + */ +static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, const BN_ULONG b1, const BN_ULONG b0) + { + BN_ULONG m1, m0; + /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */ + bn_GF2m_mul_1x1(r+3, r+2, a1, b1); + bn_GF2m_mul_1x1(r+1, r, a0, b0); + bn_GF2m_mul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1); + /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */ + r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ + r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ + } + + +/* Add polynomials a and b and store result in r; r could be a or b, a and b + * could be equal; r is the bitwise XOR of a and b. + */ +int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) + { + int i; + const BIGNUM *at, *bt; + + bn_check_top(a); + bn_check_top(b); + + if (a->top < b->top) { at = b; bt = a; } + else { at = a; bt = b; } + + bn_wexpand(r, at->top); + + for (i = 0; i < bt->top; i++) + { + r->d[i] = at->d[i] ^ bt->d[i]; + } + for (; i < at->top; i++) + { + r->d[i] = at->d[i]; + } + + r->top = at->top; + bn_correct_top(r); + + return 1; + } + + +/* Some functions allow for representation of the irreducible polynomials + * as an int[], say p. The irreducible f(t) is then of the form: + * t^p[0] + t^p[1] + ... + t^p[k] + * where m = p[0] > p[1] > ... > p[k] = 0. + */ + + +/* Performs modular reduction of a and store result in r. r could be a. */ +int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]) + { + int j, k; + int n, dN, d0, d1; + BN_ULONG zz, *z; + + bn_check_top(a); + + if (!p[0]) + { + /* reduction mod 1 => return 0 */ + BN_zero(r); + return 1; + } + + /* Since the algorithm does reduction in the r value, if a != r, copy + * the contents of a into r so we can do reduction in r. + */ + if (a != r) + { + if (!bn_wexpand(r, a->top)) return 0; + for (j = 0; j < a->top; j++) + { + r->d[j] = a->d[j]; + } + r->top = a->top; + } + z = r->d; + + /* start reduction */ + dN = p[0] / BN_BITS2; + for (j = r->top - 1; j > dN;) + { + zz = z[j]; + if (z[j] == 0) { j--; continue; } + z[j] = 0; + + for (k = 1; p[k] != 0; k++) + { + /* reducing component t^p[k] */ + n = p[0] - p[k]; + d0 = n % BN_BITS2; d1 = BN_BITS2 - d0; + n /= BN_BITS2; + z[j-n] ^= (zz>>d0); + if (d0) z[j-n-1] ^= (zz<<d1); + } + + /* reducing component t^0 */ + n = dN; + d0 = p[0] % BN_BITS2; + d1 = BN_BITS2 - d0; + z[j-n] ^= (zz >> d0); + if (d0) z[j-n-1] ^= (zz << d1); + } + + /* final round of reduction */ + while (j == dN) + { + + d0 = p[0] % BN_BITS2; + zz = z[dN] >> d0; + if (zz == 0) break; + d1 = BN_BITS2 - d0; + + if (d0) z[dN] = (z[dN] << d1) >> d1; /* clear up the top d1 bits */ + z[0] ^= zz; /* reduction t^0 component */ + + for (k = 1; p[k] != 0; k++) + { + BN_ULONG tmp_ulong; + + /* reducing component t^p[k]*/ + n = p[k] / BN_BITS2; + d0 = p[k] % BN_BITS2; + d1 = BN_BITS2 - d0; + z[n] ^= (zz << d0); + tmp_ulong = zz >> d1; + if (d0 && tmp_ulong) + z[n+1] ^= tmp_ulong; + } + + + } + + bn_correct_top(r); + return 1; + } + +/* Performs modular reduction of a by p and store result in r. r could be a. + * + * This function calls down to the BN_GF2m_mod_arr implementation; this wrapper + * function is only provided for convenience; for best performance, use the + * BN_GF2m_mod_arr function. + */ +int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) + { + int ret = 0; + const int max = BN_num_bits(p); + unsigned int *arr=NULL; + bn_check_top(a); + bn_check_top(p); + if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + ret = BN_GF2m_poly2arr(p, arr, max); + if (!ret || ret > max) + { + BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH); + goto err; + } + ret = BN_GF2m_mod_arr(r, a, arr); + bn_check_top(r); +err: + if (arr) OPENSSL_free(arr); + return ret; + } + + +/* Compute the product of two polynomials a and b, reduce modulo p, and store + * the result in r. r could be a or b; a could be b. + */ +int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) + { + int zlen, i, j, k, ret = 0; + BIGNUM *s; + BN_ULONG x1, x0, y1, y0, zz[4]; + + bn_check_top(a); + bn_check_top(b); + + if (a == b) + { + return BN_GF2m_mod_sqr_arr(r, a, p, ctx); + } + + BN_CTX_start(ctx); + if ((s = BN_CTX_get(ctx)) == NULL) goto err; + + zlen = a->top + b->top + 4; + if (!bn_wexpand(s, zlen)) goto err; + s->top = zlen; + + for (i = 0; i < zlen; i++) s->d[i] = 0; + + for (j = 0; j < b->top; j += 2) + { + y0 = b->d[j]; + y1 = ((j+1) == b->top) ? 0 : b->d[j+1]; + for (i = 0; i < a->top; i += 2) + { + x0 = a->d[i]; + x1 = ((i+1) == a->top) ? 0 : a->d[i+1]; + bn_GF2m_mul_2x2(zz, x1, x0, y1, y0); + for (k = 0; k < 4; k++) s->d[i+j+k] ^= zz[k]; + } + } + + bn_correct_top(s); + if (BN_GF2m_mod_arr(r, s, p)) + ret = 1; + bn_check_top(r); + +err: + BN_CTX_end(ctx); + return ret; + } + +/* Compute the product of two polynomials a and b, reduce modulo p, and store + * the result in r. r could be a or b; a could equal b. + * + * This function calls down to the BN_GF2m_mod_mul_arr implementation; this wrapper + * function is only provided for convenience; for best performance, use the + * BN_GF2m_mod_mul_arr function. + */ +int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) + { + int ret = 0; + const int max = BN_num_bits(p); + unsigned int *arr=NULL; + bn_check_top(a); + bn_check_top(b); + bn_check_top(p); + if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + ret = BN_GF2m_poly2arr(p, arr, max); + if (!ret || ret > max) + { + BNerr(BN_F_BN_GF2M_MOD_MUL,BN_R_INVALID_LENGTH); + goto err; + } + ret = BN_GF2m_mod_mul_arr(r, a, b, arr, ctx); + bn_check_top(r); +err: + if (arr) OPENSSL_free(arr); + return ret; + } + + +/* Square a, reduce the result mod p, and store it in a. r could be a. */ +int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) + { + int i, ret = 0; + BIGNUM *s; + + bn_check_top(a); + BN_CTX_start(ctx); + if ((s = BN_CTX_get(ctx)) == NULL) return 0; + if (!bn_wexpand(s, 2 * a->top)) goto err; + + for (i = a->top - 1; i >= 0; i--) + { + s->d[2*i+1] = SQR1(a->d[i]); + s->d[2*i ] = SQR0(a->d[i]); + } + + s->top = 2 * a->top; + bn_correct_top(s); + if (!BN_GF2m_mod_arr(r, s, p)) goto err; + bn_check_top(r); + ret = 1; +err: + BN_CTX_end(ctx); + return ret; + } + +/* Square a, reduce the result mod p, and store it in a. r could be a. + * + * This function calls down to the BN_GF2m_mod_sqr_arr implementation; this wrapper + * function is only provided for convenience; for best performance, use the + * BN_GF2m_mod_sqr_arr function. + */ +int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) + { + int ret = 0; + const int max = BN_num_bits(p); + unsigned int *arr=NULL; + + bn_check_top(a); + bn_check_top(p); + if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + ret = BN_GF2m_poly2arr(p, arr, max); + if (!ret || ret > max) + { + BNerr(BN_F_BN_GF2M_MOD_SQR,BN_R_INVALID_LENGTH); + goto err; + } + ret = BN_GF2m_mod_sqr_arr(r, a, arr, ctx); + bn_check_top(r); +err: + if (arr) OPENSSL_free(arr); + return ret; + } + + +/* Invert a, reduce modulo p, and store the result in r. r could be a. + * Uses Modified Almost Inverse Algorithm (Algorithm 10) from + * Hankerson, D., Hernandez, J.L., and Menezes, A. "Software Implementation + * of Elliptic Curve Cryptography Over Binary Fields". + */ +int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) + { + BIGNUM *b, *c, *u, *v, *tmp; + int ret = 0; + + bn_check_top(a); + bn_check_top(p); + + BN_CTX_start(ctx); + + b = BN_CTX_get(ctx); + c = BN_CTX_get(ctx); + u = BN_CTX_get(ctx); + v = BN_CTX_get(ctx); + if (v == NULL) goto err; + + if (!BN_one(b)) goto err; + if (!BN_GF2m_mod(u, a, p)) goto err; + if (!BN_copy(v, p)) goto err; + + if (BN_is_zero(u)) goto err; + + while (1) + { + while (!BN_is_odd(u)) + { + if (!BN_rshift1(u, u)) goto err; + if (BN_is_odd(b)) + { + if (!BN_GF2m_add(b, b, p)) goto err; + } + if (!BN_rshift1(b, b)) goto err; + } + + if (BN_abs_is_word(u, 1)) break; + + if (BN_num_bits(u) < BN_num_bits(v)) + { + tmp = u; u = v; v = tmp; + tmp = b; b = c; c = tmp; + } + + if (!BN_GF2m_add(u, u, v)) goto err; + if (!BN_GF2m_add(b, b, c)) goto err; + } + + + if (!BN_copy(r, b)) goto err; + bn_check_top(r); + ret = 1; + +err: + BN_CTX_end(ctx); + return ret; + } + +/* Invert xx, reduce modulo p, and store the result in r. r could be xx. + * + * This function calls down to the BN_GF2m_mod_inv implementation; this wrapper + * function is only provided for convenience; for best performance, use the + * BN_GF2m_mod_inv function. + */ +int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) + { + BIGNUM *field; + int ret = 0; + + bn_check_top(xx); + BN_CTX_start(ctx); + if ((field = BN_CTX_get(ctx)) == NULL) goto err; + if (!BN_GF2m_arr2poly(p, field)) goto err; + + ret = BN_GF2m_mod_inv(r, xx, field, ctx); + bn_check_top(r); + +err: + BN_CTX_end(ctx); + return ret; + } + + +#ifndef OPENSSL_SUN_GF2M_DIV +/* Divide y by x, reduce modulo p, and store the result in r. r could be x + * or y, x could equal y. + */ +int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p, BN_CTX *ctx) + { + BIGNUM *xinv = NULL; + int ret = 0; + + bn_check_top(y); + bn_check_top(x); + bn_check_top(p); + + BN_CTX_start(ctx); + xinv = BN_CTX_get(ctx); + if (xinv == NULL) goto err; + + if (!BN_GF2m_mod_inv(xinv, x, p, ctx)) goto err; + if (!BN_GF2m_mod_mul(r, y, xinv, p, ctx)) goto err; + bn_check_top(r); + ret = 1; + +err: + BN_CTX_end(ctx); + return ret; + } +#else +/* Divide y by x, reduce modulo p, and store the result in r. r could be x + * or y, x could equal y. + * Uses algorithm Modular_Division_GF(2^m) from + * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to + * the Great Divide". + */ +int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p, BN_CTX *ctx) + { + BIGNUM *a, *b, *u, *v; + int ret = 0; + + bn_check_top(y); + bn_check_top(x); + bn_check_top(p); + + BN_CTX_start(ctx); + + a = BN_CTX_get(ctx); + b = BN_CTX_get(ctx); + u = BN_CTX_get(ctx); + v = BN_CTX_get(ctx); + if (v == NULL) goto err; + + /* reduce x and y mod p */ + if (!BN_GF2m_mod(u, y, p)) goto err; + if (!BN_GF2m_mod(a, x, p)) goto err; + if (!BN_copy(b, p)) goto err; + + while (!BN_is_odd(a)) + { + if (!BN_rshift1(a, a)) goto err; + if (BN_is_odd(u)) if (!BN_GF2m_add(u, u, p)) goto err; + if (!BN_rshift1(u, u)) goto err; + } + + do + { + if (BN_GF2m_cmp(b, a) > 0) + { + if (!BN_GF2m_add(b, b, a)) goto err; + if (!BN_GF2m_add(v, v, u)) goto err; + do + { + if (!BN_rshift1(b, b)) goto err; + if (BN_is_odd(v)) if (!BN_GF2m_add(v, v, p)) goto err; + if (!BN_rshift1(v, v)) goto err; + } while (!BN_is_odd(b)); + } + else if (BN_abs_is_word(a, 1)) + break; + else + { + if (!BN_GF2m_add(a, a, b)) goto err; + if (!BN_GF2m_add(u, u, v)) goto err; + do + { + if (!BN_rshift1(a, a)) goto err; + if (BN_is_odd(u)) if (!BN_GF2m_add(u, u, p)) goto err; + if (!BN_rshift1(u, u)) goto err; + } while (!BN_is_odd(a)); + } + } while (1); + + if (!BN_copy(r, u)) goto err; + bn_check_top(r); + ret = 1; + +err: + BN_CTX_end(ctx); + return ret; + } +#endif + +/* Divide yy by xx, reduce modulo p, and store the result in r. r could be xx + * or yy, xx could equal yy. + * + * This function calls down to the BN_GF2m_mod_div implementation; this wrapper + * function is only provided for convenience; for best performance, use the + * BN_GF2m_mod_div function. + */ +int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) + { + BIGNUM *field; + int ret = 0; + + bn_check_top(yy); + bn_check_top(xx); + + BN_CTX_start(ctx); + if ((field = BN_CTX_get(ctx)) == NULL) goto err; + if (!BN_GF2m_arr2poly(p, field)) goto err; + + ret = BN_GF2m_mod_div(r, yy, xx, field, ctx); + bn_check_top(r); + +err: + BN_CTX_end(ctx); + return ret; + } + + +/* Compute the bth power of a, reduce modulo p, and store + * the result in r. r could be a. + * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363. + */ +int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) + { + int ret = 0, i, n; + BIGNUM *u; + + bn_check_top(a); + bn_check_top(b); + + if (BN_is_zero(b)) + return(BN_one(r)); + + if (BN_abs_is_word(b, 1)) + return (BN_copy(r, a) != NULL); + + BN_CTX_start(ctx); + if ((u = BN_CTX_get(ctx)) == NULL) goto err; + + if (!BN_GF2m_mod_arr(u, a, p)) goto err; + + n = BN_num_bits(b) - 1; + for (i = n - 1; i >= 0; i--) + { + if (!BN_GF2m_mod_sqr_arr(u, u, p, ctx)) goto err; + if (BN_is_bit_set(b, i)) + { + if (!BN_GF2m_mod_mul_arr(u, u, a, p, ctx)) goto err; + } + } + if (!BN_copy(r, u)) goto err; + bn_check_top(r); + ret = 1; +err: + BN_CTX_end(ctx); + return ret; + } + +/* Compute the bth power of a, reduce modulo p, and store + * the result in r. r could be a. + * + * This function calls down to the BN_GF2m_mod_exp_arr implementation; this wrapper + * function is only provided for convenience; for best performance, use the + * BN_GF2m_mod_exp_arr function. + */ +int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) + { + int ret = 0; + const int max = BN_num_bits(p); + unsigned int *arr=NULL; + bn_check_top(a); + bn_check_top(b); + bn_check_top(p); + if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + ret = BN_GF2m_poly2arr(p, arr, max); + if (!ret || ret > max) + { + BNerr(BN_F_BN_GF2M_MOD_EXP,BN_R_INVALID_LENGTH); + goto err; + } + ret = BN_GF2m_mod_exp_arr(r, a, b, arr, ctx); + bn_check_top(r); +err: + if (arr) OPENSSL_free(arr); + return ret; + } + +/* Compute the square root of a, reduce modulo p, and store + * the result in r. r could be a. + * Uses exponentiation as in algorithm A.4.1 from IEEE P1363. + */ +int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) + { + int ret = 0; + BIGNUM *u; + + bn_check_top(a); + + if (!p[0]) + { + /* reduction mod 1 => return 0 */ + BN_zero(r); + return 1; + } + + BN_CTX_start(ctx); + if ((u = BN_CTX_get(ctx)) == NULL) goto err; + + if (!BN_set_bit(u, p[0] - 1)) goto err; + ret = BN_GF2m_mod_exp_arr(r, a, u, p, ctx); + bn_check_top(r); + +err: + BN_CTX_end(ctx); + return ret; + } + +/* Compute the square root of a, reduce modulo p, and store + * the result in r. r could be a. + * + * This function calls down to the BN_GF2m_mod_sqrt_arr implementation; this wrapper + * function is only provided for convenience; for best performance, use the + * BN_GF2m_mod_sqrt_arr function. + */ +int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) + { + int ret = 0; + const int max = BN_num_bits(p); + unsigned int *arr=NULL; + bn_check_top(a); + bn_check_top(p); + if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + ret = BN_GF2m_poly2arr(p, arr, max); + if (!ret || ret > max) + { + BNerr(BN_F_BN_GF2M_MOD_SQRT,BN_R_INVALID_LENGTH); + goto err; + } + ret = BN_GF2m_mod_sqrt_arr(r, a, arr, ctx); + bn_check_top(r); +err: + if (arr) OPENSSL_free(arr); + return ret; + } + +/* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0. + * Uses algorithms A.4.7 and A.4.6 from IEEE P1363. + */ +int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx) + { + int ret = 0, count = 0; + unsigned int j; + BIGNUM *a, *z, *rho, *w, *w2, *tmp; + + bn_check_top(a_); + + if (!p[0]) + { + /* reduction mod 1 => return 0 */ + BN_zero(r); + return 1; + } + + BN_CTX_start(ctx); + a = BN_CTX_get(ctx); + z = BN_CTX_get(ctx); + w = BN_CTX_get(ctx); + if (w == NULL) goto err; + + if (!BN_GF2m_mod_arr(a, a_, p)) goto err; + + if (BN_is_zero(a)) + { + BN_zero(r); + ret = 1; + goto err; + } + + if (p[0] & 0x1) /* m is odd */ + { + /* compute half-trace of a */ + if (!BN_copy(z, a)) goto err; + for (j = 1; j <= (p[0] - 1) / 2; j++) + { + if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err; + if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err; + if (!BN_GF2m_add(z, z, a)) goto err; + } + + } + else /* m is even */ + { + rho = BN_CTX_get(ctx); + w2 = BN_CTX_get(ctx); + tmp = BN_CTX_get(ctx); + if (tmp == NULL) goto err; + do + { + if (!BN_rand(rho, p[0], 0, 0)) goto err; + if (!BN_GF2m_mod_arr(rho, rho, p)) goto err; + BN_zero(z); + if (!BN_copy(w, rho)) goto err; + for (j = 1; j <= p[0] - 1; j++) + { + if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err; + if (!BN_GF2m_mod_sqr_arr(w2, w, p, ctx)) goto err; + if (!BN_GF2m_mod_mul_arr(tmp, w2, a, p, ctx)) goto err; + if (!BN_GF2m_add(z, z, tmp)) goto err; + if (!BN_GF2m_add(w, w2, rho)) goto err; + } + count++; + } while (BN_is_zero(w) && (count < MAX_ITERATIONS)); + if (BN_is_zero(w)) + { + BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR,BN_R_TOO_MANY_ITERATIONS); + goto err; + } + } + + if (!BN_GF2m_mod_sqr_arr(w, z, p, ctx)) goto err; + if (!BN_GF2m_add(w, z, w)) goto err; + if (BN_GF2m_cmp(w, a)) + { + BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, BN_R_NO_SOLUTION); + goto err; + } + + if (!BN_copy(r, z)) goto err; + bn_check_top(r); + + ret = 1; + +err: + BN_CTX_end(ctx); + return ret; + } + +/* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0. + * + * This function calls down to the BN_GF2m_mod_solve_quad_arr implementation; this wrapper + * function is only provided for convenience; for best performance, use the + * BN_GF2m_mod_solve_quad_arr function. + */ +int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) + { + int ret = 0; + const int max = BN_num_bits(p); + unsigned int *arr=NULL; + bn_check_top(a); + bn_check_top(p); + if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * + max)) == NULL) goto err; + ret = BN_GF2m_poly2arr(p, arr, max); + if (!ret || ret > max) + { + BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD,BN_R_INVALID_LENGTH); + goto err; + } + ret = BN_GF2m_mod_solve_quad_arr(r, a, arr, ctx); + bn_check_top(r); +err: + if (arr) OPENSSL_free(arr); + return ret; + } + +/* Convert the bit-string representation of a polynomial + * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array + * of integers corresponding to the bits with non-zero coefficient. + * Up to max elements of the array will be filled. Return value is total + * number of coefficients that would be extracted if array was large enough. + */ +int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max) + { + int i, j, k = 0; + BN_ULONG mask; + + if (BN_is_zero(a) || !BN_is_bit_set(a, 0)) + /* a_0 == 0 => return error (the unsigned int array + * must be terminated by 0) + */ + return 0; + + for (i = a->top - 1; i >= 0; i--) + { + if (!a->d[i]) + /* skip word if a->d[i] == 0 */ + continue; + mask = BN_TBIT; + for (j = BN_BITS2 - 1; j >= 0; j--) + { + if (a->d[i] & mask) + { + if (k < max) p[k] = BN_BITS2 * i + j; + k++; + } + mask >>= 1; + } + } + + return k; + } + +/* Convert the coefficient array representation of a polynomial to a + * bit-string. The array must be terminated by 0. + */ +int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a) + { + int i; + + bn_check_top(a); + BN_zero(a); + for (i = 0; p[i] != 0; i++) + { + if (BN_set_bit(a, p[i]) == 0) + return 0; + } + BN_set_bit(a, 0); + bn_check_top(a); + + return 1; + } + diff --git a/crypto/openssl/crypto/bn/bn_kron.c b/crypto/openssl/crypto/bn/bn_kron.c index 49f7559..740359b 100644 --- a/crypto/openssl/crypto/bn/bn_kron.c +++ b/crypto/openssl/crypto/bn/bn_kron.c @@ -53,9 +53,9 @@ * */ +#include "cryptlib.h" #include "bn_lcl.h" - /* least significant word */ #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0]) @@ -74,6 +74,9 @@ int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) */ static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1}; + bn_check_top(a); + bn_check_top(b); + BN_CTX_start(ctx); A = BN_CTX_get(ctx); B = BN_CTX_get(ctx); @@ -172,8 +175,7 @@ int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) tmp = A; A = B; B = tmp; tmp->neg = 0; } - - end: +end: BN_CTX_end(ctx); if (err) return -2; diff --git a/crypto/openssl/crypto/bn/bn_lcl.h b/crypto/openssl/crypto/bn/bn_lcl.h index 253e195..ad4ca7f 100644 --- a/crypto/openssl/crypto/bn/bn_lcl.h +++ b/crypto/openssl/crypto/bn/bn_lcl.h @@ -119,20 +119,6 @@ extern "C" { #endif -/* Used for temp variables */ -#define BN_CTX_NUM 32 -#define BN_CTX_NUM_POS 12 -struct bignum_ctx - { - int tos; - BIGNUM bn[BN_CTX_NUM]; - int flags; - int depth; - int pos[BN_CTX_NUM_POS]; - int too_many; - } /* BN_CTX */; - - /* * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions * @@ -177,6 +163,45 @@ struct bignum_ctx +/* BN_mod_exp_mont_conttime is based on the assumption that the + * L1 data cache line width of the target processor is at least + * the following value. + */ +#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 ) +#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) + +/* Window sizes optimized for fixed window size modular exponentiation + * algorithm (BN_mod_exp_mont_consttime). + * + * To achieve the security goals of BN_mode_exp_mont_consttime, the + * maximum size of the window must not exceed + * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). + * + * Window size thresholds are defined for cache line sizes of 32 and 64, + * cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A + * window size of 7 should only be used on processors that have a 128 + * byte or greater cache line size. + */ +#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 + +# define BN_window_bits_for_ctime_exponent_size(b) \ + ((b) > 937 ? 6 : \ + (b) > 306 ? 5 : \ + (b) > 89 ? 4 : \ + (b) > 22 ? 3 : 1) +# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) + +#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 + +# define BN_window_bits_for_ctime_exponent_size(b) \ + ((b) > 306 ? 5 : \ + (b) > 89 ? 4 : \ + (b) > 22 ? 3 : 1) +# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) + +#endif + + /* Pentium pro 16,16,16,32,64 */ /* Alpha 16,16,16,16.64 */ #define BN_MULL_SIZE_NORMAL (16) /* 32 */ @@ -245,6 +270,15 @@ struct bignum_ctx : "a"(a),"g"(b) \ : "cc"); # endif +# elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT) +# if defined(_MSC_VER) && _MSC_VER>=1400 + unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b); + unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b, + unsigned __int64 *h); +# pragma intrinsic(__umulh,_umul128) +# define BN_UMULT_HIGH(a,b) __umulh((a),(b)) +# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) +# endif # endif /* cpu */ #endif /* OPENSSL_NO_ASM */ @@ -254,44 +288,17 @@ struct bignum_ctx #define Lw(t) (((BN_ULONG)(t))&BN_MASK2) #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) -/* This is used for internal error checking and is not normally used */ -#ifdef BN_DEBUG -# include <assert.h> -# define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->dmax); -#else -# define bn_check_top(a) -#endif - -/* This macro is to add extra stuff for development checking */ -#ifdef BN_DEBUG -#define bn_set_max(r) ((r)->max=(r)->top,BN_set_flags((r),BN_FLG_STATIC_DATA)) -#else -#define bn_set_max(r) -#endif - -/* These macros are used to 'take' a section of a bignum for read only use */ -#define bn_set_low(r,a,n) \ - { \ - (r)->top=((a)->top > (n))?(n):(a)->top; \ - (r)->d=(a)->d; \ - (r)->neg=(a)->neg; \ - (r)->flags|=BN_FLG_STATIC_DATA; \ - bn_set_max(r); \ - } - -#define bn_set_high(r,a,n) \ +#ifdef BN_DEBUG_RAND +#define bn_clear_top2max(a) \ { \ - if ((a)->top > (n)) \ - { \ - (r)->top=(a)->top-n; \ - (r)->d= &((a)->d[n]); \ - } \ - else \ - (r)->top=0; \ - (r)->neg=(a)->neg; \ - (r)->flags|=BN_FLG_STATIC_DATA; \ - bn_set_max(r); \ + int ind = (a)->dmax - (a)->top; \ + BN_ULONG *ftl = &(a)->d[(a)->top-1]; \ + for (; ind != 0; ind--) \ + *(++ftl) = 0x0; \ } +#else +#define bn_clear_top2max(a) +#endif #ifdef BN_LLONG #define mul_add(r,a,w,c) { \ @@ -315,6 +322,33 @@ struct bignum_ctx (r1)=Hw(t); \ } +#elif defined(BN_UMULT_LOHI) +#define mul_add(r,a,w,c) { \ + BN_ULONG high,low,ret,tmp=(a); \ + ret = (r); \ + BN_UMULT_LOHI(low,high,w,tmp); \ + ret += (c); \ + (c) = (ret<(c))?1:0; \ + (c) += high; \ + ret += low; \ + (c) += (ret<low)?1:0; \ + (r) = ret; \ + } + +#define mul(r,a,w,c) { \ + BN_ULONG high,low,ret,ta=(a); \ + BN_UMULT_LOHI(low,high,w,ta); \ + ret = low + (c); \ + (c) = high; \ + (c) += (ret<low)?1:0; \ + (r) = ret; \ + } + +#define sqr(r0,r1,a) { \ + BN_ULONG tmp=(a); \ + BN_UMULT_LOHI(r0,r1,tmp,tmp); \ + } + #elif defined(BN_UMULT_HIGH) #define mul_add(r,a,w,c) { \ BN_ULONG high,low,ret,tmp=(a); \ @@ -433,18 +467,20 @@ void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a); int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n); int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); -#ifdef BN_RECURSION -void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, - BN_ULONG *t); -void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, - int n, BN_ULONG *t); +void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, + int dna,int dnb,BN_ULONG *t); +void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, + int n,int tna,int tnb,BN_ULONG *t); +void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t); +void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n); void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, BN_ULONG *t); void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2, BN_ULONG *t); -void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t); -#endif -void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n); +BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, + int cl, int dl); +BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, + int cl, int dl); #ifdef __cplusplus } diff --git a/crypto/openssl/crypto/bn/bn_lib.c b/crypto/openssl/crypto/bn/bn_lib.c index e166045..3c4d545 100644 --- a/crypto/openssl/crypto/bn/bn_lib.c +++ b/crypto/openssl/crypto/bn/bn_lib.c @@ -69,6 +69,8 @@ const char *BN_version="Big Number" OPENSSL_VERSION_PTEXT; +/* This stuff appears to be completely unused, so is deprecated */ +#ifndef OPENSSL_NO_DEPRECATED /* For a 32 bit machine * 2 - 4 == 128 * 3 - 8 == 256 @@ -91,28 +93,28 @@ void BN_set_params(int mult, int high, int low, int mont) { if (mult >= 0) { - if (mult > (sizeof(int)*8)-1) + if (mult > (int)(sizeof(int)*8)-1) mult=sizeof(int)*8-1; bn_limit_bits=mult; bn_limit_num=1<<mult; } if (high >= 0) { - if (high > (sizeof(int)*8)-1) + if (high > (int)(sizeof(int)*8)-1) high=sizeof(int)*8-1; bn_limit_bits_high=high; bn_limit_num_high=1<<high; } if (low >= 0) { - if (low > (sizeof(int)*8)-1) + if (low > (int)(sizeof(int)*8)-1) low=sizeof(int)*8-1; bn_limit_bits_low=low; bn_limit_num_low=1<<low; } if (mont >= 0) { - if (mont > (sizeof(int)*8)-1) + if (mont > (int)(sizeof(int)*8)-1) mont=sizeof(int)*8-1; bn_limit_bits_mont=mont; bn_limit_num_mont=1<<mont; @@ -127,11 +129,12 @@ int BN_get_params(int which) else if (which == 3) return(bn_limit_bits_mont); else return(0); } +#endif const BIGNUM *BN_value_one(void) { static BN_ULONG data_one=1L; - static BIGNUM const_one={&data_one,1,1,0}; + static BIGNUM const_one={&data_one,1,1,0,BN_FLG_STATIC_DATA}; return(&const_one); } @@ -244,16 +247,11 @@ int BN_num_bits_word(BN_ULONG l) int BN_num_bits(const BIGNUM *a) { - BN_ULONG l; - int i; - + int i = a->top - 1; bn_check_top(a); - if (a->top == 0) return(0); - l=a->d[a->top-1]; - assert(l != 0); - i=(a->top-1)*BN_BITS2; - return(i+BN_num_bits_word(l)); + if (BN_is_zero(a)) return 0; + return ((i*BN_BITS2) + BN_num_bits_word(a->d[i])); } void BN_clear_free(BIGNUM *a) @@ -261,6 +259,7 @@ void BN_clear_free(BIGNUM *a) int i; if (a == NULL) return; + bn_check_top(a); if (a->d != NULL) { OPENSSL_cleanse(a->d,a->dmax*sizeof(a->d[0])); @@ -276,16 +275,24 @@ void BN_clear_free(BIGNUM *a) void BN_free(BIGNUM *a) { if (a == NULL) return; + bn_check_top(a); if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA))) OPENSSL_free(a->d); - a->flags|=BN_FLG_FREE; /* REMOVE? */ if (a->flags & BN_FLG_MALLOCED) OPENSSL_free(a); + else + { +#ifndef OPENSSL_NO_DEPRECATED + a->flags|=BN_FLG_FREE; +#endif + a->d = NULL; + } } void BN_init(BIGNUM *a) { memset(a,0,sizeof(BIGNUM)); + bn_check_top(a); } BIGNUM *BN_new(void) @@ -302,6 +309,7 @@ BIGNUM *BN_new(void) ret->neg=0; ret->dmax=0; ret->d=NULL; + bn_check_top(ret); return(ret); } @@ -313,19 +321,19 @@ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) const BN_ULONG *B; int i; + bn_check_top(b); + if (words > (INT_MAX/(4*BN_BITS2))) { BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_BIGNUM_TOO_LONG); return NULL; } - - bn_check_top(b); if (BN_get_flags(b,BN_FLG_STATIC_DATA)) { BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); return(NULL); } - a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1)); + a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*words); if (A == NULL) { BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE); @@ -363,19 +371,8 @@ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) } } - /* Now need to zero any data between b->top and b->max */ - /* XXX Why? */ - - A= &(a[b->top]); - for (i=(words - b->top)>>3; i>0; i--,A+=8) - { - A[0]=0; A[1]=0; A[2]=0; A[3]=0; - A[4]=0; A[5]=0; A[6]=0; A[7]=0; - } - for (i=(words - b->top)&7; i>0; i--,A++) - A[0]=0; #else - memset(A,0,sizeof(BN_ULONG)*(words+1)); + memset(A,0,sizeof(BN_ULONG)*words); memcpy(A,b->d,sizeof(b->d[0])*b->top); #endif @@ -393,16 +390,19 @@ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) * while bn_dup_expand() makes sure allocation is made only once. */ +#ifndef OPENSSL_NO_DEPRECATED BIGNUM *bn_dup_expand(const BIGNUM *b, int words) { BIGNUM *r = NULL; + bn_check_top(b); + /* This function does not work if * words <= b->dmax && top < words * because BN_dup() does not preserve 'dmax'! * (But bn_dup_expand() is not used anywhere yet.) */ - + if (words > b->dmax) { BN_ULONG *a = bn_expand_internal(b, words); @@ -431,48 +431,67 @@ BIGNUM *bn_dup_expand(const BIGNUM *b, int words) r = BN_dup(b); } + bn_check_top(r); return r; } +#endif /* This is an internal function that should not be used in applications. - * It ensures that 'b' has enough room for a 'words' word number number. + * It ensures that 'b' has enough room for a 'words' word number + * and initialises any unused part of b->d with leading zeros. * It is mostly used by the various BIGNUM routines. If there is an error, * NULL is returned. If not, 'b' is returned. */ BIGNUM *bn_expand2(BIGNUM *b, int words) { + bn_check_top(b); + if (words > b->dmax) { BN_ULONG *a = bn_expand_internal(b, words); + if(!a) return NULL; + if(b->d) OPENSSL_free(b->d); + b->d=a; + b->dmax=words; + } - if (a) +/* None of this should be necessary because of what b->top means! */ +#if 0 + /* NB: bn_wexpand() calls this only if the BIGNUM really has to grow */ + if (b->top < b->dmax) + { + int i; + BN_ULONG *A = &(b->d[b->top]); + for (i=(b->dmax - b->top)>>3; i>0; i--,A+=8) { - if (b->d) - OPENSSL_free(b->d); - b->d=a; - b->dmax=words; + A[0]=0; A[1]=0; A[2]=0; A[3]=0; + A[4]=0; A[5]=0; A[6]=0; A[7]=0; } - else - b = NULL; + for (i=(b->dmax - b->top)&7; i>0; i--,A++) + A[0]=0; + assert(A == &(b->d[b->dmax])); } +#endif + bn_check_top(b); return b; } BIGNUM *BN_dup(const BIGNUM *a) { - BIGNUM *r, *t; + BIGNUM *t; if (a == NULL) return NULL; - bn_check_top(a); t = BN_new(); - if (t == NULL) return(NULL); - r = BN_copy(t, a); - /* now r == t || r == NULL */ - if (r == NULL) + if (t == NULL) return NULL; + if(!BN_copy(t, a)) + { BN_free(t); - return r; + return NULL; + } + bn_check_top(t); + return t; } BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) @@ -506,11 +525,9 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) memcpy(a->d,b->d,sizeof(b->d[0])*b->top); #endif -/* memset(&(a->d[b->top]),0,sizeof(a->d[0])*(a->max-b->top));*/ a->top=b->top; - if ((a->top == 0) && (a->d != NULL)) - a->d[0]=0; a->neg=b->neg; + bn_check_top(a); return(a); } @@ -520,6 +537,9 @@ void BN_swap(BIGNUM *a, BIGNUM *b) BN_ULONG *tmp_d; int tmp_top, tmp_dmax, tmp_neg; + bn_check_top(a); + bn_check_top(b); + flags_old_a = a->flags; flags_old_b = b->flags; @@ -540,11 +560,13 @@ void BN_swap(BIGNUM *a, BIGNUM *b) a->flags = (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA); b->flags = (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA); + bn_check_top(a); + bn_check_top(b); } - void BN_clear(BIGNUM *a) { + bn_check_top(a); if (a->d != NULL) memset(a->d,0,a->dmax*sizeof(a->d[0])); a->top=0; @@ -553,49 +575,22 @@ void BN_clear(BIGNUM *a) BN_ULONG BN_get_word(const BIGNUM *a) { - int i,n; - BN_ULONG ret=0; - - n=BN_num_bytes(a); - if (n > sizeof(BN_ULONG)) - return(BN_MASK2); - for (i=a->top-1; i>=0; i--) - { -#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */ - ret<<=BN_BITS4; /* stops the compiler complaining */ - ret<<=BN_BITS4; -#else - ret=0; -#endif - ret|=a->d[i]; - } - return(ret); + if (a->top > 1) + return BN_MASK2; + else if (a->top == 1) + return a->d[0]; + /* a->top == 0 */ + return 0; } int BN_set_word(BIGNUM *a, BN_ULONG w) { - int i,n; - if (bn_expand(a,sizeof(BN_ULONG)*8) == NULL) return(0); - - n=sizeof(BN_ULONG)/BN_BYTES; - a->neg=0; - a->top=0; - a->d[0]=(BN_ULONG)w&BN_MASK2; - if (a->d[0] != 0) a->top=1; - for (i=1; i<n; i++) - { - /* the following is done instead of - * w>>=BN_BITS2 so compilers don't complain - * on builds where sizeof(long) == BN_TYPES */ -#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */ - w>>=BN_BITS4; - w>>=BN_BITS4; -#else - w=0; -#endif - a->d[i]=(BN_ULONG)w&BN_MASK2; - if (a->d[i] != 0) a->top=i+1; - } + bn_check_top(a); + if (bn_expand(a,(int)sizeof(BN_ULONG)*8) == NULL) return(0); + a->neg = 0; + a->d[0] = w; + a->top = (w ? 1 : 0); + bn_check_top(a); return(1); } @@ -604,9 +599,12 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) unsigned int i,m; unsigned int n; BN_ULONG l; + BIGNUM *bn = NULL; - if (ret == NULL) ret=BN_new(); + if (ret == NULL) + ret = bn = BN_new(); if (ret == NULL) return(NULL); + bn_check_top(ret); l=0; n=len; if (n == 0) @@ -614,13 +612,16 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) ret->top=0; return(ret); } - if (bn_expand(ret,(int)(n+2)*8) == NULL) - return(NULL); i=((n-1)/BN_BYTES)+1; m=((n-1)%(BN_BYTES)); + if (bn_wexpand(ret, (int)i) == NULL) + { + if (bn) BN_free(bn); + return NULL; + } ret->top=i; ret->neg=0; - while (n-- > 0) + while (n--) { l=(l<<8L)| *(s++); if (m-- == 0) @@ -632,7 +633,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) } /* need to call this due to clear byte at top if avoiding * having the top bit set (-ve number) */ - bn_fix_top(ret); + bn_correct_top(ret); return(ret); } @@ -642,8 +643,9 @@ int BN_bn2bin(const BIGNUM *a, unsigned char *to) int n,i; BN_ULONG l; + bn_check_top(a); n=i=BN_num_bytes(a); - while (i-- > 0) + while (i--) { l=a->d[i/BN_BYTES]; *(to++)=(unsigned char)(l>>(8*(i%BN_BYTES)))&0xff; @@ -668,7 +670,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b) t1= ap[i]; t2= bp[i]; if (t1 != t2) - return(t1 > t2?1:-1); + return((t1 > t2) ? 1 : -1); } return(0); } @@ -718,6 +720,9 @@ int BN_set_bit(BIGNUM *a, int n) { int i,j,k; + if (n < 0) + return 0; + i=n/BN_BITS2; j=n%BN_BITS2; if (a->top <= i) @@ -729,6 +734,7 @@ int BN_set_bit(BIGNUM *a, int n) } a->d[i]|=(((BN_ULONG)1)<<j); + bn_check_top(a); return(1); } @@ -736,12 +742,15 @@ int BN_clear_bit(BIGNUM *a, int n) { int i,j; + bn_check_top(a); + if (n < 0) return 0; + i=n/BN_BITS2; j=n%BN_BITS2; if (a->top <= i) return(0); a->d[i]&=(~(((BN_ULONG)1)<<j)); - bn_fix_top(a); + bn_correct_top(a); return(1); } @@ -749,10 +758,11 @@ int BN_is_bit_set(const BIGNUM *a, int n) { int i,j; - if (n < 0) return(0); + bn_check_top(a); + if (n < 0) return 0; i=n/BN_BITS2; j=n%BN_BITS2; - if (a->top <= i) return(0); + if (a->top <= i) return 0; return((a->d[i]&(((BN_ULONG)1)<<j))?1:0); } @@ -760,9 +770,12 @@ int BN_mask_bits(BIGNUM *a, int n) { int b,w; + bn_check_top(a); + if (n < 0) return 0; + w=n/BN_BITS2; b=n%BN_BITS2; - if (w >= a->top) return(0); + if (w >= a->top) return 0; if (b == 0) a->top=w; else @@ -770,10 +783,18 @@ int BN_mask_bits(BIGNUM *a, int n) a->top=w+1; a->d[w]&= ~(BN_MASK2<<b); } - bn_fix_top(a); + bn_correct_top(a); return(1); } +void BN_set_negative(BIGNUM *a, int b) + { + if (b && !BN_is_zero(a)) + a->neg = 1; + else + a->neg = 0; + } + int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n) { int i; diff --git a/crypto/openssl/crypto/bn/bn_mod.c b/crypto/openssl/crypto/bn/bn_mod.c index 5cf8248..77d6ddb 100644 --- a/crypto/openssl/crypto/bn/bn_mod.c +++ b/crypto/openssl/crypto/bn/bn_mod.c @@ -149,7 +149,7 @@ int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_ * and less than m */ int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m) { - if (!BN_add(r, a, b)) return 0; + if (!BN_uadd(r, a, b)) return 0; if (BN_ucmp(r, m) >= 0) return BN_usub(r, r, m); return 1; @@ -192,6 +192,7 @@ int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, else { if (!BN_mul(t,a,b,ctx)) goto err; } if (!BN_nnmod(r,t,m,ctx)) goto err; + bn_check_top(r); ret=1; err: BN_CTX_end(ctx); @@ -210,6 +211,7 @@ int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) { if (!BN_lshift1(r, a)) return 0; + bn_check_top(r); return BN_nnmod(r, r, m, ctx); } @@ -219,6 +221,7 @@ int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m) { if (!BN_lshift1(r, a)) return 0; + bn_check_top(r); if (BN_cmp(r, m) >= 0) return BN_sub(r, r, m); return 1; @@ -240,6 +243,7 @@ int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ct } ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m)); + bn_check_top(r); if (abs_m) BN_free(abs_m); @@ -291,6 +295,7 @@ int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m) if (!BN_sub(r, r, m)) return 0; } } + bn_check_top(r); return 1; } diff --git a/crypto/openssl/crypto/bn/bn_mont.c b/crypto/openssl/crypto/bn/bn_mont.c index c9ebdba..42376da 100644 --- a/crypto/openssl/crypto/bn/bn_mont.c +++ b/crypto/openssl/crypto/bn/bn_mont.c @@ -90,6 +90,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, } /* reduce from aRR to aR */ if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; + bn_check_top(r); ret=1; err: BN_CTX_end(ctx); @@ -172,7 +173,7 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ; } } - bn_fix_top(r); + bn_correct_top(r); /* mont->ri will be a multiple of the word size */ #if 0 @@ -229,6 +230,7 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, if (!BN_usub(ret,ret,&(mont->N))) goto err; } retn=1; + bn_check_top(ret); err: BN_CTX_end(ctx); return(retn); @@ -269,11 +271,13 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont) int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) { - BIGNUM Ri,*R; + int ret = 0; + BIGNUM *Ri,*R; - BN_init(&Ri); + BN_CTX_start(ctx); + if((Ri = BN_CTX_get(ctx)) == NULL) goto err; R= &(mont->RR); /* grab RR as a temp */ - BN_copy(&(mont->N),mod); /* Set N */ + if (!BN_copy(&(mont->N),mod)) goto err; /* Set N */ mont->N.neg = 0; #ifdef MONT_WORD @@ -282,57 +286,56 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) BN_ULONG buf[2]; mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; - if (!(BN_zero(R))) goto err; + BN_zero(R); if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ buf[0]=mod->d[0]; /* tmod = N mod word size */ buf[1]=0; tmod.d=buf; - tmod.top=1; + tmod.top = buf[0] != 0 ? 1 : 0; tmod.dmax=2; tmod.neg=0; /* Ri = R^-1 mod N*/ - if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL) + if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) goto err; - if (!BN_lshift(&Ri,&Ri,BN_BITS2)) goto err; /* R*Ri */ - if (!BN_is_zero(&Ri)) + if (!BN_lshift(Ri,Ri,BN_BITS2)) goto err; /* R*Ri */ + if (!BN_is_zero(Ri)) { - if (!BN_sub_word(&Ri,1)) goto err; + if (!BN_sub_word(Ri,1)) goto err; } else /* if N mod word size == 1 */ { - if (!BN_set_word(&Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */ + if (!BN_set_word(Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */ } - if (!BN_div(&Ri,NULL,&Ri,&tmod,ctx)) goto err; + if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; /* Ni = (R*Ri-1)/N, * keep only least significant word: */ - mont->n0 = (Ri.top > 0) ? Ri.d[0] : 0; - BN_free(&Ri); + mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; } #else /* !MONT_WORD */ { /* bignum version */ mont->ri=BN_num_bits(&mont->N); - if (!BN_zero(R)) goto err; + BN_zero(R); if (!BN_set_bit(R,mont->ri)) goto err; /* R = 2^ri */ /* Ri = R^-1 mod N*/ - if ((BN_mod_inverse(&Ri,R,&mont->N,ctx)) == NULL) + if ((BN_mod_inverse(Ri,R,&mont->N,ctx)) == NULL) goto err; - if (!BN_lshift(&Ri,&Ri,mont->ri)) goto err; /* R*Ri */ - if (!BN_sub_word(&Ri,1)) goto err; + if (!BN_lshift(Ri,Ri,mont->ri)) goto err; /* R*Ri */ + if (!BN_sub_word(Ri,1)) goto err; /* Ni = (R*Ri-1) / N */ - if (!BN_div(&(mont->Ni),NULL,&Ri,&mont->N,ctx)) goto err; - BN_free(&Ri); + if (!BN_div(&(mont->Ni),NULL,Ri,&mont->N,ctx)) goto err; } #endif /* setup RR for conversions */ - if (!BN_zero(&(mont->RR))) goto err; + BN_zero(&(mont->RR)); if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err; if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err; - return(1); + ret = 1; err: - return(0); + BN_CTX_end(ctx); + return ret; } BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) @@ -347,3 +350,21 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) return(to); } +BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, + const BIGNUM *mod, BN_CTX *ctx) + { + if (*pmont) + return *pmont; + CRYPTO_w_lock(lock); + if (!*pmont) + { + BN_MONT_CTX *mtmp; + mtmp = BN_MONT_CTX_new(); + if (mtmp && !BN_MONT_CTX_set(mtmp, mod, ctx)) + BN_MONT_CTX_free(mtmp); + else + *pmont = mtmp; + } + CRYPTO_w_unlock(lock); + return *pmont; + } diff --git a/crypto/openssl/crypto/bn/bn_mpi.c b/crypto/openssl/crypto/bn/bn_mpi.c index 05fa9d1..a054d21a 100644 --- a/crypto/openssl/crypto/bn/bn_mpi.c +++ b/crypto/openssl/crypto/bn/bn_mpi.c @@ -124,6 +124,7 @@ BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a) { BN_clear_bit(a,BN_num_bits(a)-1); } + bn_check_top(a); return(a); } diff --git a/crypto/openssl/crypto/bn/bn_mul.c b/crypto/openssl/crypto/bn/bn_mul.c index 3ae3822..aec1eaf 100644 --- a/crypto/openssl/crypto/bn/bn_mul.c +++ b/crypto/openssl/crypto/bn/bn_mul.c @@ -56,10 +56,325 @@ * [including the GNU Public Licence.] */ +#ifndef BN_DEBUG +# undef NDEBUG /* avoid conflicting definitions */ +# define NDEBUG +#endif + #include <stdio.h> +#include <assert.h> #include "cryptlib.h" #include "bn_lcl.h" +#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) +/* Here follows specialised variants of bn_add_words() and + bn_sub_words(). They have the property performing operations on + arrays of different sizes. The sizes of those arrays is expressed through + cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl, + which is the delta between the two lengths, calculated as len(a)-len(b). + All lengths are the number of BN_ULONGs... For the operations that require + a result array as parameter, it must have the length cl+abs(dl). + These functions should probably end up in bn_asm.c as soon as there are + assembler counterparts for the systems that use assembler files. */ + +BN_ULONG bn_sub_part_words(BN_ULONG *r, + const BN_ULONG *a, const BN_ULONG *b, + int cl, int dl) + { + BN_ULONG c, t; + + assert(cl >= 0); + c = bn_sub_words(r, a, b, cl); + + if (dl == 0) + return c; + + r += cl; + a += cl; + b += cl; + + if (dl < 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); +#endif + for (;;) + { + t = b[0]; + r[0] = (0-t-c)&BN_MASK2; + if (t != 0) c=1; + if (++dl >= 0) break; + + t = b[1]; + r[1] = (0-t-c)&BN_MASK2; + if (t != 0) c=1; + if (++dl >= 0) break; + + t = b[2]; + r[2] = (0-t-c)&BN_MASK2; + if (t != 0) c=1; + if (++dl >= 0) break; + + t = b[3]; + r[3] = (0-t-c)&BN_MASK2; + if (t != 0) c=1; + if (++dl >= 0) break; + + b += 4; + r += 4; + } + } + else + { + int save_dl = dl; +#ifdef BN_COUNT + fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c); +#endif + while(c) + { + t = a[0]; + r[0] = (t-c)&BN_MASK2; + if (t != 0) c=0; + if (--dl <= 0) break; + + t = a[1]; + r[1] = (t-c)&BN_MASK2; + if (t != 0) c=0; + if (--dl <= 0) break; + + t = a[2]; + r[2] = (t-c)&BN_MASK2; + if (t != 0) c=0; + if (--dl <= 0) break; + + t = a[3]; + r[3] = (t-c)&BN_MASK2; + if (t != 0) c=0; + if (--dl <= 0) break; + + save_dl = dl; + a += 4; + r += 4; + } + if (dl > 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); +#endif + if (save_dl > dl) + { + switch (save_dl - dl) + { + case 1: + r[1] = a[1]; + if (--dl <= 0) break; + case 2: + r[2] = a[2]; + if (--dl <= 0) break; + case 3: + r[3] = a[3]; + if (--dl <= 0) break; + } + a += 4; + r += 4; + } + } + if (dl > 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl); +#endif + for(;;) + { + r[0] = a[0]; + if (--dl <= 0) break; + r[1] = a[1]; + if (--dl <= 0) break; + r[2] = a[2]; + if (--dl <= 0) break; + r[3] = a[3]; + if (--dl <= 0) break; + + a += 4; + r += 4; + } + } + } + return c; + } +#endif + +BN_ULONG bn_add_part_words(BN_ULONG *r, + const BN_ULONG *a, const BN_ULONG *b, + int cl, int dl) + { + BN_ULONG c, l, t; + + assert(cl >= 0); + c = bn_add_words(r, a, b, cl); + + if (dl == 0) + return c; + + r += cl; + a += cl; + b += cl; + + if (dl < 0) + { + int save_dl = dl; +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); +#endif + while (c) + { + l=(c+b[0])&BN_MASK2; + c=(l < c); + r[0]=l; + if (++dl >= 0) break; + + l=(c+b[1])&BN_MASK2; + c=(l < c); + r[1]=l; + if (++dl >= 0) break; + + l=(c+b[2])&BN_MASK2; + c=(l < c); + r[2]=l; + if (++dl >= 0) break; + + l=(c+b[3])&BN_MASK2; + c=(l < c); + r[3]=l; + if (++dl >= 0) break; + + save_dl = dl; + b+=4; + r+=4; + } + if (dl < 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl); +#endif + if (save_dl < dl) + { + switch (dl - save_dl) + { + case 1: + r[1] = b[1]; + if (++dl >= 0) break; + case 2: + r[2] = b[2]; + if (++dl >= 0) break; + case 3: + r[3] = b[3]; + if (++dl >= 0) break; + } + b += 4; + r += 4; + } + } + if (dl < 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl); +#endif + for(;;) + { + r[0] = b[0]; + if (++dl >= 0) break; + r[1] = b[1]; + if (++dl >= 0) break; + r[2] = b[2]; + if (++dl >= 0) break; + r[3] = b[3]; + if (++dl >= 0) break; + + b += 4; + r += 4; + } + } + } + else + { + int save_dl = dl; +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl); +#endif + while (c) + { + t=(a[0]+c)&BN_MASK2; + c=(t < c); + r[0]=t; + if (--dl <= 0) break; + + t=(a[1]+c)&BN_MASK2; + c=(t < c); + r[1]=t; + if (--dl <= 0) break; + + t=(a[2]+c)&BN_MASK2; + c=(t < c); + r[2]=t; + if (--dl <= 0) break; + + t=(a[3]+c)&BN_MASK2; + c=(t < c); + r[3]=t; + if (--dl <= 0) break; + + save_dl = dl; + a+=4; + r+=4; + } +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); +#endif + if (dl > 0) + { + if (save_dl > dl) + { + switch (save_dl - dl) + { + case 1: + r[1] = a[1]; + if (--dl <= 0) break; + case 2: + r[2] = a[2]; + if (--dl <= 0) break; + case 3: + r[3] = a[3]; + if (--dl <= 0) break; + } + a += 4; + r += 4; + } + } + if (dl > 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl); +#endif + for(;;) + { + r[0] = a[0]; + if (--dl <= 0) break; + r[1] = a[1]; + if (--dl <= 0) break; + r[2] = a[2]; + if (--dl <= 0) break; + r[3] = a[3]; + if (--dl <= 0) break; + + a += 4; + r += 4; + } + } + } + return c; + } + #ifdef BN_RECURSION /* Karatsuba recursive multiplication algorithm * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ @@ -75,14 +390,15 @@ * a[1]*b[1] */ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, - BN_ULONG *t) + int dna, int dnb, BN_ULONG *t) { int n=n2/2,c1,c2; + int tna=n+dna, tnb=n+dnb; unsigned int neg,zero; BN_ULONG ln,lo,*p; # ifdef BN_COUNT - printf(" bn_mul_recursive %d * %d\n",n2,n2); + fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2); # endif # ifdef BN_MUL_COMBA # if 0 @@ -92,34 +408,40 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, return; } # endif - if (n2 == 8) + /* Only call bn_mul_comba 8 if n2 == 8 and the + * two arrays are complete [steve] + */ + if (n2 == 8 && dna == 0 && dnb == 0) { bn_mul_comba8(r,a,b); return; } # endif /* BN_MUL_COMBA */ + /* Else do normal multiply */ if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { - /* This should not happen */ - bn_mul_normal(r,a,n2,b,n2); + bn_mul_normal(r,a,n2+dna,b,n2+dnb); + if ((dna + dnb) < 0) + memset(&r[2*n2 + dna + dnb], 0, + sizeof(BN_ULONG) * -(dna + dnb)); return; } /* r=(a[0]-a[1])*(b[1]-b[0]) */ - c1=bn_cmp_words(a,&(a[n]),n); - c2=bn_cmp_words(&(b[n]),b,n); + c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); + c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); zero=neg=0; switch (c1*3+c2) { case -4: - bn_sub_words(t, &(a[n]),a, n); /* - */ - bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ + bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ + bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ break; case -3: zero=1; break; case -2: - bn_sub_words(t, &(a[n]),a, n); /* - */ - bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ + bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ + bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ neg=1; break; case -1: @@ -128,21 +450,22 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, zero=1; break; case 2: - bn_sub_words(t, a, &(a[n]),n); /* + */ - bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ + bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ + bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ neg=1; break; case 3: zero=1; break; case 4: - bn_sub_words(t, a, &(a[n]),n); - bn_sub_words(&(t[n]),&(b[n]),b, n); + bn_sub_part_words(t, a, &(a[n]),tna,n-tna); + bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); break; } # ifdef BN_MUL_COMBA - if (n == 4) + if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take + extra args to do this well */ { if (!zero) bn_mul_comba4(&(t[n2]),t,&(t[n])); @@ -152,7 +475,9 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, bn_mul_comba4(r,a,b); bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); } - else if (n == 8) + else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could + take extra args to do this + well */ { if (!zero) bn_mul_comba8(&(t[n2]),t,&(t[n])); @@ -167,11 +492,11 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, { p= &(t[n2*2]); if (!zero) - bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); + bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); else memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); - bn_mul_recursive(r,a,b,n,p); - bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p); + bn_mul_recursive(r,a,b,n,0,0,p); + bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p); } /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign @@ -220,39 +545,39 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, /* n+tn is the word length * t needs to be n*4 is size, as does r */ -void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, - int n, BN_ULONG *t) +void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, + int tna, int tnb, BN_ULONG *t) { int i,j,n2=n*2; int c1,c2,neg,zero; BN_ULONG ln,lo,*p; # ifdef BN_COUNT - printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n); + fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n", + tna, n, tnb, n); # endif if (n < 8) { - i=tn+n; - bn_mul_normal(r,a,i,b,i); + bn_mul_normal(r,a,n+tna,b,n+tnb); return; } /* r=(a[0]-a[1])*(b[1]-b[0]) */ - c1=bn_cmp_words(a,&(a[n]),n); - c2=bn_cmp_words(&(b[n]),b,n); + c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); + c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); zero=neg=0; switch (c1*3+c2) { case -4: - bn_sub_words(t, &(a[n]),a, n); /* - */ - bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ + bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ + bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ break; case -3: zero=1; /* break; */ case -2: - bn_sub_words(t, &(a[n]),a, n); /* - */ - bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ + bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ + bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ neg=1; break; case -1: @@ -261,16 +586,16 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, zero=1; /* break; */ case 2: - bn_sub_words(t, a, &(a[n]),n); /* + */ - bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ + bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ + bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ neg=1; break; case 3: zero=1; /* break; */ case 4: - bn_sub_words(t, a, &(a[n]),n); - bn_sub_words(&(t[n]),&(b[n]),b, n); + bn_sub_part_words(t, a, &(a[n]),tna,n-tna); + bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); break; } /* The zero case isn't yet implemented here. The speedup @@ -289,54 +614,59 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, { bn_mul_comba8(&(t[n2]),t,&(t[n])); bn_mul_comba8(r,a,b); - bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); - memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2)); + bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); + memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb)); } else { p= &(t[n2*2]); - bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); - bn_mul_recursive(r,a,b,n,p); + bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); + bn_mul_recursive(r,a,b,n,0,0,p); i=n/2; /* If there is only a bottom half to the number, * just do it */ - j=tn-i; + if (tna > tnb) + j = tna - i; + else + j = tnb - i; if (j == 0) { - bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p); + bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]), + i,tna-i,tnb-i,p); memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); } else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ { bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), - j,i,p); - memset(&(r[n2+tn*2]),0, - sizeof(BN_ULONG)*(n2-tn*2)); + i,tna-i,tnb-i,p); + memset(&(r[n2+tna+tnb]),0, + sizeof(BN_ULONG)*(n2-tna-tnb)); } else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ { memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); - if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL) + if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL + && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { - bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); + bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); } else { for (;;) { i/=2; - if (i < tn) + if (i < tna && i < tnb) { bn_mul_part_recursive(&(r[n2]), &(a[n]),&(b[n]), - tn-i,i,p); + i,tna-i,tnb-i,p); break; } - else if (i == tn) + else if (i <= tna && i <= tnb) { bn_mul_recursive(&(r[n2]), &(a[n]),&(b[n]), - i,p); + i,tna-i,tnb-i,p); break; } } @@ -397,10 +727,10 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int n=n2/2; # ifdef BN_COUNT - printf(" bn_mul_low_recursive %d * %d\n",n2,n2); + fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2); # endif - bn_mul_recursive(r,a,b,n,&(t[0])); + bn_mul_recursive(r,a,b,n,0,0,&(t[0])); if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) { bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); @@ -431,7 +761,7 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, BN_ULONG ll,lc,*lp,*mp; # ifdef BN_COUNT - printf(" bn_mul_high %d * %d\n",n2,n2); + fprintf(stderr," bn_mul_high %d * %d\n",n2,n2); # endif n=n2/2; @@ -484,8 +814,8 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, else # endif { - bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2])); - bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2])); + bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2])); + bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2])); } /* s0 == low(al*bl) @@ -610,19 +940,19 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { + int ret=0; int top,al,bl; BIGNUM *rr; - int ret = 0; #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) int i; #endif #ifdef BN_RECURSION - BIGNUM *t; - int j,k; + BIGNUM *t=NULL; + int j=0,k; #endif #ifdef BN_COUNT - printf("BN_mul %d * %d\n",a->top,b->top); + fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top); #endif bn_check_top(a); @@ -634,7 +964,7 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) if ((al == 0) || (bl == 0)) { - if (!BN_zero(r)) goto err; + BN_zero(r); return(1); } top=al+bl; @@ -675,21 +1005,55 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) #ifdef BN_RECURSION if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { - if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA) && bl<b->dmax) + if (i >= -1 && i <= 1) { -#if 0 /* tribute to const-ification, bl<b->dmax above covers for this */ - if (bn_wexpand(b,al) == NULL) goto err; -#endif - b->d[bl]=0; + int sav_j =0; + /* Find out the power of two lower or equal + to the longest of the two numbers */ + if (i >= 0) + { + j = BN_num_bits_word((BN_ULONG)al); + } + if (i == -1) + { + j = BN_num_bits_word((BN_ULONG)bl); + } + sav_j = j; + j = 1<<(j-1); + assert(j <= al || j <= bl); + k = j+j; + t = BN_CTX_get(ctx); + if (al > j || bl > j) + { + bn_wexpand(t,k*4); + bn_wexpand(rr,k*4); + bn_mul_part_recursive(rr->d,a->d,b->d, + j,al-j,bl-j,t->d); + } + else /* al <= j || bl <= j */ + { + bn_wexpand(t,k*2); + bn_wexpand(rr,k*2); + bn_mul_recursive(rr->d,a->d,b->d, + j,al-j,bl-j,t->d); + } + rr->top=top; + goto end; + } +#if 0 + if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA)) + { + BIGNUM *tmp_bn = (BIGNUM *)b; + if (bn_wexpand(tmp_bn,al) == NULL) goto err; + tmp_bn->d[bl]=0; bl++; i--; } - else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA) && al<a->dmax) + else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA)) { -#if 0 /* tribute to const-ification, al<a->dmax above covers for this */ - if (bn_wexpand(a,bl) == NULL) goto err; -#endif - a->d[al]=0; + BIGNUM *tmp_bn = (BIGNUM *)a; + if (bn_wexpand(tmp_bn,bl) == NULL) goto err; + tmp_bn->d[al]=0; al++; i++; } @@ -706,26 +1070,17 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) if (bn_wexpand(t,k*2) == NULL) goto err; if (bn_wexpand(rr,k*2) == NULL) goto err; bn_mul_recursive(rr->d,a->d,b->d,al,t->d); - rr->top=top; - goto end; } -#if 0 /* tribute to const-ification, rsa/dsa performance is not affected */ else { - if (bn_wexpand(a,k) == NULL ) goto err; - if (bn_wexpand(b,k) == NULL ) goto err; - if (bn_wexpand(t,k*4) == NULL ) goto err; - if (bn_wexpand(rr,k*4) == NULL ) goto err; - for (i=a->top; i<k; i++) - a->d[i]=0; - for (i=b->top; i<k; i++) - b->d[i]=0; + if (bn_wexpand(t,k*4) == NULL) goto err; + if (bn_wexpand(rr,k*4) == NULL) goto err; bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d); } rr->top=top; goto end; -#endif } +#endif } #endif /* BN_RECURSION */ if (bn_wexpand(rr,top) == NULL) goto err; @@ -735,10 +1090,11 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) end: #endif - bn_fix_top(rr); + bn_correct_top(rr); if (r != rr) BN_copy(r,rr); ret=1; err: + bn_check_top(r); BN_CTX_end(ctx); return(ret); } @@ -748,7 +1104,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) BN_ULONG *rr; #ifdef BN_COUNT - printf(" bn_mul_normal %d * %d\n",na,nb); + fprintf(stderr," bn_mul_normal %d * %d\n",na,nb); #endif if (na < nb) @@ -761,7 +1117,13 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) } rr= &(r[na]); - rr[0]=bn_mul_words(r,a,na,b[0]); + if (nb <= 0) + { + (void)bn_mul_words(r,a,na,0); + return; + } + else + rr[0]=bn_mul_words(r,a,na,b[0]); for (;;) { @@ -782,7 +1144,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) { #ifdef BN_COUNT - printf(" bn_mul_low_normal %d * %d\n",n,n); + fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n); #endif bn_mul_words(r,a,n,b[0]); diff --git a/crypto/openssl/crypto/bn/bn_nist.c b/crypto/openssl/crypto/bn/bn_nist.c new file mode 100644 index 0000000..f8e306b --- /dev/null +++ b/crypto/openssl/crypto/bn/bn_nist.c @@ -0,0 +1,775 @@ +/* crypto/bn/bn_nist.c */ +/* + * Written by Nils Larsch for the OpenSSL project + */ +/* ==================================================================== + * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include "bn_lcl.h" +#include "cryptlib.h" + +#define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2 +#define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2 +#define BN_NIST_256_TOP (256+BN_BITS2-1)/BN_BITS2 +#define BN_NIST_384_TOP (384+BN_BITS2-1)/BN_BITS2 +#define BN_NIST_521_TOP (521+BN_BITS2-1)/BN_BITS2 + +#if BN_BITS2 == 64 +static const BN_ULONG _nist_p_192[] = + {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFEULL, + 0xFFFFFFFFFFFFFFFFULL}; +static const BN_ULONG _nist_p_224[] = + {0x0000000000000001ULL,0xFFFFFFFF00000000ULL, + 0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL}; +static const BN_ULONG _nist_p_256[] = + {0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL, + 0x0000000000000000ULL,0xFFFFFFFF00000001ULL}; +static const BN_ULONG _nist_p_384[] = + {0x00000000FFFFFFFFULL,0xFFFFFFFF00000000ULL, + 0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFFULL, + 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}; +static const BN_ULONG _nist_p_521[] = + {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL, + 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL, + 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL, + 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL, + 0x00000000000001FFULL}; +#elif BN_BITS2 == 32 +static const BN_ULONG _nist_p_192[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE, + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}; +static const BN_ULONG _nist_p_224[] = {0x00000001,0x00000000,0x00000000, + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}; +static const BN_ULONG _nist_p_256[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, + 0x00000000,0x00000000,0x00000000,0x00000001,0xFFFFFFFF}; +static const BN_ULONG _nist_p_384[] = {0xFFFFFFFF,0x00000000,0x00000000, + 0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}; +static const BN_ULONG _nist_p_521[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, + 0xFFFFFFFF,0x000001FF}; +#elif BN_BITS2 == 16 +static const BN_ULONG _nist_p_192[] = {0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFE, + 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF}; +static const BN_ULONG _nist_p_224[] = {0x0001,0x0000,0x0000,0x0000,0x0000, + 0x0000,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF}; +static const BN_ULONG _nist_p_256[] = {0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF, + 0xFFFF,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0000,0xFFFF, + 0xFFFF}; +static const BN_ULONG _nist_p_384[] = {0xFFFF,0xFFFF,0x0000,0x0000,0x0000, + 0x0000,0xFFFF,0xFFFF,0xFFFE,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF, + 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF}; +static const BN_ULONG _nist_p_521[] = {0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF, + 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF, + 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF, + 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0x01FF}; +#elif BN_BITS2 == 8 +static const BN_ULONG _nist_p_192[] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFE,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF}; +static const BN_ULONG _nist_p_224[] = {0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; +static const BN_ULONG _nist_p_256[] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x01,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF}; +static const BN_ULONG _nist_p_384[] = {0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; +static const BN_ULONG _nist_p_521[] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0x01}; +#endif + +const BIGNUM *BN_get0_nist_prime_192(void) + { + static BIGNUM const_nist_192 = { (BN_ULONG *)_nist_p_192, + BN_NIST_192_TOP, BN_NIST_192_TOP, 0, BN_FLG_STATIC_DATA }; + return &const_nist_192; + } + +const BIGNUM *BN_get0_nist_prime_224(void) + { + static BIGNUM const_nist_224 = { (BN_ULONG *)_nist_p_224, + BN_NIST_224_TOP, BN_NIST_224_TOP, 0, BN_FLG_STATIC_DATA }; + return &const_nist_224; + } + +const BIGNUM *BN_get0_nist_prime_256(void) + { + static BIGNUM const_nist_256 = { (BN_ULONG *)_nist_p_256, + BN_NIST_256_TOP, BN_NIST_256_TOP, 0, BN_FLG_STATIC_DATA }; + return &const_nist_256; + } + +const BIGNUM *BN_get0_nist_prime_384(void) + { + static BIGNUM const_nist_384 = { (BN_ULONG *)_nist_p_384, + BN_NIST_384_TOP, BN_NIST_384_TOP, 0, BN_FLG_STATIC_DATA }; + return &const_nist_384; + } + +const BIGNUM *BN_get0_nist_prime_521(void) + { + static BIGNUM const_nist_521 = { (BN_ULONG *)_nist_p_521, + BN_NIST_521_TOP, BN_NIST_521_TOP, 0, BN_FLG_STATIC_DATA }; + return &const_nist_521; + } + +/* some misc internal functions */ +#if BN_BITS2 != 64 +static BN_ULONG _256_data[BN_NIST_256_TOP*6]; +static int _is_set_256_data = 0; +static void _init_256_data(void); + +static BN_ULONG _384_data[BN_NIST_384_TOP*8]; +static int _is_set_384_data = 0; +static void _init_384_data(void); +#endif + +#define BN_NIST_ADD_ONE(a) while (!(++(*(a)))) ++(a); + +static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max) + { + int i; + BN_ULONG *_tmp1 = (buf), *_tmp2 = (a); + for (i = (top); i != 0; i--) + *_tmp1++ = *_tmp2++; + for (i = (max) - (top); i != 0; i--) + *_tmp1++ = (BN_ULONG) 0; + } + +static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top) + { + int i; + BN_ULONG *_tmp1 = (buf), *_tmp2 = (a); + for (i = (top); i != 0; i--) + *_tmp1++ = *_tmp2++; + } + +#if BN_BITS2 == 64 +#define bn_cp_64(to, n, from, m) (to)[n] = (from)[m]; +#define bn_64_set_0(to, n) (to)[n] = (BN_ULONG)0; +/* TBD */ +#define bn_cp_32(to, n, from, m) (to)[n] = (from)[m]; +#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0; +#else +#define bn_cp_64(to, n, from, m) \ + { \ + bn_cp_32(to, (n)*2, from, (m)*2); \ + bn_cp_32(to, (n)*2+1, from, (m)*2+1); \ + } +#define bn_64_set_0(to, n) \ + { \ + bn_32_set_0(to, (n)*2); \ + bn_32_set_0(to, (n)*2+1); \ + } +#if BN_BITS2 == 32 +#define bn_cp_32(to, n, from, m) (to)[n] = (from)[m]; +#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0; +#elif BN_BITS2 == 16 +#define bn_cp_32(to, n, from, m) \ + { \ + (to)[(n)*2] = (from)[(m)*2]; \ + (to)[(n)*2+1] = (from)[(m)*2+1];\ + } +#define bn_32_set_0(to, n) { (to)[(n)*2] = 0; (to)[(n)*2+1] = 0; } +#elif BN_BITS2 == 8 +#define bn_cp_32(to, n, from, m) \ + { \ + (to)[(n)*4] = (from)[(m)*4]; \ + (to)[(n)*4+1] = (from)[(m)*4+1];\ + (to)[(n)*4+2] = (from)[(m)*4+2];\ + (to)[(n)*4+3] = (from)[(m)*4+3];\ + } +#define bn_32_set_0(to, n) \ + { (to)[(n)*4] = (BN_ULONG)0; (to)[(n)*4+1] = (BN_ULONG)0; \ + (to)[(n)*4+2] = (BN_ULONG)0; (to)[(n)*4+3] = (BN_ULONG)0; } +#endif +#endif /* BN_BITS2 != 64 */ + + +#define nist_set_192(to, from, a1, a2, a3) \ + { \ + if (a3 != 0) bn_cp_64(to, 0, from, (a3) - 3) else bn_64_set_0(to, 0)\ + bn_cp_64(to, 1, from, (a2) - 3) \ + if (a1 != 0) bn_cp_64(to, 2, from, (a1) - 3) else bn_64_set_0(to, 2)\ + } + +int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, + BN_CTX *ctx) + { + int top = a->top, i; + BN_ULONG carry = 0; + register BN_ULONG *r_d, *a_d = a->d; + BN_ULONG t_d[BN_NIST_192_TOP], + buf[BN_NIST_192_TOP]; + + i = BN_ucmp(field, a); + if (i == 0) + { + BN_zero(r); + return 1; + } + else if (i > 0) + return (r == a) ? 1 : (BN_copy(r ,a) != NULL); + + if (top == BN_NIST_192_TOP) + return BN_usub(r, a, field); + + if (r != a) + { + if (!bn_wexpand(r, BN_NIST_192_TOP)) + return 0; + r_d = r->d; + nist_cp_bn(r_d, a_d, BN_NIST_192_TOP); + } + else + r_d = a_d; + + nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP); + +#if defined(OPENSSL_SYS_VMS) && defined(__DECC) +# pragma message save +# pragma message disable BADSUBSCRIPT +#endif + + nist_set_192(t_d, buf, 0, 3, 3); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP)) + ++carry; + + nist_set_192(t_d, buf, 4, 4, 0); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP)) + ++carry; + +#if defined(OPENSSL_SYS_VMS) && defined(__DECC) +# pragma message restore +#endif + + nist_set_192(t_d, buf, 5, 5, 5) + if (bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP)) + ++carry; + + while (carry) + { + if (bn_sub_words(r_d, r_d, _nist_p_192, BN_NIST_192_TOP)) + --carry; + } + r->top = BN_NIST_192_TOP; + bn_correct_top(r); + if (BN_ucmp(r, field) >= 0) + { + bn_sub_words(r_d, r_d, _nist_p_192, BN_NIST_192_TOP); + bn_correct_top(r); + } + + bn_check_top(r); + return 1; + } + +#define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \ + { \ + if (a7 != 0) bn_cp_32(to, 0, from, (a7) - 7) else bn_32_set_0(to, 0)\ + if (a6 != 0) bn_cp_32(to, 1, from, (a6) - 7) else bn_32_set_0(to, 1)\ + if (a5 != 0) bn_cp_32(to, 2, from, (a5) - 7) else bn_32_set_0(to, 2)\ + if (a4 != 0) bn_cp_32(to, 3, from, (a4) - 7) else bn_32_set_0(to, 3)\ + if (a3 != 0) bn_cp_32(to, 4, from, (a3) - 7) else bn_32_set_0(to, 4)\ + if (a2 != 0) bn_cp_32(to, 5, from, (a2) - 7) else bn_32_set_0(to, 5)\ + if (a1 != 0) bn_cp_32(to, 6, from, (a1) - 7) else bn_32_set_0(to, 6)\ + } + +int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, + BN_CTX *ctx) + { +#if BN_BITS2 != 64 + int top = a->top, i; + int carry = 0; + BN_ULONG *r_d, *a_d = a->d; + BN_ULONG t_d[BN_NIST_224_TOP], + buf[BN_NIST_224_TOP]; + + i = BN_ucmp(field, a); + if (i == 0) + { + BN_zero(r); + return 1; + } + else if (i > 0) + return (r == a)? 1 : (BN_copy(r ,a) != NULL); + + if (top == BN_NIST_224_TOP) + return BN_usub(r, a, field); + + if (r != a) + { + if (!bn_wexpand(r, BN_NIST_224_TOP)) + return 0; + r_d = r->d; + nist_cp_bn(r_d, a_d, BN_NIST_224_TOP); + } + else + r_d = a_d; + + nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP); + + nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP)) + ++carry; + nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP)) + ++carry; + nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP)) + --carry; + nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP)) + --carry; + + if (carry > 0) + while (carry) + { + if (bn_sub_words(r_d,r_d,_nist_p_224,BN_NIST_224_TOP)) + --carry; + } + else if (carry < 0) + while (carry) + { + if (bn_add_words(r_d,r_d,_nist_p_224,BN_NIST_224_TOP)) + ++carry; + } + + r->top = BN_NIST_224_TOP; + bn_correct_top(r); + if (BN_ucmp(r, field) >= 0) + { + bn_sub_words(r_d, r_d, _nist_p_224, BN_NIST_224_TOP); + bn_correct_top(r); + } + bn_check_top(r); + return 1; +#else + return 0; +#endif + } + +#if BN_BITS2 != 64 +static void _init_256_data(void) + { + int i; + BN_ULONG *tmp1 = _256_data; + const BN_ULONG *tmp2 = tmp1; + + memcpy(tmp1, _nist_p_256, BN_NIST_256_TOP * sizeof(BN_ULONG)); + tmp1 += BN_NIST_256_TOP; + + for (i=0; i<5; i++) + { + bn_add_words(tmp1, _nist_p_256, tmp2, BN_NIST_256_TOP); + tmp2 = tmp1; + tmp1 += BN_NIST_256_TOP; + } + _is_set_256_data = 1; + } +#endif + +#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \ + { \ + if (a8 != 0) bn_cp_32(to, 0, from, (a8) - 8) else bn_32_set_0(to, 0)\ + if (a7 != 0) bn_cp_32(to, 1, from, (a7) - 8) else bn_32_set_0(to, 1)\ + if (a6 != 0) bn_cp_32(to, 2, from, (a6) - 8) else bn_32_set_0(to, 2)\ + if (a5 != 0) bn_cp_32(to, 3, from, (a5) - 8) else bn_32_set_0(to, 3)\ + if (a4 != 0) bn_cp_32(to, 4, from, (a4) - 8) else bn_32_set_0(to, 4)\ + if (a3 != 0) bn_cp_32(to, 5, from, (a3) - 8) else bn_32_set_0(to, 5)\ + if (a2 != 0) bn_cp_32(to, 6, from, (a2) - 8) else bn_32_set_0(to, 6)\ + if (a1 != 0) bn_cp_32(to, 7, from, (a1) - 8) else bn_32_set_0(to, 7)\ + } + +int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, + BN_CTX *ctx) + { +#if BN_BITS2 != 64 + int i, top = a->top; + int carry = 0; + register BN_ULONG *a_d = a->d, *r_d; + BN_ULONG t_d[BN_NIST_256_TOP], + t_d2[BN_NIST_256_TOP], + buf[BN_NIST_256_TOP]; + + if (!_is_set_256_data) + { + CRYPTO_w_lock(CRYPTO_LOCK_BN); + + if (!_is_set_256_data) + _init_256_data(); + + CRYPTO_w_unlock(CRYPTO_LOCK_BN); + } + + i = BN_ucmp(field, a); + if (i == 0) + { + BN_zero(r); + return 1; + } + else if (i > 0) + return (r == a)? 1 : (BN_copy(r ,a) != NULL); + + if (top == BN_NIST_256_TOP) + return BN_usub(r, a, field); + + if (r != a) + { + if (!bn_wexpand(r, BN_NIST_256_TOP)) + return 0; + r_d = r->d; + nist_cp_bn(r_d, a_d, BN_NIST_256_TOP); + } + else + r_d = a_d; + + nist_cp_bn_0(buf, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP); + + /*S1*/ + nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0); + /*S2*/ + nist_set_256(t_d2,buf, 0, 15, 14, 13, 12, 0, 0, 0); + if (bn_add_words(t_d, t_d, t_d2, BN_NIST_256_TOP)) + carry = 2; + /* left shift */ + { + register BN_ULONG *ap,t,c; + ap = t_d; + c=0; + for (i = BN_NIST_256_TOP; i != 0; --i) + { + t= *ap; + *(ap++)=((t<<1)|c)&BN_MASK2; + c=(t & BN_TBIT)?1:0; + } + if (c) + ++carry; + } + + if (bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP)) + ++carry; + /*S3*/ + nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP)) + ++carry; + /*S4*/ + nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP)) + ++carry; + /*D1*/ + nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP)) + --carry; + /*D2*/ + nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP)) + --carry; + /*D3*/ + nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP)) + --carry; + /*D4*/ + nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP)) + --carry; + + if (carry) + { + if (carry > 0) + bn_sub_words(r_d, r_d, _256_data + BN_NIST_256_TOP * + --carry, BN_NIST_256_TOP); + else + { + carry = -carry; + bn_add_words(r_d, r_d, _256_data + BN_NIST_256_TOP * + --carry, BN_NIST_256_TOP); + } + } + + r->top = BN_NIST_256_TOP; + bn_correct_top(r); + if (BN_ucmp(r, field) >= 0) + { + bn_sub_words(r_d, r_d, _nist_p_256, BN_NIST_256_TOP); + bn_correct_top(r); + } + bn_check_top(r); + return 1; +#else + return 0; +#endif + } + +#if BN_BITS2 != 64 +static void _init_384_data(void) + { + int i; + BN_ULONG *tmp1 = _384_data; + const BN_ULONG *tmp2 = tmp1; + + memcpy(tmp1, _nist_p_384, BN_NIST_384_TOP * sizeof(BN_ULONG)); + tmp1 += BN_NIST_384_TOP; + + for (i=0; i<7; i++) + { + bn_add_words(tmp1, _nist_p_384, tmp2, BN_NIST_384_TOP); + tmp2 = tmp1; + tmp1 += BN_NIST_384_TOP; + } + _is_set_384_data = 1; + } +#endif + +#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \ + { \ + if (a12 != 0) bn_cp_32(to, 0, from, (a12) - 12) else bn_32_set_0(to, 0)\ + if (a11 != 0) bn_cp_32(to, 1, from, (a11) - 12) else bn_32_set_0(to, 1)\ + if (a10 != 0) bn_cp_32(to, 2, from, (a10) - 12) else bn_32_set_0(to, 2)\ + if (a9 != 0) bn_cp_32(to, 3, from, (a9) - 12) else bn_32_set_0(to, 3)\ + if (a8 != 0) bn_cp_32(to, 4, from, (a8) - 12) else bn_32_set_0(to, 4)\ + if (a7 != 0) bn_cp_32(to, 5, from, (a7) - 12) else bn_32_set_0(to, 5)\ + if (a6 != 0) bn_cp_32(to, 6, from, (a6) - 12) else bn_32_set_0(to, 6)\ + if (a5 != 0) bn_cp_32(to, 7, from, (a5) - 12) else bn_32_set_0(to, 7)\ + if (a4 != 0) bn_cp_32(to, 8, from, (a4) - 12) else bn_32_set_0(to, 8)\ + if (a3 != 0) bn_cp_32(to, 9, from, (a3) - 12) else bn_32_set_0(to, 9)\ + if (a2 != 0) bn_cp_32(to, 10, from, (a2) - 12) else bn_32_set_0(to, 10)\ + if (a1 != 0) bn_cp_32(to, 11, from, (a1) - 12) else bn_32_set_0(to, 11)\ + } + +int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, + BN_CTX *ctx) + { +#if BN_BITS2 != 64 + int i, top = a->top; + int carry = 0; + register BN_ULONG *r_d, *a_d = a->d; + BN_ULONG t_d[BN_NIST_384_TOP], + buf[BN_NIST_384_TOP]; + + if (!_is_set_384_data) + { + CRYPTO_w_lock(CRYPTO_LOCK_BN); + + if (!_is_set_384_data) + _init_384_data(); + + CRYPTO_w_unlock(CRYPTO_LOCK_BN); + } + + i = BN_ucmp(field, a); + if (i == 0) + { + BN_zero(r); + return 1; + } + else if (i > 0) + return (r == a)? 1 : (BN_copy(r ,a) != NULL); + + if (top == BN_NIST_384_TOP) + return BN_usub(r, a, field); + + if (r != a) + { + if (!bn_wexpand(r, BN_NIST_384_TOP)) + return 0; + r_d = r->d; + nist_cp_bn(r_d, a_d, BN_NIST_384_TOP); + } + else + r_d = a_d; + + nist_cp_bn_0(buf, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP); + + /*S1*/ + nist_set_256(t_d, buf, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4); + /* left shift */ + { + register BN_ULONG *ap,t,c; + ap = t_d; + c=0; + for (i = BN_NIST_256_TOP; i != 0; --i) + { + t= *ap; + *(ap++)=((t<<1)|c)&BN_MASK2; + c=(t & BN_TBIT)?1:0; + } + } + if (bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2), + t_d, BN_NIST_256_TOP)) + ++carry; + /*S2 */ + if (bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP)) + ++carry; + /*S3*/ + nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP)) + ++carry; + /*S4*/ + nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP)) + ++carry; + /*S5*/ + nist_set_256(t_d, buf, 0, 0, 0, 0, 23-4, 22-4, 21-4, 20-4); + if (bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2), + t_d, BN_NIST_256_TOP)) + ++carry; + /*S6*/ + nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20); + if (bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP)) + ++carry; + /*D1*/ + nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP)) + --carry; + /*D2*/ + nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP)) + --carry; + /*D3*/ + nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0); + if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP)) + --carry; + + if (carry) + { + if (carry > 0) + bn_sub_words(r_d, r_d, _384_data + BN_NIST_384_TOP * + --carry, BN_NIST_384_TOP); + else + { + carry = -carry; + bn_add_words(r_d, r_d, _384_data + BN_NIST_384_TOP * + --carry, BN_NIST_384_TOP); + } + } + + r->top = BN_NIST_384_TOP; + bn_correct_top(r); + if (BN_ucmp(r, field) >= 0) + { + bn_sub_words(r_d, r_d, _nist_p_384, BN_NIST_384_TOP); + bn_correct_top(r); + } + bn_check_top(r); + return 1; +#else + return 0; +#endif + } + +int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, + BN_CTX *ctx) + { +#if BN_BITS2 == 64 +#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1FF +#elif BN_BITS2 == 32 +#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1FF +#elif BN_BITS2 == 16 +#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1FF +#elif BN_BITS2 == 8 +#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1 +#endif + int top, ret = 0; + BN_ULONG *r_d; + BIGNUM *tmp; + + /* check whether a reduction is necessary */ + top = a->top; + if (top < BN_NIST_521_TOP || ( top == BN_NIST_521_TOP && + (!(a->d[BN_NIST_521_TOP-1] & ~(BN_NIST_521_TOP_MASK))))) + return (r == a)? 1 : (BN_copy(r ,a) != NULL); + + BN_CTX_start(ctx); + tmp = BN_CTX_get(ctx); + if (!tmp) + goto err; + + if (!bn_wexpand(tmp, BN_NIST_521_TOP)) + goto err; + nist_cp_bn(tmp->d, a->d, BN_NIST_521_TOP); + + tmp->top = BN_NIST_521_TOP; + tmp->d[BN_NIST_521_TOP-1] &= BN_NIST_521_TOP_MASK; + bn_correct_top(tmp); + + if (!BN_rshift(r, a, 521)) + goto err; + + if (!BN_uadd(r, tmp, r)) + goto err; + top = r->top; + r_d = r->d; + if (top == BN_NIST_521_TOP && + (r_d[BN_NIST_521_TOP-1] & ~(BN_NIST_521_TOP_MASK))) + { + BN_NIST_ADD_ONE(r_d) + r_d[BN_NIST_521_TOP-1] &= BN_NIST_521_TOP_MASK; + } + bn_correct_top(r); + + ret = 1; +err: + BN_CTX_end(ctx); + + bn_check_top(r); + return ret; + } diff --git a/crypto/openssl/crypto/bn/bn_prime.c b/crypto/openssl/crypto/bn/bn_prime.c index e072d92..d57f658 100644 --- a/crypto/openssl/crypto/bn/bn_prime.c +++ b/crypto/openssl/crypto/bn/bn_prime.c @@ -115,6 +115,11 @@ #include "bn_lcl.h" #include <openssl/rand.h> +/* NB: these functions have been "upgraded", the deprecated versions (which are + * compatibility wrappers using these functions) are in bn_depr.c. + * - Geoff + */ + /* The quick sieve algorithm approach to weeding out primes is * Philip Zimmermann's, as implemented in PGP. I have had a read of * his comments and implemented my own version. @@ -129,51 +134,69 @@ static int probable_prime_dh(BIGNUM *rnd, int bits, static int probable_prime_dh_safe(BIGNUM *rnd, int bits, const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx); -BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, - const BIGNUM *add, const BIGNUM *rem, - void (*callback)(int,int,void *), void *cb_arg) +int BN_GENCB_call(BN_GENCB *cb, int a, int b) + { + /* No callback means continue */ + if(!cb) return 1; + switch(cb->ver) + { + case 1: + /* Deprecated-style callbacks */ + if(!cb->cb.cb_1) + return 1; + cb->cb.cb_1(a, b, cb->arg); + return 1; + case 2: + /* New-style callbacks */ + return cb->cb.cb_2(a, b, cb); + default: + break; + } + /* Unrecognised callback type */ + return 0; + } + +int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe, + const BIGNUM *add, const BIGNUM *rem, BN_GENCB *cb) { - BIGNUM *rnd=NULL; - BIGNUM t; + BIGNUM *t; int found=0; int i,j,c1=0; BN_CTX *ctx; int checks = BN_prime_checks_for_size(bits); - BN_init(&t); ctx=BN_CTX_new(); if (ctx == NULL) goto err; - if (ret == NULL) - { - if ((rnd=BN_new()) == NULL) goto err; - } - else - rnd=ret; + BN_CTX_start(ctx); + t = BN_CTX_get(ctx); + if(!t) goto err; loop: /* make a random number and set the top and bottom bits */ if (add == NULL) { - if (!probable_prime(rnd,bits)) goto err; + if (!probable_prime(ret,bits)) goto err; } else { if (safe) { - if (!probable_prime_dh_safe(rnd,bits,add,rem,ctx)) + if (!probable_prime_dh_safe(ret,bits,add,rem,ctx)) goto err; } else { - if (!probable_prime_dh(rnd,bits,add,rem,ctx)) + if (!probable_prime_dh(ret,bits,add,rem,ctx)) goto err; } } - /* if (BN_mod_word(rnd,(BN_ULONG)3) == 1) goto loop; */ - if (callback != NULL) callback(0,c1++,cb_arg); + /* if (BN_mod_word(ret,(BN_ULONG)3) == 1) goto loop; */ + if(!BN_GENCB_call(cb, 0, c1++)) + /* aborted */ + goto err; if (!safe) { - i=BN_is_prime_fasttest(rnd,checks,callback,ctx,cb_arg,0); + i=BN_is_prime_fasttest_ex(ret,checks,ctx,0,cb); if (i == -1) goto err; if (i == 0) goto loop; } @@ -183,41 +206,42 @@ loop: * check that (p-1)/2 is prime. * Since a prime is odd, We just * need to divide by 2 */ - if (!BN_rshift1(&t,rnd)) goto err; + if (!BN_rshift1(t,ret)) goto err; for (i=0; i<checks; i++) { - j=BN_is_prime_fasttest(rnd,1,callback,ctx,cb_arg,0); + j=BN_is_prime_fasttest_ex(ret,1,ctx,0,cb); if (j == -1) goto err; if (j == 0) goto loop; - j=BN_is_prime_fasttest(&t,1,callback,ctx,cb_arg,0); + j=BN_is_prime_fasttest_ex(t,1,ctx,0,cb); if (j == -1) goto err; if (j == 0) goto loop; - if (callback != NULL) callback(2,c1-1,cb_arg); + if(!BN_GENCB_call(cb, 2, c1-1)) + goto err; /* We have a safe prime test pass */ } } /* we have a prime :-) */ found = 1; err: - if (!found && (ret == NULL) && (rnd != NULL)) BN_free(rnd); - BN_free(&t); - if (ctx != NULL) BN_CTX_free(ctx); - return(found ? rnd : NULL); + if (ctx != NULL) + { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } + bn_check_top(ret); + return found; } -int BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int,int,void *), - BN_CTX *ctx_passed, void *cb_arg) +int BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_GENCB *cb) { - return BN_is_prime_fasttest(a, checks, callback, ctx_passed, cb_arg, 0); + return BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb); } -int BN_is_prime_fasttest(const BIGNUM *a, int checks, - void (*callback)(int,int,void *), - BN_CTX *ctx_passed, void *cb_arg, - int do_trial_division) +int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, + int do_trial_division, BN_GENCB *cb) { int i, j, ret = -1; int k; @@ -234,13 +258,15 @@ int BN_is_prime_fasttest(const BIGNUM *a, int checks, /* first look for small factors */ if (!BN_is_odd(a)) - return 0; + /* a is even => a is prime if and only if a == 2 */ + return BN_is_word(a, 2); if (do_trial_division) { for (i = 1; i < NUMPRIMES; i++) if (BN_mod_word(a, primes[i]) == 0) return 0; - if (callback != NULL) callback(1, -1, cb_arg); + if(!BN_GENCB_call(cb, 1, -1)) + goto err; } if (ctx_passed != NULL) @@ -306,7 +332,8 @@ int BN_is_prime_fasttest(const BIGNUM *a, int checks, ret=0; goto err; } - if (callback != NULL) callback(1,i,cb_arg); + if(!BN_GENCB_call(cb, 1, i)) + goto err; } ret=1; err: @@ -343,6 +370,7 @@ static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1, } /* If we get here, 'w' is the (a-1)/2-th power of the original 'w', * and it is neither -1 nor +1 -- so 'a' cannot be prime */ + bn_check_top(w); return 1; } @@ -374,6 +402,7 @@ again: } } if (!BN_add_word(rnd,delta)) return(0); + bn_check_top(rnd); return(1); } @@ -411,6 +440,7 @@ static int probable_prime_dh(BIGNUM *rnd, int bits, ret=1; err: BN_CTX_end(ctx); + bn_check_top(rnd); return(ret); } @@ -462,5 +492,6 @@ static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd, ret=1; err: BN_CTX_end(ctx); + bn_check_top(p); return(ret); } diff --git a/crypto/openssl/crypto/bn/bn_prime.pl b/crypto/openssl/crypto/bn/bn_prime.pl index 9fc3765..e583d1d 100644 --- a/crypto/openssl/crypto/bn/bn_prime.pl +++ b/crypto/openssl/crypto/bn/bn_prime.pl @@ -11,7 +11,7 @@ loop: while ($#primes < $num-1) $p+=2; $s=int(sqrt($p)); - for ($i=0; $primes[$i]<=$s; $i++) + for ($i=0; defined($primes[$i]) && $primes[$i]<=$s; $i++) { next loop if (($p%$primes[$i]) == 0); } diff --git a/crypto/openssl/crypto/bn/bn_print.c b/crypto/openssl/crypto/bn/bn_print.c index 0d94260..055d048 100644 --- a/crypto/openssl/crypto/bn/bn_print.c +++ b/crypto/openssl/crypto/bn/bn_print.c @@ -79,7 +79,7 @@ char *BN_bn2hex(const BIGNUM *a) } p=buf; if (a->neg) *(p++)='-'; - if (a->top == 0) *(p++)='0'; + if (BN_is_zero(a)) *(p++)='0'; for (i=a->top-1; i >=0; i--) { for (j=BN_BITS2-8; j >= 0; j-=8) @@ -102,14 +102,19 @@ err: /* Must 'OPENSSL_free' the returned data */ char *BN_bn2dec(const BIGNUM *a) { - int i=0,num; + int i=0,num, ok = 0; char *buf=NULL; char *p; BIGNUM *t=NULL; BN_ULONG *bn_data=NULL,*lp; + /* get an upper bound for the length of the decimal integer + * num <= (BN_num_bits(a) + 1) * log(2) + * <= 3 * BN_num_bits(a) * 0.1001 + log(2) + 1 (rounding error) + * <= BN_num_bits(a)/10 + BN_num_bits/1000 + 1 + 1 + */ i=BN_num_bits(a)*3; - num=(i/10+i/1000+3)+1; + num=(i/10+i/1000+1)+1; bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG)); buf=(char *)OPENSSL_malloc(num+3); if ((buf == NULL) || (bn_data == NULL)) @@ -122,14 +127,16 @@ char *BN_bn2dec(const BIGNUM *a) #define BUF_REMAIN (num+3 - (size_t)(p - buf)) p=buf; lp=bn_data; - if (t->neg) *(p++)='-'; - if (t->top == 0) + if (BN_is_zero(t)) { *(p++)='0'; *(p++)='\0'; } else { + if (BN_is_negative(t)) + *p++ = '-'; + i=0; while (!BN_is_zero(t)) { @@ -149,9 +156,16 @@ char *BN_bn2dec(const BIGNUM *a) while (*p) p++; } } + ok = 1; err: if (bn_data != NULL) OPENSSL_free(bn_data); if (t != NULL) BN_free(t); + if (!ok && buf) + { + OPENSSL_free(buf); + buf = NULL; + } + return(buf); } @@ -211,10 +225,11 @@ int BN_hex2bn(BIGNUM **bn, const char *a) j-=(BN_BYTES*2); } ret->top=h; - bn_fix_top(ret); + bn_correct_top(ret); ret->neg=neg; *bn=ret; + bn_check_top(ret); return(num); err: if (*bn == NULL) BN_free(ret); @@ -270,8 +285,9 @@ int BN_dec2bn(BIGNUM **bn, const char *a) } ret->neg=neg; - bn_fix_top(ret); + bn_correct_top(ret); *bn=ret; + bn_check_top(ret); return(num); err: if (*bn == NULL) BN_free(ret); @@ -300,7 +316,7 @@ int BN_print(BIO *bp, const BIGNUM *a) int ret=0; if ((a->neg) && (BIO_write(bp,"-",1) != 1)) goto end; - if ((a->top == 0) && (BIO_write(bp,"0",1) != 1)) goto end; + if (BN_is_zero(a) && (BIO_write(bp,"0",1) != 1)) goto end; for (i=a->top-1; i >=0; i--) { for (j=BN_BITS2-4; j >= 0; j-=4) @@ -320,14 +336,3 @@ end: return(ret); } #endif - -#ifdef BN_DEBUG -void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n) - { - int i; - fprintf(o, "%s=", a); - for (i=n-1;i>=0;i--) - fprintf(o, "%08lX", b[i]); /* assumes 32-bit BN_ULONG */ - fprintf(o, "\n"); - } -#endif diff --git a/crypto/openssl/crypto/bn/bn_rand.c b/crypto/openssl/crypto/bn/bn_rand.c index 893c9d2..f51830b 100644 --- a/crypto/openssl/crypto/bn/bn_rand.c +++ b/crypto/openssl/crypto/bn/bn_rand.c @@ -134,13 +134,13 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) buf=(unsigned char *)OPENSSL_malloc(bytes); if (buf == NULL) { - BNerr(BN_F_BN_RAND,ERR_R_MALLOC_FAILURE); + BNerr(BN_F_BNRAND,ERR_R_MALLOC_FAILURE); goto err; } /* make a random number and set the top and bottom bits */ time(&tim); - RAND_add(&tim,sizeof(tim),0); + RAND_add(&tim,sizeof(tim),0.0); if (pseudorand) { @@ -204,6 +204,7 @@ err: OPENSSL_cleanse(buf,bytes); OPENSSL_free(buf); } + bn_check_top(rnd); return(ret); } @@ -230,6 +231,7 @@ static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range) { int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand; int n; + int count = 100; if (range->neg || BN_is_zero(range)) { @@ -242,9 +244,7 @@ static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range) /* BN_is_bit_set(range, n - 1) always holds */ if (n == 1) - { - if (!BN_zero(r)) return 0; - } + BN_zero(r); else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3)) { /* range = 100..._2, @@ -263,6 +263,13 @@ static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range) if (BN_cmp(r, range) >= 0) if (!BN_sub(r, r, range)) return 0; } + + if (!--count) + { + BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS); + return 0; + } + } while (BN_cmp(r, range) >= 0); } @@ -272,10 +279,17 @@ static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range) { /* range = 11..._2 or range = 101..._2 */ if (!bn_rand(r, n, -1, 0)) return 0; + + if (!--count) + { + BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS); + return 0; + } } while (BN_cmp(r, range) >= 0); } + bn_check_top(r); return 1; } diff --git a/crypto/openssl/crypto/bn/bn_recp.c b/crypto/openssl/crypto/bn/bn_recp.c index ef5fdd4..2e8efb8 100644 --- a/crypto/openssl/crypto/bn/bn_recp.c +++ b/crypto/openssl/crypto/bn/bn_recp.c @@ -94,7 +94,7 @@ void BN_RECP_CTX_free(BN_RECP_CTX *recp) int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) { if (!BN_copy(&(recp->N),d)) return 0; - if (!BN_zero(&(recp->Nr))) return 0; + BN_zero(&(recp->Nr)); recp->num_bits=BN_num_bits(d); recp->shift=0; return(1); @@ -123,6 +123,7 @@ int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, ret = BN_div_recp(NULL,r,ca,recp,ctx); err: BN_CTX_end(ctx); + bn_check_top(r); return(ret); } @@ -147,7 +148,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, if (BN_ucmp(m,&(recp->N)) < 0) { - if (!BN_zero(d)) return 0; + BN_zero(d); if (!BN_copy(r,m)) return 0; BN_CTX_end(ctx); return(1); @@ -190,7 +191,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, { if (j++ > 2) { - BNerr(BN_F_BN_MOD_MUL_RECIPROCAL,BN_R_BAD_RECIPROCAL); + BNerr(BN_F_BN_DIV_RECP,BN_R_BAD_RECIPROCAL); goto err; } if (!BN_usub(r,r,&(recp->N))) goto err; @@ -203,6 +204,8 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, ret=1; err: BN_CTX_end(ctx); + bn_check_top(dv); + bn_check_top(rem); return(ret); } @@ -214,17 +217,18 @@ err: int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) { int ret= -1; - BIGNUM t; + BIGNUM *t; - BN_init(&t); + BN_CTX_start(ctx); + if((t = BN_CTX_get(ctx)) == NULL) goto err; - if (!BN_zero(&t)) goto err; - if (!BN_set_bit(&t,len)) goto err; + if (!BN_set_bit(t,len)) goto err; - if (!BN_div(r,NULL,&t,m,ctx)) goto err; + if (!BN_div(r,NULL,t,m,ctx)) goto err; ret=len; err: - BN_free(&t); + bn_check_top(r); + BN_CTX_end(ctx); return(ret); } diff --git a/crypto/openssl/crypto/bn/bn_shift.c b/crypto/openssl/crypto/bn/bn_shift.c index 70f785e..de9312d 100644 --- a/crypto/openssl/crypto/bn/bn_shift.c +++ b/crypto/openssl/crypto/bn/bn_shift.c @@ -65,6 +65,9 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a) register BN_ULONG *ap,*rp,t,c; int i; + bn_check_top(r); + bn_check_top(a); + if (r != a) { r->neg=a->neg; @@ -89,6 +92,7 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a) *rp=1; r->top++; } + bn_check_top(r); return(1); } @@ -97,6 +101,9 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) BN_ULONG *ap,*rp,t,c; int i; + bn_check_top(r); + bn_check_top(a); + if (BN_is_zero(a)) { BN_zero(r); @@ -117,7 +124,8 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) rp[i]=((t>>1)&BN_MASK2)|c; c=(t&1)?BN_TBIT:0; } - bn_fix_top(r); + bn_correct_top(r); + bn_check_top(r); return(1); } @@ -127,6 +135,9 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) BN_ULONG *t,*f; BN_ULONG l; + bn_check_top(r); + bn_check_top(a); + r->neg=a->neg; nw=n/BN_BITS2; if (bn_wexpand(r,a->top+nw+1) == NULL) return(0); @@ -149,7 +160,8 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) /* for (i=0; i<nw; i++) t[i]=0;*/ r->top=a->top+nw+1; - bn_fix_top(r); + bn_correct_top(r); + bn_check_top(r); return(1); } @@ -159,6 +171,9 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) BN_ULONG *t,*f; BN_ULONG l,tmp; + bn_check_top(r); + bn_check_top(a); + nw=n/BN_BITS2; rb=n%BN_BITS2; lb=BN_BITS2-rb; @@ -185,13 +200,13 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) if (rb == 0) { - for (i=j+1; i > 0; i--) + for (i=j; i != 0; i--) *(t++)= *(f++); } else { l= *(f++); - for (i=1; i<j; i++) + for (i=j-1; i != 0; i--) { tmp =(l>>rb)&BN_MASK2; l= *(f++); @@ -199,7 +214,7 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) } *(t++) =(l>>rb)&BN_MASK2; } - *t=0; - bn_fix_top(r); + bn_correct_top(r); + bn_check_top(r); return(1); } diff --git a/crypto/openssl/crypto/bn/bn_sqr.c b/crypto/openssl/crypto/bn/bn_sqr.c index c1d0cca..270d0cd 100644 --- a/crypto/openssl/crypto/bn/bn_sqr.c +++ b/crypto/openssl/crypto/bn/bn_sqr.c @@ -77,16 +77,16 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) if (al <= 0) { r->top=0; - return(1); + return 1; } BN_CTX_start(ctx); rr=(a != r) ? r : BN_CTX_get(ctx); tmp=BN_CTX_get(ctx); - if (tmp == NULL) goto err; + if (!rr || !tmp) goto err; - max=(al+al); - if (bn_wexpand(rr,max+1) == NULL) goto err; + max = 2 * al; /* Non-zero (from above) */ + if (bn_wexpand(rr,max) == NULL) goto err; if (al == 4) { @@ -138,12 +138,18 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) #endif } - rr->top=max; rr->neg=0; - if ((max > 0) && (rr->d[max-1] == 0)) rr->top--; + /* If the most-significant half of the top word of 'a' is zero, then + * the square of 'a' will max-1 words. */ + if(a->d[al - 1] == (a->d[al - 1] & BN_MASK2l)) + rr->top = max - 1; + else + rr->top = max; if (rr != r) BN_copy(r,rr); ret = 1; err: + bn_check_top(rr); + bn_check_top(tmp); BN_CTX_end(ctx); return(ret); } diff --git a/crypto/openssl/crypto/bn/bn_sqrt.c b/crypto/openssl/crypto/bn/bn_sqrt.c index e2a1105..6beaf9e 100644 --- a/crypto/openssl/crypto/bn/bn_sqrt.c +++ b/crypto/openssl/crypto/bn/bn_sqrt.c @@ -1,4 +1,4 @@ -/* crypto/bn/bn_mod.c */ +/* crypto/bn/bn_sqrt.c */ /* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de> * and Bodo Moeller for the OpenSSL project. */ /* ==================================================================== @@ -65,14 +65,12 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) * using the Tonelli/Shanks algorithm (cf. Henri Cohen, "A Course * in Algebraic Computational Number Theory", algorithm 1.5.1). * 'p' must be prime! - * If 'a' is not a square, this is not necessarily detected by - * the algorithms; a bogus result must be expected in this case. */ { BIGNUM *ret = in; int err = 1; int r; - BIGNUM *b, *q, *t, *x, *y; + BIGNUM *A, *b, *q, *t, *x, *y; int e, i, j; if (!BN_is_odd(p) || BN_abs_is_word(p, 1)) @@ -85,9 +83,11 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) goto end; if (!BN_set_word(ret, BN_is_bit_set(a, 0))) { - BN_free(ret); + if (ret != in) + BN_free(ret); return NULL; } + bn_check_top(ret); return ret; } @@ -103,23 +103,16 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) goto end; if (!BN_set_word(ret, BN_is_one(a))) { - BN_free(ret); + if (ret != in) + BN_free(ret); return NULL; } + bn_check_top(ret); return ret; } -#if 0 /* if BN_mod_sqrt is used with correct input, this just wastes time */ - r = BN_kronecker(a, p, ctx); - if (r < -1) return NULL; - if (r == -1) - { - BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE); - return(NULL); - } -#endif - BN_CTX_start(ctx); + A = BN_CTX_get(ctx); b = BN_CTX_get(ctx); q = BN_CTX_get(ctx); t = BN_CTX_get(ctx); @@ -131,6 +124,9 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) ret = BN_new(); if (ret == NULL) goto end; + /* A = a mod p */ + if (!BN_nnmod(A, a, p, ctx)) goto end; + /* now write |p| - 1 as 2^e*q where q is odd */ e = 1; while (!BN_is_bit_set(p, e)) @@ -149,9 +145,9 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) if (!BN_rshift(q, p, 2)) goto end; q->neg = 0; if (!BN_add_word(q, 1)) goto end; - if (!BN_mod_exp(ret, a, q, p, ctx)) goto end; + if (!BN_mod_exp(ret, A, q, p, ctx)) goto end; err = 0; - goto end; + goto vrfy; } if (e == 2) @@ -182,15 +178,8 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) * November 1992.) */ - /* make sure that a is reduced modulo p */ - if (a->neg || BN_ucmp(a, p) >= 0) - { - if (!BN_nnmod(x, a, p, ctx)) goto end; - a = x; /* use x as temporary variable */ - } - /* t := 2*a */ - if (!BN_mod_lshift1_quick(t, a, p)) goto end; + if (!BN_mod_lshift1_quick(t, A, p)) goto end; /* b := (2*a)^((|p|-5)/8) */ if (!BN_rshift(q, p, 3)) goto end; @@ -205,12 +194,12 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) if (!BN_sub_word(t, 1)) goto end; /* x = a*b*t */ - if (!BN_mod_mul(x, a, b, p, ctx)) goto end; + if (!BN_mod_mul(x, A, b, p, ctx)) goto end; if (!BN_mod_mul(x, x, t, p, ctx)) goto end; if (!BN_copy(ret, x)) goto end; err = 0; - goto end; + goto vrfy; } /* e > 2, so we really have to use the Tonelli/Shanks algorithm. @@ -297,11 +286,11 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) /* x := a^((q-1)/2) */ if (BN_is_zero(t)) /* special case: p = 2^e + 1 */ { - if (!BN_nnmod(t, a, p, ctx)) goto end; + if (!BN_nnmod(t, A, p, ctx)) goto end; if (BN_is_zero(t)) { /* special case: a == 0 (mod p) */ - if (!BN_zero(ret)) goto end; + BN_zero(ret); err = 0; goto end; } @@ -310,11 +299,11 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) } else { - if (!BN_mod_exp(x, a, t, p, ctx)) goto end; + if (!BN_mod_exp(x, A, t, p, ctx)) goto end; if (BN_is_zero(x)) { /* special case: a == 0 (mod p) */ - if (!BN_zero(ret)) goto end; + BN_zero(ret); err = 0; goto end; } @@ -322,10 +311,10 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) /* b := a*x^2 (= a^q) */ if (!BN_mod_sqr(b, x, p, ctx)) goto end; - if (!BN_mod_mul(b, b, a, p, ctx)) goto end; + if (!BN_mod_mul(b, b, A, p, ctx)) goto end; /* x := a*x (= a^((q+1)/2)) */ - if (!BN_mod_mul(x, x, a, p, ctx)) goto end; + if (!BN_mod_mul(x, x, A, p, ctx)) goto end; while (1) { @@ -342,7 +331,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { if (!BN_copy(ret, x)) goto end; err = 0; - goto end; + goto vrfy; } @@ -373,6 +362,22 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) e = i; } + vrfy: + if (!err) + { + /* verify the result -- the input might have been not a square + * (test added in 0.9.8) */ + + if (!BN_mod_sqr(x, ret, p, ctx)) + err = 1; + + if (!err && 0 != BN_cmp(x, A)) + { + BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE); + err = 1; + } + } + end: if (err) { @@ -383,5 +388,6 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) ret = NULL; } BN_CTX_end(ctx); + bn_check_top(ret); return ret; } diff --git a/crypto/openssl/crypto/bn/bn_word.c b/crypto/openssl/crypto/bn/bn_word.c index 988e0ca..ee7b87c 100644 --- a/crypto/openssl/crypto/bn/bn_word.c +++ b/crypto/openssl/crypto/bn/bn_word.c @@ -69,6 +69,10 @@ BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) #endif int i; + if (w == 0) + return (BN_ULONG)-1; + + bn_check_top(a); w&=BN_MASK2; for (i=a->top-1; i>=0; i--) { @@ -85,12 +89,24 @@ BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) { - BN_ULONG ret; - int i; + BN_ULONG ret = 0; + int i, j; + + bn_check_top(a); + w &= BN_MASK2; + + if (!w) + /* actually this an error (division by zero) */ + return (BN_ULONG)-1; + if (a->top == 0) + return 0; + + /* normalize input (so bn_div_words doesn't complain) */ + j = BN_BITS2 - BN_num_bits_word(w); + w <<= j; + if (!BN_lshift(a, a, j)) + return (BN_ULONG)-1; - if (a->top == 0) return(0); - ret=0; - w&=BN_MASK2; for (i=a->top-1; i>=0; i--) { BN_ULONG l,d; @@ -102,6 +118,8 @@ BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) } if ((a->top > 0) && (a->d[a->top-1] == 0)) a->top--; + ret >>= j; + bn_check_top(a); return(ret); } @@ -110,6 +128,14 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) BN_ULONG l; int i; + bn_check_top(a); + w &= BN_MASK2; + + /* degenerate case: w is zero */ + if (!w) return 1; + /* degenerate case: a is zero */ + if(BN_is_zero(a)) return BN_set_word(a, w); + /* handle 'a' when negative */ if (a->neg) { a->neg=0; @@ -118,15 +144,17 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) a->neg=!(a->neg); return(i); } - w&=BN_MASK2; - if (bn_wexpand(a,a->top+1) == NULL) return(0); + /* Only expand (and risk failing) if it's possibly necessary */ + if (((BN_ULONG)(a->d[a->top - 1] + 1) == 0) && + (bn_wexpand(a,a->top+1) == NULL)) + return(0); i=0; for (;;) { if (i >= a->top) l=w; else - l=(a->d[i]+(BN_ULONG)w)&BN_MASK2; + l=(a->d[i]+w)&BN_MASK2; a->d[i]=l; if (w > l) w=1; @@ -136,6 +164,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) } if (i >= a->top) a->top++; + bn_check_top(a); return(1); } @@ -143,7 +172,21 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) { int i; - if (BN_is_zero(a) || a->neg) + bn_check_top(a); + w &= BN_MASK2; + + /* degenerate case: w is zero */ + if (!w) return 1; + /* degenerate case: a is zero */ + if(BN_is_zero(a)) + { + i = BN_set_word(a,w); + if (i != 0) + BN_set_negative(a, 1); + return i; + } + /* handle 'a' when negative */ + if (a->neg) { a->neg=0; i=BN_add_word(a,w); @@ -151,7 +194,6 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) return(i); } - w&=BN_MASK2; if ((a->top == 1) && (a->d[0] < w)) { a->d[0]=w-a->d[0]; @@ -175,6 +217,7 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) } if ((a->d[i] == 0) && (i == (a->top-1))) a->top--; + bn_check_top(a); return(1); } @@ -182,6 +225,7 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w) { BN_ULONG ll; + bn_check_top(a); w&=BN_MASK2; if (a->top) { @@ -197,6 +241,7 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w) } } } + bn_check_top(a); return(1); } diff --git a/crypto/openssl/crypto/bn/bntest.c b/crypto/openssl/crypto/bn/bntest.c index 28cd333..c885300 100644 --- a/crypto/openssl/crypto/bn/bntest.c +++ b/crypto/openssl/crypto/bn/bntest.c @@ -55,6 +55,25 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * + * Portions of the attached software ("Contribution") are developed by + * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. + * + * The Contribution is licensed pursuant to the Eric Young open source + * license provided above. + * + * The binary polynomial arithmetic software is originally written by + * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories. + * + */ + +/* Until the key-gen callbacks are modified to use newer prototypes, we allow + * deprecated functions for openssl-internal code */ +#ifdef OPENSSL_NO_DEPRECATED +#undef OPENSSL_NO_DEPRECATED +#endif #include <stdio.h> #include <stdlib.h> @@ -79,6 +98,7 @@ int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_); int test_rshift1(BIO *bp); int test_rshift(BIO *bp,BN_CTX *ctx); int test_div(BIO *bp,BN_CTX *ctx); +int test_div_word(BIO *bp); int test_div_recp(BIO *bp,BN_CTX *ctx); int test_mul(BIO *bp); int test_sqr(BIO *bp,BN_CTX *ctx); @@ -86,7 +106,17 @@ int test_mont(BIO *bp,BN_CTX *ctx); int test_mod(BIO *bp,BN_CTX *ctx); int test_mod_mul(BIO *bp,BN_CTX *ctx); int test_mod_exp(BIO *bp,BN_CTX *ctx); +int test_mod_exp_mont_consttime(BIO *bp,BN_CTX *ctx); int test_exp(BIO *bp,BN_CTX *ctx); +int test_gf2m_add(BIO *bp); +int test_gf2m_mod(BIO *bp); +int test_gf2m_mod_mul(BIO *bp,BN_CTX *ctx); +int test_gf2m_mod_sqr(BIO *bp,BN_CTX *ctx); +int test_gf2m_mod_inv(BIO *bp,BN_CTX *ctx); +int test_gf2m_mod_div(BIO *bp,BN_CTX *ctx); +int test_gf2m_mod_exp(BIO *bp,BN_CTX *ctx); +int test_gf2m_mod_sqrt(BIO *bp,BN_CTX *ctx); +int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx); int test_kron(BIO *bp,BN_CTX *ctx); int test_sqrt(BIO *bp,BN_CTX *ctx); int rand_neg(void); @@ -193,6 +223,10 @@ int main(int argc, char *argv[]) if (!test_div(out,ctx)) goto err; BIO_flush(out); + message(out,"BN_div_word"); + if (!test_div_word(out)) goto err; + BIO_flush(out); + message(out,"BN_div_recp"); if (!test_div_recp(out,ctx)) goto err; BIO_flush(out); @@ -213,6 +247,10 @@ int main(int argc, char *argv[]) if (!test_mod_exp(out,ctx)) goto err; BIO_flush(out); + message(out,"BN_mod_exp_mont_consttime"); + if (!test_mod_exp_mont_consttime(out,ctx)) goto err; + BIO_flush(out); + message(out,"BN_exp"); if (!test_exp(out,ctx)) goto err; BIO_flush(out); @@ -225,6 +263,42 @@ int main(int argc, char *argv[]) if (!test_sqrt(out,ctx)) goto err; BIO_flush(out); + message(out,"BN_GF2m_add"); + if (!test_gf2m_add(out)) goto err; + BIO_flush(out); + + message(out,"BN_GF2m_mod"); + if (!test_gf2m_mod(out)) goto err; + BIO_flush(out); + + message(out,"BN_GF2m_mod_mul"); + if (!test_gf2m_mod_mul(out,ctx)) goto err; + BIO_flush(out); + + message(out,"BN_GF2m_mod_sqr"); + if (!test_gf2m_mod_sqr(out,ctx)) goto err; + BIO_flush(out); + + message(out,"BN_GF2m_mod_inv"); + if (!test_gf2m_mod_inv(out,ctx)) goto err; + BIO_flush(out); + + message(out,"BN_GF2m_mod_div"); + if (!test_gf2m_mod_div(out,ctx)) goto err; + BIO_flush(out); + + message(out,"BN_GF2m_mod_exp"); + if (!test_gf2m_mod_exp(out,ctx)) goto err; + BIO_flush(out); + + message(out,"BN_GF2m_mod_sqrt"); + if (!test_gf2m_mod_sqrt(out,ctx)) goto err; + BIO_flush(out); + + message(out,"BN_GF2m_mod_solve_quad"); + if (!test_gf2m_mod_solve_quad(out,ctx)) goto err; + BIO_flush(out); + BN_CTX_free(ctx); BIO_free(out); @@ -232,7 +306,7 @@ int main(int argc, char *argv[]) EXIT(0); err: BIO_puts(out,"1\n"); /* make sure the Perl script fed by bc notices - * the failure, see test_bn in test/Makefile */ + * the failure, see test_bn in test/Makefile.ssl*/ BIO_flush(out); ERR_load_crypto_strings(); ERR_print_errors_fp(stderr); @@ -399,6 +473,78 @@ int test_div(BIO *bp, BN_CTX *ctx) return(1); } +static void print_word(BIO *bp,BN_ULONG w) + { +#ifdef SIXTY_FOUR_BIT + if (sizeof(w) > sizeof(unsigned long)) + { + unsigned long h=(unsigned long)(w>>32), + l=(unsigned long)(w); + + if (h) BIO_printf(bp,"%lX%08lX",h,l); + else BIO_printf(bp,"%lX",l); + return; + } +#endif + BIO_printf(bp,"%lX",w); + } + +int test_div_word(BIO *bp) + { + BIGNUM a,b; + BN_ULONG r,s; + int i; + + BN_init(&a); + BN_init(&b); + + for (i=0; i<num0; i++) + { + do { + BN_bntest_rand(&a,512,-1,0); + BN_bntest_rand(&b,BN_BITS2,-1,0); + s = b.d[0]; + } while (!s); + + BN_copy(&b, &a); + r = BN_div_word(&b, s); + + if (bp != NULL) + { + if (!results) + { + BN_print(bp,&a); + BIO_puts(bp," / "); + print_word(bp,s); + BIO_puts(bp," - "); + } + BN_print(bp,&b); + BIO_puts(bp,"\n"); + + if (!results) + { + BN_print(bp,&a); + BIO_puts(bp," % "); + print_word(bp,s); + BIO_puts(bp," - "); + } + print_word(bp,r); + BIO_puts(bp,"\n"); + } + BN_mul_word(&b,s); + BN_add_word(&b,r); + BN_sub(&b,&a,&b); + if(!BN_is_zero(&b)) + { + fprintf(stderr,"Division (word) test failed!\n"); + return 0; + } + } + BN_free(&a); + BN_free(&b); + return(1); + } + int test_div_recp(BIO *bp, BN_CTX *ctx) { BIGNUM a,b,c,d,e; @@ -813,6 +959,57 @@ int test_mod_exp(BIO *bp, BN_CTX *ctx) return(1); } +int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx) + { + BIGNUM *a,*b,*c,*d,*e; + int i; + + a=BN_new(); + b=BN_new(); + c=BN_new(); + d=BN_new(); + e=BN_new(); + + BN_bntest_rand(c,30,0,1); /* must be odd for montgomery */ + for (i=0; i<num2; i++) + { + BN_bntest_rand(a,20+i*5,0,0); /**/ + BN_bntest_rand(b,2+i,0,0); /**/ + + if (!BN_mod_exp_mont_consttime(d,a,b,c,ctx,NULL)) + return(00); + + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp," ^ "); + BN_print(bp,b); + BIO_puts(bp," % "); + BN_print(bp,c); + BIO_puts(bp," - "); + } + BN_print(bp,d); + BIO_puts(bp,"\n"); + } + BN_exp(e,a,b,ctx); + BN_sub(e,e,d); + BN_div(a,b,e,c,ctx); + if(!BN_is_zero(b)) + { + fprintf(stderr,"Modulo exponentiation test failed!\n"); + return 0; + } + } + BN_free(a); + BN_free(b); + BN_free(c); + BN_free(d); + BN_free(e); + return(1); + } + int test_exp(BIO *bp, BN_CTX *ctx) { BIGNUM *a,*b,*d,*e,*one; @@ -863,7 +1060,582 @@ int test_exp(BIO *bp, BN_CTX *ctx) return(1); } -static void genprime_cb(int p, int n, void *arg) +int test_gf2m_add(BIO *bp) + { + BIGNUM a,b,c; + int i, ret = 0; + + BN_init(&a); + BN_init(&b); + BN_init(&c); + + for (i=0; i<num0; i++) + { + BN_rand(&a,512,0,0); + BN_copy(&b, BN_value_one()); + a.neg=rand_neg(); + b.neg=rand_neg(); + BN_GF2m_add(&c,&a,&b); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,&a); + BIO_puts(bp," ^ "); + BN_print(bp,&b); + BIO_puts(bp," = "); + } + BN_print(bp,&c); + BIO_puts(bp,"\n"); + } +#endif + /* Test that two added values have the correct parity. */ + if((BN_is_odd(&a) && BN_is_odd(&c)) || (!BN_is_odd(&a) && !BN_is_odd(&c))) + { + fprintf(stderr,"GF(2^m) addition test (a) failed!\n"); + goto err; + } + BN_GF2m_add(&c,&c,&c); + /* Test that c + c = 0. */ + if(!BN_is_zero(&c)) + { + fprintf(stderr,"GF(2^m) addition test (b) failed!\n"); + goto err; + } + } + ret = 1; + err: + BN_free(&a); + BN_free(&b); + BN_free(&c); + return ret; + } + +int test_gf2m_mod(BIO *bp) + { + BIGNUM *a,*b[2],*c,*d,*e; + int i, j, ret = 0; + unsigned int p0[] = {163,7,6,3,0}; + unsigned int p1[] = {193,15,0}; + + a=BN_new(); + b[0]=BN_new(); + b[1]=BN_new(); + c=BN_new(); + d=BN_new(); + e=BN_new(); + + BN_GF2m_arr2poly(p0, b[0]); + BN_GF2m_arr2poly(p1, b[1]); + + for (i=0; i<num0; i++) + { + BN_bntest_rand(a, 1024, 0, 0); + for (j=0; j < 2; j++) + { + BN_GF2m_mod(c, a, b[j]); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp," % "); + BN_print(bp,b[j]); + BIO_puts(bp," - "); + BN_print(bp,c); + BIO_puts(bp,"\n"); + } + } +#endif + BN_GF2m_add(d, a, c); + BN_GF2m_mod(e, d, b[j]); + /* Test that a + (a mod p) mod p == 0. */ + if(!BN_is_zero(e)) + { + fprintf(stderr,"GF(2^m) modulo test failed!\n"); + goto err; + } + } + } + ret = 1; + err: + BN_free(a); + BN_free(b[0]); + BN_free(b[1]); + BN_free(c); + BN_free(d); + BN_free(e); + return ret; + } + +int test_gf2m_mod_mul(BIO *bp,BN_CTX *ctx) + { + BIGNUM *a,*b[2],*c,*d,*e,*f,*g,*h; + int i, j, ret = 0; + unsigned int p0[] = {163,7,6,3,0}; + unsigned int p1[] = {193,15,0}; + + a=BN_new(); + b[0]=BN_new(); + b[1]=BN_new(); + c=BN_new(); + d=BN_new(); + e=BN_new(); + f=BN_new(); + g=BN_new(); + h=BN_new(); + + BN_GF2m_arr2poly(p0, b[0]); + BN_GF2m_arr2poly(p1, b[1]); + + for (i=0; i<num0; i++) + { + BN_bntest_rand(a, 1024, 0, 0); + BN_bntest_rand(c, 1024, 0, 0); + BN_bntest_rand(d, 1024, 0, 0); + for (j=0; j < 2; j++) + { + BN_GF2m_mod_mul(e, a, c, b[j], ctx); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp," * "); + BN_print(bp,c); + BIO_puts(bp," % "); + BN_print(bp,b[j]); + BIO_puts(bp," - "); + BN_print(bp,e); + BIO_puts(bp,"\n"); + } + } +#endif + BN_GF2m_add(f, a, d); + BN_GF2m_mod_mul(g, f, c, b[j], ctx); + BN_GF2m_mod_mul(h, d, c, b[j], ctx); + BN_GF2m_add(f, e, g); + BN_GF2m_add(f, f, h); + /* Test that (a+d)*c = a*c + d*c. */ + if(!BN_is_zero(f)) + { + fprintf(stderr,"GF(2^m) modular multiplication test failed!\n"); + goto err; + } + } + } + ret = 1; + err: + BN_free(a); + BN_free(b[0]); + BN_free(b[1]); + BN_free(c); + BN_free(d); + BN_free(e); + BN_free(f); + BN_free(g); + BN_free(h); + return ret; + } + +int test_gf2m_mod_sqr(BIO *bp,BN_CTX *ctx) + { + BIGNUM *a,*b[2],*c,*d; + int i, j, ret = 0; + unsigned int p0[] = {163,7,6,3,0}; + unsigned int p1[] = {193,15,0}; + + a=BN_new(); + b[0]=BN_new(); + b[1]=BN_new(); + c=BN_new(); + d=BN_new(); + + BN_GF2m_arr2poly(p0, b[0]); + BN_GF2m_arr2poly(p1, b[1]); + + for (i=0; i<num0; i++) + { + BN_bntest_rand(a, 1024, 0, 0); + for (j=0; j < 2; j++) + { + BN_GF2m_mod_sqr(c, a, b[j], ctx); + BN_copy(d, a); + BN_GF2m_mod_mul(d, a, d, b[j], ctx); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp," ^ 2 % "); + BN_print(bp,b[j]); + BIO_puts(bp, " = "); + BN_print(bp,c); + BIO_puts(bp,"; a * a = "); + BN_print(bp,d); + BIO_puts(bp,"\n"); + } + } +#endif + BN_GF2m_add(d, c, d); + /* Test that a*a = a^2. */ + if(!BN_is_zero(d)) + { + fprintf(stderr,"GF(2^m) modular squaring test failed!\n"); + goto err; + } + } + } + ret = 1; + err: + BN_free(a); + BN_free(b[0]); + BN_free(b[1]); + BN_free(c); + BN_free(d); + return ret; + } + +int test_gf2m_mod_inv(BIO *bp,BN_CTX *ctx) + { + BIGNUM *a,*b[2],*c,*d; + int i, j, ret = 0; + unsigned int p0[] = {163,7,6,3,0}; + unsigned int p1[] = {193,15,0}; + + a=BN_new(); + b[0]=BN_new(); + b[1]=BN_new(); + c=BN_new(); + d=BN_new(); + + BN_GF2m_arr2poly(p0, b[0]); + BN_GF2m_arr2poly(p1, b[1]); + + for (i=0; i<num0; i++) + { + BN_bntest_rand(a, 512, 0, 0); + for (j=0; j < 2; j++) + { + BN_GF2m_mod_inv(c, a, b[j], ctx); + BN_GF2m_mod_mul(d, a, c, b[j], ctx); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp, " * "); + BN_print(bp,c); + BIO_puts(bp," - 1 % "); + BN_print(bp,b[j]); + BIO_puts(bp,"\n"); + } + } +#endif + /* Test that ((1/a)*a) = 1. */ + if(!BN_is_one(d)) + { + fprintf(stderr,"GF(2^m) modular inversion test failed!\n"); + goto err; + } + } + } + ret = 1; + err: + BN_free(a); + BN_free(b[0]); + BN_free(b[1]); + BN_free(c); + BN_free(d); + return ret; + } + +int test_gf2m_mod_div(BIO *bp,BN_CTX *ctx) + { + BIGNUM *a,*b[2],*c,*d,*e,*f; + int i, j, ret = 0; + unsigned int p0[] = {163,7,6,3,0}; + unsigned int p1[] = {193,15,0}; + + a=BN_new(); + b[0]=BN_new(); + b[1]=BN_new(); + c=BN_new(); + d=BN_new(); + e=BN_new(); + f=BN_new(); + + BN_GF2m_arr2poly(p0, b[0]); + BN_GF2m_arr2poly(p1, b[1]); + + for (i=0; i<num0; i++) + { + BN_bntest_rand(a, 512, 0, 0); + BN_bntest_rand(c, 512, 0, 0); + for (j=0; j < 2; j++) + { + BN_GF2m_mod_div(d, a, c, b[j], ctx); + BN_GF2m_mod_mul(e, d, c, b[j], ctx); + BN_GF2m_mod_div(f, a, e, b[j], ctx); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp, " = "); + BN_print(bp,c); + BIO_puts(bp," * "); + BN_print(bp,d); + BIO_puts(bp, " % "); + BN_print(bp,b[j]); + BIO_puts(bp,"\n"); + } + } +#endif + /* Test that ((a/c)*c)/a = 1. */ + if(!BN_is_one(f)) + { + fprintf(stderr,"GF(2^m) modular division test failed!\n"); + goto err; + } + } + } + ret = 1; + err: + BN_free(a); + BN_free(b[0]); + BN_free(b[1]); + BN_free(c); + BN_free(d); + BN_free(e); + BN_free(f); + return ret; + } + +int test_gf2m_mod_exp(BIO *bp,BN_CTX *ctx) + { + BIGNUM *a,*b[2],*c,*d,*e,*f; + int i, j, ret = 0; + unsigned int p0[] = {163,7,6,3,0}; + unsigned int p1[] = {193,15,0}; + + a=BN_new(); + b[0]=BN_new(); + b[1]=BN_new(); + c=BN_new(); + d=BN_new(); + e=BN_new(); + f=BN_new(); + + BN_GF2m_arr2poly(p0, b[0]); + BN_GF2m_arr2poly(p1, b[1]); + + for (i=0; i<num0; i++) + { + BN_bntest_rand(a, 512, 0, 0); + BN_bntest_rand(c, 512, 0, 0); + BN_bntest_rand(d, 512, 0, 0); + for (j=0; j < 2; j++) + { + BN_GF2m_mod_exp(e, a, c, b[j], ctx); + BN_GF2m_mod_exp(f, a, d, b[j], ctx); + BN_GF2m_mod_mul(e, e, f, b[j], ctx); + BN_add(f, c, d); + BN_GF2m_mod_exp(f, a, f, b[j], ctx); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp, " ^ ("); + BN_print(bp,c); + BIO_puts(bp," + "); + BN_print(bp,d); + BIO_puts(bp, ") = "); + BN_print(bp,e); + BIO_puts(bp, "; - "); + BN_print(bp,f); + BIO_puts(bp, " % "); + BN_print(bp,b[j]); + BIO_puts(bp,"\n"); + } + } +#endif + BN_GF2m_add(f, e, f); + /* Test that a^(c+d)=a^c*a^d. */ + if(!BN_is_zero(f)) + { + fprintf(stderr,"GF(2^m) modular exponentiation test failed!\n"); + goto err; + } + } + } + ret = 1; + err: + BN_free(a); + BN_free(b[0]); + BN_free(b[1]); + BN_free(c); + BN_free(d); + BN_free(e); + BN_free(f); + return ret; + } + +int test_gf2m_mod_sqrt(BIO *bp,BN_CTX *ctx) + { + BIGNUM *a,*b[2],*c,*d,*e,*f; + int i, j, ret = 0; + unsigned int p0[] = {163,7,6,3,0}; + unsigned int p1[] = {193,15,0}; + + a=BN_new(); + b[0]=BN_new(); + b[1]=BN_new(); + c=BN_new(); + d=BN_new(); + e=BN_new(); + f=BN_new(); + + BN_GF2m_arr2poly(p0, b[0]); + BN_GF2m_arr2poly(p1, b[1]); + + for (i=0; i<num0; i++) + { + BN_bntest_rand(a, 512, 0, 0); + for (j=0; j < 2; j++) + { + BN_GF2m_mod(c, a, b[j]); + BN_GF2m_mod_sqrt(d, a, b[j], ctx); + BN_GF2m_mod_sqr(e, d, b[j], ctx); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,d); + BIO_puts(bp, " ^ 2 - "); + BN_print(bp,a); + BIO_puts(bp,"\n"); + } + } +#endif + BN_GF2m_add(f, c, e); + /* Test that d^2 = a, where d = sqrt(a). */ + if(!BN_is_zero(f)) + { + fprintf(stderr,"GF(2^m) modular square root test failed!\n"); + goto err; + } + } + } + ret = 1; + err: + BN_free(a); + BN_free(b[0]); + BN_free(b[1]); + BN_free(c); + BN_free(d); + BN_free(e); + BN_free(f); + return ret; + } + +int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx) + { + BIGNUM *a,*b[2],*c,*d,*e; + int i, j, s = 0, t, ret = 0; + unsigned int p0[] = {163,7,6,3,0}; + unsigned int p1[] = {193,15,0}; + + a=BN_new(); + b[0]=BN_new(); + b[1]=BN_new(); + c=BN_new(); + d=BN_new(); + e=BN_new(); + + BN_GF2m_arr2poly(p0, b[0]); + BN_GF2m_arr2poly(p1, b[1]); + + for (i=0; i<num0; i++) + { + BN_bntest_rand(a, 512, 0, 0); + for (j=0; j < 2; j++) + { + t = BN_GF2m_mod_solve_quad(c, a, b[j], ctx); + if (t) + { + s++; + BN_GF2m_mod_sqr(d, c, b[j], ctx); + BN_GF2m_add(d, c, d); + BN_GF2m_mod(e, a, b[j]); +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BN_print(bp,c); + BIO_puts(bp, " is root of z^2 + z = "); + BN_print(bp,a); + BIO_puts(bp, " % "); + BN_print(bp,b[j]); + BIO_puts(bp, "\n"); + } + } +#endif + BN_GF2m_add(e, e, d); + /* Test that solution of quadratic c satisfies c^2 + c = a. */ + if(!BN_is_zero(e)) + { + fprintf(stderr,"GF(2^m) modular solve quadratic test failed!\n"); + goto err; + } + + } + else + { +#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */ + if (bp != NULL) + { + if (!results) + { + BIO_puts(bp, "There are no roots of z^2 + z = "); + BN_print(bp,a); + BIO_puts(bp, " % "); + BN_print(bp,b[j]); + BIO_puts(bp, "\n"); + } + } +#endif + } + } + } + if (s == 0) + { + fprintf(stderr,"All %i tests of GF(2^m) modular solve quadratic resulted in no roots;\n", num0); + fprintf(stderr,"this is very unlikely and probably indicates an error.\n"); + goto err; + } + ret = 1; + err: + BN_free(a); + BN_free(b[0]); + BN_free(b[1]); + BN_free(c); + BN_free(d); + BN_free(e); + return ret; + } + +static int genprime_cb(int p, int n, BN_GENCB *arg) { char c='*'; @@ -873,12 +1645,12 @@ static void genprime_cb(int p, int n, void *arg) if (p == 3) c='\n'; putc(c, stderr); fflush(stderr); - (void)n; - (void)arg; + return 1; } int test_kron(BIO *bp, BN_CTX *ctx) { + BN_GENCB cb; BIGNUM *a,*b,*r,*t; int i; int legendre, kronecker; @@ -889,6 +1661,8 @@ int test_kron(BIO *bp, BN_CTX *ctx) r = BN_new(); t = BN_new(); if (a == NULL || b == NULL || r == NULL || t == NULL) goto err; + + BN_GENCB_set(&cb, genprime_cb, NULL); /* We test BN_kronecker(a, b, ctx) just for b odd (Jacobi symbol). * In this case we know that if b is prime, then BN_kronecker(a, b, ctx) @@ -899,7 +1673,7 @@ int test_kron(BIO *bp, BN_CTX *ctx) * don't want to test whether b is prime but whether BN_kronecker * works.) */ - if (!BN_generate_prime(b, 512, 0, NULL, NULL, genprime_cb, NULL)) goto err; + if (!BN_generate_prime_ex(b, 512, 0, NULL, NULL, &cb)) goto err; b->neg = rand_neg(); putc('\n', stderr); @@ -967,6 +1741,7 @@ int test_kron(BIO *bp, BN_CTX *ctx) int test_sqrt(BIO *bp, BN_CTX *ctx) { + BN_GENCB cb; BIGNUM *a,*p,*r; int i, j; int ret = 0; @@ -975,7 +1750,9 @@ int test_sqrt(BIO *bp, BN_CTX *ctx) p = BN_new(); r = BN_new(); if (a == NULL || p == NULL || r == NULL) goto err; - + + BN_GENCB_set(&cb, genprime_cb, NULL); + for (i = 0; i < 16; i++) { if (i < 8) @@ -989,7 +1766,7 @@ int test_sqrt(BIO *bp, BN_CTX *ctx) if (!BN_set_word(a, 32)) goto err; if (!BN_set_word(r, 2*i + 1)) goto err; - if (!BN_generate_prime(p, 256, 0, a, r, genprime_cb, NULL)) goto err; + if (!BN_generate_prime_ex(p, 256, 0, a, r, &cb)) goto err; putc('\n', stderr); } p->neg = rand_neg(); diff --git a/crypto/openssl/crypto/bn/expspeed.c b/crypto/openssl/crypto/bn/expspeed.c index 07a1bcf..4d5f221 100644 --- a/crypto/openssl/crypto/bn/expspeed.c +++ b/crypto/openssl/crypto/bn/expspeed.c @@ -321,7 +321,7 @@ void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx) #else /* TEST_SQRT */ "2*sqrt [prime == %d (mod 64)] %4d %4d mod %4d" #endif - " -> %8.3fms %5.1f (%ld)\n", + " -> %8.6fms %5.1f (%ld)\n", #ifdef TEST_SQRT P_MOD_64, #endif diff --git a/crypto/openssl/crypto/bn/exptest.c b/crypto/openssl/crypto/bn/exptest.c index b09cf88..f598a07 100644 --- a/crypto/openssl/crypto/bn/exptest.c +++ b/crypto/openssl/crypto/bn/exptest.c @@ -77,7 +77,7 @@ int main(int argc, char *argv[]) BIO *out=NULL; int i,ret; unsigned char c; - BIGNUM *r_mont,*r_recp,*r_simple,*a,*b,*m; + BIGNUM *r_mont,*r_mont_const,*r_recp,*r_simple,*a,*b,*m; RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_rand may fail, and we don't * even check its return value @@ -88,6 +88,7 @@ int main(int argc, char *argv[]) ctx=BN_CTX_new(); if (ctx == NULL) EXIT(1); r_mont=BN_new(); + r_mont_const=BN_new(); r_recp=BN_new(); r_simple=BN_new(); a=BN_new(); @@ -143,8 +144,17 @@ int main(int argc, char *argv[]) EXIT(1); } + ret=BN_mod_exp_mont_consttime(r_mont_const,a,b,m,ctx,NULL); + if (ret <= 0) + { + printf("BN_mod_exp_mont_consttime() problems\n"); + ERR_print_errors(out); + EXIT(1); + } + if (BN_cmp(r_simple, r_mont) == 0 - && BN_cmp(r_simple,r_recp) == 0) + && BN_cmp(r_simple,r_recp) == 0 + && BN_cmp(r_simple,r_mont_const) == 0) { printf("."); fflush(stdout); @@ -153,6 +163,8 @@ int main(int argc, char *argv[]) { if (BN_cmp(r_simple,r_mont) != 0) printf("\nsimple and mont results differ\n"); + if (BN_cmp(r_simple,r_mont) != 0) + printf("\nsimple and mont const time results differ\n"); if (BN_cmp(r_simple,r_recp) != 0) printf("\nsimple and recp results differ\n"); @@ -162,11 +174,13 @@ int main(int argc, char *argv[]) printf("\nsimple ="); BN_print(out,r_simple); printf("\nrecp ="); BN_print(out,r_recp); printf("\nmont ="); BN_print(out,r_mont); + printf("\nmont_ct ="); BN_print(out,r_mont_const); printf("\n"); EXIT(1); } } BN_free(r_mont); + BN_free(r_mont_const); BN_free(r_recp); BN_free(r_simple); BN_free(a); @@ -181,6 +195,9 @@ int main(int argc, char *argv[]) err: ERR_load_crypto_strings(); ERR_print_errors(out); +#ifdef OPENSSL_SYS_NETWARE + printf("ERROR\n"); +#endif EXIT(1); return(1); } |